From b332828c39326b1dca617f387dd15d12e81cd5f0 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:10 +0530 Subject: hw-breakpoints: prepare the code for Hardware Breakpoint interfaces The generic hardware breakpoint interface provides an abstraction of hardware breakpoints in front of specific arch implementations for both kernel and user side breakpoints. This includes execution breakpoints and read/write breakpoints, also known as "watchpoints". This patch introduces header files containing constants, structure definitions and declaration of functions used by the hardware breakpoint core and x86 specific code. It also introduces an array based storage for the debug-register values in 'struct thread_struct', while modifying all users of debugreg member in the structure. [ Impact: add headers for new hardware breakpoint interface ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- include/asm-generic/hw_breakpoint.h | 139 ++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 include/asm-generic/hw_breakpoint.h (limited to 'include') diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h new file mode 100644 index 000000000000..9bf2d12eb74a --- /dev/null +++ b/include/asm-generic/hw_breakpoint.h @@ -0,0 +1,139 @@ +#ifndef _ASM_GENERIC_HW_BREAKPOINT_H +#define _ASM_GENERIC_HW_BREAKPOINT_H + +#ifndef __ARCH_HW_BREAKPOINT_H +#error "Please don't include this file directly" +#endif + +#ifdef __KERNEL__ +#include +#include +#include + +/** + * struct hw_breakpoint - unified kernel/user-space hardware breakpoint + * @triggered: callback invoked after target address access + * @info: arch-specific breakpoint info (address, length, and type) + * + * %hw_breakpoint structures are the kernel's way of representing + * hardware breakpoints. These are data breakpoints + * (also known as "watchpoints", triggered on data access), and the breakpoint's + * target address can be located in either kernel space or user space. + * + * The breakpoint's address, length, and type are highly + * architecture-specific. The values are encoded in the @info field; you + * specify them when registering the breakpoint. To examine the encoded + * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared + * below. + * + * The address is specified as a regular kernel pointer (for kernel-space + * breakponts) or as an %__user pointer (for user-space breakpoints). + * With register_user_hw_breakpoint(), the address must refer to a + * location in user space. The breakpoint will be active only while the + * requested task is running. Conversely with + * register_kernel_hw_breakpoint(), the address must refer to a location + * in kernel space, and the breakpoint will be active on all CPUs + * regardless of the current task. + * + * The length is the breakpoint's extent in bytes, which is subject to + * certain limitations. include/asm/hw_breakpoint.h contains macros + * defining the available lengths for a specific architecture. Note that + * the address's alignment must match the length. The breakpoint will + * catch accesses to any byte in the range from address to address + + * (length - 1). + * + * The breakpoint's type indicates the sort of access that will cause it + * to trigger. Possible values may include: + * + * %HW_BREAKPOINT_RW (triggered on read or write access), + * %HW_BREAKPOINT_WRITE (triggered on write access), and + * %HW_BREAKPOINT_READ (triggered on read access). + * + * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all + * possibilities are available on all architectures. Execute breakpoints + * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. + * + * When a breakpoint gets hit, the @triggered callback is + * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the + * processor registers. + * Data breakpoints occur after the memory access has taken place. + * Breakpoints are disabled during execution @triggered, to avoid + * recursive traps and allow unhindered access to breakpointed memory. + * + * This sample code sets a breakpoint on pid_max and registers a callback + * function for writes to that variable. Note that it is not portable + * as written, because not all architectures support HW_BREAKPOINT_LEN_4. + * + * ---------------------------------------------------------------------- + * + * #include + * + * struct hw_breakpoint my_bp; + * + * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) + * { + * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); + * dump_stack(); + * ............... + * } + * + * static struct hw_breakpoint my_bp; + * + * static int init_module(void) + * { + * ...................... + * my_bp.info.type = HW_BREAKPOINT_WRITE; + * my_bp.info.len = HW_BREAKPOINT_LEN_4; + * + * my_bp.installed = (void *)my_bp_installed; + * + * rc = register_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * static void cleanup_module(void) + * { + * ...................... + * unregister_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * ---------------------------------------------------------------------- + */ +struct hw_breakpoint { + void (*triggered)(struct hw_breakpoint *, struct pt_regs *); + struct arch_hw_breakpoint info; +}; + +/* + * len and type values are defined in include/asm/hw_breakpoint.h. + * Available values vary according to the architecture. On i386 the + * possibilities are: + * + * HW_BREAKPOINT_LEN_1 + * HW_BREAKPOINT_LEN_2 + * HW_BREAKPOINT_LEN_4 + * HW_BREAKPOINT_RW + * HW_BREAKPOINT_READ + * + * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the + * 1-, 2-, and 4-byte lengths may be unavailable. There also may be + * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. + */ + +extern int register_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern int modify_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern void unregister_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +/* + * Kernel breakpoints are not associated with any particular thread. + */ +extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); + +extern unsigned int hbp_kernel_pos; + +#endif /* __KERNEL__ */ +#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */ -- cgit v1.3-8-gc7d7 From bd1a5c849bdcc5c89e4a6a18216cd2b9a7a8a78f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:53 -0400 Subject: tracing: Ftrace dynamic ftrace_event_call support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dynamic ftrace_event_call support to ftrace. Trace engines can add new ftrace_event_call to ftrace on the fly. Each operator function of the call takes an ftrace_event_call data structure as an argument, because these functions may be shared among several ftrace_event_calls. Changes from v13: - Define remove_subsystem_dir() always (revirt a2ca5e03), because trace_remove_event_call() uses it. - Modify syscall tracer because of ftrace_event_call change. [fweisbec@gmail.com: Fixed conflict against latest tracing/core] Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Przemysław Pawełczyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203453.31965.71901.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 19 +++---- include/linux/syscalls.h | 4 +- include/trace/ftrace.h | 16 +++--- include/trace/syscall.h | 11 ++-- kernel/trace/trace_events.c | 121 +++++++++++++++++++++++++++++------------- kernel/trace/trace_export.c | 18 +++---- kernel/trace/trace_syscalls.c | 20 +++---- 7 files changed, 130 insertions(+), 79 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index ace2da9e0a0d..1ab3089b5c59 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -112,12 +112,12 @@ struct ftrace_event_call { struct dentry *dir; struct trace_event *event; int enabled; - int (*regfunc)(void *); - void (*unregfunc)(void *); + int (*regfunc)(struct ftrace_event_call *); + void (*unregfunc)(struct ftrace_event_call *); int id; - int (*raw_init)(void); - int (*show_format)(struct ftrace_event_call *call, - struct trace_seq *s); + int (*raw_init)(struct ftrace_event_call *); + int (*show_format)(struct ftrace_event_call *, + struct trace_seq *); int (*define_fields)(struct ftrace_event_call *); struct list_head fields; int filter_active; @@ -147,11 +147,12 @@ enum { FILTER_PTR_STRING, }; -extern int trace_define_field(struct ftrace_event_call *call, - const char *type, const char *name, - int offset, int size, int is_signed, - int filter_type); extern int trace_define_common_fields(struct ftrace_event_call *call); +extern int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size, int is_signed, + int filter_type); +extern int trace_add_event_call(struct ftrace_event_call *call); +extern void trace_remove_event_call(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f124c8995555..646102eeff92 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -165,7 +165,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - static int init_enter_##sname(void) \ + static int init_enter_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ num = syscall_name_to_nr("sys"#sname); \ @@ -202,7 +202,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - static int init_exit_##sname(void) \ + static int init_exit_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ num = syscall_name_to_nr("sys"#sname); \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 360a77ad79e1..f2bd7a8f8e8b 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -434,7 +434,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * event_trace_printk(_RET_IP_, ": " ); * } * - * static int ftrace_reg_event_(void) + * static int ftrace_reg_event_(struct ftrace_event_call *unused) * { * int ret; * @@ -445,7 +445,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * return ret; * } * - * static void ftrace_unreg_event_(void) + * static void ftrace_unreg_event_(struct ftrace_event_call *unused) * { * unregister_trace_(ftrace_event_); * } @@ -478,7 +478,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * trace_current_buffer_unlock_commit(event, irq_flags, pc); * } * - * static int ftrace_raw_reg_event_(void) + * static int ftrace_raw_reg_event_(struct ftrace_event_call *unused) * { * int ret; * @@ -489,7 +489,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * return ret; * } * - * static void ftrace_unreg_event_(void) + * static void ftrace_unreg_event_(struct ftrace_event_call *unused) * { * unregister_trace_(ftrace_raw_event_); * } @@ -498,7 +498,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * .trace = ftrace_raw_output_, <-- stage 2 * }; * - * static int ftrace_raw_init_event_(void) + * static int ftrace_raw_init_event_(struct ftrace_event_call *unused) * { * int id; * @@ -592,7 +592,7 @@ static void ftrace_raw_event_##call(proto) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ } \ \ -static int ftrace_raw_reg_event_##call(void *ptr) \ +static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ { \ int ret; \ \ @@ -603,7 +603,7 @@ static int ftrace_raw_reg_event_##call(void *ptr) \ return ret; \ } \ \ -static void ftrace_raw_unreg_event_##call(void *ptr) \ +static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ { \ unregister_trace_##call(ftrace_raw_event_##call); \ } \ @@ -612,7 +612,7 @@ static struct trace_event ftrace_event_type_##call = { \ .trace = ftrace_raw_output_##call, \ }; \ \ -static int ftrace_raw_init_event_##call(void) \ +static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ { \ int id; \ \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dc283ba5ae0..e290b86f6167 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -39,16 +39,19 @@ void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); extern struct trace_event event_syscall_enter; extern struct trace_event event_syscall_exit; -extern int reg_event_syscall_enter(void *ptr); -extern void unreg_event_syscall_enter(void *ptr); -extern int reg_event_syscall_exit(void *ptr); -extern void unreg_event_syscall_exit(void *ptr); + extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_enter_define_fields(struct ftrace_event_call *call); extern int syscall_exit_define_fields(struct ftrace_event_call *call); +extern int reg_event_syscall_enter(struct ftrace_event_call *call); +extern void unreg_event_syscall_enter(struct ftrace_event_call *call); +extern int reg_event_syscall_exit(struct ftrace_event_call *call); +extern void unreg_event_syscall_exit(struct ftrace_event_call *call); +extern int +ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d33bcdeffe69..8079bb511c43 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,8 +27,8 @@ DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); -int trace_define_field(struct ftrace_event_call *call, const char *type, - const char *name, int offset, int size, int is_signed, +int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size, int is_signed, int filter_type) { struct ftrace_event_field *field; @@ -92,9 +92,7 @@ int trace_define_common_fields(struct ftrace_event_call *call) } EXPORT_SYMBOL_GPL(trace_define_common_fields); -#ifdef CONFIG_MODULES - -static void trace_destroy_fields(struct ftrace_event_call *call) +void trace_destroy_fields(struct ftrace_event_call *call) { struct ftrace_event_field *field, *next; @@ -106,8 +104,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call) } } -#endif /* CONFIG_MODULES */ - static void ftrace_event_enable_disable(struct ftrace_event_call *call, int enable) { @@ -116,14 +112,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, if (call->enabled) { call->enabled = 0; tracing_stop_cmdline_record(); - call->unregfunc(call->data); + call->unregfunc(call); } break; case 1: if (!call->enabled) { call->enabled = 1; tracing_start_cmdline_record(); - call->regfunc(call->data); + call->regfunc(call); } break; } @@ -991,27 +987,43 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, return 0; } -#define for_each_event(event, start, end) \ - for (event = start; \ - (unsigned long)event < (unsigned long)end; \ - event++) +static int __trace_add_event_call(struct ftrace_event_call *call) +{ + struct dentry *d_events; + int ret; -#ifdef CONFIG_MODULES + if (!call->name) + return -EINVAL; -static LIST_HEAD(ftrace_module_file_list); + if (call->raw_init) { + ret = call->raw_init(call); + if (ret < 0) { + if (ret != -ENOSYS) + pr_warning("Could not initialize trace " + "events/%s\n", call->name); + return ret; + } + } -/* - * Modules must own their file_operations to keep up with - * reference counting. - */ -struct ftrace_module_file_ops { - struct list_head list; - struct module *mod; - struct file_operations id; - struct file_operations enable; - struct file_operations format; - struct file_operations filter; -}; + d_events = event_trace_events_dir(); + if (!d_events) + return -ENOENT; + + list_add(&call->list, &ftrace_events); + return event_create_dir(call, d_events, &ftrace_event_id_fops, + &ftrace_enable_fops, &ftrace_event_filter_fops, + &ftrace_event_format_fops); +} + +/* Add an additional event_call dynamically */ +int trace_add_event_call(struct ftrace_event_call *call) +{ + int ret; + mutex_lock(&event_mutex); + ret = __trace_add_event_call(call); + mutex_unlock(&event_mutex); + return ret; +} static void remove_subsystem_dir(const char *name) { @@ -1039,6 +1051,48 @@ static void remove_subsystem_dir(const char *name) } } +static void __trace_remove_event_call(struct ftrace_event_call *call) +{ + ftrace_event_enable_disable(call, 0); + if (call->event) + __unregister_ftrace_event(call->event); + debugfs_remove_recursive(call->dir); + list_del(&call->list); + trace_destroy_fields(call); + destroy_preds(call); + remove_subsystem_dir(call->system); +} + +/* Remove an event_call */ +void trace_remove_event_call(struct ftrace_event_call *call) +{ + mutex_lock(&event_mutex); + __trace_remove_event_call(call); + mutex_unlock(&event_mutex); +} + +#define for_each_event(event, start, end) \ + for (event = start; \ + (unsigned long)event < (unsigned long)end; \ + event++) + +#ifdef CONFIG_MODULES + +static LIST_HEAD(ftrace_module_file_list); + +/* + * Modules must own their file_operations to keep up with + * reference counting. + */ +struct ftrace_module_file_ops { + struct list_head list; + struct module *mod; + struct file_operations id; + struct file_operations enable; + struct file_operations format; + struct file_operations filter; +}; + static struct ftrace_module_file_ops * trace_create_file_ops(struct module *mod) { @@ -1096,7 +1150,7 @@ static void trace_module_add_events(struct module *mod) if (!call->name) continue; if (call->raw_init) { - ret = call->raw_init(); + ret = call->raw_init(call); if (ret < 0) { if (ret != -ENOSYS) pr_warning("Could not initialize trace " @@ -1131,14 +1185,7 @@ static void trace_module_remove_events(struct module *mod) list_for_each_entry_safe(call, p, &ftrace_events, list) { if (call->mod == mod) { found = true; - ftrace_event_enable_disable(call, 0); - if (call->event) - __unregister_ftrace_event(call->event); - debugfs_remove_recursive(call->dir); - list_del(&call->list); - trace_destroy_fields(call); - destroy_preds(call); - remove_subsystem_dir(call->system); + __trace_remove_event_call(call); } } @@ -1256,7 +1303,7 @@ static __init int event_trace_init(void) if (!call->name) continue; if (call->raw_init) { - ret = call->raw_init(); + ret = call->raw_init(call); if (ret < 0) { if (ret != -ENOSYS) pr_warning("Could not initialize trace " diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 029a91f42287..9cbe7f1930ea 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -117,10 +117,16 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ cmd; +static int ftrace_raw_init_event(struct ftrace_event_call *event_call) +{ + INIT_LIST_HEAD(&event_call->fields); + init_preds(event_call); + return 0; +} + #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ int ftrace_define_fields_##call(struct ftrace_event_call *event_call); \ -static int ftrace_raw_init_event_##call(void); \ \ struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -128,16 +134,10 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .id = proto, \ .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_raw_init_event_##call, \ + .raw_init = ftrace_raw_init_event, \ .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##call, \ -}; \ -static int ftrace_raw_init_event_##call(void) \ -{ \ - INIT_LIST_HEAD(&event_##call.fields); \ - init_preds(&event_##call); \ - return 0; \ -} \ +}; #undef TRACE_EVENT_FORMAT_NOFILTER #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 85291c4de406..5931933587e9 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -193,8 +193,8 @@ int syscall_enter_define_fields(struct ftrace_event_call *call) return ret; for (i = 0; i < meta->nb_args; i++) { - ret = trace_define_field(call, meta->types[i], - meta->args[i], offset, + ret = trace_define_field(call, (char *)meta->types[i], + (char *)meta->args[i], offset, sizeof(unsigned long), 0, FILTER_OTHER); offset += sizeof(unsigned long); @@ -277,13 +277,13 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) trace_current_buffer_unlock_commit(event, 0, 0); } -int reg_event_syscall_enter(void *ptr) +int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; char *name; - name = (char *)ptr; + name = (char *)call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= FTRACE_SYSCALL_MAX) return -ENOSYS; @@ -301,12 +301,12 @@ int reg_event_syscall_enter(void *ptr) return ret; } -void unreg_event_syscall_enter(void *ptr) +void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; char *name; - name = (char *)ptr; + name = (char *)call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= FTRACE_SYSCALL_MAX) return; @@ -318,13 +318,13 @@ void unreg_event_syscall_enter(void *ptr) mutex_unlock(&syscall_trace_lock); } -int reg_event_syscall_exit(void *ptr) +int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; char *name; - name = (char *)ptr; + name = (char *)call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= FTRACE_SYSCALL_MAX) return -ENOSYS; @@ -342,12 +342,12 @@ int reg_event_syscall_exit(void *ptr) return ret; } -void unreg_event_syscall_exit(void *ptr) +void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; char *name; - name = (char *)ptr; + name = (char *)call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= FTRACE_SYSCALL_MAX) return; -- cgit v1.3-8-gc7d7 From 24851d2447830e6cba4c4b641cb73e713f312373 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 26 Aug 2009 23:38:30 +0200 Subject: tracing/kprobes: Dump the culprit kprobe in case of kprobe recursion Kprobes can enter into a probing recursion, ie: a kprobe that does an endless loop because one of its core mechanism function used during probing is also probed itself. This patch helps pinpointing the kprobe that raised such recursion by dumping it and raising a BUG instead of a warning (we also disarm the kprobe to try avoiding recursion in BUG itself). Having a BUG instead of a warning stops the stacktrace in the right place and doesn't pollute the logs with hundreds of traces that eventually end up in a stack overflow. Signed-off-by: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli --- arch/x86/kernel/kprobes.c | 8 ++++++-- include/linux/kprobes.h | 2 ++ kernel/kprobes.c | 7 +++++++ 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 16ae9610f6ff..ecee3d23fef8 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -490,9 +490,13 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, /* A probe has been hit in the codepath leading up * to, or just after, single-stepping of a probed * instruction. This entire codepath should strictly - * reside in .kprobes.text section. Raise a warning - * to highlight this peculiar case. + * reside in .kprobes.text section. + * Raise a BUG or we'll continue in an endless + * reentering loop and eventually a stack overflow. */ + arch_disarm_kprobe(p); + dump_kprobe(p); + BUG(); } default: /* impossible cases */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index bcd9c07848be..87eb79c9dd60 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); int disable_kprobe(struct kprobe *kp); int enable_kprobe(struct kprobe *kp); +void dump_kprobe(struct kprobe *kp); + #else /* !CONFIG_KPROBES: */ static inline int kprobes_built_in(void) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ef177d653b2c..f72e96c25a38 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1141,6 +1141,13 @@ static void __kprobes kill_kprobe(struct kprobe *p) arch_remove_kprobe(p); } +void __kprobes dump_kprobe(struct kprobe *kp) +{ + printk(KERN_WARNING "Dumping kprobe:\n"); + printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n", + kp->symbol_name, kp->addr, kp->offset); +} + /* Module notifier call back, checking kprobes on the module */ static int __kprobes kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) -- cgit v1.3-8-gc7d7 From aeaeae1187d7520f1c5559623f0a149da6a1c96e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 27 Aug 2009 05:09:51 +0200 Subject: tracing: Restore the const qualifier for field names and types definition Restore the const qualifier in field's name and type parameters of trace_define_field that was lost while solving a conflict. Fields names and types are defined as builtin constant strings in static TRACE_EVENTs. But kprobes allocates these dynamically. That said, we still want to always pass these strings as const char * in trace_define_fields() to avoid any further accidental writes on the pointed strings. Reported-by: Li Zefan Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt --- include/linux/ftrace_event.h | 6 +++--- kernel/trace/trace_events.c | 4 ++-- kernel/trace/trace_syscalls.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 1ab3089b5c59..73edf5a52e31 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -148,9 +148,9 @@ enum { }; extern int trace_define_common_fields(struct ftrace_event_call *call); -extern int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size, int is_signed, - int filter_type); +extern int trace_define_field(struct ftrace_event_call *call, const char *type, + const char *name, int offset, int size, + int is_signed, int filter_type); extern int trace_add_event_call(struct ftrace_event_call *call); extern void trace_remove_event_call(struct ftrace_event_call *call); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 8079bb511c43..197cdaa96c43 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,8 +27,8 @@ DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); -int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size, int is_signed, +int trace_define_field(struct ftrace_event_call *call, const char *type, + const char *name, int offset, int size, int is_signed, int filter_type) { struct ftrace_event_field *field; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 5931933587e9..a928dd004535 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -193,8 +193,8 @@ int syscall_enter_define_fields(struct ftrace_event_call *call) return ret; for (i = 0; i < meta->nb_args; i++) { - ret = trace_define_field(call, (char *)meta->types[i], - (char *)meta->args[i], offset, + ret = trace_define_field(call, meta->types[i], + meta->args[i], offset, sizeof(unsigned long), 0, FILTER_OTHER); offset += sizeof(unsigned long); -- cgit v1.3-8-gc7d7 From 20f3097bfe5fb5ced0b14f9ea2620c4039bf1dde Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 4 Aug 2009 12:07:08 -0700 Subject: intr-remap: generic support for remapping HPET MSIs Generic support for remapping HPET MSI's by parsing the HPET timer block device scope in the ACPI DRHD tables. This is needed for platforms supporting interrupt-remapping and MSI capable HPET timer block. Signed-off-by: Suresh Siddha Cc: David Woodhouse Cc: Jesse Barnes Cc: Venkatesh Pallipadi Cc: Jay Fenlason LKML-Reference: <20090804190729.477649000@intel.com> Signed-off-by: Thomas Gleixner --- drivers/pci/intr_remapping.c | 89 ++++++++++++++++++++++++++++++++++++++++++-- drivers/pci/intr_remapping.h | 7 ++++ include/linux/dmar.h | 10 +++++ include/linux/hpet.h | 2 + 4 files changed, 104 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 4f5b8712931f..2cc3f70ad425 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -14,7 +15,8 @@ #include "pci.h" static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; -static int ir_ioapic_num; +static struct hpet_scope ir_hpet[MAX_HPET_TBS]; +static int ir_ioapic_num, ir_hpet_num; int intr_remapping_enabled; static int disable_intremap; @@ -351,6 +353,16 @@ int flush_irte(int irq) return rc; } +struct intel_iommu *map_hpet_to_ir(u8 hpet_id) +{ + int i; + + for (i = 0; i < MAX_HPET_TBS; i++) + if (ir_hpet[i].id == hpet_id) + return ir_hpet[i].iommu; + return NULL; +} + struct intel_iommu *map_ioapic_to_ir(int apic) { int i; @@ -478,6 +490,36 @@ int set_ioapic_sid(struct irte *irte, int apic) return 0; } +int set_hpet_sid(struct irte *irte, u8 id) +{ + int i; + u16 sid = 0; + + if (!irte) + return -1; + + for (i = 0; i < MAX_HPET_TBS; i++) { + if (ir_hpet[i].id == id) { + sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn; + break; + } + } + + if (sid == 0) { + pr_warning("Failed to set source-id of HPET block (%d)\n", id); + return -1; + } + + /* + * Should really use SQ_ALL_16. Some platforms are broken. + * While we figure out the right quirks for these broken platforms, use + * SQ_13_IGNORE_3 for now. + */ + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid); + + return 0; +} + int set_msi_sid(struct irte *irte, struct pci_dev *dev) { struct pci_dev *bridge; @@ -711,6 +753,34 @@ error: return -1; } +static void ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope, + struct intel_iommu *iommu) +{ + struct acpi_dmar_pci_path *path; + u8 bus; + int count; + + bus = scope->bus; + path = (struct acpi_dmar_pci_path *)(scope + 1); + count = (scope->length - sizeof(struct acpi_dmar_device_scope)) + / sizeof(struct acpi_dmar_pci_path); + + while (--count > 0) { + /* + * Access PCI directly due to the PCI + * subsystem isn't initialized yet. + */ + bus = read_pci_config_byte(bus, path->dev, path->fn, + PCI_SECONDARY_BUS); + path++; + } + ir_hpet[ir_hpet_num].bus = bus; + ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->dev, path->fn); + ir_hpet[ir_hpet_num].iommu = iommu; + ir_hpet[ir_hpet_num].id = scope->enumeration_id; + ir_hpet_num++; +} + static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope, struct intel_iommu *iommu) { @@ -740,8 +810,8 @@ static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope, ir_ioapic_num++; } -static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, - struct intel_iommu *iommu) +static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header, + struct intel_iommu *iommu) { struct acpi_dmar_hardware_unit *drhd; struct acpi_dmar_device_scope *scope; @@ -765,6 +835,17 @@ static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, drhd->address); ir_parse_one_ioapic_scope(scope, iommu); + } else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) { + if (ir_hpet_num == MAX_HPET_TBS) { + printk(KERN_WARNING "Exceeded Max HPET blocks\n"); + return -1; + } + + printk(KERN_INFO "HPET id %d under DRHD base" + " 0x%Lx\n", scope->enumeration_id, + drhd->address); + + ir_parse_one_hpet_scope(scope, iommu); } start += scope->length; } @@ -785,7 +866,7 @@ int __init parse_ioapics_under_ir(void) struct intel_iommu *iommu = drhd->iommu; if (ecap_ir_support(iommu->ecap)) { - if (ir_parse_ioapic_scope(drhd->hdr, iommu)) + if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu)) return -1; ir_supported = 1; diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h index 63a263c18415..5662fecfee60 100644 --- a/drivers/pci/intr_remapping.h +++ b/drivers/pci/intr_remapping.h @@ -7,4 +7,11 @@ struct ioapic_scope { unsigned int devfn; /* PCI devfn number */ }; +struct hpet_scope { + struct intel_iommu *iommu; + u8 id; + unsigned int bus; + unsigned int devfn; +}; + #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 4a2b162c256a..69a6fbac0921 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -126,7 +126,9 @@ extern int free_irte(int irq); extern int irq_remapped(int irq); extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); +extern struct intel_iommu *map_hpet_to_ir(u8 id); extern int set_ioapic_sid(struct irte *irte, int apic); +extern int set_hpet_sid(struct irte *irte, u8 id); extern int set_msi_sid(struct irte *irte, struct pci_dev *dev); #else static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) @@ -158,10 +160,18 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic) { return NULL; } +static inline struct intel_iommu *map_hpet_to_ir(unsigned int hpet_id) +{ + return NULL; +} static inline int set_ioapic_sid(struct irte *irte, int apic) { return 0; } +static inline int set_hpet_sid(struct irte *irte, u8 id) +{ + return -1; +} static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev) { return 0; diff --git a/include/linux/hpet.h b/include/linux/hpet.h index 79f63a27bcef..219ca4f6bea6 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -126,4 +126,6 @@ struct hpet_info { #define HPET_DPI _IO('h', 0x05) /* disable periodic */ #define HPET_IRQFREQ _IOW('h', 0x6, unsigned long) /* IRQFREQ usec */ +#define MAX_HPET_TBS 8 /* maximum hpet timer blocks */ + #endif /* !__HPET__ */ -- cgit v1.3-8-gc7d7 From c95b4502ad7fe8f3b9954aec794b00ac0046ab3a Mon Sep 17 00:00:00 2001 From: john stultz Date: Thu, 27 Aug 2009 17:04:42 -0700 Subject: ntp: Provide compability defines (You say MOD_NANO, I say ADJ_NANO) MOD_NANO, ADJ_NANO, MOD_NANO, ADJ_NANO! Lets call the whole thing off! But oh! If we call the whole thing off, Then we must part. And oh! If we ever part, Then that might break my heart^H^H^H^Hclock! So, if you like MOD_NANO and I like ADJ_NANO, I'll include MOD_NANO and give up ADJ_NANO (not really!). For we know we need each other, So we better call the calling off off. Let's call the whole thing off! The tumultuous NTP and Linux relationship has hit another snag: Ends up NTPd still uses the "xntp 3.4 compatability names" and when the STA_NANO value was added (along with ADJ_NANO), NTPd expected MOD_NANO to be added and has apparently hit some build errors. Report to ntp hackers: https://lists.ntp.org/pipermail/hackers/2009-August/004455.html Related Bugs: https://support.ntp.org/bugs/show_bug.cgi?id=1219 https://bugzilla.redhat.com/show_bug.cgi?id=505566 So in an effort to make peace, here's a patch to help get things building again. I also have updated the comment to make sure folks don't think the MOD_* values are just legacy constants. Of course, NTPd really uses the glibc-headers, so those will need to be similarly updated before things are working again (the RH bug above should probably cover that). Thanks to Michael Tatarinov and Hal Murray for finding and reporting the issue! Signed-off-by: John Stultz Cc: Miroslav Lichvar Cc: hmurray@megapathdsl.net Cc: Ulrich Drepper Cc: Michael Tatarinov LKML-Reference: <1251417882.7905.42.camel@localhost.localdomain> Signed-off-by: Thomas Gleixner --- include/linux/timex.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index e6967d10d9e5..782ccd45c0d9 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -115,13 +115,16 @@ struct timex { #define ADJ_OFFSET_SS_READ 0xa001 /* read-only adjtime */ #endif -/* xntp 3.4 compatibility names */ +/* NTP userland likes the MOD_ prefix better */ #define MOD_OFFSET ADJ_OFFSET #define MOD_FREQUENCY ADJ_FREQUENCY #define MOD_MAXERROR ADJ_MAXERROR #define MOD_ESTERROR ADJ_ESTERROR #define MOD_STATUS ADJ_STATUS #define MOD_TIMECONST ADJ_TIMECONST +#define MOD_TAI ADJ_TAI +#define MOD_MICRO ADJ_MICRO +#define MOD_NANO ADJ_NANO /* -- cgit v1.3-8-gc7d7 From 85488037bb9b533b064be66412dbe1dbcd2734d9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 5 Sep 2009 18:52:16 +0100 Subject: ASoC: Add source argument to PLL configuration More and more devices feature PLLs and FLLs with the ability to select between multiple input clocks. In order to better support these devices a new argument, source, has been added to the set_pll() configuration API. Using set_clkdiv() is often difficult due to the need to stop the PLL/FLL before any reconfiguration can be done. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 6 +++--- sound/soc/atmel/playpaq_wm8510.c | 2 +- sound/soc/codecs/wm8350.c | 2 +- sound/soc/codecs/wm8400.c | 3 ++- sound/soc/codecs/wm8510.c | 4 ++-- sound/soc/codecs/wm8580.c | 4 ++-- sound/soc/codecs/wm8753.c | 4 ++-- sound/soc/codecs/wm8900.c | 4 ++-- sound/soc/codecs/wm8940.c | 4 ++-- sound/soc/codecs/wm8960.c | 4 ++-- sound/soc/codecs/wm8974.c | 4 ++-- sound/soc/codecs/wm8990.c | 4 ++-- sound/soc/codecs/wm8993.c | 2 +- sound/soc/codecs/wm9713.c | 4 ++-- sound/soc/imx/mx27vis_wm8974.c | 2 +- sound/soc/pxa/magician.c | 2 +- sound/soc/pxa/pxa-ssp.c | 4 ++-- sound/soc/pxa/zylonite.c | 5 +++-- sound/soc/s3c24xx/neo1973_gta02_wm8753.c | 2 +- sound/soc/s3c24xx/neo1973_wm8753.c | 2 +- sound/soc/soc-core.c | 8 +++++--- 21 files changed, 40 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 97ca9af414dc..16963d4d5df2 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -106,7 +106,7 @@ int snd_soc_dai_set_clkdiv(struct snd_soc_dai *dai, int div_id, int div); int snd_soc_dai_set_pll(struct snd_soc_dai *dai, - int pll_id, unsigned int freq_in, unsigned int freq_out); + int pll_id, int source, unsigned int freq_in, unsigned int freq_out); /* Digital Audio interface formatting */ int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt); @@ -136,8 +136,8 @@ struct snd_soc_dai_ops { */ int (*set_sysclk)(struct snd_soc_dai *dai, int clk_id, unsigned int freq, int dir); - int (*set_pll)(struct snd_soc_dai *dai, - int pll_id, unsigned int freq_in, unsigned int freq_out); + int (*set_pll)(struct snd_soc_dai *dai, int pll_id, int source, + unsigned int freq_in, unsigned int freq_out); int (*set_clkdiv)(struct snd_soc_dai *dai, int div_id, int div); /* diff --git a/sound/soc/atmel/playpaq_wm8510.c b/sound/soc/atmel/playpaq_wm8510.c index 9eb610c2ba91..9df4c68ef000 100644 --- a/sound/soc/atmel/playpaq_wm8510.c +++ b/sound/soc/atmel/playpaq_wm8510.c @@ -268,7 +268,7 @@ static int playpaq_wm8510_hw_params(struct snd_pcm_substream *substream, #endif /* CONFIG_SND_AT32_SOC_PLAYPAQ_SLAVE */ - ret = snd_soc_dai_set_pll(codec_dai, 0, + ret = snd_soc_dai_set_pll(codec_dai, 0, 0, clk_get_rate(CODEC_CLK), pll_out); if (ret < 0) { pr_warning("playpaq_wm8510: Failed to set CODEC DAI PLL (%d)\n", diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index 71c9c4bb2632..0ebd99b7493e 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -1101,7 +1101,7 @@ static inline int fll_factors(struct _fll_div *fll_div, unsigned int input, } static int wm8350_set_fll(struct snd_soc_dai *codec_dai, - int pll_id, unsigned int freq_in, + int pll_id, int source, unsigned int freq_in, unsigned int freq_out) { struct snd_soc_codec *codec = codec_dai->codec; diff --git a/sound/soc/codecs/wm8400.c b/sound/soc/codecs/wm8400.c index b9ef4d915221..9cb8e50f0fbb 100644 --- a/sound/soc/codecs/wm8400.c +++ b/sound/soc/codecs/wm8400.c @@ -1011,7 +1011,8 @@ static int fll_factors(struct wm8400_priv *wm8400, struct fll_factors *factors, } static int wm8400_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, - unsigned int freq_in, unsigned int freq_out) + int source, unsigned int freq_in, + unsigned int freq_out) { struct snd_soc_codec *codec = codec_dai->codec; struct wm8400_priv *wm8400 = codec->private_data; diff --git a/sound/soc/codecs/wm8510.c b/sound/soc/codecs/wm8510.c index 060d5d06ba95..5702435af81b 100644 --- a/sound/soc/codecs/wm8510.c +++ b/sound/soc/codecs/wm8510.c @@ -271,8 +271,8 @@ static void pll_factors(unsigned int target, unsigned int source) pll_div.k = K; } -static int wm8510_set_dai_pll(struct snd_soc_dai *codec_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out) +static int wm8510_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out) { struct snd_soc_codec *codec = codec_dai->codec; u16 reg; diff --git a/sound/soc/codecs/wm8580.c b/sound/soc/codecs/wm8580.c index 6bded8c78150..3be5c0b2552c 100644 --- a/sound/soc/codecs/wm8580.c +++ b/sound/soc/codecs/wm8580.c @@ -407,8 +407,8 @@ static int pll_factors(struct _pll_div *pll_div, unsigned int target, return 0; } -static int wm8580_set_dai_pll(struct snd_soc_dai *codec_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out) +static int wm8580_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out) { int offset; struct snd_soc_codec *codec = codec_dai->codec; diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c index d80d414cfbbd..f60f3a02d1f8 100644 --- a/sound/soc/codecs/wm8753.c +++ b/sound/soc/codecs/wm8753.c @@ -723,8 +723,8 @@ static void pll_factors(struct _pll_div *pll_div, unsigned int target, pll_div->k = K; } -static int wm8753_set_dai_pll(struct snd_soc_dai *codec_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out) +static int wm8753_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out) { u16 reg, enable; int offset; diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c index 5e9c855c0036..882604ef768c 100644 --- a/sound/soc/codecs/wm8900.c +++ b/sound/soc/codecs/wm8900.c @@ -814,8 +814,8 @@ reenable: return 0; } -static int wm8900_set_dai_pll(struct snd_soc_dai *codec_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out) +static int wm8900_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out) { return wm8900_set_fll(codec_dai->codec, pll_id, freq_in, freq_out); } diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c index da97aae475a2..914d788a2b76 100644 --- a/sound/soc/codecs/wm8940.c +++ b/sound/soc/codecs/wm8940.c @@ -536,8 +536,8 @@ static void pll_factors(unsigned int target, unsigned int source) } /* Untested at the moment */ -static int wm8940_set_dai_pll(struct snd_soc_dai *codec_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out) +static int wm8940_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out) { struct snd_soc_codec *codec = codec_dai->codec; u16 reg; diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index f59703be61c8..416fb3c17018 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -540,8 +540,8 @@ static int pll_factors(unsigned int source, unsigned int target, return 0; } -static int wm8960_set_dai_pll(struct snd_soc_dai *codec_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out) +static int wm8960_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out) { struct snd_soc_codec *codec = codec_dai->codec; u16 reg; diff --git a/sound/soc/codecs/wm8974.c b/sound/soc/codecs/wm8974.c index d8a013ab3177..fa4d85bd048b 100644 --- a/sound/soc/codecs/wm8974.c +++ b/sound/soc/codecs/wm8974.c @@ -329,8 +329,8 @@ static void pll_factors(unsigned int target, unsigned int source) pll_div.k = K; } -static int wm8974_set_dai_pll(struct snd_soc_dai *codec_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out) +static int wm8974_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out) { struct snd_soc_codec *codec = codec_dai->codec; u16 reg; diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c index 2d702db4131d..f657e9a5fe26 100644 --- a/sound/soc/codecs/wm8990.c +++ b/sound/soc/codecs/wm8990.c @@ -972,8 +972,8 @@ static void pll_factors(struct _pll_div *pll_div, unsigned int target, pll_div->k = K; } -static int wm8990_set_dai_pll(struct snd_soc_dai *codec_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out) +static int wm8990_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out) { u16 reg; struct snd_soc_codec *codec = codec_dai->codec; diff --git a/sound/soc/codecs/wm8993.c b/sound/soc/codecs/wm8993.c index d9987999e92c..6b32a2852603 100644 --- a/sound/soc/codecs/wm8993.c +++ b/sound/soc/codecs/wm8993.c @@ -422,7 +422,7 @@ static int fll_factors(struct _fll_div *fll_div, unsigned int Fref, return 0; } -static int wm8993_set_fll(struct snd_soc_dai *dai, int fll_id, +static int wm8993_set_fll(struct snd_soc_dai *dai, int fll_id, int source, unsigned int Fref, unsigned int Fout) { struct snd_soc_codec *codec = dai->codec; diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index abed37acf787..ca3d449ed89e 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -800,8 +800,8 @@ static int wm9713_set_pll(struct snd_soc_codec *codec, return 0; } -static int wm9713_set_dai_pll(struct snd_soc_dai *codec_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out) +static int wm9713_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out) { struct snd_soc_codec *codec = codec_dai->codec; return wm9713_set_pll(codec, pll_id, freq_in, freq_out); diff --git a/sound/soc/imx/mx27vis_wm8974.c b/sound/soc/imx/mx27vis_wm8974.c index e4dcb539108a..0267d2d91685 100644 --- a/sound/soc/imx/mx27vis_wm8974.c +++ b/sound/soc/imx/mx27vis_wm8974.c @@ -157,7 +157,7 @@ static int mx27vis_hifi_hw_params(struct snd_pcm_substream *substream, /* codec PLL input is 25 MHz */ - ret = codec_dai->ops->set_pll(codec_dai, IGNORED_ARG, + ret = codec_dai->ops->set_pll(codec_dai, IGNORED_ARG, IGNORED_ARG, 25000000, pll_out); if (ret < 0) { printk(KERN_ERR "Error when setting PLL input\n"); diff --git a/sound/soc/pxa/magician.c b/sound/soc/pxa/magician.c index 9f7c61e23daf..4c8d99a8d386 100644 --- a/sound/soc/pxa/magician.c +++ b/sound/soc/pxa/magician.c @@ -213,7 +213,7 @@ static int magician_playback_hw_params(struct snd_pcm_substream *substream, return ret; /* set SSP audio pll clock */ - ret = snd_soc_dai_set_pll(cpu_dai, 0, 0, acps); + ret = snd_soc_dai_set_pll(cpu_dai, 0, 0, 0, acps); if (ret < 0) return ret; diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index 5b9ed6464789..57f201c94ca8 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -305,8 +305,8 @@ static int pxa_ssp_set_dai_clkdiv(struct snd_soc_dai *cpu_dai, /* * Configure the PLL frequency pxa27x and (afaik - pxa320 only) */ -static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out) +static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out) { struct ssp_priv *priv = cpu_dai->private_data; struct ssp_device *ssp = priv->dev.ssp; diff --git a/sound/soc/pxa/zylonite.c b/sound/soc/pxa/zylonite.c index 9a386b4c4ed1..dd678ae24398 100644 --- a/sound/soc/pxa/zylonite.c +++ b/sound/soc/pxa/zylonite.c @@ -74,7 +74,8 @@ static const struct snd_soc_dapm_route audio_map[] = { static int zylonite_wm9713_init(struct snd_soc_codec *codec) { if (clk_pout) - snd_soc_dai_set_pll(&codec->dai[0], 0, clk_get_rate(pout), 0); + snd_soc_dai_set_pll(&codec->dai[0], 0, 0, + clk_get_rate(pout), 0); snd_soc_dapm_new_controls(codec, zylonite_dapm_widgets, ARRAY_SIZE(zylonite_dapm_widgets)); @@ -128,7 +129,7 @@ static int zylonite_voice_hw_params(struct snd_pcm_substream *substream, if (ret < 0) return ret; - ret = snd_soc_dai_set_pll(cpu_dai, 0, 0, pll_out); + ret = snd_soc_dai_set_pll(cpu_dai, 0, 0, 0, pll_out); if (ret < 0) return ret; diff --git a/sound/soc/s3c24xx/neo1973_gta02_wm8753.c b/sound/soc/s3c24xx/neo1973_gta02_wm8753.c index 0c52e36ddd87..6ddd1b3b16b3 100644 --- a/sound/soc/s3c24xx/neo1973_gta02_wm8753.c +++ b/sound/soc/s3c24xx/neo1973_gta02_wm8753.c @@ -119,7 +119,7 @@ static int neo1973_gta02_hifi_hw_params(struct snd_pcm_substream *substream, return ret; /* codec PLL input is PCLK/4 */ - ret = snd_soc_dai_set_pll(codec_dai, WM8753_PLL1, + ret = snd_soc_dai_set_pll(codec_dai, WM8753_PLL1, 0, iis_clkrate / 4, pll_out); if (ret < 0) return ret; diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c index 906709e6dd5f..16009eba9cba 100644 --- a/sound/soc/s3c24xx/neo1973_wm8753.c +++ b/sound/soc/s3c24xx/neo1973_wm8753.c @@ -137,7 +137,7 @@ static int neo1973_hifi_hw_params(struct snd_pcm_substream *substream, return ret; /* codec PLL input is PCLK/4 */ - ret = snd_soc_dai_set_pll(codec_dai, WM8753_PLL1, + ret = snd_soc_dai_set_pll(codec_dai, WM8753_PLL1, 0, iis_clkrate / 4, pll_out); if (ret < 0) return ret; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 7ff04ad2a97e..05fdc8023da4 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2197,16 +2197,18 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_clkdiv); * snd_soc_dai_set_pll - configure DAI PLL. * @dai: DAI * @pll_id: DAI specific PLL ID + * @source: DAI specific source for the PLL * @freq_in: PLL input clock frequency in Hz * @freq_out: requested PLL output clock frequency in Hz * * Configures and enables PLL to generate output clock based on input clock. */ -int snd_soc_dai_set_pll(struct snd_soc_dai *dai, - int pll_id, unsigned int freq_in, unsigned int freq_out) +int snd_soc_dai_set_pll(struct snd_soc_dai *dai, int pll_id, int source, + unsigned int freq_in, unsigned int freq_out) { if (dai->ops && dai->ops->set_pll) - return dai->ops->set_pll(dai, pll_id, freq_in, freq_out); + return dai->ops->set_pll(dai, pll_id, source, + freq_in, freq_out); else return -EINVAL; } -- cgit v1.3-8-gc7d7 From 215edda3adf502ccdf3a358ab35b616e7abd25ff Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 8 Sep 2009 18:59:05 +0100 Subject: ASoC: Allow per-route connectedness checks for supplies Some chips with complex internal supply (particularly clocking) arragements may have multiple options for some of the supply connections. Since these don't affect user-visible audio routing the expectation would be that they would be managed automatically by one of the drivers. Support these users by allowing routes to have a connected function which is queried before the connectedness of the path is checked as normal. Currently this is only done for supplies, other widgets could be supported but are not currently since the expectation for them is that audio routing will be under the control of userspace. Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 7 +++++++ sound/soc/soc-dapm.c | 19 ++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index c1410e3191e3..67224db60348 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -333,6 +333,10 @@ struct snd_soc_dapm_route { const char *sink; const char *control; const char *source; + + /* Note: currently only supported for links where source is a supply */ + int (*connected)(struct snd_soc_dapm_widget *source, + struct snd_soc_dapm_widget *sink); }; /* dapm audio path between two widgets */ @@ -349,6 +353,9 @@ struct snd_soc_dapm_path { u32 connect:1; /* source and sink widgets are connected */ u32 walked:1; /* path has been walked */ + int (*connected)(struct snd_soc_dapm_widget *source, + struct snd_soc_dapm_widget *sink); + struct list_head list_source; struct list_head list_sink; struct list_head list; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 0d8b08ef8731..37f7adeae323 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -718,6 +718,10 @@ static int dapm_supply_check_power(struct snd_soc_dapm_widget *w) /* Check if one of our outputs is connected */ list_for_each_entry(path, &w->sinks, list_source) { + if (path->connected && + !path->connected(path->source, path->sink)) + continue; + if (path->sink && path->sink->power_check && path->sink->power_check(path->sink)) { power = 1; @@ -1136,6 +1140,9 @@ static ssize_t dapm_widget_power_read_file(struct file *file, w->sname); list_for_each_entry(p, &w->sources, list_sink) { + if (p->connected && !p->connected(w, p->sink)) + continue; + if (p->connect) ret += snprintf(buf + ret, PAGE_SIZE - ret, " in %s %s\n", @@ -1143,6 +1150,9 @@ static ssize_t dapm_widget_power_read_file(struct file *file, p->source->name); } list_for_each_entry(p, &w->sinks, list_source) { + if (p->connected && !p->connected(w, p->sink)) + continue; + if (p->connect) ret += snprintf(buf + ret, PAGE_SIZE - ret, " out %s %s\n", @@ -1385,10 +1395,13 @@ int snd_soc_dapm_sync(struct snd_soc_codec *codec) EXPORT_SYMBOL_GPL(snd_soc_dapm_sync); static int snd_soc_dapm_add_route(struct snd_soc_codec *codec, - const char *sink, const char *control, const char *source) + const struct snd_soc_dapm_route *route) { struct snd_soc_dapm_path *path; struct snd_soc_dapm_widget *wsource = NULL, *wsink = NULL, *w; + const char *sink = route->sink; + const char *control = route->control; + const char *source = route->source; int ret = 0; /* find src and dest widgets */ @@ -1412,6 +1425,7 @@ static int snd_soc_dapm_add_route(struct snd_soc_codec *codec, path->source = wsource; path->sink = wsink; + path->connected = route->connected; INIT_LIST_HEAD(&path->list); INIT_LIST_HEAD(&path->list_source); INIT_LIST_HEAD(&path->list_sink); @@ -1512,8 +1526,7 @@ int snd_soc_dapm_add_routes(struct snd_soc_codec *codec, int i, ret; for (i = 0; i < num; i++) { - ret = snd_soc_dapm_add_route(codec, route->sink, - route->control, route->source); + ret = snd_soc_dapm_add_route(codec, route); if (ret < 0) { printk(KERN_ERR "Failed to add route %s->%s\n", route->source, -- cgit v1.3-8-gc7d7 From 472df3cbae8da6a949f1392a11958b8d21383735 Mon Sep 17 00:00:00 2001 From: Barry Song <21cnbao@gmail.com> Date: Sat, 12 Sep 2009 01:16:29 +0800 Subject: ASoC: Provide API for reordering channels The patch adds an interface to set the relationship between audio channel number and slot number. The interface should be really useful because audio channel n doesn't always use slot n in all platforms. And for some devices, the relationship even can change with sound mode switch in 2.1,3.1,4.1,5.1,6.1,7.1 etc. Signed-off-by: Barry Song <21cnbao@gmail.com> Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 7 +++++++ sound/soc/soc-core.c | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 16963d4d5df2..e0c7fa7b1060 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -114,6 +114,10 @@ int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt); int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width); +int snd_soc_dai_set_channel_map(struct snd_soc_dai *dai, + unsigned int tx_num, unsigned int *tx_slot, + unsigned int rx_num, unsigned int *rx_slot); + int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate); /* Digital Audio Interface mute */ @@ -148,6 +152,9 @@ struct snd_soc_dai_ops { int (*set_tdm_slot)(struct snd_soc_dai *dai, unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width); + int (*set_channel_map)(struct snd_soc_dai *dai, + unsigned int tx_num, unsigned int *tx_slot, + unsigned int rx_num, unsigned int *rx_slot); int (*set_tristate)(struct snd_soc_dai *dai, int tristate); /* diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 05fdc8023da4..f5b356f8acfb 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2252,6 +2252,30 @@ int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai, } EXPORT_SYMBOL_GPL(snd_soc_dai_set_tdm_slot); +/** + * snd_soc_dai_set_channel_map - configure DAI audio channel map + * @dai: DAI + * @tx_num: how many TX channels + * @tx_slot: pointer to an array which imply the TX slot number channel + * 0~num-1 uses + * @rx_num: how many RX channels + * @rx_slot: pointer to an array which imply the RX slot number channel + * 0~num-1 uses + * + * configure the relationship between channel number and TDM slot number. + */ +int snd_soc_dai_set_channel_map(struct snd_soc_dai *dai, + unsigned int tx_num, unsigned int *tx_slot, + unsigned int rx_num, unsigned int *rx_slot) +{ + if (dai->ops && dai->ops->set_channel_map) + return dai->ops->set_channel_map(dai, tx_num, tx_slot, + rx_num, rx_slot); + else + return -EINVAL; +} +EXPORT_SYMBOL_GPL(snd_soc_dai_set_channel_map); + /** * snd_soc_dai_set_tristate - configure DAI system or master clock. * @dai: DAI -- cgit v1.3-8-gc7d7 From 8ebc423238341b52912c7295b045a32477b33f09 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 7 Apr 2009 04:19:49 +0200 Subject: reiserfs: kill-the-BKL This patch is an attempt to remove the Bkl based locking scheme from reiserfs and is intended. It is a bit inspired from an old attempt by Peter Zijlstra: http://lkml.indiana.edu/hypermail/linux/kernel/0704.2/2174.html The bkl is heavily used in this filesystem to prevent from concurrent write accesses on the filesystem. Reiserfs makes a deep use of the specific properties of the Bkl: - It can be acqquired recursively by a same task - It is released on the schedule() calls and reacquired when schedule() returns The two properties above are a roadmap for the reiserfs write locking so it's very hard to simply replace it with a common mutex. - We need a recursive-able locking unless we want to restructure several blocks of the code. - We need to identify the sites where the bkl was implictly relaxed (schedule, wait, sync, etc...) so that we can in turn release and reacquire our new lock explicitly. Such implicit releases of the lock are often required to let other resources producer/consumer do their job or we can suffer unexpected starvations or deadlocks. So the new lock that replaces the bkl here is a per superblock mutex with a specific property: it can be acquired recursively by a same task, like the bkl. For such purpose, we integrate a lock owner and a lock depth field on the superblock information structure. The first axis on this patch is to turn reiserfs_write_(un)lock() function into a wrapper to manage this mutex. Also some explicit calls to lock_kernel() have been converted to reiserfs_write_lock() helpers. The second axis is to find the important blocking sites (schedule...(), wait_on_buffer(), sync_dirty_buffer(), etc...) and then apply an explicit release of the write lock on these locations before blocking. Then we can safely wait for those who can give us resources or those who need some. Typically this is a fight between the current writer, the reiserfs workqueue (aka the async commiter) and the pdflush threads. The third axis is a consequence of the second. The write lock is usually on top of a lock dependency chain which can include the journal lock, the flush lock or the commit lock. So it's dangerous to release and trying to reacquire the write lock while we still hold other locks. This is fine with the bkl: T1 T2 lock_kernel() mutex_lock(A) unlock_kernel() // do something lock_kernel() mutex_lock(A) -> already locked by T1 schedule() (and then unlock_kernel()) lock_kernel() mutex_unlock(A) .... This is not fine with a mutex: T1 T2 mutex_lock(write) mutex_lock(A) mutex_unlock(write) // do something mutex_lock(write) mutex_lock(A) -> already locked by T1 schedule() mutex_lock(write) -> already locked by T2 deadlock The solution in this patch is to provide a helper which releases the write lock and sleep a bit if we can't lock a mutex that depend on it. It's another simulation of the bkl behaviour. The last axis is to locate the fs callbacks that are called with the bkl held, according to Documentation/filesystem/Locking. Those are: - reiserfs_remount - reiserfs_fill_super - reiserfs_put_super Reiserfs didn't need to explicitly lock because of the context of these callbacks. But now we must take care of that with the new locking. After this patch, reiserfs suffers from a slight performance regression (for now). On UP, a high volume write with dd reports an average of 27 MB/s instead of 30 MB/s without the patch applied. Signed-off-by: Frederic Weisbecker Reviewed-by: Ingo Molnar Cc: Jeff Mahoney Cc: Peter Zijlstra Cc: Bron Gondwana Cc: Andrew Morton Cc: Linus Torvalds Cc: Alexander Viro LKML-Reference: <1239070789-13354-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- fs/reiserfs/Makefile | 2 +- fs/reiserfs/bitmap.c | 2 + fs/reiserfs/dir.c | 8 +++ fs/reiserfs/fix_node.c | 10 +++ fs/reiserfs/inode.c | 23 +++++-- fs/reiserfs/ioctl.c | 6 +- fs/reiserfs/journal.c | 134 ++++++++++++++++++++++++++++++++--------- fs/reiserfs/lock.c | 63 +++++++++++++++++++ fs/reiserfs/resize.c | 2 + fs/reiserfs/stree.c | 2 + fs/reiserfs/super.c | 37 +++++++++--- include/linux/reiserfs_fs.h | 12 ++-- include/linux/reiserfs_fs_sb.h | 9 +++ 13 files changed, 261 insertions(+), 49 deletions(-) create mode 100644 fs/reiserfs/lock.c (limited to 'include') diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile index 7c5ab6330dd6..6a9e30c041dd 100644 --- a/fs/reiserfs/Makefile +++ b/fs/reiserfs/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ hashes.o tail_conversion.o journal.o resize.o \ - item_ops.o ioctl.o procfs.o xattr.o + item_ops.o ioctl.o procfs.o xattr.o lock.o ifeq ($(CONFIG_REISERFS_FS_XATTR),y) reiserfs-objs += xattr_user.o xattr_trusted.o diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index e716161ab325..147033461b87 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1256,7 +1256,9 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, else { if (buffer_locked(bh)) { PROC_INFO_INC(sb, scan_bitmap.wait); + reiserfs_write_unlock(sb); __wait_on_buffer(bh); + reiserfs_write_lock(sb); } BUG_ON(!buffer_uptodate(bh)); BUG_ON(atomic_read(&bh->b_count) == 0); diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 6d2668fdc384..17f31ad379c8 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -174,14 +174,22 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, // user space buffer is swapped out. At that time // entry can move to somewhere else memcpy(local_buf, d_name, d_reclen); + + /* + * Since filldir might sleep, we can release + * the write lock here for other waiters + */ + reiserfs_write_unlock(inode->i_sb); if (filldir (dirent, local_buf, d_reclen, d_off, d_ino, DT_UNKNOWN) < 0) { + reiserfs_write_lock(inode->i_sb); if (local_buf != small_buf) { kfree(local_buf); } goto end; } + reiserfs_write_lock(inode->i_sb); if (local_buf != small_buf) { kfree(local_buf); } diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index 5e5a4e6fbaf8..bf5f2cbdb063 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c @@ -1022,7 +1022,11 @@ static int get_far_parent(struct tree_balance *tb, /* Check whether the common parent is locked. */ if (buffer_locked(*pcom_father)) { + + /* Release the write lock while the buffer is busy */ + reiserfs_write_unlock(tb->tb_sb); __wait_on_buffer(*pcom_father); + reiserfs_write_lock(tb->tb_sb); if (FILESYSTEM_CHANGED_TB(tb)) { brelse(*pcom_father); return REPEAT_SEARCH; @@ -1927,7 +1931,9 @@ static int get_direct_parent(struct tree_balance *tb, int h) return REPEAT_SEARCH; if (buffer_locked(bh)) { + reiserfs_write_unlock(tb->tb_sb); __wait_on_buffer(bh); + reiserfs_write_lock(tb->tb_sb); if (FILESYSTEM_CHANGED_TB(tb)) return REPEAT_SEARCH; } @@ -2278,7 +2284,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) REPEAT_SEARCH : CARRY_ON; } #endif + reiserfs_write_unlock(tb->tb_sb); __wait_on_buffer(locked); + reiserfs_write_lock(tb->tb_sb); if (FILESYSTEM_CHANGED_TB(tb)) return REPEAT_SEARCH; } @@ -2349,7 +2357,9 @@ int fix_nodes(int op_mode, struct tree_balance *tb, /* if it possible in indirect_to_direct conversion */ if (buffer_locked(tbS0)) { + reiserfs_write_unlock(tb->tb_sb); __wait_on_buffer(tbS0); + reiserfs_write_lock(tb->tb_sb); if (FILESYSTEM_CHANGED_TB(tb)) return REPEAT_SEARCH; } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a14d6cd9eeda..1893c8198439 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -489,10 +489,14 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, disappeared */ if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { int err; - lock_kernel(); + + reiserfs_write_lock(inode->i_sb); + err = reiserfs_commit_for_inode(inode); REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - unlock_kernel(); + + reiserfs_write_unlock(inode->i_sb); + if (err < 0) ret = err; } @@ -616,7 +620,6 @@ int reiserfs_get_block(struct inode *inode, sector_t block, loff_t new_offset = (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; - /* bad.... */ reiserfs_write_lock(inode->i_sb); version = get_inode_item_key_version(inode); @@ -997,10 +1000,14 @@ int reiserfs_get_block(struct inode *inode, sector_t block, if (retval) goto failure; } - /* inserting indirect pointers for a hole can take a - ** long time. reschedule if needed + /* + * inserting indirect pointers for a hole can take a + * long time. reschedule if needed and also release the write + * lock for others. */ + reiserfs_write_unlock(inode->i_sb); cond_resched(); + reiserfs_write_lock(inode->i_sb); retval = search_for_position_by_key(inode->i_sb, &key, &path); if (retval == IO_ERROR) { @@ -2608,7 +2615,10 @@ int reiserfs_prepare_write(struct file *f, struct page *page, int ret; int old_ref = 0; + reiserfs_write_unlock(inode->i_sb); reiserfs_wait_on_write_block(inode->i_sb); + reiserfs_write_lock(inode->i_sb); + fix_tail_page_for_writing(page); if (reiserfs_transaction_running(inode->i_sb)) { struct reiserfs_transaction_handle *th; @@ -2758,7 +2768,10 @@ int reiserfs_commit_write(struct file *f, struct page *page, int update_sd = 0; struct reiserfs_transaction_handle *th = NULL; + reiserfs_write_unlock(inode->i_sb); reiserfs_wait_on_write_block(inode->i_sb); + reiserfs_write_lock(inode->i_sb); + if (reiserfs_transaction_running(inode->i_sb)) { th = current->journal_info; } diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 0ccc3fdda7bf..5e40b0cd4c3d 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -141,9 +141,11 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, default: return -ENOIOCTLCMD; } - lock_kernel(); + + reiserfs_write_lock(inode->i_sb); ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); + reiserfs_write_unlock(inode->i_sb); + return ret; } #endif diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 90622200b39c..438c71f0bc91 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh) clear_buffer_journal_restore_dirty(bh); } -/* utility function to force a BUG if it is called without the big -** kernel lock held. caller is the string printed just before calling BUG() -*/ -void reiserfs_check_lock_depth(struct super_block *sb, char *caller) -{ -#ifdef CONFIG_SMP - if (current->lock_depth < 0) { - reiserfs_panic(sb, "journal-1", "%s called without kernel " - "lock held", caller); - } -#else - ; -#endif -} - /* return a cnode with same dev, block number and size in table, or null if not found */ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct super_block @@ -552,11 +537,48 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, journal_hash(table, cn->sb, cn->blocknr) = cn; } +/* + * Several mutexes depend on the write lock. + * However sometimes we want to relax the write lock while we hold + * these mutexes, according to the release/reacquire on schedule() + * properties of the Bkl that were used. + * Reiserfs performances and locking were based on this scheme. + * Now that the write lock is a mutex and not the bkl anymore, doing so + * may result in a deadlock: + * + * A acquire write_lock + * A acquire j_commit_mutex + * A release write_lock and wait for something + * B acquire write_lock + * B can't acquire j_commit_mutex and sleep + * A can't acquire write lock anymore + * deadlock + * + * What we do here is avoiding such deadlock by playing the same game + * than the Bkl: if we can't acquire a mutex that depends on the write lock, + * we release the write lock, wait a bit and then retry. + * + * The mutexes concerned by this hack are: + * - The commit mutex of a journal list + * - The flush mutex + * - The journal lock + */ +static inline void reiserfs_mutex_lock_safe(struct mutex *m, + struct super_block *s) +{ + while (!mutex_trylock(m)) { + reiserfs_write_unlock(s); + schedule(); + reiserfs_write_lock(s); + } +} + /* lock the current transaction */ static inline void lock_journal(struct super_block *sb) { PROC_INFO_INC(sb, journal.lock_journal); - mutex_lock(&SB_JOURNAL(sb)->j_mutex); + + reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb); } /* unlock the current transaction */ @@ -708,7 +730,9 @@ static void check_barrier_completion(struct super_block *s, disable_barrier(s); set_buffer_uptodate(bh); set_buffer_dirty(bh); + reiserfs_write_unlock(s); sync_dirty_buffer(bh); + reiserfs_write_lock(s); } } @@ -996,8 +1020,13 @@ static int reiserfs_async_progress_wait(struct super_block *s) { DEFINE_WAIT(wait); struct reiserfs_journal *j = SB_JOURNAL(s); - if (atomic_read(&j->j_async_throttle)) + + if (atomic_read(&j->j_async_throttle)) { + reiserfs_write_unlock(s); congestion_wait(BLK_RW_ASYNC, HZ / 10); + reiserfs_write_lock(s); + } + return 0; } @@ -1043,7 +1072,8 @@ static int flush_commit_list(struct super_block *s, } /* make sure nobody is trying to flush this one at the same time */ - mutex_lock(&jl->j_commit_mutex); + reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s); + if (!journal_list_still_alive(s, trans_id)) { mutex_unlock(&jl->j_commit_mutex); goto put_jl; @@ -1061,12 +1091,17 @@ static int flush_commit_list(struct super_block *s, if (!list_empty(&jl->j_bh_list)) { int ret; - unlock_kernel(); + + /* + * We might sleep in numerous places inside + * write_ordered_buffers. Relax the write lock. + */ + reiserfs_write_unlock(s); ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, journal, jl, &jl->j_bh_list); if (ret < 0 && retval == 0) retval = ret; - lock_kernel(); + reiserfs_write_lock(s); } BUG_ON(!list_empty(&jl->j_bh_list)); /* @@ -1114,12 +1149,19 @@ static int flush_commit_list(struct super_block *s, bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); tbh = journal_find_get_block(s, bn); + + reiserfs_write_unlock(s); wait_on_buffer(tbh); + reiserfs_write_lock(s); // since we're using ll_rw_blk above, it might have skipped over // a locked buffer. Double check here // - if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ + /* redundant, sync_dirty_buffer() checks */ + if (buffer_dirty(tbh)) { + reiserfs_write_unlock(s); sync_dirty_buffer(tbh); + reiserfs_write_lock(s); + } if (unlikely(!buffer_uptodate(tbh))) { #ifdef CONFIG_REISERFS_CHECK reiserfs_warning(s, "journal-601", @@ -1143,10 +1185,15 @@ static int flush_commit_list(struct super_block *s, if (buffer_dirty(jl->j_commit_bh)) BUG(); mark_buffer_dirty(jl->j_commit_bh) ; + reiserfs_write_unlock(s); sync_dirty_buffer(jl->j_commit_bh) ; + reiserfs_write_lock(s); } - } else + } else { + reiserfs_write_unlock(s); wait_on_buffer(jl->j_commit_bh); + reiserfs_write_lock(s); + } check_barrier_completion(s, jl->j_commit_bh); @@ -1286,7 +1333,9 @@ static int _update_journal_header_block(struct super_block *sb, if (trans_id >= journal->j_last_flush_trans_id) { if (buffer_locked((journal->j_header_bh))) { + reiserfs_write_unlock(sb); wait_on_buffer((journal->j_header_bh)); + reiserfs_write_lock(sb); if (unlikely(!buffer_uptodate(journal->j_header_bh))) { #ifdef CONFIG_REISERFS_CHECK reiserfs_warning(sb, "journal-699", @@ -1312,12 +1361,16 @@ static int _update_journal_header_block(struct super_block *sb, disable_barrier(sb); goto sync; } + reiserfs_write_unlock(sb); wait_on_buffer(journal->j_header_bh); + reiserfs_write_lock(sb); check_barrier_completion(sb, journal->j_header_bh); } else { sync: set_buffer_dirty(journal->j_header_bh); + reiserfs_write_unlock(sb); sync_dirty_buffer(journal->j_header_bh); + reiserfs_write_lock(sb); } if (!buffer_uptodate(journal->j_header_bh)) { reiserfs_warning(sb, "journal-837", @@ -1409,7 +1462,7 @@ static int flush_journal_list(struct super_block *s, /* if flushall == 0, the lock is already held */ if (flushall) { - mutex_lock(&journal->j_flush_mutex); + reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); } else if (mutex_trylock(&journal->j_flush_mutex)) { BUG(); } @@ -1553,7 +1606,11 @@ static int flush_journal_list(struct super_block *s, reiserfs_panic(s, "journal-1011", "cn->bh is NULL"); } + + reiserfs_write_unlock(s); wait_on_buffer(cn->bh); + reiserfs_write_lock(s); + if (!cn->bh) { reiserfs_panic(s, "journal-1012", "cn->bh is NULL"); @@ -1973,11 +2030,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, reiserfs_mounted_fs_count--; /* wait for all commits to finish */ cancel_delayed_work(&SB_JOURNAL(sb)->j_work); + + /* + * We must release the write lock here because + * the workqueue job (flush_async_commit) needs this lock + */ + reiserfs_write_unlock(sb); flush_workqueue(commit_wq); + if (!reiserfs_mounted_fs_count) { destroy_workqueue(commit_wq); commit_wq = NULL; } + reiserfs_write_lock(sb); free_journal_ram(sb); @@ -2243,7 +2308,11 @@ static int journal_read_transaction(struct super_block *sb, /* read in the log blocks, memcpy to the corresponding real block */ ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); for (i = 0; i < get_desc_trans_len(desc); i++) { + + reiserfs_write_unlock(sb); wait_on_buffer(log_blocks[i]); + reiserfs_write_lock(sb); + if (!buffer_uptodate(log_blocks[i])) { reiserfs_warning(sb, "journal-1212", "REPLAY FAILURE fsck required! " @@ -2964,8 +3033,11 @@ static void queue_log_writer(struct super_block *s) init_waitqueue_entry(&wait, current); add_wait_queue(&journal->j_join_wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); - if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) + if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) { + reiserfs_write_unlock(s); schedule(); + reiserfs_write_lock(s); + } __set_current_state(TASK_RUNNING); remove_wait_queue(&journal->j_join_wait, &wait); } @@ -2982,7 +3054,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) struct reiserfs_journal *journal = SB_JOURNAL(sb); unsigned long bcount = journal->j_bcount; while (1) { + reiserfs_write_unlock(sb); schedule_timeout_uninterruptible(1); + reiserfs_write_lock(sb); journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; while ((atomic_read(&journal->j_wcount) > 0 || atomic_read(&journal->j_jlock)) && @@ -3033,7 +3107,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { unlock_journal(sb); + reiserfs_write_unlock(sb); reiserfs_wait_on_write_block(sb); + reiserfs_write_lock(sb); PROC_INFO_INC(sb, journal.journal_relock_writers); goto relock; } @@ -3506,14 +3582,14 @@ static void flush_async_commits(struct work_struct *work) struct reiserfs_journal_list *jl; struct list_head *entry; - lock_kernel(); + reiserfs_write_lock(sb); if (!list_empty(&journal->j_journal_list)) { /* last entry is the youngest, commit it and you get everything */ entry = journal->j_journal_list.prev; jl = JOURNAL_LIST_ENTRY(entry); flush_commit_list(sb, jl, 1); } - unlock_kernel(); + reiserfs_write_unlock(sb); } /* @@ -4041,7 +4117,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, * the new transaction is fully setup, and we've already flushed the * ordered bh list */ - mutex_lock(&jl->j_commit_mutex); + reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb); /* save the transaction id in case we need to commit it later */ commit_trans_id = jl->j_trans_id; @@ -4203,10 +4279,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, * is lost. */ if (!list_empty(&jl->j_tail_bh_list)) { - unlock_kernel(); + reiserfs_write_unlock(sb); write_ordered_buffers(&journal->j_dirty_buffers_lock, journal, jl, &jl->j_tail_bh_list); - lock_kernel(); + reiserfs_write_lock(sb); } BUG_ON(!list_empty(&jl->j_tail_bh_list)); mutex_unlock(&jl->j_commit_mutex); diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c new file mode 100644 index 000000000000..cdd8d9ef048e --- /dev/null +++ b/fs/reiserfs/lock.c @@ -0,0 +1,63 @@ +#include +#include + +/* + * The previous reiserfs locking scheme was heavily based on + * the tricky properties of the Bkl: + * + * - it was acquired recursively by a same task + * - the performances relied on the release-while-schedule() property + * + * Now that we replace it by a mutex, we still want to keep the same + * recursive property to avoid big changes in the code structure. + * We use our own lock_owner here because the owner field on a mutex + * is only available in SMP or mutex debugging, also we only need this field + * for this mutex, no need for a system wide mutex facility. + * + * Also this lock is often released before a call that could block because + * reiserfs performances were partialy based on the release while schedule() + * property of the Bkl. + */ +void reiserfs_write_lock(struct super_block *s) +{ + struct reiserfs_sb_info *sb_i = REISERFS_SB(s); + + if (sb_i->lock_owner != current) { + mutex_lock(&sb_i->lock); + sb_i->lock_owner = current; + } + + /* No need to protect it, only the current task touches it */ + sb_i->lock_depth++; +} + +void reiserfs_write_unlock(struct super_block *s) +{ + struct reiserfs_sb_info *sb_i = REISERFS_SB(s); + + /* + * Are we unlocking without even holding the lock? + * Such a situation could even raise a BUG() if we don't + * want the data become corrupted + */ + WARN_ONCE(sb_i->lock_owner != current, + "Superblock write lock imbalance"); + + if (--sb_i->lock_depth == -1) { + sb_i->lock_owner = NULL; + mutex_unlock(&sb_i->lock); + } +} + +/* + * Utility function to force a BUG if it is called without the superblock + * write lock held. caller is the string printed just before calling BUG() + */ +void reiserfs_check_lock_depth(struct super_block *sb, char *caller) +{ + struct reiserfs_sb_info *sb_i = REISERFS_SB(sb); + + if (sb_i->lock_depth < 0) + reiserfs_panic(sb, "%s called without kernel lock held %d", + caller); +} diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 18b315d3d104..b3a94d20f0fc 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -141,7 +141,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) set_buffer_uptodate(bh); mark_buffer_dirty(bh); + reiserfs_write_unlock(s); sync_dirty_buffer(bh); + reiserfs_write_lock(s); // update bitmap_info stuff bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; brelse(bh); diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index d036ee5b1c81..6bd99a99a652 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -629,7 +629,9 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s search_by_key_reada(sb, reada_bh, reada_blocks, reada_count); ll_rw_block(READ, 1, &bh); + reiserfs_write_unlock(sb); wait_on_buffer(bh); + reiserfs_write_lock(sb); if (!buffer_uptodate(bh)) goto io_error; } else { diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7adea74d6a8a..e1cfb80d0bf3 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -465,7 +465,7 @@ static void reiserfs_put_super(struct super_block *s) struct reiserfs_transaction_handle th; th.t_trans_id = 0; - lock_kernel(); + reiserfs_write_lock(s); if (s->s_dirt) reiserfs_write_super(s); @@ -499,10 +499,10 @@ static void reiserfs_put_super(struct super_block *s) reiserfs_proc_info_done(s); + reiserfs_write_unlock(s); + mutex_destroy(&REISERFS_SB(s)->lock); kfree(s->s_fs_info); s->s_fs_info = NULL; - - unlock_kernel(); } static struct kmem_cache *reiserfs_inode_cachep; @@ -1168,11 +1168,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) unsigned int qfmt = 0; #ifdef CONFIG_QUOTA int i; +#endif + + reiserfs_write_lock(s); +#ifdef CONFIG_QUOTA memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); #endif - lock_kernel(); rs = SB_DISK_SUPER_BLOCK(s); if (!reiserfs_parse_options @@ -1295,12 +1298,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) out_ok: replace_mount_options(s, new_opts); - unlock_kernel(); + reiserfs_write_unlock(s); return 0; out_err: kfree(new_opts); - unlock_kernel(); + reiserfs_write_unlock(s); return err; } @@ -1404,7 +1407,9 @@ static int read_super_block(struct super_block *s, int offset) static int reread_meta_blocks(struct super_block *s) { ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); + reiserfs_write_unlock(s); wait_on_buffer(SB_BUFFER_WITH_SB(s)); + reiserfs_write_lock(s); if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { reiserfs_warning(s, "reiserfs-2504", "error reading the super"); return 1; @@ -1613,7 +1618,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); if (!sbi) { errval = -ENOMEM; - goto error; + goto error_alloc; } s->s_fs_info = sbi; /* Set default values for options: non-aggressive tails, RO on errors */ @@ -1627,6 +1632,20 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) /* setup default block allocator options */ reiserfs_init_alloc_options(s); + mutex_init(&REISERFS_SB(s)->lock); + REISERFS_SB(s)->lock_depth = -1; + + /* + * This function is called with the bkl, which also was the old + * locking used here. + * do_journal_begin() will soon check if we hold the lock (ie: was the + * bkl). This is likely because do_journal_begin() has several another + * callers because at this time, it doesn't seem to be necessary to + * protect against anything. + * Anyway, let's be conservative and lock for now. + */ + reiserfs_write_lock(s); + jdev_name = NULL; if (reiserfs_parse_options (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, @@ -1852,9 +1871,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) init_waitqueue_head(&(sbi->s_wait)); spin_lock_init(&sbi->bitmap_lock); + reiserfs_write_unlock(s); + return (0); error: + reiserfs_write_unlock(s); +error_alloc: if (jinit_done) { /* kill the commit thread, free journal ram */ journal_release_error(NULL, s); } diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index dd31e7bae35c..e47328f51801 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -52,11 +52,13 @@ #define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION #define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION -/* Locking primitives */ -/* Right now we are still falling back to (un)lock_kernel, but eventually that - would evolve into real per-fs locks */ -#define reiserfs_write_lock( sb ) lock_kernel() -#define reiserfs_write_unlock( sb ) unlock_kernel() +/* + * Locking primitives. The write lock is a per superblock + * special mutex that has properties close to the Big Kernel Lock + * which was used in the previous locking scheme. + */ +void reiserfs_write_lock(struct super_block *s); +void reiserfs_write_unlock(struct super_block *s); struct fid; diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index dab68bbed675..045c37213675 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -7,6 +7,8 @@ #ifdef __KERNEL__ #include #include +#include +#include #endif typedef enum { @@ -355,6 +357,13 @@ struct reiserfs_sb_info { struct reiserfs_journal *s_journal; /* pointer to journal information */ unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ + /* Serialize writers access, replace the old bkl */ + struct mutex lock; + /* Owner of the lock (can be recursive) */ + struct task_struct *lock_owner; + /* Depth of the lock, start from -1 like the bkl */ + int lock_depth; + /* Comment? -Hans */ void (*end_io_handler) (struct buffer_head *, int); hashf_t s_hash_function; /* pointer to function which is used -- cgit v1.3-8-gc7d7 From daf88c898312a22b5385655bc6e0b064eaa2efba Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 14 Apr 2009 05:34:23 +0200 Subject: kill-the-BKL/reiserfs: provide a tool to lock only once the write lock Sometimes we don't want to recursively hold the per superblock write lock because we want to be sure it is actually released when we come to sleep. This patch introduces the necessary tools for that. reiserfs_write_lock_once() does the same job than reiserfs_write_lock() except that it won't try to acquire recursively the lock if the current task already owns it. Also the lock_depth before the call of this function is returned. reiserfs_write_unlock_once() unlock only if reiserfs_write_lock_once() returned a depth equal to -1, ie: only if it actually locked. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Jeff Mahoney Cc: Alexander Beregalov Cc: Chris Mason LKML-Reference: <1239680065-25013-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- fs/reiserfs/lock.c | 26 ++++++++++++++++++++++++++ include/linux/reiserfs_fs.h | 2 ++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c index cdd8d9ef048e..cb1bba3802dd 100644 --- a/fs/reiserfs/lock.c +++ b/fs/reiserfs/lock.c @@ -49,6 +49,32 @@ void reiserfs_write_unlock(struct super_block *s) } } +/* + * If we already own the lock, just exit and don't increase the depth. + * Useful when we don't want to lock more than once. + * + * We always return the lock_depth we had before calling + * this function. + */ +int reiserfs_write_lock_once(struct super_block *s) +{ + struct reiserfs_sb_info *sb_i = REISERFS_SB(s); + + if (sb_i->lock_owner != current) { + mutex_lock(&sb_i->lock); + sb_i->lock_owner = current; + return sb_i->lock_depth++; + } + + return sb_i->lock_depth; +} + +void reiserfs_write_unlock_once(struct super_block *s, int lock_depth) +{ + if (lock_depth == -1) + reiserfs_write_unlock(s); +} + /* * Utility function to force a BUG if it is called without the superblock * write lock held. caller is the string printed just before calling BUG() diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index e47328f51801..4a2df57c8b1d 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -59,6 +59,8 @@ */ void reiserfs_write_lock(struct super_block *s); void reiserfs_write_unlock(struct super_block *s); +int reiserfs_write_lock_once(struct super_block *s); +void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); struct fid; -- cgit v1.3-8-gc7d7 From f32049dc244f4d394c8faa161b4f13cb8c4f5c8c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 30 Apr 2009 22:05:25 +0200 Subject: kill-the-BKL/reiserfs: release write lock on fs_changed() fs_changed() is a macro used by reiserfs to check whether its tree has been rebalanced. It has been designed to check parallel changes on the tree after calling a sleeping function, which released the Bkl. fs_changed() also calls cond_resched(), so that if rescheduling is needed, we are in the best place to do that, since we check if the tree has changed just after (because of the bkl release on schedule()). Even if we are not anymore using the Bkl, we still want to release the lock while we reschedule, so that other waiters for the lock can acquire it safely, because of the following __fs_changed() check. [ Impact: release the reiserfs write lock when it is not needed ] Cc: Jeff Mahoney Cc: Chris Mason Cc: Alexander Beregalov Signed-off-by: Frederic Weisbecker --- include/linux/reiserfs_fs.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 4a2df57c8b1d..fa5dbf307c40 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1333,7 +1333,13 @@ static inline loff_t max_reiserfs_offset(struct inode *inode) #define get_generation(s) atomic_read (&fs_generation(s)) #define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) #define __fs_changed(gen,s) (gen != get_generation (s)) -#define fs_changed(gen,s) ({cond_resched(); __fs_changed(gen, s);}) +#define fs_changed(gen,s) \ +({ \ + reiserfs_write_unlock(s); \ + cond_resched(); \ + reiserfs_write_lock(s); \ + __fs_changed(gen, s); \ +}) /***************************************************************************/ /* FIXATE NODES */ -- cgit v1.3-8-gc7d7 From e43d3f21c502dec786f2885a75e25859f18d6ffa Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 7 May 2009 22:51:20 +0200 Subject: kill-the-BKL/reiserfs: add reiserfs_cond_resched() Usually, when we call cond_resched(), we want the write lock to be released and then reacquired once we return from scheduling. Not only does it follow the previous bkl based locking scheme, but it also let other waiters to get the lock. But if we aren't going to reschedule(), such as in !TIF_NEED_RESCHED case, it's useless to release the lock. Worse, if we release and reacquire the lock whereas it is not needed, we create useless contentions. Also if someone takes the lock while we are modifying or reading the tree, there are good chances we'll have to retry our operation, eg if the block we were seeeking has moved. So this patch introduces a helper which only unlock the write lock if we are going to schedule. [ Impact: prepare to inject less lock contention and less tree operation attempts ] Reported-by: Andi Kleen Cc: Jeff Mahoney Cc: Chris Mason Cc: Ingo Molnar Cc: Alexander Beregalov Signed-off-by: Frederic Weisbecker --- include/linux/reiserfs_fs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index fa5dbf307c40..27f4ecc28180 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -62,6 +62,19 @@ void reiserfs_write_unlock(struct super_block *s); int reiserfs_write_lock_once(struct super_block *s); void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); +/* + * When we schedule, we usually want to also release the write lock, + * according to the previous bkl based locking scheme of reiserfs. + */ +static inline void reiserfs_cond_resched(struct super_block *s) +{ + if (need_resched()) { + reiserfs_write_unlock(s); + schedule(); + reiserfs_write_lock(s); + } +} + struct fid; /* in reading the #defines, it may help to understand that they employ -- cgit v1.3-8-gc7d7 From d663af807d8bb226394cb7e02f4665f6141a8140 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 7 May 2009 23:25:29 +0200 Subject: kill-the-bkl/reiserfs: conditionaly release the write lock on fs_changed() The goal of fs_changed() is to check whether the tree changed during a schedule(). This is a BKL legacy. A recent patch added an explicit unconditional release/reacquire of the write lock around the cond_resched() called inside fs_changed. But it's wasteful to unconditionally do that, we are creating superfluous lock contention in !TIF_NEED_RESCHED case. This patch manage that by calling reiserfs_cond_resched() from fs_changed() which only releases the lock if we are going to reschedule. [ Impact: inject less lock contention and tree job retries ] Cc: Jeff Mahoney Cc: Chris Mason Cc: Ingo Molnar Cc: Alexander Beregalov Signed-off-by: Frederic Weisbecker --- include/linux/reiserfs_fs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 27f4ecc28180..508fb523863e 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1348,9 +1348,7 @@ static inline loff_t max_reiserfs_offset(struct inode *inode) #define __fs_changed(gen,s) (gen != get_generation (s)) #define fs_changed(gen,s) \ ({ \ - reiserfs_write_unlock(s); \ - cond_resched(); \ - reiserfs_write_lock(s); \ + reiserfs_cond_resched(s); \ __fs_changed(gen, s); \ }) -- cgit v1.3-8-gc7d7 From c72e05756b900b3be24cd73a16de52bab80984c0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 May 2009 18:12:08 +0200 Subject: kill-the-bkl/reiserfs: acquire the inode mutex safely While searching a pathname, an inode mutex can be acquired in do_lookup() which calls reiserfs_lookup() which in turn acquires the write lock. On the other side reiserfs_fill_super() can acquire the write_lock and then call reiserfs_lookup_privroot() which can acquire an inode mutex (the root of the mount point). So we theoretically risk an AB - BA lock inversion that could lead to a deadlock. As for other lock dependencies found since the bkl to mutex conversion, the fix is to use reiserfs_mutex_lock_safe() which drops the lock dependency to the write lock. [ Impact: fix a possible deadlock with reiserfs ] Cc: Jeff Mahoney Cc: Chris Mason Cc: Ingo Molnar Cc: Alexander Beregalov Signed-off-by: Frederic Weisbecker --- fs/reiserfs/journal.c | 34 ---------------------------------- fs/reiserfs/xattr.c | 4 ++-- include/linux/reiserfs_fs.h | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index e9a972bd0323..d23d6d7a45a6 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -537,40 +537,6 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, journal_hash(table, cn->sb, cn->blocknr) = cn; } -/* - * Several mutexes depend on the write lock. - * However sometimes we want to relax the write lock while we hold - * these mutexes, according to the release/reacquire on schedule() - * properties of the Bkl that were used. - * Reiserfs performances and locking were based on this scheme. - * Now that the write lock is a mutex and not the bkl anymore, doing so - * may result in a deadlock: - * - * A acquire write_lock - * A acquire j_commit_mutex - * A release write_lock and wait for something - * B acquire write_lock - * B can't acquire j_commit_mutex and sleep - * A can't acquire write lock anymore - * deadlock - * - * What we do here is avoiding such deadlock by playing the same game - * than the Bkl: if we can't acquire a mutex that depends on the write lock, - * we release the write lock, wait a bit and then retry. - * - * The mutexes concerned by this hack are: - * - The commit mutex of a journal list - * - The flush mutex - * - The journal lock - */ -static inline void reiserfs_mutex_lock_safe(struct mutex *m, - struct super_block *s) -{ - reiserfs_write_unlock(s); - mutex_lock(m); - reiserfs_write_lock(s); -} - /* lock the current transaction */ static inline void lock_journal(struct super_block *sb) { diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 6925b835a43b..59870a4751cc 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -975,7 +975,7 @@ int reiserfs_lookup_privroot(struct super_block *s) int err = 0; /* If we don't have the privroot located yet - go find it */ - mutex_lock(&s->s_root->d_inode->i_mutex); + reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s); dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { @@ -1011,7 +1011,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) if (privroot->d_inode) { s->s_xattr = reiserfs_xattr_handlers; - mutex_lock(&privroot->d_inode->i_mutex); + reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s); if (!REISERFS_SB(s)->xattr_root) { struct dentry *dentry; dentry = lookup_one_len(XAROOT_NAME, privroot, diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 508fb523863e..a498d9266d8c 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -62,6 +62,41 @@ void reiserfs_write_unlock(struct super_block *s); int reiserfs_write_lock_once(struct super_block *s); void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); +/* + * Several mutexes depend on the write lock. + * However sometimes we want to relax the write lock while we hold + * these mutexes, according to the release/reacquire on schedule() + * properties of the Bkl that were used. + * Reiserfs performances and locking were based on this scheme. + * Now that the write lock is a mutex and not the bkl anymore, doing so + * may result in a deadlock: + * + * A acquire write_lock + * A acquire j_commit_mutex + * A release write_lock and wait for something + * B acquire write_lock + * B can't acquire j_commit_mutex and sleep + * A can't acquire write lock anymore + * deadlock + * + * What we do here is avoiding such deadlock by playing the same game + * than the Bkl: if we can't acquire a mutex that depends on the write lock, + * we release the write lock, wait a bit and then retry. + * + * The mutexes concerned by this hack are: + * - The commit mutex of a journal list + * - The flush mutex + * - The journal lock + * - The inode mutex + */ +static inline void reiserfs_mutex_lock_safe(struct mutex *m, + struct super_block *s) +{ + reiserfs_write_unlock(s); + mutex_lock(m); + reiserfs_write_lock(s); +} + /* * When we schedule, we usually want to also release the write lock, * according to the previous bkl based locking scheme of reiserfs. -- cgit v1.3-8-gc7d7 From 08f14fc8963e585e65b71212ce8050607b9b6c36 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 May 2009 19:10:38 +0200 Subject: kill-the-bkl/reiserfs: move the concurrent tree accesses checks per superblock When do_balance() balances the tree, a trick is performed to provide the ability for other tree writers/readers to check whether do_balance() is executing concurrently (requires CONFIG_REISERFS_CHECK). This is done to protect concurrent accesses to the tree. The trick is the following: When do_balance is called, a unique global variable called cur_tb takes a pointer to the current tree to be rebalanced. Once do_balance finishes its work, cur_tb takes the NULL value. Then, concurrent tree readers/writers just have to check the value of cur_tb to ensure do_balance isn't executing concurrently. If it is, then it proves that schedule() occured on do_balance(), which then relaxed the bkl that protected the tree. Now that the bkl has be turned into a mutex, this check is still fine even though do_balance() becomes preemptible: the write lock will not be automatically released on schedule(), so the tree is still protected. But this is only fine if we have a single reiserfs mountpoint. Indeed, because the bkl is a global lock, it didn't allowed concurrent executions between a tree reader/writer in a mount point and a do_balance() on another tree from another mountpoint. So assuming all these readers/writers weren't supposed to be reentrant, the current check now sometimes detect false positives with the current per-superblock mutex which allows this reentrancy. This patch keeps the concurrent tree accesses check but moves it per superblock, so that only trees from a same mount point are checked to be not accessed concurrently. [ Impact: fix spurious panic while running several reiserfs mount-points ] Cc: Jeff Mahoney Cc: Chris Mason Cc: Ingo Molnar Cc: Alexander Beregalov Signed-off-by: Frederic Weisbecker --- fs/reiserfs/do_balan.c | 17 +++++------------ fs/reiserfs/fix_node.c | 5 +---- fs/reiserfs/prints.c | 4 ---- fs/reiserfs/stree.c | 5 +---- include/linux/reiserfs_fs_sb.h | 11 +++++++++++ 5 files changed, 18 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c index 128d3f7c8aa5..60c080440661 100644 --- a/fs/reiserfs/do_balan.c +++ b/fs/reiserfs/do_balan.c @@ -21,14 +21,6 @@ #include #include -#ifdef CONFIG_REISERFS_CHECK - -struct tree_balance *cur_tb = NULL; /* detects whether more than one - copy of tb exists as a means - of checking whether schedule - is interrupting do_balance */ -#endif - static inline void buffer_info_init_left(struct tree_balance *tb, struct buffer_info *bi) { @@ -1840,11 +1832,12 @@ static int check_before_balancing(struct tree_balance *tb) { int retval = 0; - if (cur_tb) { + if (REISERFS_SB(tb->tb_sb)->cur_tb) { reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " "occurred based on cur_tb not being null at " "this point in code. do_balance cannot properly " - "handle schedule occurring while it runs."); + "handle concurrent tree accesses on a same " + "mount point."); } /* double check that buffers that we will modify are unlocked. (fix_nodes should already have @@ -1986,7 +1979,7 @@ static inline void do_balance_starts(struct tree_balance *tb) "check");*/ RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); #ifdef CONFIG_REISERFS_CHECK - cur_tb = tb; + REISERFS_SB(tb->tb_sb)->cur_tb = tb; #endif } @@ -1996,7 +1989,7 @@ static inline void do_balance_completed(struct tree_balance *tb) #ifdef CONFIG_REISERFS_CHECK check_leaf_level(tb); check_internal_levels(tb); - cur_tb = NULL; + REISERFS_SB(tb->tb_sb)->cur_tb = NULL; #endif /* reiserfs_free_block is no longer schedule safe. So, we need to diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index 3a685e3f754f..d2f31330dcae 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c @@ -563,9 +563,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, return needed_nodes; } -#ifdef CONFIG_REISERFS_CHECK -extern struct tree_balance *cur_tb; -#endif /* Set parameters for balancing. * Performs write of results of analysis of balancing into structure tb, @@ -2368,7 +2365,7 @@ int fix_nodes(int op_mode, struct tree_balance *tb, return REPEAT_SEARCH; } #ifdef CONFIG_REISERFS_CHECK - if (cur_tb) { + if (REISERFS_SB(tb->tb_sb)->cur_tb) { print_cur_tb("fix_nodes"); reiserfs_panic(tb->tb_sb, "PAP-8305", "there is pending do_balance"); diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 536eacaeb710..adbc6f538515 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -349,10 +349,6 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...) . */ -#ifdef CONFIG_REISERFS_CHECK -extern struct tree_balance *cur_tb; -#endif - void __reiserfs_panic(struct super_block *sb, const char *id, const char *function, const char *fmt, ...) { diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 6b025a42d510..5fa7118f04e1 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -222,9 +222,6 @@ static inline int bin_search(const void *key, /* Key to search for. */ return ITEM_NOT_FOUND; } -#ifdef CONFIG_REISERFS_CHECK -extern struct tree_balance *cur_tb; -#endif /* Minimal possible key. It is never in the tree. */ const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; @@ -711,7 +708,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s !key_in_buffer(search_path, key, sb), "PAP-5130: key is not in the buffer"); #ifdef CONFIG_REISERFS_CHECK - if (cur_tb) { + if (REISERFS_SB(sb)->cur_tb) { print_cur_tb("5140"); reiserfs_panic(sb, "PAP-5140", "schedule occurred in do_balance!"); diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 045c37213675..52c83b6a758a 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -417,6 +417,17 @@ struct reiserfs_sb_info { char *s_qf_names[MAXQUOTAS]; int s_jquota_fmt; #endif +#ifdef CONFIG_REISERFS_CHECK + + struct tree_balance *cur_tb; /* + * Detects whether more than one + * copy of tb exists per superblock + * as a means of checking whether + * do_balance is executing concurrently + * against another tree reader/writer + * on a same mount point. + */ +#endif }; /* Definitions of reiserfs on-disk properties: */ -- cgit v1.3-8-gc7d7 From c0a9eedf9acafb083adf3ddbff0a1e4d6d9a6949 Mon Sep 17 00:00:00 2001 From: Pavel Hofman Date: Wed, 16 Sep 2009 22:25:35 +0200 Subject: ALSA: ak4114 - fix errors in output selector bits * the previous version had a typo - values of AK4114_OPS10-12 were identical with AK4114_OPS00-02 * Since no cards actually use this feature, the bug was not identified earlier Signed-off-by: Pavel Hofman Signed-off-by: Takashi Iwai --- include/sound/ak4114.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/sound/ak4114.h b/include/sound/ak4114.h index d293d36a66b8..3ce69fd92523 100644 --- a/include/sound/ak4114.h +++ b/include/sound/ak4114.h @@ -95,13 +95,13 @@ /* AK4114_REG_IO0 */ #define AK4114_TX1E (1<<7) /* TX1 Output Enable (1 = enable) */ -#define AK4114_OPS12 (1<<2) /* Output Though Data Selector for TX1 pin */ -#define AK4114_OPS11 (1<<1) /* Output Though Data Selector for TX1 pin */ -#define AK4114_OPS10 (1<<0) /* Output Though Data Selector for TX1 pin */ +#define AK4114_OPS12 (1<<6) /* Output Data Selector for TX1 pin */ +#define AK4114_OPS11 (1<<5) /* Output Data Selector for TX1 pin */ +#define AK4114_OPS10 (1<<4) /* Output Data Selector for TX1 pin */ #define AK4114_TX0E (1<<3) /* TX0 Output Enable (1 = enable) */ -#define AK4114_OPS02 (1<<2) /* Output Though Data Selector for TX0 pin */ -#define AK4114_OPS01 (1<<1) /* Output Though Data Selector for TX0 pin */ -#define AK4114_OPS00 (1<<0) /* Output Though Data Selector for TX0 pin */ +#define AK4114_OPS02 (1<<2) /* Output Data Selector for TX0 pin */ +#define AK4114_OPS01 (1<<1) /* Output Data Selector for TX0 pin */ +#define AK4114_OPS00 (1<<0) /* Output Data Selector for TX0 pin */ /* AK4114_REG_IO1 */ #define AK4114_EFH1 (1<<7) /* Interrupt 0 pin Hold */ -- cgit v1.3-8-gc7d7 From 8f34692f63d66805b51ff408f4067748d3c1c3fd Mon Sep 17 00:00:00 2001 From: Pavel Hofman Date: Wed, 16 Sep 2009 22:25:36 +0200 Subject: ALSA: ak4620 support, codec regs listed in proc * complete support for ak4620 * codec regs listed in proc for all codecs/chips * adding total regs for each codec * fixing nb. of steps in input attenuation controls Signed-off-by: Pavel Hofman Signed-off-by: Takashi Iwai --- include/sound/ak4xxx-adda.h | 5 +- sound/i2c/other/ak4xxx-adda.c | 136 ++++++++++++++++++++++++++++++++---------- sound/pci/ice1712/juli.c | 21 ------- 3 files changed, 108 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/include/sound/ak4xxx-adda.h b/include/sound/ak4xxx-adda.h index 891cf1aea8b1..030b87c2f6d4 100644 --- a/include/sound/ak4xxx-adda.h +++ b/include/sound/ak4xxx-adda.h @@ -68,7 +68,7 @@ struct snd_akm4xxx { enum { SND_AK4524, SND_AK4528, SND_AK4529, SND_AK4355, SND_AK4358, SND_AK4381, - SND_AK5365 + SND_AK5365, SND_AK4620, } type; /* (array) information of combined codecs */ @@ -76,6 +76,9 @@ struct snd_akm4xxx { const struct snd_akm4xxx_adc_channel *adc_info; struct snd_ak4xxx_ops ops; + unsigned int num_chips; + unsigned int total_regs; + const char *name; }; void snd_akm4xxx_write(struct snd_akm4xxx *ak, int chip, unsigned char reg, diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c index ee47abab764e..1adb8a3c2b62 100644 --- a/sound/i2c/other/ak4xxx-adda.c +++ b/sound/i2c/other/ak4xxx-adda.c @@ -19,7 +19,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - */ + */ #include #include @@ -29,6 +29,7 @@ #include #include #include +#include MODULE_AUTHOR("Jaroslav Kysela , Takashi Iwai "); MODULE_DESCRIPTION("Routines for control of AK452x / AK43xx AD/DA converters"); @@ -52,26 +53,21 @@ EXPORT_SYMBOL(snd_akm4xxx_write); static void ak4524_reset(struct snd_akm4xxx *ak, int state) { unsigned int chip; - unsigned char reg, maxreg; + unsigned char reg; - if (ak->type == SND_AK4528) - maxreg = 0x06; - else - maxreg = 0x08; for (chip = 0; chip < ak->num_dacs/2; chip++) { snd_akm4xxx_write(ak, chip, 0x01, state ? 0x00 : 0x03); if (state) continue; /* DAC volumes */ - for (reg = 0x04; reg < maxreg; reg++) + for (reg = 0x04; reg < ak->total_regs; reg++) snd_akm4xxx_write(ak, chip, reg, snd_akm4xxx_get(ak, chip, reg)); } } /* reset procedure for AK4355 and AK4358 */ -static void ak435X_reset(struct snd_akm4xxx *ak, int state, - unsigned char total_regs) +static void ak435X_reset(struct snd_akm4xxx *ak, int state) { unsigned char reg; @@ -79,7 +75,7 @@ static void ak435X_reset(struct snd_akm4xxx *ak, int state, snd_akm4xxx_write(ak, 0, 0x01, 0x02); /* reset and soft-mute */ return; } - for (reg = 0x00; reg < total_regs; reg++) + for (reg = 0x00; reg < ak->total_regs; reg++) if (reg != 0x01) snd_akm4xxx_write(ak, 0, reg, snd_akm4xxx_get(ak, 0, reg)); @@ -91,12 +87,11 @@ static void ak4381_reset(struct snd_akm4xxx *ak, int state) { unsigned int chip; unsigned char reg; - for (chip = 0; chip < ak->num_dacs/2; chip++) { snd_akm4xxx_write(ak, chip, 0x00, state ? 0x0c : 0x0f); if (state) continue; - for (reg = 0x01; reg < 0x05; reg++) + for (reg = 0x01; reg < ak->total_regs; reg++) snd_akm4xxx_write(ak, chip, reg, snd_akm4xxx_get(ak, chip, reg)); } @@ -113,16 +108,17 @@ void snd_akm4xxx_reset(struct snd_akm4xxx *ak, int state) switch (ak->type) { case SND_AK4524: case SND_AK4528: + case SND_AK4620: ak4524_reset(ak, state); break; case SND_AK4529: /* FIXME: needed for ak4529? */ break; case SND_AK4355: - ak435X_reset(ak, state, 0x0b); + ak435X_reset(ak, state); break; case SND_AK4358: - ak435X_reset(ak, state, 0x10); + ak435X_reset(ak, state); break; case SND_AK4381: ak4381_reset(ak, state); @@ -139,7 +135,7 @@ EXPORT_SYMBOL(snd_akm4xxx_reset); * Volume conversion table for non-linear volumes * from -63.5dB (mute) to 0dB step 0.5dB * - * Used for AK4524 input/ouput attenuation, AK4528, and + * Used for AK4524/AK4620 input/ouput attenuation, AK4528, and * AK5365 input attenuation */ static const unsigned char vol_cvt_datt[128] = { @@ -259,8 +255,22 @@ void snd_akm4xxx_init(struct snd_akm4xxx *ak) 0x00, 0x0f, /* 0: power-up, un-reset */ 0xff, 0xff }; + static const unsigned char inits_ak4620[] = { + 0x00, 0x07, /* 0: normal */ + 0x01, 0x00, /* 0: reset */ + 0x01, 0x02, /* 1: RSTAD */ + 0x01, 0x03, /* 1: RSTDA */ + 0x01, 0x0f, /* 1: normal */ + 0x02, 0x60, /* 2: 24bit I2S */ + 0x03, 0x01, /* 3: deemphasis off */ + 0x04, 0x00, /* 4: LIN muted */ + 0x05, 0x00, /* 5: RIN muted */ + 0x06, 0x00, /* 6: LOUT muted */ + 0x07, 0x00, /* 7: ROUT muted */ + 0xff, 0xff + }; - int chip, num_chips; + int chip; const unsigned char *ptr, *inits; unsigned char reg, data; @@ -270,42 +280,64 @@ void snd_akm4xxx_init(struct snd_akm4xxx *ak) switch (ak->type) { case SND_AK4524: inits = inits_ak4524; - num_chips = ak->num_dacs / 2; + ak->num_chips = ak->num_dacs / 2; + ak->name = "ak4524"; + ak->total_regs = 0x08; break; case SND_AK4528: inits = inits_ak4528; - num_chips = ak->num_dacs / 2; + ak->num_chips = ak->num_dacs / 2; + ak->name = "ak4528"; + ak->total_regs = 0x06; break; case SND_AK4529: inits = inits_ak4529; - num_chips = 1; + ak->num_chips = 1; + ak->name = "ak4529"; + ak->total_regs = 0x0d; break; case SND_AK4355: inits = inits_ak4355; - num_chips = 1; + ak->num_chips = 1; + ak->name = "ak4355"; + ak->total_regs = 0x0b; break; case SND_AK4358: inits = inits_ak4358; - num_chips = 1; + ak->num_chips = 1; + ak->name = "ak4358"; + ak->total_regs = 0x10; break; case SND_AK4381: inits = inits_ak4381; - num_chips = ak->num_dacs / 2; + ak->num_chips = ak->num_dacs / 2; + ak->name = "ak4381"; + ak->total_regs = 0x05; break; case SND_AK5365: /* FIXME: any init sequence? */ + ak->num_chips = 1; + ak->name = "ak5365"; + ak->total_regs = 0x08; return; + case SND_AK4620: + inits = inits_ak4620; + ak->num_chips = ak->num_dacs / 2; + ak->name = "ak4620"; + ak->total_regs = 0x08; + break; default: snd_BUG(); return; } - for (chip = 0; chip < num_chips; chip++) { + for (chip = 0; chip < ak->num_chips; chip++) { ptr = inits; while (*ptr != 0xff) { reg = *ptr++; data = *ptr++; snd_akm4xxx_write(ak, chip, reg, data); + udelay(10); } } } @@ -688,6 +720,12 @@ static int build_dac_controls(struct snd_akm4xxx *ak) AK_COMPOSE(idx/2, (idx%2) + 3, 0, 255); knew.tlv.p = db_scale_linear; break; + case SND_AK4620: + /* register 6 & 7 */ + knew.private_value = + AK_COMPOSE(idx/2, (idx%2) + 6, 0, 255); + knew.tlv.p = db_scale_linear; + break; default: return -EINVAL; } @@ -704,10 +742,12 @@ static int build_dac_controls(struct snd_akm4xxx *ak) static int build_adc_controls(struct snd_akm4xxx *ak) { - int idx, err, mixer_ch, num_stereo; + int idx, err, mixer_ch, num_stereo, max_steps; struct snd_kcontrol_new knew; mixer_ch = 0; + if (ak->type == SND_AK4528) + return 0; /* no controls */ for (idx = 0; idx < ak->num_adcs;) { memset(&knew, 0, sizeof(knew)); if (! ak->adc_info || ! ak->adc_info[mixer_ch].name) { @@ -733,13 +773,12 @@ static int build_adc_controls(struct snd_akm4xxx *ak) } /* register 4 & 5 */ if (ak->type == SND_AK5365) - knew.private_value = - AK_COMPOSE(idx/2, (idx%2) + 4, 0, 151) | - AK_VOL_CVT | AK_IPGA; + max_steps = 152; else - knew.private_value = - AK_COMPOSE(idx/2, (idx%2) + 4, 0, 163) | - AK_VOL_CVT | AK_IPGA; + max_steps = 164; + knew.private_value = + AK_COMPOSE(idx/2, (idx%2) + 4, 0, max_steps) | + AK_VOL_CVT | AK_IPGA; knew.tlv.p = db_scale_vol_datt; err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak)); if (err < 0) @@ -808,6 +847,7 @@ static int build_deemphasis(struct snd_akm4xxx *ak, int num_emphs) switch (ak->type) { case SND_AK4524: case SND_AK4528: + case SND_AK4620: /* register 3 */ knew.private_value = AK_COMPOSE(idx, 3, 0, 0); break; @@ -834,6 +874,35 @@ static int build_deemphasis(struct snd_akm4xxx *ak, int num_emphs) return 0; } +#ifdef CONFIG_PROC_FS +static void proc_regs_read(struct snd_info_entry *entry, + struct snd_info_buffer *buffer) +{ + struct snd_akm4xxx *ak = (struct snd_akm4xxx *)entry->private_data; + int reg, val, chip; + for (chip = 0; chip < ak->num_chips; chip++) { + for (reg = 0; reg < ak->total_regs; reg++) { + val = snd_akm4xxx_get(ak, chip, reg); + snd_iprintf(buffer, "chip %d: 0x%02x = 0x%02x\n", chip, + reg, val); + } + } +} + +static int proc_init(struct snd_akm4xxx *ak) +{ + struct snd_info_entry *entry; + int err; + err = snd_card_proc_new(ak->card, ak->name, &entry); + if (err < 0) + return err; + snd_info_set_text_ops(entry, ak, proc_regs_read); + return 0; +} +#else /* !CONFIG_PROC_FS */ +static int proc_init(struct snd_akm4xxx *ak) {} +#endif + int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak) { int err, num_emphs; @@ -845,18 +914,21 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak) err = build_adc_controls(ak); if (err < 0) return err; - if (ak->type == SND_AK4355 || ak->type == SND_AK4358) num_emphs = 1; + else if (ak->type == SND_AK4620) + num_emphs = 0; else num_emphs = ak->num_dacs / 2; err = build_deemphasis(ak, num_emphs); + if (err < 0) + return err; + err = proc_init(ak); if (err < 0) return err; return 0; } - EXPORT_SYMBOL(snd_akm4xxx_build_controls); static int __init alsa_akm4xxx_module_init(void) diff --git a/sound/pci/ice1712/juli.c b/sound/pci/ice1712/juli.c index fd948bfd9aef..4789e8bfdc17 100644 --- a/sound/pci/ice1712/juli.c +++ b/sound/pci/ice1712/juli.c @@ -412,25 +412,6 @@ static struct snd_kcontrol_new juli_mute_controls[] __devinitdata = { }, }; - -static void ak4358_proc_regs_read(struct snd_info_entry *entry, - struct snd_info_buffer *buffer) -{ - struct snd_ice1712 *ice = (struct snd_ice1712 *)entry->private_data; - int reg, val; - for (reg = 0; reg <= 0xf; reg++) { - val = snd_akm4xxx_get(ice->akm, 0, reg); - snd_iprintf(buffer, "0x%02x = 0x%02x\n", reg, val); - } -} - -static void ak4358_proc_init(struct snd_ice1712 *ice) -{ - struct snd_info_entry *entry; - if (!snd_card_proc_new(ice->card, "ak4358_codec", &entry)) - snd_info_set_text_ops(entry, ice, ak4358_proc_regs_read); -} - static char *slave_vols[] __devinitdata = { PCM_VOLUME, MONITOR_AN_IN_VOLUME, @@ -496,8 +477,6 @@ static int __devinit juli_add_controls(struct snd_ice1712 *ice) /* only capture SPDIF over AK4114 */ err = snd_ak4114_build(spec->ak4114, NULL, ice->pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream); - - ak4358_proc_init(ice); if (err < 0) return err; return 0; -- cgit v1.3-8-gc7d7 From 42cfa276aebd28e5cc4350ff6c7d75f1cb84dd98 Mon Sep 17 00:00:00 2001 From: Pavel Hofman Date: Wed, 16 Sep 2009 22:25:37 +0200 Subject: ALSA: ak4113 support * complete support for ak4113 * based on code for ak4114 and ak4117 Signed-off-by: Pavel Hofman Signed-off-by: Takashi Iwai --- include/sound/ak4113.h | 321 ++++++++++++++++++++++++ sound/i2c/other/Makefile | 3 +- sound/i2c/other/ak4113.c | 639 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 962 insertions(+), 1 deletion(-) create mode 100644 include/sound/ak4113.h create mode 100644 sound/i2c/other/ak4113.c (limited to 'include') diff --git a/include/sound/ak4113.h b/include/sound/ak4113.h new file mode 100644 index 000000000000..8988edae1609 --- /dev/null +++ b/include/sound/ak4113.h @@ -0,0 +1,321 @@ +#ifndef __SOUND_AK4113_H +#define __SOUND_AK4113_H + +/* + * Routines for Asahi Kasei AK4113 + * Copyright (c) by Jaroslav Kysela , + * Copyright (c) by Pavel Hofman , + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* AK4113 registers */ +/* power down */ +#define AK4113_REG_PWRDN 0x00 +/* format control */ +#define AK4113_REG_FORMAT 0x01 +/* input/output control */ +#define AK4113_REG_IO0 0x02 +/* input/output control */ +#define AK4113_REG_IO1 0x03 +/* interrupt0 mask */ +#define AK4113_REG_INT0_MASK 0x04 +/* interrupt1 mask */ +#define AK4113_REG_INT1_MASK 0x05 +/* DAT mask & DTS select */ +#define AK4113_REG_DATDTS 0x06 +/* receiver status 0 */ +#define AK4113_REG_RCS0 0x07 +/* receiver status 1 */ +#define AK4113_REG_RCS1 0x08 +/* receiver status 2 */ +#define AK4113_REG_RCS2 0x09 +/* RX channel status byte 0 */ +#define AK4113_REG_RXCSB0 0x0a +/* RX channel status byte 1 */ +#define AK4113_REG_RXCSB1 0x0b +/* RX channel status byte 2 */ +#define AK4113_REG_RXCSB2 0x0c +/* RX channel status byte 3 */ +#define AK4113_REG_RXCSB3 0x0d +/* RX channel status byte 4 */ +#define AK4113_REG_RXCSB4 0x0e +/* burst preamble Pc byte 0 */ +#define AK4113_REG_Pc0 0x0f +/* burst preamble Pc byte 1 */ +#define AK4113_REG_Pc1 0x10 +/* burst preamble Pd byte 0 */ +#define AK4113_REG_Pd0 0x11 +/* burst preamble Pd byte 1 */ +#define AK4113_REG_Pd1 0x12 +/* Q-subcode address + control */ +#define AK4113_REG_QSUB_ADDR 0x13 +/* Q-subcode track */ +#define AK4113_REG_QSUB_TRACK 0x14 +/* Q-subcode index */ +#define AK4113_REG_QSUB_INDEX 0x15 +/* Q-subcode minute */ +#define AK4113_REG_QSUB_MINUTE 0x16 +/* Q-subcode second */ +#define AK4113_REG_QSUB_SECOND 0x17 +/* Q-subcode frame */ +#define AK4113_REG_QSUB_FRAME 0x18 +/* Q-subcode zero */ +#define AK4113_REG_QSUB_ZERO 0x19 +/* Q-subcode absolute minute */ +#define AK4113_REG_QSUB_ABSMIN 0x1a +/* Q-subcode absolute second */ +#define AK4113_REG_QSUB_ABSSEC 0x1b +/* Q-subcode absolute frame */ +#define AK4113_REG_QSUB_ABSFRM 0x1c + +/* sizes */ +#define AK4113_REG_RXCSB_SIZE ((AK4113_REG_RXCSB4-AK4113_REG_RXCSB0)+1) +#define AK4113_REG_QSUB_SIZE ((AK4113_REG_QSUB_ABSFRM-AK4113_REG_QSUB_ADDR)\ + +1) + +#define AK4113_WRITABLE_REGS (AK4113_REG_DATDTS + 1) + +/* AK4113_REG_PWRDN bits */ +/* Channel Status Select */ +#define AK4113_CS12 (1<<7) +/* Block Start & C/U Output Mode */ +#define AK4113_BCU (1<<6) +/* Master Clock Operation Select */ +#define AK4113_CM1 (1<<5) +/* Master Clock Operation Select */ +#define AK4113_CM0 (1<<4) +/* Master Clock Frequency Select */ +#define AK4113_OCKS1 (1<<3) +/* Master Clock Frequency Select */ +#define AK4113_OCKS0 (1<<2) +/* 0 = power down, 1 = normal operation */ +#define AK4113_PWN (1<<1) +/* 0 = reset & initialize (except thisregister), 1 = normal operation */ +#define AK4113_RST (1<<0) + +/* AK4113_REQ_FORMAT bits */ +/* V/TX Output select: 0 = Validity Flag Output, 1 = TX */ +#define AK4113_VTX (1<<7) +/* Audio Data Control */ +#define AK4113_DIF2 (1<<6) +/* Audio Data Control */ +#define AK4113_DIF1 (1<<5) +/* Audio Data Control */ +#define AK4113_DIF0 (1<<4) +/* Deemphasis Autodetect Enable (1 = enable) */ +#define AK4113_DEAU (1<<3) +/* 32kHz-48kHz Deemphasis Control */ +#define AK4113_DEM1 (1<<2) +/* 32kHz-48kHz Deemphasis Control */ +#define AK4113_DEM0 (1<<1) +#define AK4113_DEM_OFF (AK4113_DEM0) +#define AK4113_DEM_44KHZ (0) +#define AK4113_DEM_48KHZ (AK4113_DEM1) +#define AK4113_DEM_32KHZ (AK4113_DEM0|AK4113_DEM1) +/* STDO: 16-bit, right justified */ +#define AK4113_DIF_16R (0) +/* STDO: 18-bit, right justified */ +#define AK4113_DIF_18R (AK4113_DIF0) +/* STDO: 20-bit, right justified */ +#define AK4113_DIF_20R (AK4113_DIF1) +/* STDO: 24-bit, right justified */ +#define AK4113_DIF_24R (AK4113_DIF1|AK4113_DIF0) +/* STDO: 24-bit, left justified */ +#define AK4113_DIF_24L (AK4113_DIF2) +/* STDO: I2S */ +#define AK4113_DIF_24I2S (AK4113_DIF2|AK4113_DIF0) +/* STDO: 24-bit, left justified; LRCLK, BICK = Input */ +#define AK4113_DIF_I24L (AK4113_DIF2|AK4113_DIF1) +/* STDO: I2S; LRCLK, BICK = Input */ +#define AK4113_DIF_I24I2S (AK4113_DIF2|AK4113_DIF1|AK4113_DIF0) + +/* AK4113_REG_IO0 */ +/* XTL1=0,XTL0=0 -> 11.2896Mhz; XTL1=0,XTL0=1 -> 12.288Mhz */ +#define AK4113_XTL1 (1<<6) +/* XTL1=1,XTL0=0 -> 24.576Mhz; XTL1=1,XTL0=1 -> use channel status */ +#define AK4113_XTL0 (1<<5) +/* Block Start Signal Output: 0 = U-bit, 1 = C-bit (req. BCU = 1) */ +#define AK4113_UCE (1<<4) +/* TX Output Enable (1 = enable) */ +#define AK4113_TXE (1<<3) +/* Output Through Data Selector for TX pin */ +#define AK4113_OPS2 (1<<2) +/* Output Through Data Selector for TX pin */ +#define AK4113_OPS1 (1<<1) +/* Output Through Data Selector for TX pin */ +#define AK4113_OPS0 (1<<0) +/* 11.2896 MHz ref. Xtal freq. */ +#define AK4113_XTL_11_2896M (0) +/* 12.288 MHz ref. Xtal freq. */ +#define AK4113_XTL_12_288M (AK4113_XTL0) +/* 24.576 MHz ref. Xtal freq. */ +#define AK4113_XTL_24_576M (AK4113_XTL1) + +/* AK4113_REG_IO1 */ +/* Interrupt 0 pin Hold */ +#define AK4113_EFH1 (1<<7) +/* Interrupt 0 pin Hold */ +#define AK4113_EFH0 (1<<6) +#define AK4113_EFH_512LRCLK (0) +#define AK4113_EFH_1024LRCLK (AK4113_EFH0) +#define AK4113_EFH_2048LRCLK (AK4113_EFH1) +#define AK4113_EFH_4096LRCLK (AK4113_EFH1|AK4113_EFH0) +/* PLL Lock Time: 0 = 384/fs, 1 = 1/fs */ +#define AK4113_FAST (1<<5) +/* MCKO2 Output Select: 0 = CMx/OCKSx, 1 = Xtal */ +#define AK4113_XMCK (1<<4) +/* MCKO2 Output Freq. Select: 0 = x1, 1 = x0.5 (req. XMCK = 1) */ +#define AK4113_DIV (1<<3) +/* Input Recovery Data Select */ +#define AK4113_IPS2 (1<<2) +/* Input Recovery Data Select */ +#define AK4113_IPS1 (1<<1) +/* Input Recovery Data Select */ +#define AK4113_IPS0 (1<<0) +#define AK4113_IPS(x) ((x)&7) + +/* AK4113_REG_INT0_MASK && AK4113_REG_INT1_MASK*/ +/* mask enable for QINT bit */ +#define AK4113_MQI (1<<7) +/* mask enable for AUTO bit */ +#define AK4113_MAUT (1<<6) +/* mask enable for CINT bit */ +#define AK4113_MCIT (1<<5) +/* mask enable for UNLOCK bit */ +#define AK4113_MULK (1<<4) +/* mask enable for V bit */ +#define AK4113_V (1<<3) +/* mask enable for STC bit */ +#define AK4113_STC (1<<2) +/* mask enable for AUDN bit */ +#define AK4113_MAN (1<<1) +/* mask enable for PAR bit */ +#define AK4113_MPR (1<<0) + +/* AK4113_REG_DATDTS */ +/* DAT Start ID Counter */ +#define AK4113_DCNT (1<<4) +/* DTS-CD 16-bit Sync Word Detect */ +#define AK4113_DTS16 (1<<3) +/* DTS-CD 14-bit Sync Word Detect */ +#define AK4113_DTS14 (1<<2) +/* mask enable for DAT bit (if 1, no INT1 effect */ +#define AK4113_MDAT1 (1<<1) +/* mask enable for DAT bit (if 1, no INT0 effect */ +#define AK4113_MDAT0 (1<<0) + +/* AK4113_REG_RCS0 */ +/* Q-subcode buffer interrupt, 0 = no change, 1 = changed */ +#define AK4113_QINT (1<<7) +/* Non-PCM or DTS stream auto detection, 0 = no detect, 1 = detect */ +#define AK4113_AUTO (1<<6) +/* channel status buffer interrupt, 0 = no change, 1 = change */ +#define AK4113_CINT (1<<5) +/* PLL lock status, 0 = lock, 1 = unlock */ +#define AK4113_UNLCK (1<<4) +/* Validity bit, 0 = valid, 1 = invalid */ +#define AK4113_V (1<<3) +/* sampling frequency or Pre-emphasis change, 0 = no detect, 1 = detect */ +#define AK4113_STC (1<<2) +/* audio bit output, 0 = audio, 1 = non-audio */ +#define AK4113_AUDION (1<<1) +/* parity error or biphase error status, 0 = no error, 1 = error */ +#define AK4113_PAR (1<<0) + +/* AK4113_REG_RCS1 */ +/* sampling frequency detection */ +#define AK4113_FS3 (1<<7) +#define AK4113_FS2 (1<<6) +#define AK4113_FS1 (1<<5) +#define AK4113_FS0 (1<<4) +/* Pre-emphasis detect, 0 = OFF, 1 = ON */ +#define AK4113_PEM (1<<3) +/* DAT Start ID Detect, 0 = no detect, 1 = detect */ +#define AK4113_DAT (1<<2) +/* DTS-CD bit audio stream detect, 0 = no detect, 1 = detect */ +#define AK4113_DTSCD (1<<1) +/* Non-PCM bit stream detection, 0 = no detect, 1 = detect */ +#define AK4113_NPCM (1<<0) +#define AK4113_FS_8000HZ (AK4113_FS3|AK4113_FS0) +#define AK4113_FS_11025HZ (AK4113_FS2|AK4113_FS0) +#define AK4113_FS_16000HZ (AK4113_FS2|AK4113_FS1|AK4113_FS0) +#define AK4113_FS_22050HZ (AK4113_FS2) +#define AK4113_FS_24000HZ (AK4113_FS2|AK4113_FS1) +#define AK4113_FS_32000HZ (AK4113_FS1|AK4113_FS0) +#define AK4113_FS_44100HZ (0) +#define AK4113_FS_48000HZ (AK4113_FS1) +#define AK4113_FS_64000HZ (AK4113_FS3|AK4113_FS1|AK4113_FS0) +#define AK4113_FS_88200HZ (AK4113_FS3) +#define AK4113_FS_96000HZ (AK4113_FS3|AK4113_FS1) +#define AK4113_FS_176400HZ (AK4113_FS3|AK4113_FS2) +#define AK4113_FS_192000HZ (AK4113_FS3|AK4113_FS2|AK4113_FS1) + +/* AK4113_REG_RCS2 */ +/* CRC for Q-subcode, 0 = no error, 1 = error */ +#define AK4113_QCRC (1<<1) +/* CRC for channel status, 0 = no error, 1 = error */ +#define AK4113_CCRC (1<<0) + +/* flags for snd_ak4113_check_rate_and_errors() */ +#define AK4113_CHECK_NO_STAT (1<<0) /* no statistics */ +#define AK4113_CHECK_NO_RATE (1<<1) /* no rate check */ + +#define AK4113_CONTROLS 13 + +typedef void (ak4113_write_t)(void *private_data, unsigned char addr, + unsigned char data); +typedef unsigned char (ak4113_read_t)(void *private_data, unsigned char addr); + +struct ak4113 { + struct snd_card *card; + ak4113_write_t *write; + ak4113_read_t *read; + void *private_data; + unsigned int init:1; + spinlock_t lock; + unsigned char regmap[AK4113_WRITABLE_REGS]; + struct snd_kcontrol *kctls[AK4113_CONTROLS]; + struct snd_pcm_substream *substream; + unsigned long parity_errors; + unsigned long v_bit_errors; + unsigned long qcrc_errors; + unsigned long ccrc_errors; + unsigned char rcs0; + unsigned char rcs1; + unsigned char rcs2; + struct delayed_work work; + unsigned int check_flags; + void *change_callback_private; + void (*change_callback)(struct ak4113 *ak4113, unsigned char c0, + unsigned char c1); +}; + +int snd_ak4113_create(struct snd_card *card, ak4113_read_t *read, + ak4113_write_t *write, + const unsigned char pgm[AK4113_WRITABLE_REGS], + void *private_data, struct ak4113 **r_ak4113); +void snd_ak4113_reg_write(struct ak4113 *ak4113, unsigned char reg, + unsigned char mask, unsigned char val); +void snd_ak4113_reinit(struct ak4113 *ak4113); +int snd_ak4113_build(struct ak4113 *ak4113, + struct snd_pcm_substream *capture_substream); +int snd_ak4113_external_rate(struct ak4113 *ak4113); +int snd_ak4113_check_rate_and_errors(struct ak4113 *ak4113, unsigned int flags); + +#endif /* __SOUND_AK4113_H */ + diff --git a/sound/i2c/other/Makefile b/sound/i2c/other/Makefile index 703d954238f4..2dad40f3f622 100644 --- a/sound/i2c/other/Makefile +++ b/sound/i2c/other/Makefile @@ -5,6 +5,7 @@ snd-ak4114-objs := ak4114.o snd-ak4117-objs := ak4117.o +snd-ak4113-objs := ak4113.o snd-ak4xxx-adda-objs := ak4xxx-adda.o snd-pt2258-objs := pt2258.o snd-tea575x-tuner-objs := tea575x-tuner.o @@ -12,5 +13,5 @@ snd-tea575x-tuner-objs := tea575x-tuner.o # Module Dependency obj-$(CONFIG_SND_PDAUDIOCF) += snd-ak4117.o obj-$(CONFIG_SND_ICE1712) += snd-ak4xxx-adda.o -obj-$(CONFIG_SND_ICE1724) += snd-ak4114.o snd-ak4xxx-adda.o snd-pt2258.o +obj-$(CONFIG_SND_ICE1724) += snd-ak4114.o snd-ak4113.o snd-ak4xxx-adda.o snd-pt2258.o obj-$(CONFIG_SND_FM801_TEA575X) += snd-tea575x-tuner.o diff --git a/sound/i2c/other/ak4113.c b/sound/i2c/other/ak4113.c new file mode 100644 index 000000000000..fff62cc8607c --- /dev/null +++ b/sound/i2c/other/ak4113.c @@ -0,0 +1,639 @@ +/* + * Routines for control of the AK4113 via I2C/4-wire serial interface + * IEC958 (S/PDIF) receiver by Asahi Kasei + * Copyright (c) by Jaroslav Kysela + * Copyright (c) by Pavel Hofman + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Pavel Hofman "); +MODULE_DESCRIPTION("AK4113 IEC958 (S/PDIF) receiver by Asahi Kasei"); +MODULE_LICENSE("GPL"); + +#define AK4113_ADDR 0x00 /* fixed address */ + +static void ak4113_stats(struct work_struct *work); +static void ak4113_init_regs(struct ak4113 *chip); + + +static void reg_write(struct ak4113 *ak4113, unsigned char reg, + unsigned char val) +{ + ak4113->write(ak4113->private_data, reg, val); + if (reg < sizeof(ak4113->regmap)) + ak4113->regmap[reg] = val; +} + +static inline unsigned char reg_read(struct ak4113 *ak4113, unsigned char reg) +{ + return ak4113->read(ak4113->private_data, reg); +} + +static void snd_ak4113_free(struct ak4113 *chip) +{ + chip->init = 1; /* don't schedule new work */ + mb(); + cancel_delayed_work(&chip->work); + flush_scheduled_work(); + kfree(chip); +} + +static int snd_ak4113_dev_free(struct snd_device *device) +{ + struct ak4113 *chip = device->device_data; + snd_ak4113_free(chip); + return 0; +} + +int snd_ak4113_create(struct snd_card *card, ak4113_read_t *read, + ak4113_write_t *write, const unsigned char pgm[5], + void *private_data, struct ak4113 **r_ak4113) +{ + struct ak4113 *chip; + int err = 0; + unsigned char reg; + static struct snd_device_ops ops = { + .dev_free = snd_ak4113_dev_free, + }; + + chip = kzalloc(sizeof(*chip), GFP_KERNEL); + if (chip == NULL) + return -ENOMEM; + spin_lock_init(&chip->lock); + chip->card = card; + chip->read = read; + chip->write = write; + chip->private_data = private_data; + INIT_DELAYED_WORK(&chip->work, ak4113_stats); + + for (reg = 0; reg < AK4113_WRITABLE_REGS ; reg++) + chip->regmap[reg] = pgm[reg]; + ak4113_init_regs(chip); + + chip->rcs0 = reg_read(chip, AK4113_REG_RCS0) & ~(AK4113_QINT | + AK4113_CINT | AK4113_STC); + chip->rcs1 = reg_read(chip, AK4113_REG_RCS1); + chip->rcs2 = reg_read(chip, AK4113_REG_RCS2); + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); + if (err < 0) + goto __fail; + + if (r_ak4113) + *r_ak4113 = chip; + return 0; + +__fail: + snd_ak4113_free(chip); + return err < 0 ? err : -EIO; +} +EXPORT_SYMBOL_GPL(snd_ak4113_create); + +void snd_ak4113_reg_write(struct ak4113 *chip, unsigned char reg, + unsigned char mask, unsigned char val) +{ + if (reg >= AK4113_WRITABLE_REGS) + return; + reg_write(chip, reg, (chip->regmap[reg] & ~mask) | val); +} +EXPORT_SYMBOL_GPL(snd_ak4113_reg_write); + +static void ak4113_init_regs(struct ak4113 *chip) +{ + unsigned char old = chip->regmap[AK4113_REG_PWRDN], reg; + + /* bring the chip to reset state and powerdown state */ + reg_write(chip, AK4113_REG_PWRDN, old & ~(AK4113_RST|AK4113_PWN)); + udelay(200); + /* release reset, but leave powerdown */ + reg_write(chip, AK4113_REG_PWRDN, (old | AK4113_RST) & ~AK4113_PWN); + udelay(200); + for (reg = 1; reg < AK4113_WRITABLE_REGS; reg++) + reg_write(chip, reg, chip->regmap[reg]); + /* release powerdown, everything is initialized now */ + reg_write(chip, AK4113_REG_PWRDN, old | AK4113_RST | AK4113_PWN); +} + +void snd_ak4113_reinit(struct ak4113 *chip) +{ + chip->init = 1; + mb(); + flush_scheduled_work(); + ak4113_init_regs(chip); + /* bring up statistics / event queing */ + chip->init = 0; + if (chip->kctls[0]) + schedule_delayed_work(&chip->work, HZ / 10); +} +EXPORT_SYMBOL_GPL(snd_ak4113_reinit); + +static unsigned int external_rate(unsigned char rcs1) +{ + switch (rcs1 & (AK4113_FS0|AK4113_FS1|AK4113_FS2|AK4113_FS3)) { + case AK4113_FS_8000HZ: + return 8000; + case AK4113_FS_11025HZ: + return 11025; + case AK4113_FS_16000HZ: + return 16000; + case AK4113_FS_22050HZ: + return 22050; + case AK4113_FS_24000HZ: + return 24000; + case AK4113_FS_32000HZ: + return 32000; + case AK4113_FS_44100HZ: + return 44100; + case AK4113_FS_48000HZ: + return 48000; + case AK4113_FS_64000HZ: + return 64000; + case AK4113_FS_88200HZ: + return 88200; + case AK4113_FS_96000HZ: + return 96000; + case AK4113_FS_176400HZ: + return 176400; + case AK4113_FS_192000HZ: + return 192000; + default: + return 0; + } +} + +static int snd_ak4113_in_error_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = LONG_MAX; + return 0; +} + +static int snd_ak4113_in_error_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ak4113 *chip = snd_kcontrol_chip(kcontrol); + long *ptr; + + spin_lock_irq(&chip->lock); + ptr = (long *)(((char *)chip) + kcontrol->private_value); + ucontrol->value.integer.value[0] = *ptr; + *ptr = 0; + spin_unlock_irq(&chip->lock); + return 0; +} + +#define snd_ak4113_in_bit_info snd_ctl_boolean_mono_info + +static int snd_ak4113_in_bit_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ak4113 *chip = snd_kcontrol_chip(kcontrol); + unsigned char reg = kcontrol->private_value & 0xff; + unsigned char bit = (kcontrol->private_value >> 8) & 0xff; + unsigned char inv = (kcontrol->private_value >> 31) & 1; + + ucontrol->value.integer.value[0] = + ((reg_read(chip, reg) & (1 << bit)) ? 1 : 0) ^ inv; + return 0; +} + +static int snd_ak4113_rx_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 5; + return 0; +} + +static int snd_ak4113_rx_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ak4113 *chip = snd_kcontrol_chip(kcontrol); + + ucontrol->value.integer.value[0] = + (AK4113_IPS(chip->regmap[AK4113_REG_IO1])); + return 0; +} + +static int snd_ak4113_rx_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ak4113 *chip = snd_kcontrol_chip(kcontrol); + int change; + u8 old_val; + + spin_lock_irq(&chip->lock); + old_val = chip->regmap[AK4113_REG_IO1]; + change = ucontrol->value.integer.value[0] != AK4113_IPS(old_val); + if (change) + reg_write(chip, AK4113_REG_IO1, + (old_val & (~AK4113_IPS(0xff))) | + (AK4113_IPS(ucontrol->value.integer.value[0]))); + spin_unlock_irq(&chip->lock); + return change; +} + +static int snd_ak4113_rate_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 192000; + return 0; +} + +static int snd_ak4113_rate_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ak4113 *chip = snd_kcontrol_chip(kcontrol); + + ucontrol->value.integer.value[0] = external_rate(reg_read(chip, + AK4113_REG_RCS1)); + return 0; +} + +static int snd_ak4113_spdif_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_IEC958; + uinfo->count = 1; + return 0; +} + +static int snd_ak4113_spdif_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ak4113 *chip = snd_kcontrol_chip(kcontrol); + unsigned i; + + for (i = 0; i < AK4113_REG_RXCSB_SIZE; i++) + ucontrol->value.iec958.status[i] = reg_read(chip, + AK4113_REG_RXCSB0 + i); + return 0; +} + +static int snd_ak4113_spdif_mask_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_IEC958; + uinfo->count = 1; + return 0; +} + +static int snd_ak4113_spdif_mask_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + memset(ucontrol->value.iec958.status, 0xff, AK4113_REG_RXCSB_SIZE); + return 0; +} + +static int snd_ak4113_spdif_pinfo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 0xffff; + uinfo->count = 4; + return 0; +} + +static int snd_ak4113_spdif_pget(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ak4113 *chip = snd_kcontrol_chip(kcontrol); + unsigned short tmp; + + ucontrol->value.integer.value[0] = 0xf8f2; + ucontrol->value.integer.value[1] = 0x4e1f; + tmp = reg_read(chip, AK4113_REG_Pc0) | + (reg_read(chip, AK4113_REG_Pc1) << 8); + ucontrol->value.integer.value[2] = tmp; + tmp = reg_read(chip, AK4113_REG_Pd0) | + (reg_read(chip, AK4113_REG_Pd1) << 8); + ucontrol->value.integer.value[3] = tmp; + return 0; +} + +static int snd_ak4113_spdif_qinfo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_BYTES; + uinfo->count = AK4113_REG_QSUB_SIZE; + return 0; +} + +static int snd_ak4113_spdif_qget(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ak4113 *chip = snd_kcontrol_chip(kcontrol); + unsigned i; + + for (i = 0; i < AK4113_REG_QSUB_SIZE; i++) + ucontrol->value.bytes.data[i] = reg_read(chip, + AK4113_REG_QSUB_ADDR + i); + return 0; +} + +/* Don't forget to change AK4113_CONTROLS define!!! */ +static struct snd_kcontrol_new snd_ak4113_iec958_controls[] = { +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "IEC958 Parity Errors", + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = snd_ak4113_in_error_info, + .get = snd_ak4113_in_error_get, + .private_value = offsetof(struct ak4113, parity_errors), +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "IEC958 V-Bit Errors", + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = snd_ak4113_in_error_info, + .get = snd_ak4113_in_error_get, + .private_value = offsetof(struct ak4113, v_bit_errors), +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "IEC958 C-CRC Errors", + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = snd_ak4113_in_error_info, + .get = snd_ak4113_in_error_get, + .private_value = offsetof(struct ak4113, ccrc_errors), +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "IEC958 Q-CRC Errors", + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = snd_ak4113_in_error_info, + .get = snd_ak4113_in_error_get, + .private_value = offsetof(struct ak4113, qcrc_errors), +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "IEC958 External Rate", + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = snd_ak4113_rate_info, + .get = snd_ak4113_rate_get, +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = SNDRV_CTL_NAME_IEC958("", CAPTURE, MASK), + .access = SNDRV_CTL_ELEM_ACCESS_READ, + .info = snd_ak4113_spdif_mask_info, + .get = snd_ak4113_spdif_mask_get, +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = SNDRV_CTL_NAME_IEC958("", CAPTURE, DEFAULT), + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = snd_ak4113_spdif_info, + .get = snd_ak4113_spdif_get, +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "IEC958 Preample Capture Default", + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = snd_ak4113_spdif_pinfo, + .get = snd_ak4113_spdif_pget, +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "IEC958 Q-subcode Capture Default", + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = snd_ak4113_spdif_qinfo, + .get = snd_ak4113_spdif_qget, +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "IEC958 Audio", + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = snd_ak4113_in_bit_info, + .get = snd_ak4113_in_bit_get, + .private_value = (1<<31) | (1<<8) | AK4113_REG_RCS0, +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "IEC958 Non-PCM Bitstream", + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = snd_ak4113_in_bit_info, + .get = snd_ak4113_in_bit_get, + .private_value = (0<<8) | AK4113_REG_RCS1, +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "IEC958 DTS Bitstream", + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = snd_ak4113_in_bit_info, + .get = snd_ak4113_in_bit_get, + .private_value = (1<<8) | AK4113_REG_RCS1, +}, +{ + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "AK4113 Input Select", + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_WRITE, + .info = snd_ak4113_rx_info, + .get = snd_ak4113_rx_get, + .put = snd_ak4113_rx_put, +} +}; + +static void snd_ak4113_proc_regs_read(struct snd_info_entry *entry, + struct snd_info_buffer *buffer) +{ + struct ak4113 *ak4113 = entry->private_data; + int reg, val; + /* all ak4113 registers 0x00 - 0x1c */ + for (reg = 0; reg < 0x1d; reg++) { + val = reg_read(ak4113, reg); + snd_iprintf(buffer, "0x%02x = 0x%02x\n", reg, val); + } +} + +static void snd_ak4113_proc_init(struct ak4113 *ak4113) +{ + struct snd_info_entry *entry; + if (!snd_card_proc_new(ak4113->card, "ak4113", &entry)) + snd_info_set_text_ops(entry, ak4113, snd_ak4113_proc_regs_read); +} + +int snd_ak4113_build(struct ak4113 *ak4113, + struct snd_pcm_substream *cap_substream) +{ + struct snd_kcontrol *kctl; + unsigned int idx; + int err; + + if (snd_BUG_ON(!cap_substream)) + return -EINVAL; + ak4113->substream = cap_substream; + for (idx = 0; idx < AK4113_CONTROLS; idx++) { + kctl = snd_ctl_new1(&snd_ak4113_iec958_controls[idx], ak4113); + if (kctl == NULL) + return -ENOMEM; + kctl->id.device = cap_substream->pcm->device; + kctl->id.subdevice = cap_substream->number; + err = snd_ctl_add(ak4113->card, kctl); + if (err < 0) + return err; + ak4113->kctls[idx] = kctl; + } + snd_ak4113_proc_init(ak4113); + /* trigger workq */ + schedule_delayed_work(&ak4113->work, HZ / 10); + return 0; +} +EXPORT_SYMBOL_GPL(snd_ak4113_build); + +int snd_ak4113_external_rate(struct ak4113 *ak4113) +{ + unsigned char rcs1; + + rcs1 = reg_read(ak4113, AK4113_REG_RCS1); + return external_rate(rcs1); +} +EXPORT_SYMBOL_GPL(snd_ak4113_external_rate); + +int snd_ak4113_check_rate_and_errors(struct ak4113 *ak4113, unsigned int flags) +{ + struct snd_pcm_runtime *runtime = + ak4113->substream ? ak4113->substream->runtime : NULL; + unsigned long _flags; + int res = 0; + unsigned char rcs0, rcs1, rcs2; + unsigned char c0, c1; + + rcs1 = reg_read(ak4113, AK4113_REG_RCS1); + if (flags & AK4113_CHECK_NO_STAT) + goto __rate; + rcs0 = reg_read(ak4113, AK4113_REG_RCS0); + rcs2 = reg_read(ak4113, AK4113_REG_RCS2); + spin_lock_irqsave(&ak4113->lock, _flags); + if (rcs0 & AK4113_PAR) + ak4113->parity_errors++; + if (rcs0 & AK4113_V) + ak4113->v_bit_errors++; + if (rcs2 & AK4113_CCRC) + ak4113->ccrc_errors++; + if (rcs2 & AK4113_QCRC) + ak4113->qcrc_errors++; + c0 = (ak4113->rcs0 & (AK4113_QINT | AK4113_CINT | AK4113_STC | + AK4113_AUDION | AK4113_AUTO | AK4113_UNLCK)) ^ + (rcs0 & (AK4113_QINT | AK4113_CINT | AK4113_STC | + AK4113_AUDION | AK4113_AUTO | AK4113_UNLCK)); + c1 = (ak4113->rcs1 & (AK4113_DTSCD | AK4113_NPCM | AK4113_PEM | + AK4113_DAT | 0xf0)) ^ + (rcs1 & (AK4113_DTSCD | AK4113_NPCM | AK4113_PEM | + AK4113_DAT | 0xf0)); + ak4113->rcs0 = rcs0 & ~(AK4113_QINT | AK4113_CINT | AK4113_STC); + ak4113->rcs1 = rcs1; + ak4113->rcs2 = rcs2; + spin_unlock_irqrestore(&ak4113->lock, _flags); + + if (rcs0 & AK4113_PAR) + snd_ctl_notify(ak4113->card, SNDRV_CTL_EVENT_MASK_VALUE, + &ak4113->kctls[0]->id); + if (rcs0 & AK4113_V) + snd_ctl_notify(ak4113->card, SNDRV_CTL_EVENT_MASK_VALUE, + &ak4113->kctls[1]->id); + if (rcs2 & AK4113_CCRC) + snd_ctl_notify(ak4113->card, SNDRV_CTL_EVENT_MASK_VALUE, + &ak4113->kctls[2]->id); + if (rcs2 & AK4113_QCRC) + snd_ctl_notify(ak4113->card, SNDRV_CTL_EVENT_MASK_VALUE, + &ak4113->kctls[3]->id); + + /* rate change */ + if (c1 & 0xf0) + snd_ctl_notify(ak4113->card, SNDRV_CTL_EVENT_MASK_VALUE, + &ak4113->kctls[4]->id); + + if ((c1 & AK4113_PEM) | (c0 & AK4113_CINT)) + snd_ctl_notify(ak4113->card, SNDRV_CTL_EVENT_MASK_VALUE, + &ak4113->kctls[6]->id); + if (c0 & AK4113_QINT) + snd_ctl_notify(ak4113->card, SNDRV_CTL_EVENT_MASK_VALUE, + &ak4113->kctls[8]->id); + + if (c0 & AK4113_AUDION) + snd_ctl_notify(ak4113->card, SNDRV_CTL_EVENT_MASK_VALUE, + &ak4113->kctls[9]->id); + if (c1 & AK4113_NPCM) + snd_ctl_notify(ak4113->card, SNDRV_CTL_EVENT_MASK_VALUE, + &ak4113->kctls[10]->id); + if (c1 & AK4113_DTSCD) + snd_ctl_notify(ak4113->card, SNDRV_CTL_EVENT_MASK_VALUE, + &ak4113->kctls[11]->id); + + if (ak4113->change_callback && (c0 | c1) != 0) + ak4113->change_callback(ak4113, c0, c1); + +__rate: + /* compare rate */ + res = external_rate(rcs1); + if (!(flags & AK4113_CHECK_NO_RATE) && runtime && + (runtime->rate != res)) { + snd_pcm_stream_lock_irqsave(ak4113->substream, _flags); + if (snd_pcm_running(ak4113->substream)) { + /*printk(KERN_DEBUG "rate changed (%i <- %i)\n", + * runtime->rate, res); */ + snd_pcm_stop(ak4113->substream, + SNDRV_PCM_STATE_DRAINING); + wake_up(&runtime->sleep); + res = 1; + } + snd_pcm_stream_unlock_irqrestore(ak4113->substream, _flags); + } + return res; +} +EXPORT_SYMBOL_GPL(snd_ak4113_check_rate_and_errors); + +static void ak4113_stats(struct work_struct *work) +{ + struct ak4113 *chip = container_of(work, struct ak4113, work.work); + + if (!chip->init) + snd_ak4113_check_rate_and_errors(chip, chip->check_flags); + + schedule_delayed_work(&chip->work, HZ / 10); +} -- cgit v1.3-8-gc7d7 From 979f693def9084a452846365dfde5dcb28366333 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 14:44:11 +0200 Subject: ratelimit: Use per ratelimit context locking I'd like to use printk_ratelimit() in atomic context, but that's not possible right now due to the spinlock usage this commit introduced more than a year ago: 717115e: printk ratelimiting rewrite As a first step push the lock into the ratelimit state structure. This allows us to deal with locking failures to be considered as an event related to that state being too busy. Also clean up the code a bit (without changing functionality): - tidy up the definitions - clean up the code flow This also shrinks the code a tiny bit: text data bss dec hex filename 264 0 4 268 10c ratelimit.o.before 255 0 0 255 ff ratelimit.o.after ( Whole-kernel data size got a bit larger, because we have two ratelimit-state data structures right now. ) Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: David S. Miller LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/ratelimit.h | 30 ++++++++++++++++++++---------- lib/ratelimit.c | 29 +++++++++++++---------------- 2 files changed, 33 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 00044b856453..187bc16c1f15 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -1,20 +1,30 @@ #ifndef _LINUX_RATELIMIT_H #define _LINUX_RATELIMIT_H + #include +#include -#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) -#define DEFAULT_RATELIMIT_BURST 10 +#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) +#define DEFAULT_RATELIMIT_BURST 10 struct ratelimit_state { - int interval; - int burst; - int printed; - int missed; - unsigned long begin; + spinlock_t lock; /* protect the state */ + + int interval; + int burst; + int printed; + int missed; + unsigned long begin; }; -#define DEFINE_RATELIMIT_STATE(name, interval, burst) \ - struct ratelimit_state name = {interval, burst,} +#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \ + \ + struct ratelimit_state name = { \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + } extern int __ratelimit(struct ratelimit_state *rs); -#endif + +#endif /* _LINUX_RATELIMIT_H */ diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 26187edcc7ea..0e2c28e8a0ca 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -7,15 +7,12 @@ * parameter. Now every user can use their own standalone ratelimit_state. * * This file is released under the GPLv2. - * */ #include #include #include -static DEFINE_SPINLOCK(ratelimit_lock); - /* * __ratelimit - rate limiting * @rs: ratelimit_state data @@ -26,11 +23,12 @@ static DEFINE_SPINLOCK(ratelimit_lock); int __ratelimit(struct ratelimit_state *rs) { unsigned long flags; + int ret; if (!rs->interval) return 1; - spin_lock_irqsave(&ratelimit_lock, flags); + spin_lock_irqsave(&rs->lock, flags); if (!rs->begin) rs->begin = jiffies; @@ -38,20 +36,19 @@ int __ratelimit(struct ratelimit_state *rs) if (rs->missed) printk(KERN_WARNING "%s: %d callbacks suppressed\n", __func__, rs->missed); - rs->begin = 0; + rs->begin = 0; rs->printed = 0; - rs->missed = 0; + rs->missed = 0; } - if (rs->burst && rs->burst > rs->printed) - goto print; - - rs->missed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 0; + if (rs->burst && rs->burst > rs->printed) { + rs->printed++; + ret = 1; + } else { + rs->missed++; + ret = 0; + } + spin_unlock_irqrestore(&rs->lock, flags); -print: - rs->printed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 1; + return ret; } EXPORT_SYMBOL(__ratelimit); -- cgit v1.3-8-gc7d7 From 3fff4c42bd0a89869a0eb1e7874cc06ffa4aa0f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 16:18:09 +0200 Subject: printk: Remove ratelimit.h from kernel.h Decouple kernel.h from ratelimit.h: the global declaration of printk's ratelimit_state is not needed, and it leads to messy circular dependencies due to ratelimit.h's (new) adding of a spinlock_types.h include. Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: David S. Miller LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 2 -- include/linux/net.h | 1 + kernel/printk.c | 1 + kernel/sysctl.c | 3 +++ lib/ratelimit.c | 2 +- net/core/sysctl_net_core.c | 2 ++ net/core/utils.c | 2 ++ 7 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b5b1e0899a8..3305f33201be 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -241,7 +240,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); diff --git a/include/linux/net.h b/include/linux/net.h index 9040a10584f7..df20f680f455 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -358,6 +358,7 @@ static const struct proto_ops name##_ops = { \ #ifdef CONFIG_SYSCTL #include +#include extern struct ratelimit_state net_ratelimit_state; #endif diff --git a/kernel/printk.c b/kernel/printk.c index 602033acd6c7..b997c893cdcf 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -33,6 +33,7 @@ #include #include #include +#include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1a631ba684a4..6c37048b9db9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -155,6 +156,8 @@ extern int no_unaligned_warning; extern int unaligned_dump_stack; #endif +extern struct ratelimit_state printk_ratelimit_state; + #ifdef CONFIG_RT_MUTEXES extern int max_lock_depth; #endif diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 69bfcacda16d..5551731ae1d4 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -9,7 +9,7 @@ * This file is released under the GPLv2. */ -#include +#include #include #include diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7db1de0497c6..887c03c4e3c6 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -10,7 +10,9 @@ #include #include #include +#include #include + #include #include diff --git a/net/core/utils.c b/net/core/utils.c index 83221aee7084..838250241d26 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -24,6 +24,8 @@ #include #include #include +#include + #include #include -- cgit v1.3-8-gc7d7 From 96a2c464de07d7c72988db851c029b204fc59108 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 1 Aug 2009 01:34:24 +0200 Subject: tracing/bkl: Add bkl ftrace events Add two events lock_kernel and unlock_kernel() to trace the bkl uses. This opens the door for userspace tools to perform statistics about the callsites that use it, dependencies with other locks (by pairing the trace with lock events), use with recursivity and so on... The {__reacquire,release}_kernel_lock() events are not traced because these are called from schedule, thus the sched events are sufficient to trace them. Example of a trace: hald-addon-stor-4152 [000] 165.875501: unlock_kernel: depth: 0, fs/block_dev.c:1358 __blkdev_put() hald-addon-stor-4152 [000] 167.832974: lock_kernel: depth: 0, fs/block_dev.c:1167 __blkdev_get() How to get the callsites that acquire it recursively: cd /debug/tracing/events/bkl echo "lock_depth > 0" > filter firefox-4951 [001] 206.276967: unlock_kernel: depth: 1, fs/reiserfs/super.c:575 reiserfs_dirty_inode() You can also filter by file and/or line. v2: Use of FILTER_PTR_STRING attribute for files and lines fields to make them traceable. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Li Zefan --- include/linux/smp_lock.h | 19 ++++++++++++--- include/trace/events/bkl.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++ lib/kernel_lock.c | 11 +++++---- 3 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 include/trace/events/bkl.h (limited to 'include') diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index 813be59bf345..d48cc77ba70d 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -3,6 +3,7 @@ #ifdef CONFIG_LOCK_KERNEL #include +#include #define kernel_locked() (current->lock_depth >= 0) @@ -24,8 +25,18 @@ static inline int reacquire_kernel_lock(struct task_struct *task) return 0; } -extern void __lockfunc lock_kernel(void) __acquires(kernel_lock); -extern void __lockfunc unlock_kernel(void) __releases(kernel_lock); +extern void __lockfunc _lock_kernel(void) __acquires(kernel_lock); +extern void __lockfunc _unlock_kernel(void) __releases(kernel_lock); + +#define lock_kernel() { \ + trace_lock_kernel(__func__, __FILE__, __LINE__); \ + _lock_kernel(); \ +} + +#define unlock_kernel() { \ + trace_unlock_kernel(__func__, __FILE__, __LINE__); \ + _unlock_kernel(); \ +} /* * Various legacy drivers don't really need the BKL in a specific @@ -41,8 +52,8 @@ static inline void cycle_kernel_lock(void) #else -#define lock_kernel() do { } while(0) -#define unlock_kernel() do { } while(0) +#define lock_kernel() trace_lock_kernel(__func__, __FILE__, __LINE__); +#define unlock_kernel() trace_unlock_kernel(__func__, __FILE__, __LINE__); #define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) #define reacquire_kernel_lock(task) 0 diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h new file mode 100644 index 000000000000..8abd620a490e --- /dev/null +++ b/include/trace/events/bkl.h @@ -0,0 +1,61 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bkl + +#if !defined(_TRACE_BKL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BKL_H + +#include + +TRACE_EVENT(lock_kernel, + + TP_PROTO(const char *func, const char *file, int line), + + TP_ARGS(func, file, line), + + TP_STRUCT__entry( + __field( int, lock_depth ) + __field_ext( const char *, func, FILTER_PTR_STRING ) + __field_ext( const char *, file, FILTER_PTR_STRING ) + __field( int, line ) + ), + + TP_fast_assign( + /* We want to record the lock_depth after lock is acquired */ + __entry->lock_depth = current->lock_depth + 1; + __entry->func = func; + __entry->file = file; + __entry->line = line; + ), + + TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + __entry->file, __entry->line, __entry->func) +); + +TRACE_EVENT(unlock_kernel, + + TP_PROTO(const char *func, const char *file, int line), + + TP_ARGS(func, file, line), + + TP_STRUCT__entry( + __field(int, lock_depth) + __field(const char *, func) + __field(const char *, file) + __field(int, line) + ), + + TP_fast_assign( + __entry->lock_depth = current->lock_depth; + __entry->func = func; + __entry->file = file; + __entry->line = line; + ), + + TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + __entry->file, __entry->line, __entry->func) +); + +#endif /* _TRACE_BKL_H */ + +/* This part must be outside protection */ +#include diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 39f1029e3525..5c10b2e1fd08 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -5,10 +5,11 @@ * relegated to obsolescence, but used by various less * important (or lazy) subsystems. */ -#include #include #include #include +#define CREATE_TRACE_POINTS +#include /* * The 'big kernel lock' @@ -113,7 +114,7 @@ static inline void __unlock_kernel(void) * This cannot happen asynchronously, so we only need to * worry about other CPU's. */ -void __lockfunc lock_kernel(void) +void __lockfunc _lock_kernel(void) { int depth = current->lock_depth+1; if (likely(!depth)) @@ -121,13 +122,13 @@ void __lockfunc lock_kernel(void) current->lock_depth = depth; } -void __lockfunc unlock_kernel(void) +void __lockfunc _unlock_kernel(void) { BUG_ON(current->lock_depth < 0); if (likely(--current->lock_depth < 0)) __unlock_kernel(); } -EXPORT_SYMBOL(lock_kernel); -EXPORT_SYMBOL(unlock_kernel); +EXPORT_SYMBOL(_lock_kernel); +EXPORT_SYMBOL(_unlock_kernel); -- cgit v1.3-8-gc7d7 From 9f0cf4adb6aa0bfccf675c938124e68f7f06349d Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 26 Sep 2009 14:33:01 +0200 Subject: x86: Use __builtin_object_size() to validate the buffer size for copy_from_user() gcc (4.x) supports the __builtin_object_size() builtin, which reports the size of an object that a pointer point to, when known at compile time. If the buffer size is not known at compile time, a constant -1 is returned. This patch uses this feature to add a sanity check to copy_from_user(); if the target buffer is known to be smaller than the copy size, the copy is aborted and a WARNing is emitted in memory debug mode. These extra checks compile away when the object size is not known, or if both the buffer size and the copy length are constants. Signed-off-by: Arjan van de Ven LKML-Reference: <20090926143301.2c396b94@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_32.h | 19 ++++++++++++++++++- arch/x86/include/asm/uaccess_64.h | 19 ++++++++++++++++++- arch/x86/kernel/x8664_ksyms_64.c | 2 +- arch/x86/lib/copy_user_64.S | 4 ++-- arch/x86/lib/usercopy_32.c | 4 ++-- include/linux/compiler-gcc4.h | 2 ++ include/linux/compiler.h | 4 ++++ 7 files changed, 47 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 632fb44b4cb5..582d6aef7417 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -187,9 +187,26 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n); -unsigned long __must_check copy_from_user(void *to, +unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long n); + +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); +#ifdef CONFIG_DEBUG_VM + else + WARN(1, "Buffer overflow detected!\n"); +#endif + return ret; +} + long __must_check strncpy_from_user(char *dst, const char __user *src, long count); long __must_check __strncpy_from_user(char *dst, diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index db24b215fc50..ce6fec7ce38d 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -21,10 +21,27 @@ copy_user_generic(void *to, const void *from, unsigned len); __must_check unsigned long copy_to_user(void __user *to, const void *from, unsigned len); __must_check unsigned long -copy_from_user(void *to, const void __user *from, unsigned len); +_copy_from_user(void *to, const void __user *from, unsigned len); __must_check unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len); +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); +#ifdef CONFIG_DEBUG_VM + else + WARN(1, "Buffer overflow detected!\n"); +#endif + return ret; +} + + static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 3909e3ba5ce3..a0cdd8cc1d67 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -30,7 +30,7 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(__copy_user_nocache); -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(__copy_from_user_inatomic); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 6ba0f7bb85ea..4be3c415b3e9 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -78,7 +78,7 @@ ENTRY(copy_to_user) ENDPROC(copy_to_user) /* Standard copy_from_user with segment limit checking */ -ENTRY(copy_from_user) +ENTRY(_copy_from_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rsi,%rcx @@ -88,7 +88,7 @@ ENTRY(copy_from_user) jae bad_from_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_from_user) +ENDPROC(_copy_from_user) ENTRY(copy_user_generic) CFI_STARTPROC diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1f118d462acc..8498684e45b0 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -874,7 +874,7 @@ EXPORT_SYMBOL(copy_to_user); * data to the requested size using zero bytes. */ unsigned long -copy_from_user(void *to, const void __user *from, unsigned long n) +_copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); @@ -882,4 +882,4 @@ copy_from_user(void *to, const void __user *from, unsigned long n) memset(to, 0, n); return n; } -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(_copy_from_user); diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 450fa597c94d..a3aef5d55dba 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -37,3 +37,5 @@ #define __cold __attribute__((__cold__)) #endif + +#define __compiletime_object_size(obj) __builtin_object_size(obj, 0) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb5135b4e1..8e54108688f9 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -266,6 +266,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) #endif +/* Compile time object size, -1 for unknown */ +#ifndef __compiletime_object_size +# define __compiletime_object_size(obj) -1 +#endif /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.3-8-gc7d7 From be2500b8353d41463399e997fe8562f772dcaaba Mon Sep 17 00:00:00 2001 From: "Lopez Cruz, Misael" Date: Fri, 25 Sep 2009 21:02:49 -0500 Subject: ASoC: Add PDM DAI format definition Add DAI format definition for PDM interfaces. Signed-off-by: Misael Lopez Cruz Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index e0c7fa7b1060..ca24e7f7a3f5 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -30,6 +30,7 @@ struct snd_pcm_substream; #define SND_SOC_DAIFMT_DSP_A 3 /* L data MSB after FRM LRC */ #define SND_SOC_DAIFMT_DSP_B 4 /* L data MSB during FRM LRC */ #define SND_SOC_DAIFMT_AC97 5 /* AC97 */ +#define SND_SOC_DAIFMT_PDM 6 /* Pulse density modulation */ /* left and right justified also known as MSB and LSB respectively */ #define SND_SOC_DAIFMT_MSB SND_SOC_DAIFMT_LEFT_J -- cgit v1.3-8-gc7d7 From 925936ebf35a95c290e010b784c962164e6728f3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 28 Sep 2009 17:12:49 +0200 Subject: tracing: Pushdown the bkl tracepoints calls Currently we are calling the bkl tracepoint callbacks just before the bkl lock/unlock operations, ie the tracepoint call is not inside a lock_kernel() function but inside a lock_kernel() macro. Hence the bkl trace event header must be included from smp_lock.h. This raises some nasty circular header dependencies: linux/smp_lock.h -> trace/events/bkl.h -> trace/define_trace.h -> trace/ftrace.h -> linux/ftrace_event.h -> linux/hardirq.h -> linux/smp_lock.h This results in incomplete event declarations, spurious event definitions and other kind of funny behaviours. This is hardly fixable without ugly workarounds. So instead, we push the file name, line number and function name as lock_kernel() parameters, so that we only deal with the trace event header from lib/kernel_lock.c This adds two parameters to lock_kernel() and unlock_kernel() but it should be fine wrt to performances because this pair dos not seem to be called in fast paths. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Ingo Molnar Cc: Li Zefan --- include/linux/smp_lock.h | 28 +++++++++++++++------------- lib/kernel_lock.c | 15 +++++++++++---- 2 files changed, 26 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index d48cc77ba70d..2ea1dd1ba21c 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -3,7 +3,6 @@ #ifdef CONFIG_LOCK_KERNEL #include -#include #define kernel_locked() (current->lock_depth >= 0) @@ -25,18 +24,21 @@ static inline int reacquire_kernel_lock(struct task_struct *task) return 0; } -extern void __lockfunc _lock_kernel(void) __acquires(kernel_lock); -extern void __lockfunc _unlock_kernel(void) __releases(kernel_lock); +extern void __lockfunc +_lock_kernel(const char *func, const char *file, int line) +__acquires(kernel_lock); -#define lock_kernel() { \ - trace_lock_kernel(__func__, __FILE__, __LINE__); \ - _lock_kernel(); \ -} +extern void __lockfunc +_unlock_kernel(const char *func, const char *file, int line) +__releases(kernel_lock); -#define unlock_kernel() { \ - trace_unlock_kernel(__func__, __FILE__, __LINE__); \ - _unlock_kernel(); \ -} +#define lock_kernel() do { \ + _lock_kernel(__func__, __FILE__, __LINE__); \ +} while (0) + +#define unlock_kernel() do { \ + _unlock_kernel(__func__, __FILE__, __LINE__); \ +} while (0) /* * Various legacy drivers don't really need the BKL in a specific @@ -52,8 +54,8 @@ static inline void cycle_kernel_lock(void) #else -#define lock_kernel() trace_lock_kernel(__func__, __FILE__, __LINE__); -#define unlock_kernel() trace_unlock_kernel(__func__, __FILE__, __LINE__); +#define lock_kernel() +#define unlock_kernel() #define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) #define reacquire_kernel_lock(task) 0 diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 5c10b2e1fd08..4ebfa5a164d7 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -8,9 +8,11 @@ #include #include #include -#define CREATE_TRACE_POINTS #include +#define CREATE_TRACE_POINTS +#include + /* * The 'big kernel lock' * @@ -114,19 +116,24 @@ static inline void __unlock_kernel(void) * This cannot happen asynchronously, so we only need to * worry about other CPU's. */ -void __lockfunc _lock_kernel(void) +void __lockfunc _lock_kernel(const char *func, const char *file, int line) { - int depth = current->lock_depth+1; + int depth = current->lock_depth + 1; + + trace_lock_kernel(func, file, line); + if (likely(!depth)) __lock_kernel(); current->lock_depth = depth; } -void __lockfunc _unlock_kernel(void) +void __lockfunc _unlock_kernel(const char *func, const char *file, int line) { BUG_ON(current->lock_depth < 0); if (likely(--current->lock_depth < 0)) __unlock_kernel(); + + trace_unlock_kernel(func, file, line); } EXPORT_SYMBOL(_lock_kernel); -- cgit v1.3-8-gc7d7 From acd47100914b2896d0699febefd077f85c4dd272 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 1 Oct 2009 00:10:34 +0200 Subject: ALSA: sscape: convert to firmware loader framework The conversion solves the problem that firmware size was set to 64KB while non PnP cards have 128KB firmware files. An additional firmware initialization code has been moved from the OSS driver. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai --- Documentation/sound/alsa/ALSA-Configuration.txt | 8 +- include/sound/sscape_ioctl.h | 21 -- sound/isa/Kconfig | 8 +- sound/isa/sscape.c | 328 ++++++++---------------- 4 files changed, 116 insertions(+), 249 deletions(-) delete mode 100644 include/sound/sscape_ioctl.h (limited to 'include') diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 1c8eb4518ce0..cf985257ae40 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -1631,7 +1631,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. Module snd-sscape ----------------- - Module for ENSONIQ SoundScape PnP cards. + Module for ENSONIQ SoundScape cards. port - Port # (PnP setup) wss_port - WSS Port # (PnP setup) @@ -1640,9 +1640,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. dma - DMA # (PnP setup) dma2 - 2nd DMA # (PnP setup, -1 to disable) - This module supports multiple cards. ISA PnP must be enabled. - You need sscape_ctl tool in alsa-tools package for loading - the microcode. + This module supports multiple cards. + + The driver requires the firmware loader support on kernel. Module snd-sun-amd7930 (on sparc only) -------------------------------------- diff --git a/include/sound/sscape_ioctl.h b/include/sound/sscape_ioctl.h deleted file mode 100644 index 0d8885969c64..000000000000 --- a/include/sound/sscape_ioctl.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef SSCAPE_IOCTL_H -#define SSCAPE_IOCTL_H - - -struct sscape_bootblock -{ - unsigned char code[256]; - unsigned version; -}; - -#define SSCAPE_MICROCODE_SIZE 65536 - -struct sscape_microcode -{ - unsigned char __user *code; -}; - -#define SND_SSCAPE_LOAD_BOOTB _IOWR('P', 100, struct sscape_bootblock) -#define SND_SSCAPE_LOAD_MCODE _IOW ('P', 101, struct sscape_microcode) - -#endif diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig index b90fc164a79c..02fe81ca88fd 100644 --- a/sound/isa/Kconfig +++ b/sound/isa/Kconfig @@ -372,9 +372,9 @@ config SND_SGALAXY config SND_SSCAPE tristate "Ensoniq SoundScape driver" - select SND_HWDEP select SND_MPU401_UART select SND_WSS_LIB + select FW_LOADER help Say Y here to include support for Ensoniq SoundScape and Ensoniq OEM soundcards. @@ -382,7 +382,11 @@ config SND_SSCAPE The PCM audio is supported on SoundScape Classic, Elite, PnP and VIVO cards. The supported OEM cards are SPEA Media FX and Reveal SC-600. - The MIDI support is very experimental. + The MIDI support is very experimental and requires binary + firmware files called "scope.cod" and "sndscape.co?" where the + ? is digit 0, 1, 2, 3 or 4. The firmware files can be found + in DOS or Windows driver packages. One has to put the firmware + files into the /lib/firmware directory. To compile this driver as a module, choose M here: the module will be called snd-sscape. diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c index b11c35f6aefe..1ce465cc66a8 100644 --- a/sound/isa/sscape.c +++ b/sound/isa/sscape.c @@ -1,5 +1,5 @@ /* - * Low-level ALSA driver for the ENSONIQ SoundScape PnP + * Low-level ALSA driver for the ENSONIQ SoundScape * Copyright (c) by Chris Rankin * * This driver was written in part using information obtained from @@ -25,22 +25,26 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include -#include - MODULE_AUTHOR("Chris Rankin"); -MODULE_DESCRIPTION("ENSONIQ SoundScape PnP driver"); +MODULE_DESCRIPTION("ENSONIQ SoundScape driver"); MODULE_LICENSE("GPL"); +MODULE_FIRMWARE("sndscape.co0"); +MODULE_FIRMWARE("sndscape.co1"); +MODULE_FIRMWARE("sndscape.co2"); +MODULE_FIRMWARE("sndscape.co3"); +MODULE_FIRMWARE("sndscape.co4"); +MODULE_FIRMWARE("scope.cod"); static int index[SNDRV_CARDS] __devinitdata = SNDRV_DEFAULT_IDX; static char* id[SNDRV_CARDS] __devinitdata = SNDRV_DEFAULT_STR; @@ -142,14 +146,12 @@ struct soundscape { struct resource *wss_res; struct snd_wss *chip; struct snd_mpu401 *mpu; - struct snd_hwdep *hw; /* * The MIDI device won't work until we've loaded * its firmware via a hardware-dependent device IOCTL */ spinlock_t fwlock; - int hw_in_use; unsigned long midi_usage; unsigned char midi_vol; }; @@ -167,12 +169,6 @@ static inline struct soundscape *get_mpu401_soundscape(struct snd_mpu401 * mpu) return (struct soundscape *) (mpu->private_data); } -static inline struct soundscape *get_hwdep_soundscape(struct snd_hwdep * hw) -{ - return (struct soundscape *) (hw->private_data); -} - - /* * Allocates some kernel memory that we can use for DMA. * I think this means that the memory has to map to @@ -393,12 +389,12 @@ static int obp_startup_ack(struct soundscape *s, unsigned timeout) do { unsigned long flags; - unsigned char x; + int x; spin_lock_irqsave(&s->lock, flags); - x = inb(HOST_DATA_IO(s->io_base)); + x = host_read_unsafe(s->io_base); spin_unlock_irqrestore(&s->lock, flags); - if ((x & 0xfe) == 0xfe) + if (x == 0xfe || x == 0xff) return 1; msleep(10); @@ -420,10 +416,10 @@ static int host_startup_ack(struct soundscape *s, unsigned timeout) do { unsigned long flags; - unsigned char x; + int x; spin_lock_irqsave(&s->lock, flags); - x = inb(HOST_DATA_IO(s->io_base)); + x = host_read_unsafe(s->io_base); spin_unlock_irqrestore(&s->lock, flags); if (x == 0xfe) return 1; @@ -438,14 +434,14 @@ static int host_startup_ack(struct soundscape *s, unsigned timeout) * Upload a byte-stream into the SoundScape using DMA channel A. */ static int upload_dma_data(struct soundscape *s, - const unsigned char __user *data, + const unsigned char *data, size_t size) { unsigned long flags; struct snd_dma_buffer dma; int ret; - if (!get_dmabuf(&dma, PAGE_ALIGN(size))) + if (!get_dmabuf(&dma, PAGE_ALIGN(32 * 1024))) return -ENOMEM; spin_lock_irqsave(&s->lock, flags); @@ -458,7 +454,6 @@ static int upload_dma_data(struct soundscape *s, /* * Enable the DMA channels and configure them ... */ - sscape_write_unsafe(s->io_base, GA_DMACFG_REG, 0x50); sscape_write_unsafe(s->io_base, GA_DMAA_REG, (s->chip->dma1 << 4) | DMA_8BIT); sscape_write_unsafe(s->io_base, GA_DMAB_REG, 0x20); @@ -468,35 +463,17 @@ static int upload_dma_data(struct soundscape *s, sscape_write_unsafe(s->io_base, GA_HMCTL_REG, sscape_read_unsafe(s->io_base, GA_HMCTL_REG) | 0x80); /* - * Upload the user's data (firmware?) to the SoundScape + * Upload the firmware to the SoundScape * board through the DMA channel ... */ while (size != 0) { unsigned long len; - /* - * Apparently, copying to/from userspace can sleep. - * We are therefore forbidden from holding any - * spinlocks while we copy ... - */ - spin_unlock_irqrestore(&s->lock, flags); - - /* - * Remember that the data that we want to DMA - * comes from USERSPACE. We have already verified - * the userspace pointer ... - */ len = min(size, dma.bytes); - len -= __copy_from_user(dma.area, data, len); + memcpy(dma.area, data, len); data += len; size -= len; - /* - * Grab that spinlock again, now that we've - * finished copying! - */ - spin_lock_irqsave(&s->lock, flags); - snd_dma_program(s->chip->dma1, dma.addr, len, DMA_MODE_WRITE); sscape_start_dma_unsafe(s->io_base, GA_DMAA_REG); if (!sscape_wait_dma_unsafe(s->io_base, GA_DMAA_REG, 5000)) { @@ -512,6 +489,7 @@ static int upload_dma_data(struct soundscape *s, } /* while */ set_host_mode_unsafe(s->io_base); + outb(0x0, s->io_base); /* * Boot the board ... (I think) @@ -537,7 +515,7 @@ _release_dma: /* * NOTE!!! We are NOT holding any spinlocks at this point !!! */ - sscape_write(s, GA_DMAA_REG, (s->ic_type == IC_ODIE ? 0x70 : 0x40)); + sscape_write(s, GA_DMAA_REG, (s->ic_type == IC_OPUS ? 0x40 : 0x70)); free_dmabuf(&dma); return ret; @@ -547,162 +525,69 @@ _release_dma: * Upload the bootblock(?) into the SoundScape. The only * purpose of this block of code seems to be to tell * us which version of the microcode we should be using. - * - * NOTE: The boot-block data resides in USER-SPACE!!! - * However, we have already verified its memory - * addresses by the time we get here. */ -static int sscape_upload_bootblock(struct soundscape *sscape, struct sscape_bootblock __user *bb) +static int sscape_upload_bootblock(struct snd_card *card) { + struct soundscape *sscape = get_card_soundscape(card); unsigned long flags; + const struct firmware *init_fw = NULL; int data = 0; int ret; - ret = upload_dma_data(sscape, bb->code, sizeof(bb->code)); - - spin_lock_irqsave(&sscape->lock, flags); - if (ret == 0) { - data = host_read_ctrl_unsafe(sscape->io_base, 100); - } - set_midi_mode_unsafe(sscape->io_base); - spin_unlock_irqrestore(&sscape->lock, flags); - - if (ret == 0) { - if (data < 0) { - snd_printk(KERN_ERR "sscape: timeout reading firmware version\n"); - ret = -EAGAIN; - } - else if (__copy_to_user(&bb->version, &data, sizeof(bb->version))) { - ret = -EFAULT; - } + ret = request_firmware(&init_fw, "scope.cod", card->dev); + if (ret < 0) { + snd_printk(KERN_ERR "Error loading scope.cod"); + return ret; } + ret = upload_dma_data(sscape, init_fw->data, init_fw->size); - return ret; -} + release_firmware(init_fw); -/* - * Upload the microcode into the SoundScape. The - * microcode is 64K of data, and if we try to copy - * it into a local variable then we will SMASH THE - * KERNEL'S STACK! We therefore leave it in USER - * SPACE, and save ourselves from copying it at all. - */ -static int sscape_upload_microcode(struct soundscape *sscape, - const struct sscape_microcode __user *mc) -{ - unsigned long flags; - char __user *code; - int err; - - /* - * We are going to have to copy this data into a special - * DMA-able buffer before we can upload it. We shall therefore - * just check that the data pointer is valid for now. - * - * NOTE: This buffer is 64K long! That's WAY too big to - * copy into a stack-temporary anyway. - */ - if ( get_user(code, &mc->code) || - !access_ok(VERIFY_READ, code, SSCAPE_MICROCODE_SIZE) ) - return -EFAULT; + spin_lock_irqsave(&sscape->lock, flags); + if (ret == 0) + data = host_read_ctrl_unsafe(sscape->io_base, 100); - if ((err = upload_dma_data(sscape, code, SSCAPE_MICROCODE_SIZE)) == 0) { - snd_printk(KERN_INFO "sscape: MIDI firmware loaded\n"); - } + if (data & 0x10) + sscape_write_unsafe(sscape->io_base, GA_SMCFGA_REG, 0x2f); - spin_lock_irqsave(&sscape->lock, flags); - set_midi_mode_unsafe(sscape->io_base); spin_unlock_irqrestore(&sscape->lock, flags); - initialise_mpu401(sscape->mpu); + data &= 0xf; + if (ret == 0 && data > 7) { + snd_printk(KERN_ERR "timeout reading firmware version\n"); + ret = -EAGAIN; + } - return err; + return (ret == 0) ? data : ret; } /* - * Hardware-specific device functions, to implement special - * IOCTLs for the SoundScape card. This is how we upload - * the microcode into the card, for example, and so we - * must ensure that no two processes can open this device - * simultaneously, and that we can't open it at all if - * someone is using the MIDI device. + * Upload the microcode into the SoundScape. */ -static int sscape_hw_open(struct snd_hwdep * hw, struct file *file) +static int sscape_upload_microcode(struct snd_card *card, int version) { - register struct soundscape *sscape = get_hwdep_soundscape(hw); - unsigned long flags; + struct soundscape *sscape = get_card_soundscape(card); + const struct firmware *init_fw = NULL; + char name[14]; int err; - spin_lock_irqsave(&sscape->fwlock, flags); + snprintf(name, sizeof(name), "sndscape.co%d", version); - if ((sscape->midi_usage != 0) || sscape->hw_in_use) { - err = -EBUSY; - } else { - sscape->hw_in_use = 1; - err = 0; + err = request_firmware(&init_fw, name, card->dev); + if (err < 0) { + snd_printk(KERN_ERR "Error loading sndscape.co%d", version); + return err; } + err = upload_dma_data(sscape, init_fw->data, init_fw->size); + if (err == 0) + snd_printk(KERN_INFO "MIDI firmware loaded %d KBs\n", + init_fw->size >> 10); - spin_unlock_irqrestore(&sscape->fwlock, flags); - return err; -} - -static int sscape_hw_release(struct snd_hwdep * hw, struct file *file) -{ - register struct soundscape *sscape = get_hwdep_soundscape(hw); - unsigned long flags; - - spin_lock_irqsave(&sscape->fwlock, flags); - sscape->hw_in_use = 0; - spin_unlock_irqrestore(&sscape->fwlock, flags); - return 0; -} - -static int sscape_hw_ioctl(struct snd_hwdep * hw, struct file *file, - unsigned int cmd, unsigned long arg) -{ - struct soundscape *sscape = get_hwdep_soundscape(hw); - int err = -EBUSY; - - switch (cmd) { - case SND_SSCAPE_LOAD_BOOTB: - { - register struct sscape_bootblock __user *bb = (struct sscape_bootblock __user *) arg; - - /* - * We are going to have to copy this data into a special - * DMA-able buffer before we can upload it. We shall therefore - * just check that the data pointer is valid for now ... - */ - if ( !access_ok(VERIFY_READ, bb->code, sizeof(bb->code)) ) - return -EFAULT; - - /* - * Now check that we can write the firmware version number too... - */ - if ( !access_ok(VERIFY_WRITE, &bb->version, sizeof(bb->version)) ) - return -EFAULT; - - err = sscape_upload_bootblock(sscape, bb); - } - break; - - case SND_SSCAPE_LOAD_MCODE: - { - register const struct sscape_microcode __user *mc = (const struct sscape_microcode __user *) arg; - - err = sscape_upload_microcode(sscape, mc); - } - break; - - default: - err = -EINVAL; - break; - } /* switch */ + release_firmware(init_fw); return err; } - /* * Mixer control for the SoundScape's MIDI device. */ @@ -920,7 +805,7 @@ static int mpu401_open(struct snd_mpu401 * mpu) spin_lock_irqsave(&sscape->fwlock, flags); - if (sscape->hw_in_use || (sscape->midi_usage == ULONG_MAX)) { + if (sscape->midi_usage == ULONG_MAX) { err = -EBUSY; } else { ++(sscape->midi_usage); @@ -1053,13 +938,6 @@ static int __devinit create_ad1845(struct snd_card *card, unsigned port, } } - strcpy(card->driver, "SoundScape"); - strcpy(card->shortname, pcm->name); - snprintf(card->longname, sizeof(card->longname), - "%s at 0x%lx, IRQ %d, DMA1 %d, DMA2 %d\n", - pcm->name, chip->port, chip->irq, - chip->dma1, chip->dma2); - sscape->chip = chip; } @@ -1162,29 +1040,6 @@ static int __devinit create_sscape(int dev, struct snd_card *card) return -ENXIO; } - if (sscape->type != SSCAPE_VIVO) { - /* - * Now create the hardware-specific device so that we can - * load the microcode into the on-board processor. - * We cannot use the MPU-401 MIDI system until this firmware - * has been loaded into the card. - */ - err = snd_hwdep_new(card, "MC68EC000", 0, &(sscape->hw)); - if (err < 0) { - printk(KERN_ERR "sscape: Failed to create " - "firmware device\n"); - goto _release_dma; - } - strlcpy(sscape->hw->name, "SoundScape M68K", - sizeof(sscape->hw->name)); - sscape->hw->name[sizeof(sscape->hw->name) - 1] = '\0'; - sscape->hw->iface = SNDRV_HWDEP_IFACE_SSCAPE; - sscape->hw->ops.open = sscape_hw_open; - sscape->hw->ops.release = sscape_hw_release; - sscape->hw->ops.ioctl = sscape_hw_ioctl; - sscape->hw->private_data = sscape; - } - /* * Tell the on-board devices where their resources are (I think - * I can't be sure without a datasheet ... So many magic values!) @@ -1222,28 +1077,56 @@ static int __devinit create_sscape(int dev, struct snd_card *card) wss_port[dev], irq[dev]); goto _release_dma; } + strcpy(card->driver, "SoundScape"); + strcpy(card->shortname, name); + snprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx, IRQ %d, DMA1 %d, DMA2 %d\n", + name, sscape->chip->port, sscape->chip->irq, + sscape->chip->dma1, sscape->chip->dma2); + #define MIDI_DEVNUM 0 if (sscape->type != SSCAPE_VIVO) { - err = create_mpu401(card, MIDI_DEVNUM, port[dev], mpu_irq[dev]); - if (err < 0) { - printk(KERN_ERR "sscape: Failed to create " - "MPU-401 device at 0x%lx\n", - port[dev]); - goto _release_dma; - } + err = sscape_upload_bootblock(card); + if (err >= 0) + err = sscape_upload_microcode(card, err); - /* - * Enable the master IRQ ... - */ - sscape_write(sscape, GA_INTENA_REG, 0x80); - - /* - * Initialize mixer - */ - sscape->midi_vol = 0; - host_write_ctrl_unsafe(sscape->io_base, CMD_SET_MIDI_VOL, 100); - host_write_ctrl_unsafe(sscape->io_base, 0, 100); - host_write_ctrl_unsafe(sscape->io_base, CMD_XXX_MIDI_VOL, 100); + if (err == 0) { + err = create_mpu401(card, MIDI_DEVNUM, port[dev], + mpu_irq[dev]); + if (err < 0) { + printk(KERN_ERR "sscape: Failed to create " + "MPU-401 device at 0x%lx\n", + port[dev]); + goto _release_dma; + } + + /* + * Enable the master IRQ ... + */ + sscape_write(sscape, GA_INTENA_REG, 0x80); + + /* + * Initialize mixer + */ + spin_lock_irqsave(&sscape->lock, flags); + sscape->midi_vol = 0; + host_write_ctrl_unsafe(sscape->io_base, + CMD_SET_MIDI_VOL, 100); + host_write_ctrl_unsafe(sscape->io_base, + sscape->midi_vol, 100); + host_write_ctrl_unsafe(sscape->io_base, + CMD_XXX_MIDI_VOL, 100); + host_write_ctrl_unsafe(sscape->io_base, + sscape->midi_vol, 100); + host_write_ctrl_unsafe(sscape->io_base, + CMD_SET_EXTMIDI, 100); + host_write_ctrl_unsafe(sscape->io_base, + 0, 100); + host_write_ctrl_unsafe(sscape->io_base, CMD_ACK, 100); + + set_midi_mode_unsafe(sscape->io_base); + spin_unlock_irqrestore(&sscape->lock, flags); + } } /* @@ -1301,11 +1184,12 @@ static int __devinit snd_sscape_probe(struct device *pdev, unsigned int dev) sscape->type = SSCAPE; dma[dev] &= 0x03; + snd_card_set_dev(card, pdev); + ret = create_sscape(dev, card); if (ret < 0) goto _release_card; - snd_card_set_dev(card, pdev); if ((ret = snd_card_register(card)) < 0) { printk(KERN_ERR "sscape: Failed to register sound card\n"); goto _release_card; @@ -1426,12 +1310,12 @@ static int __devinit sscape_pnp_detect(struct pnp_card_link *pcard, wss_port[idx] = pnp_port_start(dev, 1); dma2[idx] = pnp_dma(dev, 1); } + snd_card_set_dev(card, &pcard->card->dev); ret = create_sscape(idx, card); if (ret < 0) goto _release_card; - snd_card_set_dev(card, &pcard->card->dev); if ((ret = snd_card_register(card)) < 0) { printk(KERN_ERR "sscape: Failed to register sound card\n"); goto _release_card; -- cgit v1.3-8-gc7d7 From 4a3127693001c61a21d1ce680db6340623f52e93 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 30 Sep 2009 13:05:23 +0200 Subject: x86: Turn the copy_from_user check into an (optional) compile time warning A previous patch added the buffer size check to copy_from_user(). One of the things learned from analyzing the result of the previous patch is that in general, gcc is really good at proving that the code contains sufficient security checks to not need to do a runtime check. But that for those cases where gcc could not prove this, there was a relatively high percentage of real security issues. This patch turns the case of "gcc cannot prove" into a compile time warning, as long as a sufficiently new gcc is in use that supports this. The objective is that these warnings will trigger developers checking new cases out before a security hole enters a linux kernel release. Signed-off-by: Arjan van de Ven Cc: Linus Torvalds Cc: "David S. Miller" Cc: James Morris Cc: Jan Beulich LKML-Reference: <20090930130523.348ae6c4@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_32.h | 12 +++++++++--- arch/x86/lib/usercopy_32.c | 6 ++++++ include/linux/compiler-gcc4.h | 3 +++ include/linux/compiler.h | 4 ++++ 4 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 582d6aef7417..952f9e793c3e 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -191,6 +191,13 @@ unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long n); + +extern void copy_from_user_overflow(void) +#ifdef CONFIG_DEBUG_STACKOVERFLOW + __compiletime_warning("copy_from_user() buffer size is not provably correct") +#endif +; + static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) @@ -200,10 +207,9 @@ static inline unsigned long __must_check copy_from_user(void *to, if (likely(sz == -1 || sz >= n)) ret = _copy_from_user(to, from, n); -#ifdef CONFIG_DEBUG_VM else - WARN(1, "Buffer overflow detected!\n"); -#endif + copy_from_user_overflow(); + return ret; } diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 8498684e45b0..e218d5df85ff 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -883,3 +883,9 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } EXPORT_SYMBOL(_copy_from_user); + +void copy_from_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index a3aef5d55dba..f1709c1f9eae 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -39,3 +39,6 @@ #endif #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#if __GNUC_MINOR__ >= 4 +#define __compiletime_warning(message) __attribute__((warning(message))) +#endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8e54108688f9..950356311f12 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -270,6 +270,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 #endif +#ifndef __compiletime_warning +# define __compiletime_warning(message) +#endif + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.3-8-gc7d7 From 88439ac793934a47f47ad285656b63d09f5937c8 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 1 Oct 2009 10:32:47 +0300 Subject: ASoC: add support for multiple cards/codecs in debugfs In order to support multiple codecs on the same system in the debugfs the directory hierarchy need to be changed by adding directory per codec under the asoc direcorty: debugfs/asoc/{dev_name(socdev->dev)}-{codec->name}/codec_reg /dapm_pop_time /dapm/{widgets} With the original implementation only the debugfs files are only created for the first codec, other codecs loaded later would fail to create the debugfs files (since they are already exist). Furthermore in this situation any of the codecs has been removed, would cause the debugfs entries to disappear, regardless if the codec, which created them are still loaded (the one which loaded first). Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown --- include/sound/soc.h | 1 + sound/soc/soc-core.c | 26 +++++++++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 475cb7ed6bec..0b1f917a53ba 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -413,6 +413,7 @@ struct snd_soc_codec { unsigned int num_dai; #ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_codec_root; struct dentry *debugfs_reg; struct dentry *debugfs_pop_time; struct dentry *debugfs_dapm; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index f5b356f8acfb..e4ab36daf3f7 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1254,21 +1254,35 @@ static const struct file_operations codec_reg_fops = { static void soc_init_codec_debugfs(struct snd_soc_codec *codec) { + char codec_root[128]; + + snprintf(codec_root, sizeof(codec_root), + "%s-%s", dev_name(codec->socdev->dev), codec->name); + + codec->debugfs_codec_root = debugfs_create_dir(codec_root, + debugfs_root); + if (!codec->debugfs_codec_root) { + printk(KERN_WARNING + "ASoC: Failed to create codec debugfs directory\n"); + return; + } + codec->debugfs_reg = debugfs_create_file("codec_reg", 0644, - debugfs_root, codec, - &codec_reg_fops); + codec->debugfs_codec_root, + codec, &codec_reg_fops); if (!codec->debugfs_reg) printk(KERN_WARNING "ASoC: Failed to create codec register debugfs file\n"); codec->debugfs_pop_time = debugfs_create_u32("dapm_pop_time", 0744, - debugfs_root, + codec->debugfs_codec_root, &codec->pop_time); if (!codec->debugfs_pop_time) printk(KERN_WARNING "Failed to create pop time debugfs file\n"); - codec->debugfs_dapm = debugfs_create_dir("dapm", debugfs_root); + codec->debugfs_dapm = debugfs_create_dir("dapm", + codec->debugfs_codec_root); if (!codec->debugfs_dapm) printk(KERN_WARNING "Failed to create DAPM debugfs directory\n"); @@ -1278,9 +1292,7 @@ static void soc_init_codec_debugfs(struct snd_soc_codec *codec) static void soc_cleanup_codec_debugfs(struct snd_soc_codec *codec) { - debugfs_remove_recursive(codec->debugfs_dapm); - debugfs_remove(codec->debugfs_pop_time); - debugfs_remove(codec->debugfs_reg); + debugfs_remove_recursive(codec->debugfs_codec_root); } #else -- cgit v1.3-8-gc7d7 From 7c68af6e32c73992bad24107311f3433c89016e2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Sep 2009 09:40:22 +0300 Subject: core, x86: Add user return notifiers Add a general per-cpu notifier that is called whenever the kernel is about to return to userspace. The notifier uses a thread_info flag and existing checks, so there is no impact on user return or context switch fast paths. This will be used initially to speed up KVM task switching by lazily updating MSRs. Signed-off-by: Avi Kivity LKML-Reference: <1253342422-13811-1-git-send-email-avi@redhat.com> Signed-off-by: H. Peter Anvin --- arch/Kconfig | 10 ++++++++ arch/x86/Kconfig | 1 + arch/x86/include/asm/thread_info.h | 7 ++++-- arch/x86/kernel/process.c | 2 ++ arch/x86/kernel/signal.c | 3 +++ include/linux/user-return-notifier.h | 42 ++++++++++++++++++++++++++++++++ kernel/user-return-notifier.c | 46 ++++++++++++++++++++++++++++++++++++ 7 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 include/linux/user-return-notifier.h create mode 100644 kernel/user-return-notifier.c (limited to 'include') diff --git a/arch/Kconfig b/arch/Kconfig index 7f418bbc261a..4e312fffbfd7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -83,6 +83,13 @@ config KRETPROBES def_bool y depends on KPROBES && HAVE_KRETPROBES +config USER_RETURN_NOTIFIER + bool + depends on HAVE_USER_RETURN_NOTIFIER + help + Provide a kernel-internal notification when a cpu is about to + switch to user mode. + config HAVE_IOREMAP_PROT bool @@ -126,4 +133,7 @@ config HAVE_DMA_API_DEBUG config HAVE_DEFAULT_NO_SPIN_MUTEXES bool +config HAVE_USER_RETURN_NOTIFIER + bool + source "kernel/gcov/Kconfig" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8da93745c087..1df175d15aa8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -50,6 +50,7 @@ config X86 select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_ARCH_KMEMCHECK + select HAVE_USER_RETURN_NOTIFIER config OUTPUT_FORMAT string diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d27d0a2fec4c..375c917c37d2 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,6 +83,7 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ +#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -107,6 +108,7 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) +#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -142,13 +144,14 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) + (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ + _TIF_USER_RETURN_NOTIFY) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) #define PREEMPT_ACTIVE 0x10000000 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2b5776..e51b056fc88f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -224,6 +225,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, */ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } + propagate_user_return_notify(prev_p, next_p); } int sys_fork(struct pt_regs *regs) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76055ad..c49f90f7957a 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -872,6 +873,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) if (current->replacement_session_keyring) key_replace_session_keyring(); } + if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) + fire_user_return_notifiers(); #ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); diff --git a/include/linux/user-return-notifier.h b/include/linux/user-return-notifier.h new file mode 100644 index 000000000000..b6ac056291d7 --- /dev/null +++ b/include/linux/user-return-notifier.h @@ -0,0 +1,42 @@ +#ifndef _LINUX_USER_RETURN_NOTIFIER_H +#define _LINUX_USER_RETURN_NOTIFIER_H + +#ifdef CONFIG_USER_RETURN_NOTIFIER + +#include +#include + +struct user_return_notifier { + void (*on_user_return)(struct user_return_notifier *urn); + struct hlist_node link; +}; + + +void user_return_notifier_register(struct user_return_notifier *urn); +void user_return_notifier_unregister(struct user_return_notifier *urn); + +static inline void propagate_user_return_notify(struct task_struct *prev, + struct task_struct *next) +{ + if (test_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY)) { + clear_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY); + set_tsk_thread_flag(next, TIF_USER_RETURN_NOTIFY); + } +} + +void fire_user_return_notifiers(void); + +#else + +struct user_return_notifier {}; + +static inline void propagate_user_return_notify(struct task_struct *prev, + struct task_struct *next) +{ +} + +static inline void fire_user_return_notifiers(void) {} + +#endif + +#endif diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c new file mode 100644 index 000000000000..530ccb816513 --- /dev/null +++ b/kernel/user-return-notifier.c @@ -0,0 +1,46 @@ + +#include +#include +#include +#include + +static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); + +#define URN_LIST_HEAD per_cpu(return_notifier_list, raw_smp_processor_id()) + +/* + * Request a notification when the current cpu returns to userspace. Must be + * called in atomic context. The notifier will also be called in atomic + * context. + */ +void user_return_notifier_register(struct user_return_notifier *urn) +{ + set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); + hlist_add_head(&urn->link, &URN_LIST_HEAD); +} +EXPORT_SYMBOL_GPL(user_return_notifier_register); + +/* + * Removes a registered user return notifier. Must be called from atomic + * context, and from the same cpu registration occured in. + */ +void user_return_notifier_unregister(struct user_return_notifier *urn) +{ + hlist_del(&urn->link); + if (hlist_empty(&URN_LIST_HEAD)) + clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); +} +EXPORT_SYMBOL_GPL(user_return_notifier_unregister); + +/* Calls registered user return notifiers */ +void fire_user_return_notifiers(void) +{ + struct user_return_notifier *urn; + struct hlist_node *tmp1, *tmp2; + struct hlist_head *head; + + head = &get_cpu_var(return_notifier_list); + hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link) + urn->on_user_return(urn); + put_cpu_var(); +} -- cgit v1.3-8-gc7d7 From 1122a26f2abe4245ccdaed95ec23f63fe086b332 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Sep 2009 13:52:12 +0200 Subject: block: use normal I/O path for discard requests prepare_discard_fn() was being called in a place where memory allocation was effectively impossible. This makes it inappropriate for all but the most trivial translations of Linux's DISCARD operation to the block command set. Additionally adding a payload there makes the ownership of the bio backing unclear as it's now allocated by the device driver and not the submitter as usual. It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether the queue supports discard operations or not. blkdev_issue_discard now allocates a one-page, sector-length payload which is the right thing for the common ATA and SCSI implementations. The mtd implementation of prepare_discard_fn() is replaced with simply checking for the request being a discard. Largely based on a previous patch from Matthew Wilcox which did the prepare_discard_fn but not the different payload allocation yet. Signed-off-by: Christoph Hellwig --- block/blk-barrier.c | 35 ++++++++++++++++++++++++++++++----- block/blk-core.c | 3 +-- block/blk-settings.c | 17 ----------------- drivers/mtd/mtd_blkdevs.c | 19 +++++-------------- drivers/staging/dst/dcore.c | 2 +- include/linux/blkdev.h | 6 ++---- 6 files changed, 39 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 6593ab39cfe9..21f5025c3945 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err) if (bio->bi_private) complete(bio->bi_private); + __free_page(bio_page(bio)); bio_put(bio); } @@ -372,26 +373,44 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, struct request_queue *q = bdev_get_queue(bdev); int type = flags & DISCARD_FL_BARRIER ? DISCARD_BARRIER : DISCARD_NOBARRIER; + struct bio *bio; + struct page *page; int ret = 0; if (!q) return -ENXIO; - if (!q->prepare_discard_fn) + if (!blk_queue_discard(q)) return -EOPNOTSUPP; while (nr_sects && !ret) { - struct bio *bio = bio_alloc(gfp_mask, 0); - if (!bio) - return -ENOMEM; + unsigned int sector_size = q->limits.logical_block_size; + bio = bio_alloc(gfp_mask, 1); + if (!bio) + goto out; + bio->bi_sector = sector; bio->bi_end_io = blkdev_discard_end_io; bio->bi_bdev = bdev; if (flags & DISCARD_FL_WAIT) bio->bi_private = &wait; - bio->bi_sector = sector; + /* + * Add a zeroed one-sector payload as that's what + * our current implementations need. If we'll ever need + * more the interface will need revisiting. + */ + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto out_free_bio; + if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) + goto out_free_page; + /* + * And override the bio size - the way discard works we + * touch many more blocks on disk than the actual payload + * length. + */ if (nr_sects > queue_max_hw_sectors(q)) { bio->bi_size = queue_max_hw_sectors(q) << 9; nr_sects -= queue_max_hw_sectors(q); @@ -414,5 +433,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio_put(bio); } return ret; +out_free_page: + __free_page(page); +out_free_bio: + bio_put(bio); +out: + return -ENOMEM; } EXPORT_SYMBOL(blkdev_issue_discard); diff --git a/block/blk-core.c b/block/blk-core.c index 8135228e4b29..80a020dd1580 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1124,7 +1124,6 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_DISCARD; if (bio_rw_flagged(bio, BIO_RW_BARRIER)) req->cmd_flags |= REQ_SOFTBARRIER; - req->q->prepare_discard_fn(req->q, req); } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) req->cmd_flags |= REQ_HARDBARRIER; @@ -1470,7 +1469,7 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; if (bio_rw_flagged(bio, BIO_RW_DISCARD) && - !q->prepare_discard_fn) { + !blk_queue_discard(q)) { err = -EOPNOTSUPP; goto end_io; } diff --git a/block/blk-settings.c b/block/blk-settings.c index eaf122ff5f16..d29498ef1eb5 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -33,23 +33,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) } EXPORT_SYMBOL(blk_queue_prep_rq); -/** - * blk_queue_set_discard - set a discard_sectors function for queue - * @q: queue - * @dfn: prepare_discard function - * - * It's possible for a queue to register a discard callback which is used - * to transform a discard request into the appropriate type for the - * hardware. If none is registered, then discard requests are failed - * with %EOPNOTSUPP. - * - */ -void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) -{ - q->prepare_discard_fn = dfn; -} -EXPORT_SYMBOL(blk_queue_set_discard); - /** * blk_queue_merge_bvec - set a merge_bvec function for queue * @q: queue diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 0acbf4f5be50..8ca17a3e96ea 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -32,14 +32,6 @@ struct mtd_blkcore_priv { spinlock_t queue_lock; }; -static int blktrans_discard_request(struct request_queue *q, - struct request *req) -{ - req->cmd_type = REQ_TYPE_LINUX_BLOCK; - req->cmd[0] = REQ_LB_OP_DISCARD; - return 0; -} - static int do_blktrans_request(struct mtd_blktrans_ops *tr, struct mtd_blktrans_dev *dev, struct request *req) @@ -52,10 +44,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, buf = req->buffer; - if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && - req->cmd[0] == REQ_LB_OP_DISCARD) - return tr->discard(dev, block, nsect); - if (!blk_fs_request(req)) return -EIO; @@ -63,6 +51,9 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, get_capacity(req->rq_disk)) return -EIO; + if (blk_discard_rq(req)) + return tr->discard(dev, block, nsect); + switch(rq_data_dir(req)) { case READ: for (; nsect > 0; nsect--, block++, buf += tr->blksize) @@ -380,8 +371,8 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) tr->blkcore_priv->rq->queuedata = tr; blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize); if (tr->discard) - blk_queue_set_discard(tr->blkcore_priv->rq, - blktrans_discard_request); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, + tr->blkcore_priv->rq); tr->blkshift = ffs(tr->blksize) - 1; diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c index ac8577358ba0..5e8db0677582 100644 --- a/drivers/staging/dst/dcore.c +++ b/drivers/staging/dst/dcore.c @@ -102,7 +102,7 @@ static int dst_request(struct request_queue *q, struct bio *bio) struct dst_node *n = q->queuedata; int err = -EIO; - if (bio_empty_barrier(bio) && !q->prepare_discard_fn) { + if (bio_empty_barrier(bio) && !blk_queue_discard(q)) { /* * This is a dirty^Wnice hack, but if we complete this * operation with -EOPNOTSUPP like intended, XFS diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e23a86cae5ac..f62d45e87618 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -82,7 +82,6 @@ enum rq_cmd_type_bits { enum { REQ_LB_OP_EJECT = 0x40, /* eject request */ REQ_LB_OP_FLUSH = 0x41, /* flush request */ - REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; /* @@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); -typedef int (prepare_discard_fn) (struct request_queue *, struct request *); struct bio_vec; struct bvec_merge_data { @@ -340,7 +338,6 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unplug_fn *unplug_fn; - prepare_discard_fn *prepare_discard_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; @@ -460,6 +457,7 @@ struct request_queue #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_CQ 16 /* hardware does queuing */ +#define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -591,6 +589,7 @@ enum { #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) +#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) @@ -955,7 +954,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); -extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -- cgit v1.3-8-gc7d7 From ca80650cfbde5b17a5fa957a261c7973f84599a7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Sep 2009 13:54:20 +0200 Subject: block: allow large discard requests Currently we set the bio size to the byte equivalent of the blocks to be trimmed when submitting the initial DISCARD ioctl. That means it is subject to the max_hw_sectors limitation of the HBA which is much lower than the size of a DISCARD request we can support. Add a separate max_discard_sectors tunable to limit the size for discard requests. We limit the max discard request size in bytes to 32bit as that is the limit for bio->bi_size. This could be much larger if we had a way to pass that information through the block layer. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 10 ++++++---- block/blk-core.c | 3 ++- block/blk-settings.c | 13 +++++++++++++ include/linux/blkdev.h | 3 +++ 4 files changed, 24 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 21f5025c3945..8873b9b439ff 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -385,6 +385,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, while (nr_sects && !ret) { unsigned int sector_size = q->limits.logical_block_size; + unsigned int max_discard_sectors = + min(q->limits.max_discard_sectors, UINT_MAX >> 9); bio = bio_alloc(gfp_mask, 1); if (!bio) @@ -411,10 +413,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, * touch many more blocks on disk than the actual payload * length. */ - if (nr_sects > queue_max_hw_sectors(q)) { - bio->bi_size = queue_max_hw_sectors(q) << 9; - nr_sects -= queue_max_hw_sectors(q); - sector += queue_max_hw_sectors(q); + if (nr_sects > max_discard_sectors) { + bio->bi_size = max_discard_sectors << 9; + nr_sects -= max_discard_sectors; + sector += max_discard_sectors; } else { bio->bi_size = nr_sects << 9; nr_sects = 0; diff --git a/block/blk-core.c b/block/blk-core.c index 80a020dd1580..34504f309728 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1436,7 +1436,8 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; } - if (unlikely(nr_sectors > queue_max_hw_sectors(q))) { + if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) && + nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", bdevname(bio->bi_bdev, b), bio_sectors(bio), diff --git a/block/blk-settings.c b/block/blk-settings.c index d29498ef1eb5..e0695bca7027 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -96,6 +96,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->max_segment_size = MAX_SEGMENT_SIZE; lim->max_sectors = BLK_DEF_MAX_SECTORS; lim->max_hw_sectors = INT_MAX; + lim->max_discard_sectors = SAFE_MAX_SECTORS; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->alignment_offset = 0; @@ -238,6 +239,18 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors) } EXPORT_SYMBOL(blk_queue_max_hw_sectors); +/** + * blk_queue_max_discard_sectors - set max sectors for a single discard + * @q: the request queue for the device + * @max_discard: maximum number of sectors to discard + **/ +void blk_queue_max_discard_sectors(struct request_queue *q, + unsigned int max_discard_sectors) +{ + q->limits.max_discard_sectors = max_discard_sectors; +} +EXPORT_SYMBOL(blk_queue_max_discard_sectors); + /** * blk_queue_max_phys_segments - set max phys segments for a request for this queue * @q: the request queue for the device diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f62d45e87618..1a03b715dfad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -311,6 +311,7 @@ struct queue_limits { unsigned int alignment_offset; unsigned int io_min; unsigned int io_opt; + unsigned int max_discard_sectors; unsigned short logical_block_size; unsigned short max_hw_segments; @@ -928,6 +929,8 @@ extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); +extern void blk_queue_max_discard_sectors(struct request_queue *q, + unsigned int max_discard_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); extern void blk_queue_alignment_offset(struct request_queue *q, -- cgit v1.3-8-gc7d7 From 1a35e0f6443f4266dad4c569c55c57a9032596fa Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Thu, 1 Oct 2009 21:16:13 +0200 Subject: Add a tracepoint for block request remapping Since 2.6.31 now has request-based device-mapper, it's useful to have a tracepoint for request-remapping as well as bio-remapping. This patch adds a tracepoint for request-remapping, trace_block_rq_remap(). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Alasdair G Kergon Cc: Li Zefan Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + include/linux/blktrace_api.h | 2 +- include/trace/events/block.h | 33 +++++++++++++++++++++++++++++++++ kernel/trace/blktrace.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 34504f309728..ddaaea4fdffc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -34,6 +34,7 @@ #include "blk.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); static int __make_request(struct request_queue *q, struct bio *bio); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 622939a23299..3b73b9992b26 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -212,7 +212,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) -# define blk_trace_remove_sysfs(struct device *dev) do { } while (0) +# define blk_trace_remove_sysfs(dev) do { } while (0) static inline int blk_trace_init_sysfs(struct device *dev) { return 0; diff --git a/include/trace/events/block.h b/include/trace/events/block.h index d86af94691c2..00405b5f624a 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -488,6 +488,39 @@ TRACE_EVENT(block_remap, (unsigned long long)__entry->old_sector) ); +TRACE_EVENT(block_rq_remap, + + TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, + sector_t from), + + TP_ARGS(q, rq, dev, from), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( dev_t, old_dev ) + __field( sector_t, old_sector ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = disk_devt(rq->rq_disk); + __entry->sector = blk_rq_pos(rq); + __entry->nr_sector = blk_rq_sectors(rq); + __entry->old_dev = dev; + __entry->old_sector = from; + blk_fill_rwbs_rq(__entry->rwbs, rq); + ), + + TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, + MAJOR(__entry->old_dev), MINOR(__entry->old_dev), + (unsigned long long)__entry->old_sector) +); + #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 60b5c5a3d4b4..d9d6206e0b14 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -855,6 +855,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, sizeof(r), &r); } +/** + * blk_add_trace_rq_remap - Add a trace for a request-remap operation + * @q: queue the io is for + * @rq: the source request + * @dev: target device + * @from: source sector + * + * Description: + * Device mapper remaps request to other devices. + * Add a trace for that action. + * + **/ +static void blk_add_trace_rq_remap(struct request_queue *q, + struct request *rq, dev_t dev, + sector_t from) +{ + struct blk_trace *bt = q->blk_trace; + struct blk_io_trace_remap r; + + if (likely(!bt)) + return; + + r.device_from = cpu_to_be32(dev); + r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); + r.sector_from = cpu_to_be64(from); + + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), + rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors, + sizeof(r), &r); +} + /** * blk_add_driver_data - Add binary message with driver-specific data * @q: queue the io is for @@ -922,10 +953,13 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_remap(blk_add_trace_remap); WARN_ON(ret); + ret = register_trace_block_rq_remap(blk_add_trace_rq_remap); + WARN_ON(ret); } static void blk_unregister_tracepoints(void) { + unregister_trace_block_rq_remap(blk_add_trace_rq_remap); unregister_trace_block_remap(blk_add_trace_remap); unregister_trace_block_split(blk_add_trace_split); unregister_trace_block_unplug_io(blk_add_trace_unplug_io); -- cgit v1.3-8-gc7d7 From b411b3637fa71fce9cf2acf0639009500f5892fe Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 25 Sep 2009 16:07:19 -0700 Subject: The DRBD driver Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- .../blockdev/drbd/DRBD-8.3-data-packets.svg | 588 +++ Documentation/blockdev/drbd/DRBD-data-packets.svg | 459 ++ Documentation/blockdev/drbd/README.txt | 16 + Documentation/blockdev/drbd/conn-states-8.dot | 18 + Documentation/blockdev/drbd/disk-states-8.dot | 16 + .../drbd/drbd-connection-state-overview.dot | 85 + Documentation/blockdev/drbd/node-states-8.dot | 14 + MAINTAINERS | 13 + drivers/block/Kconfig | 2 + drivers/block/Makefile | 1 + drivers/block/drbd/Kconfig | 82 + drivers/block/drbd/Makefile | 8 + drivers/block/drbd/drbd_actlog.c | 1484 +++++++ drivers/block/drbd/drbd_bitmap.c | 1327 ++++++ drivers/block/drbd/drbd_int.h | 2258 ++++++++++ drivers/block/drbd/drbd_main.c | 3735 ++++++++++++++++ drivers/block/drbd/drbd_nl.c | 2365 +++++++++++ drivers/block/drbd/drbd_proc.c | 266 ++ drivers/block/drbd/drbd_receiver.c | 4456 ++++++++++++++++++++ drivers/block/drbd/drbd_req.c | 1132 +++++ drivers/block/drbd/drbd_req.h | 327 ++ drivers/block/drbd/drbd_strings.c | 113 + drivers/block/drbd/drbd_tracing.c | 752 ++++ drivers/block/drbd/drbd_tracing.h | 87 + drivers/block/drbd/drbd_vli.h | 351 ++ drivers/block/drbd/drbd_worker.c | 1529 +++++++ drivers/block/drbd/drbd_wrappers.h | 91 + include/linux/drbd.h | 349 ++ include/linux/drbd_limits.h | 137 + include/linux/drbd_nl.h | 137 + include/linux/drbd_tag_magic.h | 83 + include/linux/lru_cache.h | 294 ++ lib/Kconfig | 3 + lib/Makefile | 2 + lib/lru_cache.c | 560 +++ 35 files changed, 23140 insertions(+) create mode 100644 Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg create mode 100644 Documentation/blockdev/drbd/DRBD-data-packets.svg create mode 100644 Documentation/blockdev/drbd/README.txt create mode 100644 Documentation/blockdev/drbd/conn-states-8.dot create mode 100644 Documentation/blockdev/drbd/disk-states-8.dot create mode 100644 Documentation/blockdev/drbd/drbd-connection-state-overview.dot create mode 100644 Documentation/blockdev/drbd/node-states-8.dot create mode 100644 drivers/block/drbd/Kconfig create mode 100644 drivers/block/drbd/Makefile create mode 100644 drivers/block/drbd/drbd_actlog.c create mode 100644 drivers/block/drbd/drbd_bitmap.c create mode 100644 drivers/block/drbd/drbd_int.h create mode 100644 drivers/block/drbd/drbd_main.c create mode 100644 drivers/block/drbd/drbd_nl.c create mode 100644 drivers/block/drbd/drbd_proc.c create mode 100644 drivers/block/drbd/drbd_receiver.c create mode 100644 drivers/block/drbd/drbd_req.c create mode 100644 drivers/block/drbd/drbd_req.h create mode 100644 drivers/block/drbd/drbd_strings.c create mode 100644 drivers/block/drbd/drbd_tracing.c create mode 100644 drivers/block/drbd/drbd_tracing.h create mode 100644 drivers/block/drbd/drbd_vli.h create mode 100644 drivers/block/drbd/drbd_worker.c create mode 100644 drivers/block/drbd/drbd_wrappers.h create mode 100644 include/linux/drbd.h create mode 100644 include/linux/drbd_limits.h create mode 100644 include/linux/drbd_nl.h create mode 100644 include/linux/drbd_tag_magic.h create mode 100644 include/linux/lru_cache.h create mode 100644 lib/lru_cache.c (limited to 'include') diff --git a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg new file mode 100644 index 000000000000..f87cfa0dc2fb --- /dev/null +++ b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg @@ -0,0 +1,588 @@ + + + + + + Master slide + + + + + + + + + + RSDataReply + + + + + + + CsumRSRequest + + + + w_make_resync_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_csum_rs_req() + + + receive_RSDataReply() + + + drbd_endio_write_sec() + + + e_end_resync_block() + + + + + + WriteAck + + + + got_BlockAck() + + + Checksum based Resync, case not in sync + + + DRBD-8.3 data flow + + + w_e_send_csum() + + + + + + + + RSIsInSync + + + + + + + CsumRSRequest + + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_csum_rs_req() + + + got_IsInSync() + + + Checksum based Resync, case in sync + + + + + + + + + + OVReply + + + + + + + OVRequest + + + + receive_OVRequest() + + + drbd_endio_read_sec() + + + w_e_end_ov_req() + + + receive_OVReply() + + + drbd_endio_read_sec() + + + w_e_end_ov_reply() + + + + + + OVResult + + + + got_OVResult() + + + Online verify + + + w_make_ov_request() + + + + + + + + drbd_endio_read_sec() + + + w_make_resync_request() + + + w_e_send_csum() + + + + + drbd_endio_read_sec() + + + + + + rs_begin_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_complete_io() + + diff --git a/Documentation/blockdev/drbd/DRBD-data-packets.svg b/Documentation/blockdev/drbd/DRBD-data-packets.svg new file mode 100644 index 000000000000..48a1e2165fec --- /dev/null +++ b/Documentation/blockdev/drbd/DRBD-data-packets.svg @@ -0,0 +1,459 @@ + + + + + + Master slide + + + + + + + + + RSDataReply + + + + + RSDataRequest + + + w_make_resync_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_rsdata_req() + + + receive_RSDataReply() + + + drbd_endio_write_sec() + + + e_end_resync_block() + + + + + WriteAck + + + got_BlockAck() + + + Resync blocks, 4-32K + + + + + + + WriteAck + + + + + Data + + + drbd_make_request() + + + receive_Data() + + + drbd_endio_write_sec() + + + e_end_block() + + + got_BlockAck() + + + Regular mirrored write, 512-32K + + + w_send_dblock() + + + + + drbd_endio_write_pri() + + + + + + + DataReply + + + + + DataRequest + + + drbd_make_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_data_req() + + + Drawing + + receive_DataReply() + + + + Diskless read, 512-32K + + + w_send_read_req() + + + DRBD 8 data flow + + + + + + al_begin_io() + + + al_complete_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_begin_io() + + + rs_complete_io() + + diff --git a/Documentation/blockdev/drbd/README.txt b/Documentation/blockdev/drbd/README.txt new file mode 100644 index 000000000000..627b0a1bf35e --- /dev/null +++ b/Documentation/blockdev/drbd/README.txt @@ -0,0 +1,16 @@ +Description + + DRBD is a shared-nothing, synchronously replicated block device. It + is designed to serve as a building block for high availability + clusters and in this context, is a "drop-in" replacement for shared + storage. Simplistically, you could see it as a network RAID 1. + + Please visit http://www.drbd.org to find out more. + +The here included files are intended to help understand the implementation + +DRBD-8.3-data-packets.svg, DRBD-data-packets.svg + relates some functions, and write packets. + +conn-states-8.dot, disk-states-8.dot, node-states-8.dot + The sub graphs of DRBD's state transitions diff --git a/Documentation/blockdev/drbd/conn-states-8.dot b/Documentation/blockdev/drbd/conn-states-8.dot new file mode 100644 index 000000000000..025e8cf5e64a --- /dev/null +++ b/Documentation/blockdev/drbd/conn-states-8.dot @@ -0,0 +1,18 @@ +digraph conn_states { + StandAllone -> WFConnection [ label = "ioctl_set_net()" ] + WFConnection -> Unconnected [ label = "unable to bind()" ] + WFConnection -> WFReportParams [ label = "in connect() after accept" ] + WFReportParams -> StandAllone [ label = "checks in receive_param()" ] + WFReportParams -> Connected [ label = "in receive_param()" ] + WFReportParams -> WFBitMapS [ label = "sync_handshake()" ] + WFReportParams -> WFBitMapT [ label = "sync_handshake()" ] + WFBitMapS -> SyncSource [ label = "receive_bitmap()" ] + WFBitMapT -> SyncTarget [ label = "receive_bitmap()" ] + SyncSource -> Connected + SyncTarget -> Connected + SyncSource -> PausedSyncS + SyncTarget -> PausedSyncT + PausedSyncS -> SyncSource + PausedSyncT -> SyncTarget + Connected -> WFConnection [ label = "* on network error" ] +} diff --git a/Documentation/blockdev/drbd/disk-states-8.dot b/Documentation/blockdev/drbd/disk-states-8.dot new file mode 100644 index 000000000000..d06cfb46fb98 --- /dev/null +++ b/Documentation/blockdev/drbd/disk-states-8.dot @@ -0,0 +1,16 @@ +digraph disk_states { + Diskless -> Inconsistent [ label = "ioctl_set_disk()" ] + Diskless -> Consistent [ label = "ioctl_set_disk()" ] + Diskless -> Outdated [ label = "ioctl_set_disk()" ] + Consistent -> Outdated [ label = "receive_param()" ] + Consistent -> UpToDate [ label = "receive_param()" ] + Consistent -> Inconsistent [ label = "start resync" ] + Outdated -> Inconsistent [ label = "start resync" ] + UpToDate -> Inconsistent [ label = "ioctl_replicate" ] + Inconsistent -> UpToDate [ label = "resync completed" ] + Consistent -> Failed [ label = "io completion error" ] + Outdated -> Failed [ label = "io completion error" ] + UpToDate -> Failed [ label = "io completion error" ] + Inconsistent -> Failed [ label = "io completion error" ] + Failed -> Diskless [ label = "sending notify to peer" ] +} diff --git a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot new file mode 100644 index 000000000000..6d9cf0a7b11d --- /dev/null +++ b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot @@ -0,0 +1,85 @@ +// vim: set sw=2 sts=2 : +digraph { + rankdir=BT + bgcolor=white + + node [shape=plaintext] + node [fontcolor=black] + + StandAlone [ style=filled,fillcolor=gray,label=StandAlone ] + + node [fontcolor=lightgray] + + Unconnected [ label=Unconnected ] + + CommTrouble [ shape=record, + label="{communication loss|{Timeout|BrokenPipe|NetworkFailure}}" ] + + node [fontcolor=gray] + + subgraph cluster_try_connect { + label="try to connect, handshake" + rank=max + WFConnection [ label=WFConnection ] + WFReportParams [ label=WFReportParams ] + } + + TearDown [ label=TearDown ] + + Connected [ label=Connected,style=filled,fillcolor=green,fontcolor=black ] + + node [fontcolor=lightblue] + + StartingSyncS [ label=StartingSyncS ] + StartingSyncT [ label=StartingSyncT ] + + subgraph cluster_bitmap_exchange { + node [fontcolor=red] + fontcolor=red + label="new application (WRITE?) requests blocked\lwhile bitmap is exchanged" + + WFBitMapT [ label=WFBitMapT ] + WFSyncUUID [ label=WFSyncUUID ] + WFBitMapS [ label=WFBitMapS ] + } + + node [fontcolor=blue] + + cluster_resync [ shape=record,label="{resynchronisation process running\l'concurrent' application requests allowed|{{PausedSyncT\nSyncTarget}|{PausedSyncS\nSyncSource}}}" ] + + node [shape=box,fontcolor=black] + + // drbdadm [label="drbdadm connect"] + // handshake [label="drbd_connect()\ndrbd_do_handshake\ndrbd_sync_handshake() etc."] + // comm_error [label="communication trouble"] + + // + // edges + // -------------------------------------- + + StandAlone -> Unconnected [ label="drbdadm connect" ] + Unconnected -> StandAlone [ label="drbdadm disconnect\lor serious communication trouble" ] + Unconnected -> WFConnection [ label="receiver thread is started" ] + WFConnection -> WFReportParams [ headlabel="accept()\land/or \lconnect()\l" ] + + WFReportParams -> StandAlone [ label="during handshake\lpeers do not agree\labout something essential" ] + WFReportParams -> Connected [ label="data identical\lno sync needed",color=green,fontcolor=green ] + + WFReportParams -> WFBitMapS + WFReportParams -> WFBitMapT + WFBitMapT -> WFSyncUUID [minlen=0.1,constraint=false] + + WFBitMapS -> cluster_resync:S + WFSyncUUID -> cluster_resync:T + + edge [color=green] + cluster_resync:any -> Connected [ label="resnyc done",fontcolor=green ] + + edge [color=red] + WFReportParams -> CommTrouble + Connected -> CommTrouble + cluster_resync:any -> CommTrouble + edge [color=black] + CommTrouble -> Unconnected [label="receiver thread is stopped" ] + +} diff --git a/Documentation/blockdev/drbd/node-states-8.dot b/Documentation/blockdev/drbd/node-states-8.dot new file mode 100644 index 000000000000..4a2b00c23547 --- /dev/null +++ b/Documentation/blockdev/drbd/node-states-8.dot @@ -0,0 +1,14 @@ +digraph node_states { + Secondary -> Primary [ label = "ioctl_set_state()" ] + Primary -> Secondary [ label = "ioctl_set_state()" ] +} + +digraph peer_states { + Secondary -> Primary [ label = "recv state packet" ] + Primary -> Secondary [ label = "recv state packet" ] + Primary -> Unknown [ label = "connection lost" ] + Secondary -> Unknown [ label = "connection lost" ] + Unknown -> Primary [ label = "connected" ] + Unknown -> Secondary [ label = "connected" ] +} + diff --git a/MAINTAINERS b/MAINTAINERS index c450f3abb8c9..ea56bd7a6cba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1758,6 +1758,19 @@ S: Maintained F: drivers/scsi/dpt* F: drivers/scsi/dpt/ +DRBD DRIVER +P: Philipp Reisner +P: Lars Ellenberg +M: drbd-dev@lists.linbit.com +L: drbd-user@lists.linbit.com +W: http://www.drbd.org +T: git git://git.drbd.org/linux-2.6-drbd.git drbd +T: git git://git.drbd.org/drbd-8.3.git +S: Supported +F: drivers/block/drbd/ +F: lib/lru_cache.c +F: Documentation/blockdev/drbd/ + DRIVER CORE, KOBJECTS, AND SYSFS M: Greg Kroah-Hartman T: quilt kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 1d886e079c58..77bfce52e9ca 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -271,6 +271,8 @@ config BLK_DEV_CRYPTOLOOP instead, which can be configured to be on-disk compatible with the cryptoloop device. +source "drivers/block/drbd/Kconfig" + config BLK_DEV_NBD tristate "Network block device support" depends on NET diff --git a/drivers/block/Makefile b/drivers/block/Makefile index cdaa3f8fddf0..aff5ac925c34 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -36,5 +36,6 @@ obj-$(CONFIG_BLK_DEV_UB) += ub.o obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o +obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ swim_mod-objs := swim.o swim_asm.o diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig new file mode 100644 index 000000000000..4e6f90f487c2 --- /dev/null +++ b/drivers/block/drbd/Kconfig @@ -0,0 +1,82 @@ +# +# DRBD device driver configuration +# + +comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected" + depends on !PROC_FS || !INET || !CONNECTOR + +config BLK_DEV_DRBD + tristate "DRBD Distributed Replicated Block Device support" + depends on PROC_FS && INET && CONNECTOR + select LRU_CACHE + default n + help + + NOTE: In order to authenticate connections you have to select + CRYPTO_HMAC and a hash function as well. + + DRBD is a shared-nothing, synchronously replicated block device. It + is designed to serve as a building block for high availability + clusters and in this context, is a "drop-in" replacement for shared + storage. Simplistically, you could see it as a network RAID 1. + + Each minor device has a role, which can be 'primary' or 'secondary'. + On the node with the primary device the application is supposed to + run and to access the device (/dev/drbdX). Every write is sent to + the local 'lower level block device' and, across the network, to the + node with the device in 'secondary' state. The secondary device + simply writes the data to its lower level block device. + + DRBD can also be used in dual-Primary mode (device writable on both + nodes), which means it can exhibit shared disk semantics in a + shared-nothing cluster. Needless to say, on top of dual-Primary + DRBD utilizing a cluster file system is necessary to maintain for + cache coherency. + + For automatic failover you need a cluster manager (e.g. heartbeat). + See also: http://www.drbd.org/, http://www.linux-ha.org + + If unsure, say N. + +config DRBD_TRACE + tristate "DRBD tracing" + depends on BLK_DEV_DRBD + select TRACEPOINTS + default n + help + + Say Y here if you want to be able to trace various events in DRBD. + + If unsure, say N. + +config DRBD_FAULT_INJECTION + bool "DRBD fault injection" + depends on BLK_DEV_DRBD + help + + Say Y here if you want to simulate IO errors, in order to test DRBD's + behavior. + + The actual simulation of IO errors is done by writing 3 values to + /sys/module/drbd/parameters/ + + enable_faults: bitmask of... + 1 meta data write + 2 read + 4 resync data write + 8 read + 16 data write + 32 data read + 64 read ahead + 128 kmalloc of bitmap + 256 allocation of EE (epoch_entries) + + fault_devs: bitmask of minor numbers + fault_rate: frequency in percent + + Example: Simulate data write errors on /dev/drbd0 with a probability of 5%. + echo 16 > /sys/module/drbd/parameters/enable_faults + echo 1 > /sys/module/drbd/parameters/fault_devs + echo 5 > /sys/module/drbd/parameters/fault_rate + + If unsure, say N. diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile new file mode 100644 index 000000000000..7d86ef8a8b40 --- /dev/null +++ b/drivers/block/drbd/Makefile @@ -0,0 +1,8 @@ +drbd-y := drbd_bitmap.o drbd_proc.o +drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o +drbd-y += drbd_main.o drbd_strings.o drbd_nl.o + +drbd_trace-y := drbd_tracing.o + +obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o +obj-$(CONFIG_DRBD_TRACE) += drbd_trace.o diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c new file mode 100644 index 000000000000..74b4835d3107 --- /dev/null +++ b/drivers/block/drbd/drbd_actlog.c @@ -0,0 +1,1484 @@ +/* + drbd_actlog.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include "drbd_int.h" +#include "drbd_tracing.h" +#include "drbd_wrappers.h" + +/* We maintain a trivial check sum in our on disk activity log. + * With that we can ensure correct operation even when the storage + * device might do a partial (last) sector write while loosing power. + */ +struct __packed al_transaction { + u32 magic; + u32 tr_number; + struct __packed { + u32 pos; + u32 extent; } updates[1 + AL_EXTENTS_PT]; + u32 xor_sum; +}; + +struct update_odbm_work { + struct drbd_work w; + unsigned int enr; +}; + +struct update_al_work { + struct drbd_work w; + struct lc_element *al_ext; + struct completion event; + unsigned int enr; + /* if old_enr != LC_FREE, write corresponding bitmap sector, too */ + unsigned int old_enr; +}; + +struct drbd_atodb_wait { + atomic_t count; + struct completion io_done; + struct drbd_conf *mdev; + int error; +}; + + +int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); + +/* The actual tracepoint needs to have constant number of known arguments... + */ +void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + trace__drbd_resync(mdev, level, fmt, ap); + va_end(ap); +} + +static int _drbd_md_sync_page_io(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev, + struct page *page, sector_t sector, + int rw, int size) +{ + struct bio *bio; + struct drbd_md_io md_io; + int ok; + + md_io.mdev = mdev; + init_completion(&md_io.event); + md_io.error = 0; + + if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags)) + rw |= (1 << BIO_RW_BARRIER); + rw |= ((1<bi_bdev = bdev->md_bdev; + bio->bi_sector = sector; + ok = (bio_add_page(bio, page, size, 0) == size); + if (!ok) + goto out; + bio->bi_private = &md_io; + bio->bi_end_io = drbd_md_io_complete; + bio->bi_rw = rw; + + trace_drbd_bio(mdev, "Md", bio, 0, NULL); + + if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) + bio_endio(bio, -EIO); + else + submit_bio(rw, bio); + wait_for_completion(&md_io.event); + ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0; + + /* check for unsupported barrier op. + * would rather check on EOPNOTSUPP, but that is not reliable. + * don't try again for ANY return value != 0 */ + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && !ok)) { + /* Try again with no barrier */ + dev_warn(DEV, "Barriers not supported on meta data device - disabling\n"); + set_bit(MD_NO_BARRIER, &mdev->flags); + rw &= ~(1 << BIO_RW_BARRIER); + bio_put(bio); + goto retry; + } + out: + bio_put(bio); + return ok; +} + +int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, + sector_t sector, int rw) +{ + int logical_block_size, mask, ok; + int offset = 0; + struct page *iop = mdev->md_io_page; + + D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); + + BUG_ON(!bdev->md_bdev); + + logical_block_size = bdev_logical_block_size(bdev->md_bdev); + if (logical_block_size == 0) + logical_block_size = MD_SECTOR_SIZE; + + /* in case logical_block_size != 512 [ s390 only? ] */ + if (logical_block_size != MD_SECTOR_SIZE) { + mask = (logical_block_size / MD_SECTOR_SIZE) - 1; + D_ASSERT(mask == 1 || mask == 3 || mask == 7); + D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE); + offset = sector & mask; + sector = sector & ~mask; + iop = mdev->md_io_tmpp; + + if (rw & WRITE) { + /* these are GFP_KERNEL pages, pre-allocated + * on device initialization */ + void *p = page_address(mdev->md_io_page); + void *hp = page_address(mdev->md_io_tmpp); + + ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, + READ, logical_block_size); + + if (unlikely(!ok)) { + dev_err(DEV, "drbd_md_sync_page_io(,%llus," + "READ [logical_block_size!=512]) failed!\n", + (unsigned long long)sector); + return 0; + } + + memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE); + } + } + + if (sector < drbd_md_first_sector(bdev) || + sector > drbd_md_last_sector(bdev)) + dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n", + current->comm, current->pid, __func__, + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); + + ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size); + if (unlikely(!ok)) { + dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); + return 0; + } + + if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) { + void *p = page_address(mdev->md_io_page); + void *hp = page_address(mdev->md_io_tmpp); + + memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE); + } + + return ok; +} + +static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) +{ + struct lc_element *al_ext; + struct lc_element *tmp; + unsigned long al_flags = 0; + + spin_lock_irq(&mdev->al_lock); + tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); + if (unlikely(tmp != NULL)) { + struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); + if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { + spin_unlock_irq(&mdev->al_lock); + return NULL; + } + } + al_ext = lc_get(mdev->act_log, enr); + al_flags = mdev->act_log->flags; + spin_unlock_irq(&mdev->al_lock); + + /* + if (!al_ext) { + if (al_flags & LC_STARVING) + dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n"); + if (al_flags & LC_DIRTY) + dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n"); + } + */ + + return al_ext; +} + +void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) +{ + unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); + struct lc_element *al_ext; + struct update_al_work al_work; + + D_ASSERT(atomic_read(&mdev->local_cnt) > 0); + + trace_drbd_actlog(mdev, sector, "al_begin_io"); + + wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr))); + + if (al_ext->lc_number != enr) { + /* drbd_al_write_transaction(mdev,al_ext,enr); + * recurses into generic_make_request(), which + * disallows recursion, bios being serialized on the + * current->bio_tail list now. + * we have to delegate updates to the activity log + * to the worker thread. */ + init_completion(&al_work.event); + al_work.al_ext = al_ext; + al_work.enr = enr; + al_work.old_enr = al_ext->lc_number; + al_work.w.cb = w_al_write_transaction; + drbd_queue_work_front(&mdev->data.work, &al_work.w); + wait_for_completion(&al_work.event); + + mdev->al_writ_cnt++; + + spin_lock_irq(&mdev->al_lock); + lc_changed(mdev->act_log, al_ext); + spin_unlock_irq(&mdev->al_lock); + wake_up(&mdev->al_wait); + } +} + +void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) +{ + unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); + struct lc_element *extent; + unsigned long flags; + + trace_drbd_actlog(mdev, sector, "al_complete_io"); + + spin_lock_irqsave(&mdev->al_lock, flags); + + extent = lc_find(mdev->act_log, enr); + + if (!extent) { + spin_unlock_irqrestore(&mdev->al_lock, flags); + dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr); + return; + } + + if (lc_put(mdev->act_log, extent) == 0) + wake_up(&mdev->al_wait); + + spin_unlock_irqrestore(&mdev->al_lock, flags); +} + +int +w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct update_al_work *aw = container_of(w, struct update_al_work, w); + struct lc_element *updated = aw->al_ext; + const unsigned int new_enr = aw->enr; + const unsigned int evicted = aw->old_enr; + struct al_transaction *buffer; + sector_t sector; + int i, n, mx; + unsigned int extent_nr; + u32 xor_sum = 0; + + if (!get_ldev(mdev)) { + dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n"); + complete(&((struct update_al_work *)w)->event); + return 1; + } + /* do we have to do a bitmap write, first? + * TODO reduce maximum latency: + * submit both bios, then wait for both, + * instead of doing two synchronous sector writes. */ + if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) + drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); + + mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */ + buffer = (struct al_transaction *)page_address(mdev->md_io_page); + + buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); + buffer->tr_number = cpu_to_be32(mdev->al_tr_number); + + n = lc_index_of(mdev->act_log, updated); + + buffer->updates[0].pos = cpu_to_be32(n); + buffer->updates[0].extent = cpu_to_be32(new_enr); + + xor_sum ^= new_enr; + + mx = min_t(int, AL_EXTENTS_PT, + mdev->act_log->nr_elements - mdev->al_tr_cycle); + for (i = 0; i < mx; i++) { + unsigned idx = mdev->al_tr_cycle + i; + extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; + buffer->updates[i+1].pos = cpu_to_be32(idx); + buffer->updates[i+1].extent = cpu_to_be32(extent_nr); + xor_sum ^= extent_nr; + } + for (; i < AL_EXTENTS_PT; i++) { + buffer->updates[i+1].pos = __constant_cpu_to_be32(-1); + buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE); + xor_sum ^= LC_FREE; + } + mdev->al_tr_cycle += AL_EXTENTS_PT; + if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) + mdev->al_tr_cycle = 0; + + buffer->xor_sum = cpu_to_be32(xor_sum); + + sector = mdev->ldev->md.md_offset + + mdev->ldev->md.al_offset + mdev->al_tr_pos; + + if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) + drbd_chk_io_error(mdev, 1, TRUE); + + if (++mdev->al_tr_pos > + div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) + mdev->al_tr_pos = 0; + + D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); + mdev->al_tr_number++; + + mutex_unlock(&mdev->md_io_mutex); + + complete(&((struct update_al_work *)w)->event); + put_ldev(mdev); + + return 1; +} + +/** + * drbd_al_read_tr() - Read a single transaction from the on disk activity log + * @mdev: DRBD device. + * @bdev: Block device to read form. + * @b: pointer to an al_transaction. + * @index: On disk slot of the transaction to read. + * + * Returns -1 on IO error, 0 on checksum error and 1 upon success. + */ +static int drbd_al_read_tr(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev, + struct al_transaction *b, + int index) +{ + sector_t sector; + int rv, i; + u32 xor_sum = 0; + + sector = bdev->md.md_offset + bdev->md.al_offset + index; + + /* Dont process error normally, + * as this is done before disk is attached! */ + if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) + return -1; + + rv = (be32_to_cpu(b->magic) == DRBD_MAGIC); + + for (i = 0; i < AL_EXTENTS_PT + 1; i++) + xor_sum ^= be32_to_cpu(b->updates[i].extent); + rv &= (xor_sum == be32_to_cpu(b->xor_sum)); + + return rv; +} + +/** + * drbd_al_read_log() - Restores the activity log from its on disk representation. + * @mdev: DRBD device. + * @bdev: Block device to read form. + * + * Returns 1 on success, returns 0 when reading the log failed due to IO errors. + */ +int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) +{ + struct al_transaction *buffer; + int i; + int rv; + int mx; + int active_extents = 0; + int transactions = 0; + int found_valid = 0; + int from = 0; + int to = 0; + u32 from_tnr = 0; + u32 to_tnr = 0; + u32 cnr; + + mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT); + + /* lock out all other meta data io for now, + * and make sure the page is mapped. + */ + mutex_lock(&mdev->md_io_mutex); + buffer = page_address(mdev->md_io_page); + + /* Find the valid transaction in the log */ + for (i = 0; i <= mx; i++) { + rv = drbd_al_read_tr(mdev, bdev, buffer, i); + if (rv == 0) + continue; + if (rv == -1) { + mutex_unlock(&mdev->md_io_mutex); + return 0; + } + cnr = be32_to_cpu(buffer->tr_number); + + if (++found_valid == 1) { + from = i; + to = i; + from_tnr = cnr; + to_tnr = cnr; + continue; + } + if ((int)cnr - (int)from_tnr < 0) { + D_ASSERT(from_tnr - cnr + i - from == mx+1); + from = i; + from_tnr = cnr; + } + if ((int)cnr - (int)to_tnr > 0) { + D_ASSERT(cnr - to_tnr == i - to); + to = i; + to_tnr = cnr; + } + } + + if (!found_valid) { + dev_warn(DEV, "No usable activity log found.\n"); + mutex_unlock(&mdev->md_io_mutex); + return 1; + } + + /* Read the valid transactions. + * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ + i = from; + while (1) { + int j, pos; + unsigned int extent_nr; + unsigned int trn; + + rv = drbd_al_read_tr(mdev, bdev, buffer, i); + ERR_IF(rv == 0) goto cancel; + if (rv == -1) { + mutex_unlock(&mdev->md_io_mutex); + return 0; + } + + trn = be32_to_cpu(buffer->tr_number); + + spin_lock_irq(&mdev->al_lock); + + /* This loop runs backwards because in the cyclic + elements there might be an old version of the + updated element (in slot 0). So the element in slot 0 + can overwrite old versions. */ + for (j = AL_EXTENTS_PT; j >= 0; j--) { + pos = be32_to_cpu(buffer->updates[j].pos); + extent_nr = be32_to_cpu(buffer->updates[j].extent); + + if (extent_nr == LC_FREE) + continue; + + lc_set(mdev->act_log, extent_nr, pos); + active_extents++; + } + spin_unlock_irq(&mdev->al_lock); + + transactions++; + +cancel: + if (i == to) + break; + i++; + if (i > mx) + i = 0; + } + + mdev->al_tr_number = to_tnr+1; + mdev->al_tr_pos = to; + if (++mdev->al_tr_pos > + div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) + mdev->al_tr_pos = 0; + + /* ok, we are done with it */ + mutex_unlock(&mdev->md_io_mutex); + + dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", + transactions, active_extents); + + return 1; +} + +static void atodb_endio(struct bio *bio, int error) +{ + struct drbd_atodb_wait *wc = bio->bi_private; + struct drbd_conf *mdev = wc->mdev; + struct page *page; + int uptodate = bio_flagged(bio, BIO_UPTODATE); + + /* strange behavior of some lower level drivers... + * fail the request by clearing the uptodate flag, + * but do not return any error?! */ + if (!error && !uptodate) + error = -EIO; + + drbd_chk_io_error(mdev, error, TRUE); + if (error && wc->error == 0) + wc->error = error; + + if (atomic_dec_and_test(&wc->count)) + complete(&wc->io_done); + + page = bio->bi_io_vec[0].bv_page; + put_page(page); + bio_put(bio); + mdev->bm_writ_cnt++; + put_ldev(mdev); +} + +#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) +/* activity log to on disk bitmap -- prepare bio unless that sector + * is already covered by previously prepared bios */ +static int atodb_prepare_unless_covered(struct drbd_conf *mdev, + struct bio **bios, + unsigned int enr, + struct drbd_atodb_wait *wc) __must_hold(local) +{ + struct bio *bio; + struct page *page; + sector_t on_disk_sector = enr + mdev->ldev->md.md_offset + + mdev->ldev->md.bm_offset; + unsigned int page_offset = PAGE_SIZE; + int offset; + int i = 0; + int err = -ENOMEM; + + /* Check if that enr is already covered by an already created bio. + * Caution, bios[] is not NULL terminated, + * but only initialized to all NULL. + * For completely scattered activity log, + * the last invocation iterates over all bios, + * and finds the last NULL entry. + */ + while ((bio = bios[i])) { + if (bio->bi_sector == on_disk_sector) + return 0; + i++; + } + /* bios[i] == NULL, the next not yet used slot */ + + /* GFP_KERNEL, we are not in the write-out path */ + bio = bio_alloc(GFP_KERNEL, 1); + if (bio == NULL) + return -ENOMEM; + + if (i > 0) { + const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec; + page_offset = prev_bv->bv_offset + prev_bv->bv_len; + page = prev_bv->bv_page; + } + if (page_offset == PAGE_SIZE) { + page = alloc_page(__GFP_HIGHMEM); + if (page == NULL) + goto out_bio_put; + page_offset = 0; + } else { + get_page(page); + } + + offset = S2W(enr); + drbd_bm_get_lel(mdev, offset, + min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset), + kmap(page) + page_offset); + kunmap(page); + + bio->bi_private = wc; + bio->bi_end_io = atodb_endio; + bio->bi_bdev = mdev->ldev->md_bdev; + bio->bi_sector = on_disk_sector; + + if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE) + goto out_put_page; + + atomic_inc(&wc->count); + /* we already know that we may do this... + * get_ldev_if_state(mdev,D_ATTACHING); + * just get the extra reference, so that the local_cnt reflects + * the number of pending IO requests DRBD at its backing device. + */ + atomic_inc(&mdev->local_cnt); + + bios[i] = bio; + + return 0; + +out_put_page: + err = -EINVAL; + put_page(page); +out_bio_put: + bio_put(bio); + return err; +} + +/** + * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents + * @mdev: DRBD device. + * + * Called when we detach (unconfigure) local storage, + * or when we go from R_PRIMARY to R_SECONDARY role. + */ +void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) +{ + int i, nr_elements; + unsigned int enr; + struct bio **bios; + struct drbd_atodb_wait wc; + + ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING)) + return; /* sorry, I don't have any act_log etc... */ + + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + + nr_elements = mdev->act_log->nr_elements; + + /* GFP_KERNEL, we are not in anyone's write-out path */ + bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL); + if (!bios) + goto submit_one_by_one; + + atomic_set(&wc.count, 0); + init_completion(&wc.io_done); + wc.mdev = mdev; + wc.error = 0; + + for (i = 0; i < nr_elements; i++) { + enr = lc_element_by_index(mdev->act_log, i)->lc_number; + if (enr == LC_FREE) + continue; + /* next statement also does atomic_inc wc.count and local_cnt */ + if (atodb_prepare_unless_covered(mdev, bios, + enr/AL_EXT_PER_BM_SECT, + &wc)) + goto free_bios_submit_one_by_one; + } + + /* unnecessary optimization? */ + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + + /* all prepared, submit them */ + for (i = 0; i < nr_elements; i++) { + if (bios[i] == NULL) + break; + if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) { + bios[i]->bi_rw = WRITE; + bio_endio(bios[i], -EIO); + } else { + submit_bio(WRITE, bios[i]); + } + } + + drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev)); + + /* always (try to) flush bitmap to stable storage */ + drbd_md_flush(mdev); + + /* In case we did not submit a single IO do not wait for + * them to complete. ( Because we would wait forever here. ) + * + * In case we had IOs and they are already complete, there + * is not point in waiting anyways. + * Therefore this if () ... */ + if (atomic_read(&wc.count)) + wait_for_completion(&wc.io_done); + + put_ldev(mdev); + + kfree(bios); + return; + + free_bios_submit_one_by_one: + /* free everything by calling the endio callback directly. */ + for (i = 0; i < nr_elements && bios[i]; i++) + bio_endio(bios[i], 0); + + kfree(bios); + + submit_one_by_one: + dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n"); + + for (i = 0; i < mdev->act_log->nr_elements; i++) { + enr = lc_element_by_index(mdev->act_log, i)->lc_number; + if (enr == LC_FREE) + continue; + /* Really slow: if we have al-extents 16..19 active, + * sector 4 will be written four times! Synchronous! */ + drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT); + } + + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + put_ldev(mdev); +} + +/** + * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents + * @mdev: DRBD device. + */ +void drbd_al_apply_to_bm(struct drbd_conf *mdev) +{ + unsigned int enr; + unsigned long add = 0; + char ppb[10]; + int i; + + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + + for (i = 0; i < mdev->act_log->nr_elements; i++) { + enr = lc_element_by_index(mdev->act_log, i)->lc_number; + if (enr == LC_FREE) + continue; + add += drbd_bm_ALe_set_all(mdev, enr); + } + + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + + dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n", + ppsize(ppb, Bit2KB(add))); +} + +static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) +{ + int rv; + + spin_lock_irq(&mdev->al_lock); + rv = (al_ext->refcnt == 0); + if (likely(rv)) + lc_del(mdev->act_log, al_ext); + spin_unlock_irq(&mdev->al_lock); + + return rv; +} + +/** + * drbd_al_shrink() - Removes all active extents form the activity log + * @mdev: DRBD device. + * + * Removes all active extents form the activity log, waiting until + * the reference count of each entry dropped to 0 first, of course. + * + * You need to lock mdev->act_log with lc_try_lock() / lc_unlock() + */ +void drbd_al_shrink(struct drbd_conf *mdev) +{ + struct lc_element *al_ext; + int i; + + D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); + + for (i = 0; i < mdev->act_log->nr_elements; i++) { + al_ext = lc_element_by_index(mdev->act_log, i); + if (al_ext->lc_number == LC_FREE) + continue; + wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext)); + } + + wake_up(&mdev->al_wait); +} + +static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); + + if (!get_ldev(mdev)) { + if (__ratelimit(&drbd_ratelimit_state)) + dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n"); + kfree(udw); + return 1; + } + + drbd_bm_write_sect(mdev, udw->enr); + put_ldev(mdev); + + kfree(udw); + + if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) { + switch (mdev->state.conn) { + case C_SYNC_SOURCE: case C_SYNC_TARGET: + case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T: + drbd_resync_finished(mdev); + default: + /* nothing to do */ + break; + } + } + drbd_bcast_sync_progress(mdev); + + return 1; +} + + +/* ATTENTION. The AL's extents are 4MB each, while the extents in the + * resync LRU-cache are 16MB each. + * The caller of this function has to hold an get_ldev() reference. + * + * TODO will be obsoleted once we have a caching lru of the on disk bitmap + */ +static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, + int count, int success) +{ + struct lc_element *e; + struct update_odbm_work *udw; + + unsigned int enr; + + D_ASSERT(atomic_read(&mdev->local_cnt)); + + /* I simply assume that a sector/size pair never crosses + * a 16 MB extent border. (Currently this is true...) */ + enr = BM_SECT_TO_EXT(sector); + + e = lc_get(mdev->resync, enr); + if (e) { + struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); + if (ext->lce.lc_number == enr) { + if (success) + ext->rs_left -= count; + else + ext->rs_failed += count; + if (ext->rs_left < ext->rs_failed) { + dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d " + "rs_failed=%d count=%d\n", + (unsigned long long)sector, + ext->lce.lc_number, ext->rs_left, + ext->rs_failed, count); + dump_stack(); + + lc_put(mdev->resync, &ext->lce); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return; + } + } else { + /* Normally this element should be in the cache, + * since drbd_rs_begin_io() pulled it already in. + * + * But maybe an application write finished, and we set + * something outside the resync lru_cache in sync. + */ + int rs_left = drbd_bm_e_weight(mdev, enr); + if (ext->flags != 0) { + dev_warn(DEV, "changing resync lce: %d[%u;%02lx]" + " -> %d[%u;00]\n", + ext->lce.lc_number, ext->rs_left, + ext->flags, enr, rs_left); + ext->flags = 0; + } + if (ext->rs_failed) { + dev_warn(DEV, "Kicking resync_lru element enr=%u " + "out with rs_failed=%d\n", + ext->lce.lc_number, ext->rs_failed); + set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); + } + ext->rs_left = rs_left; + ext->rs_failed = success ? 0 : count; + lc_changed(mdev->resync, &ext->lce); + } + lc_put(mdev->resync, &ext->lce); + /* no race, we are within the al_lock! */ + + if (ext->rs_left == ext->rs_failed) { + ext->rs_failed = 0; + + udw = kmalloc(sizeof(*udw), GFP_ATOMIC); + if (udw) { + udw->enr = ext->lce.lc_number; + udw->w.cb = w_update_odbm; + drbd_queue_work_front(&mdev->data.work, &udw->w); + } else { + dev_warn(DEV, "Could not kmalloc an udw\n"); + set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); + } + } + } else { + dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n", + mdev->resync_locked, + mdev->resync->nr_elements, + mdev->resync->flags); + } +} + +/* clear the bit corresponding to the piece of storage in question: + * size byte of data starting from sector. Only clear a bits of the affected + * one ore more _aligned_ BM_BLOCK_SIZE blocks. + * + * called by worker on C_SYNC_TARGET and receiver on SyncSource. + * + */ +void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, + const char *file, const unsigned int line) +{ + /* Is called from worker and receiver context _only_ */ + unsigned long sbnr, ebnr, lbnr; + unsigned long count = 0; + sector_t esector, nr_sectors; + int wake_up = 0; + unsigned long flags; + + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", + (unsigned long long)sector, size); + return; + } + nr_sectors = drbd_get_capacity(mdev->this_bdev); + esector = sector + (size >> 9) - 1; + + ERR_IF(sector >= nr_sectors) return; + ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); + + lbnr = BM_SECT_TO_BIT(nr_sectors-1); + + /* we clear it (in sync). + * round up start sector, round down end sector. we make sure we only + * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */ + if (unlikely(esector < BM_SECT_PER_BIT-1)) + return; + if (unlikely(esector == (nr_sectors-1))) + ebnr = lbnr; + else + ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); + sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); + + trace_drbd_resync(mdev, TRACE_LVL_METRICS, + "drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", + (unsigned long long)sector, size, sbnr, ebnr); + + if (sbnr > ebnr) + return; + + /* + * ok, (capacity & 7) != 0 sometimes, but who cares... + * we count rs_{total,left} in bits, not sectors. + */ + spin_lock_irqsave(&mdev->al_lock, flags); + count = drbd_bm_clear_bits(mdev, sbnr, ebnr); + if (count) { + /* we need the lock for drbd_try_clear_on_disk_bm */ + if (jiffies - mdev->rs_mark_time > HZ*10) { + /* should be rolling marks, + * but we estimate only anyways. */ + if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) && + mdev->state.conn != C_PAUSED_SYNC_T && + mdev->state.conn != C_PAUSED_SYNC_S) { + mdev->rs_mark_time = jiffies; + mdev->rs_mark_left = drbd_bm_total_weight(mdev); + } + } + if (get_ldev(mdev)) { + drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE); + put_ldev(mdev); + } + /* just wake_up unconditional now, various lc_chaged(), + * lc_put() in drbd_try_clear_on_disk_bm(). */ + wake_up = 1; + } + spin_unlock_irqrestore(&mdev->al_lock, flags); + if (wake_up) + wake_up(&mdev->al_wait); +} + +/* + * this is intended to set one request worth of data out of sync. + * affects at least 1 bit, + * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits. + * + * called by tl_clear and drbd_send_dblock (==drbd_make_request). + * so this can be _any_ process. + */ +void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, + const char *file, const unsigned int line) +{ + unsigned long sbnr, ebnr, lbnr, flags; + sector_t esector, nr_sectors; + unsigned int enr, count; + struct lc_element *e; + + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + dev_err(DEV, "sector: %llus, size: %d\n", + (unsigned long long)sector, size); + return; + } + + if (!get_ldev(mdev)) + return; /* no disk, no metadata, no bitmap to set bits in */ + + nr_sectors = drbd_get_capacity(mdev->this_bdev); + esector = sector + (size >> 9) - 1; + + ERR_IF(sector >= nr_sectors) + goto out; + ERR_IF(esector >= nr_sectors) + esector = (nr_sectors-1); + + lbnr = BM_SECT_TO_BIT(nr_sectors-1); + + /* we set it out of sync, + * we do not need to round anything here */ + sbnr = BM_SECT_TO_BIT(sector); + ebnr = BM_SECT_TO_BIT(esector); + + trace_drbd_resync(mdev, TRACE_LVL_METRICS, + "drbd_set_out_of_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", + (unsigned long long)sector, size, sbnr, ebnr); + + /* ok, (capacity & 7) != 0 sometimes, but who cares... + * we count rs_{total,left} in bits, not sectors. */ + spin_lock_irqsave(&mdev->al_lock, flags); + count = drbd_bm_set_bits(mdev, sbnr, ebnr); + + enr = BM_SECT_TO_EXT(sector); + e = lc_find(mdev->resync, enr); + if (e) + lc_entry(e, struct bm_extent, lce)->rs_left += count; + spin_unlock_irqrestore(&mdev->al_lock, flags); + +out: + put_ldev(mdev); +} + +static +struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) +{ + struct lc_element *e; + struct bm_extent *bm_ext; + int wakeup = 0; + unsigned long rs_flags; + + spin_lock_irq(&mdev->al_lock); + if (mdev->resync_locked > mdev->resync->nr_elements/2) { + spin_unlock_irq(&mdev->al_lock); + return NULL; + } + e = lc_get(mdev->resync, enr); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; + if (bm_ext) { + if (bm_ext->lce.lc_number != enr) { + bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); + bm_ext->rs_failed = 0; + lc_changed(mdev->resync, &bm_ext->lce); + wakeup = 1; + } + if (bm_ext->lce.refcnt == 1) + mdev->resync_locked++; + set_bit(BME_NO_WRITES, &bm_ext->flags); + } + rs_flags = mdev->resync->flags; + spin_unlock_irq(&mdev->al_lock); + if (wakeup) + wake_up(&mdev->al_wait); + + if (!bm_ext) { + if (rs_flags & LC_STARVING) + dev_warn(DEV, "Have to wait for element" + " (resync LRU too small?)\n"); + BUG_ON(rs_flags & LC_DIRTY); + } + + return bm_ext; +} + +static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) +{ + struct lc_element *al_ext; + int rv = 0; + + spin_lock_irq(&mdev->al_lock); + if (unlikely(enr == mdev->act_log->new_number)) + rv = 1; + else { + al_ext = lc_find(mdev->act_log, enr); + if (al_ext) { + if (al_ext->refcnt) + rv = 1; + } + } + spin_unlock_irq(&mdev->al_lock); + + /* + if (unlikely(rv)) { + dev_info(DEV, "Delaying sync read until app's write is done\n"); + } + */ + return rv; +} + +/** + * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED + * @mdev: DRBD device. + * @sector: The sector number. + * + * This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted. + */ +int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) +{ + unsigned int enr = BM_SECT_TO_EXT(sector); + struct bm_extent *bm_ext; + int i, sig; + + trace_drbd_resync(mdev, TRACE_LVL_ALL, + "drbd_rs_begin_io: sector=%llus (rs_end=%d)\n", + (unsigned long long)sector, enr); + + sig = wait_event_interruptible(mdev->al_wait, + (bm_ext = _bme_get(mdev, enr))); + if (sig) + return 0; + + if (test_bit(BME_LOCKED, &bm_ext->flags)) + return 1; + + for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { + sig = wait_event_interruptible(mdev->al_wait, + !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i)); + if (sig) { + spin_lock_irq(&mdev->al_lock); + if (lc_put(mdev->resync, &bm_ext->lce) == 0) { + clear_bit(BME_NO_WRITES, &bm_ext->flags); + mdev->resync_locked--; + wake_up(&mdev->al_wait); + } + spin_unlock_irq(&mdev->al_lock); + return 0; + } + } + + set_bit(BME_LOCKED, &bm_ext->flags); + + return 1; +} + +/** + * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep + * @mdev: DRBD device. + * @sector: The sector number. + * + * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then + * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN + * if there is still application IO going on in this area. + */ +int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) +{ + unsigned int enr = BM_SECT_TO_EXT(sector); + const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; + struct lc_element *e; + struct bm_extent *bm_ext; + int i; + + trace_drbd_resync(mdev, TRACE_LVL_ALL, "drbd_try_rs_begin_io: sector=%llus\n", + (unsigned long long)sector); + + spin_lock_irq(&mdev->al_lock); + if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) { + /* in case you have very heavy scattered io, it may + * stall the syncer undefined if we give up the ref count + * when we try again and requeue. + * + * if we don't give up the refcount, but the next time + * we are scheduled this extent has been "synced" by new + * application writes, we'd miss the lc_put on the + * extent we keep the refcount on. + * so we remembered which extent we had to try again, and + * if the next requested one is something else, we do + * the lc_put here... + * we also have to wake_up + */ + + trace_drbd_resync(mdev, TRACE_LVL_ALL, + "dropping %u, apparently got 'synced' by application io\n", + mdev->resync_wenr); + + e = lc_find(mdev->resync, mdev->resync_wenr); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; + if (bm_ext) { + D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); + D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags)); + clear_bit(BME_NO_WRITES, &bm_ext->flags); + mdev->resync_wenr = LC_FREE; + if (lc_put(mdev->resync, &bm_ext->lce) == 0) + mdev->resync_locked--; + wake_up(&mdev->al_wait); + } else { + dev_alert(DEV, "LOGIC BUG\n"); + } + } + /* TRY. */ + e = lc_try_get(mdev->resync, enr); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; + if (bm_ext) { + if (test_bit(BME_LOCKED, &bm_ext->flags)) + goto proceed; + if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) { + mdev->resync_locked++; + } else { + /* we did set the BME_NO_WRITES, + * but then could not set BME_LOCKED, + * so we tried again. + * drop the extra reference. */ + trace_drbd_resync(mdev, TRACE_LVL_ALL, + "dropping extra reference on %u\n", enr); + + bm_ext->lce.refcnt--; + D_ASSERT(bm_ext->lce.refcnt > 0); + } + goto check_al; + } else { + /* do we rather want to try later? */ + if (mdev->resync_locked > mdev->resync->nr_elements-3) { + trace_drbd_resync(mdev, TRACE_LVL_ALL, + "resync_locked = %u!\n", mdev->resync_locked); + + goto try_again; + } + /* Do or do not. There is no try. -- Yoda */ + e = lc_get(mdev->resync, enr); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; + if (!bm_ext) { + const unsigned long rs_flags = mdev->resync->flags; + if (rs_flags & LC_STARVING) + dev_warn(DEV, "Have to wait for element" + " (resync LRU too small?)\n"); + BUG_ON(rs_flags & LC_DIRTY); + goto try_again; + } + if (bm_ext->lce.lc_number != enr) { + bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); + bm_ext->rs_failed = 0; + lc_changed(mdev->resync, &bm_ext->lce); + wake_up(&mdev->al_wait); + D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); + } + set_bit(BME_NO_WRITES, &bm_ext->flags); + D_ASSERT(bm_ext->lce.refcnt == 1); + mdev->resync_locked++; + goto check_al; + } +check_al: + trace_drbd_resync(mdev, TRACE_LVL_ALL, "checking al for %u\n", enr); + + for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { + if (unlikely(al_enr+i == mdev->act_log->new_number)) + goto try_again; + if (lc_is_used(mdev->act_log, al_enr+i)) + goto try_again; + } + set_bit(BME_LOCKED, &bm_ext->flags); +proceed: + mdev->resync_wenr = LC_FREE; + spin_unlock_irq(&mdev->al_lock); + return 0; + +try_again: + trace_drbd_resync(mdev, TRACE_LVL_ALL, "need to try again for %u\n", enr); + if (bm_ext) + mdev->resync_wenr = enr; + spin_unlock_irq(&mdev->al_lock); + return -EAGAIN; +} + +void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) +{ + unsigned int enr = BM_SECT_TO_EXT(sector); + struct lc_element *e; + struct bm_extent *bm_ext; + unsigned long flags; + + trace_drbd_resync(mdev, TRACE_LVL_ALL, + "drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n", + (long long)sector, enr); + + spin_lock_irqsave(&mdev->al_lock, flags); + e = lc_find(mdev->resync, enr); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; + if (!bm_ext) { + spin_unlock_irqrestore(&mdev->al_lock, flags); + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n"); + return; + } + + if (bm_ext->lce.refcnt == 0) { + spin_unlock_irqrestore(&mdev->al_lock, flags); + dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, " + "but refcnt is 0!?\n", + (unsigned long long)sector, enr); + return; + } + + if (lc_put(mdev->resync, &bm_ext->lce) == 0) { + clear_bit(BME_LOCKED, &bm_ext->flags); + clear_bit(BME_NO_WRITES, &bm_ext->flags); + mdev->resync_locked--; + wake_up(&mdev->al_wait); + } + + spin_unlock_irqrestore(&mdev->al_lock, flags); +} + +/** + * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED) + * @mdev: DRBD device. + */ +void drbd_rs_cancel_all(struct drbd_conf *mdev) +{ + trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_cancel_all\n"); + + spin_lock_irq(&mdev->al_lock); + + if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */ + lc_reset(mdev->resync); + put_ldev(mdev); + } + mdev->resync_locked = 0; + mdev->resync_wenr = LC_FREE; + spin_unlock_irq(&mdev->al_lock); + wake_up(&mdev->al_wait); +} + +/** + * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU + * @mdev: DRBD device. + * + * Returns 0 upon success, -EAGAIN if at least one reference count was + * not zero. + */ +int drbd_rs_del_all(struct drbd_conf *mdev) +{ + struct lc_element *e; + struct bm_extent *bm_ext; + int i; + + trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_del_all\n"); + + spin_lock_irq(&mdev->al_lock); + + if (get_ldev_if_state(mdev, D_FAILED)) { + /* ok, ->resync is there. */ + for (i = 0; i < mdev->resync->nr_elements; i++) { + e = lc_element_by_index(mdev->resync, i); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; + if (bm_ext->lce.lc_number == LC_FREE) + continue; + if (bm_ext->lce.lc_number == mdev->resync_wenr) { + dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently" + " got 'synced' by application io\n", + mdev->resync_wenr); + D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); + D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags)); + clear_bit(BME_NO_WRITES, &bm_ext->flags); + mdev->resync_wenr = LC_FREE; + lc_put(mdev->resync, &bm_ext->lce); + } + if (bm_ext->lce.refcnt != 0) { + dev_info(DEV, "Retrying drbd_rs_del_all() later. " + "refcnt=%d\n", bm_ext->lce.refcnt); + put_ldev(mdev); + spin_unlock_irq(&mdev->al_lock); + return -EAGAIN; + } + D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); + D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags)); + lc_del(mdev->resync, &bm_ext->lce); + } + D_ASSERT(mdev->resync->used == 0); + put_ldev(mdev); + } + spin_unlock_irq(&mdev->al_lock); + + return 0; +} + +/** + * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks + * @mdev: DRBD device. + * @sector: The sector number. + * @size: Size of failed IO operation, in byte. + */ +void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) +{ + /* Is called from worker and receiver context _only_ */ + unsigned long sbnr, ebnr, lbnr; + unsigned long count; + sector_t esector, nr_sectors; + int wake_up = 0; + + trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, + "drbd_rs_failed_io: sector=%llus, size=%u\n", + (unsigned long long)sector, size); + + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", + (unsigned long long)sector, size); + return; + } + nr_sectors = drbd_get_capacity(mdev->this_bdev); + esector = sector + (size >> 9) - 1; + + ERR_IF(sector >= nr_sectors) return; + ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); + + lbnr = BM_SECT_TO_BIT(nr_sectors-1); + + /* + * round up start sector, round down end sector. we make sure we only + * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */ + if (unlikely(esector < BM_SECT_PER_BIT-1)) + return; + if (unlikely(esector == (nr_sectors-1))) + ebnr = lbnr; + else + ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); + sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); + + if (sbnr > ebnr) + return; + + /* + * ok, (capacity & 7) != 0 sometimes, but who cares... + * we count rs_{total,left} in bits, not sectors. + */ + spin_lock_irq(&mdev->al_lock); + count = drbd_bm_count_bits(mdev, sbnr, ebnr); + if (count) { + mdev->rs_failed += count; + + if (get_ldev(mdev)) { + drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE); + put_ldev(mdev); + } + + /* just wake_up unconditional now, various lc_chaged(), + * lc_put() in drbd_try_clear_on_disk_bm(). */ + wake_up = 1; + } + spin_unlock_irq(&mdev->al_lock); + if (wake_up) + wake_up(&mdev->al_wait); +} diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c new file mode 100644 index 000000000000..b61057e77882 --- /dev/null +++ b/drivers/block/drbd/drbd_bitmap.c @@ -0,0 +1,1327 @@ +/* + drbd_bitmap.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2004-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2004-2008, Philipp Reisner . + Copyright (C) 2004-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include "drbd_int.h" + +/* OPAQUE outside this file! + * interface defined in drbd_int.h + + * convention: + * function name drbd_bm_... => used elsewhere, "public". + * function name bm_... => internal to implementation, "private". + + * Note that since find_first_bit returns int, at the current granularity of + * the bitmap (4KB per byte), this implementation "only" supports up to + * 1<<(32+12) == 16 TB... + */ + +/* + * NOTE + * Access to the *bm_pages is protected by bm_lock. + * It is safe to read the other members within the lock. + * + * drbd_bm_set_bits is called from bio_endio callbacks, + * We may be called with irq already disabled, + * so we need spin_lock_irqsave(). + * And we need the kmap_atomic. + */ +struct drbd_bitmap { + struct page **bm_pages; + spinlock_t bm_lock; + /* WARNING unsigned long bm_*: + * 32bit number of bit offset is just enough for 512 MB bitmap. + * it will blow up if we make the bitmap bigger... + * not that it makes much sense to have a bitmap that large, + * rather change the granularity to 16k or 64k or something. + * (that implies other problems, however...) + */ + unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ + unsigned long bm_bits; + size_t bm_words; + size_t bm_number_of_pages; + sector_t bm_dev_capacity; + struct semaphore bm_change; /* serializes resize operations */ + + atomic_t bm_async_io; + wait_queue_head_t bm_io_wait; + + unsigned long bm_flags; + + /* debugging aid, in case we are still racy somewhere */ + char *bm_why; + struct task_struct *bm_task; +}; + +/* definition of bits in bm_flags */ +#define BM_LOCKED 0 +#define BM_MD_IO_ERROR 1 +#define BM_P_VMALLOCED 2 + +static int bm_is_locked(struct drbd_bitmap *b) +{ + return test_bit(BM_LOCKED, &b->bm_flags); +} + +#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) +static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) +{ + struct drbd_bitmap *b = mdev->bitmap; + if (!__ratelimit(&drbd_ratelimit_state)) + return; + dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", + current == mdev->receiver.task ? "receiver" : + current == mdev->asender.task ? "asender" : + current == mdev->worker.task ? "worker" : current->comm, + func, b->bm_why ?: "?", + b->bm_task == mdev->receiver.task ? "receiver" : + b->bm_task == mdev->asender.task ? "asender" : + b->bm_task == mdev->worker.task ? "worker" : "?"); +} + +void drbd_bm_lock(struct drbd_conf *mdev, char *why) +{ + struct drbd_bitmap *b = mdev->bitmap; + int trylock_failed; + + if (!b) { + dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n"); + return; + } + + trylock_failed = down_trylock(&b->bm_change); + + if (trylock_failed) { + dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", + current == mdev->receiver.task ? "receiver" : + current == mdev->asender.task ? "asender" : + current == mdev->worker.task ? "worker" : current->comm, + why, b->bm_why ?: "?", + b->bm_task == mdev->receiver.task ? "receiver" : + b->bm_task == mdev->asender.task ? "asender" : + b->bm_task == mdev->worker.task ? "worker" : "?"); + down(&b->bm_change); + } + if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) + dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); + + b->bm_why = why; + b->bm_task = current; +} + +void drbd_bm_unlock(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + if (!b) { + dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n"); + return; + } + + if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags)) + dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); + + b->bm_why = NULL; + b->bm_task = NULL; + up(&b->bm_change); +} + +/* word offset to long pointer */ +static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) +{ + struct page *page; + unsigned long page_nr; + + /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */ + page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); + BUG_ON(page_nr >= b->bm_number_of_pages); + page = b->bm_pages[page_nr]; + + return (unsigned long *) kmap_atomic(page, km); +} + +static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset) +{ + return __bm_map_paddr(b, offset, KM_IRQ1); +} + +static void __bm_unmap(unsigned long *p_addr, const enum km_type km) +{ + kunmap_atomic(p_addr, km); +}; + +static void bm_unmap(unsigned long *p_addr) +{ + return __bm_unmap(p_addr, KM_IRQ1); +} + +/* long word offset of _bitmap_ sector */ +#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) +/* word offset from start of bitmap to word number _in_page_ + * modulo longs per page +#define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) + hm, well, Philipp thinks gcc might not optimze the % into & (... - 1) + so do it explicitly: + */ +#define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) + +/* Long words per page */ +#define LWPP (PAGE_SIZE/sizeof(long)) + +/* + * actually most functions herein should take a struct drbd_bitmap*, not a + * struct drbd_conf*, but for the debug macros I like to have the mdev around + * to be able to report device specific. + */ + +static void bm_free_pages(struct page **pages, unsigned long number) +{ + unsigned long i; + if (!pages) + return; + + for (i = 0; i < number; i++) { + if (!pages[i]) { + printk(KERN_ALERT "drbd: bm_free_pages tried to free " + "a NULL pointer; i=%lu n=%lu\n", + i, number); + continue; + } + __free_page(pages[i]); + pages[i] = NULL; + } +} + +static void bm_vk_free(void *ptr, int v) +{ + if (v) + vfree(ptr); + else + kfree(ptr); +} + +/* + * "have" and "want" are NUMBER OF PAGES. + */ +static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) +{ + struct page **old_pages = b->bm_pages; + struct page **new_pages, *page; + unsigned int i, bytes, vmalloced = 0; + unsigned long have = b->bm_number_of_pages; + + BUG_ON(have == 0 && old_pages != NULL); + BUG_ON(have != 0 && old_pages == NULL); + + if (have == want) + return old_pages; + + /* Trying kmalloc first, falling back to vmalloc. + * GFP_KERNEL is ok, as this is done when a lower level disk is + * "attached" to the drbd. Context is receiver thread or cqueue + * thread. As we have no disk yet, we are not in the IO path, + * not even the IO path of the peer. */ + bytes = sizeof(struct page *)*want; + new_pages = kmalloc(bytes, GFP_KERNEL); + if (!new_pages) { + new_pages = vmalloc(bytes); + if (!new_pages) + return NULL; + vmalloced = 1; + } + + memset(new_pages, 0, bytes); + if (want >= have) { + for (i = 0; i < have; i++) + new_pages[i] = old_pages[i]; + for (; i < want; i++) { + page = alloc_page(GFP_HIGHUSER); + if (!page) { + bm_free_pages(new_pages + have, i - have); + bm_vk_free(new_pages, vmalloced); + return NULL; + } + new_pages[i] = page; + } + } else { + for (i = 0; i < want; i++) + new_pages[i] = old_pages[i]; + /* NOT HERE, we are outside the spinlock! + bm_free_pages(old_pages + want, have - want); + */ + } + + if (vmalloced) + set_bit(BM_P_VMALLOCED, &b->bm_flags); + else + clear_bit(BM_P_VMALLOCED, &b->bm_flags); + + return new_pages; +} + +/* + * called on driver init only. TODO call when a device is created. + * allocates the drbd_bitmap, and stores it in mdev->bitmap. + */ +int drbd_bm_init(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + WARN_ON(b != NULL); + b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL); + if (!b) + return -ENOMEM; + spin_lock_init(&b->bm_lock); + init_MUTEX(&b->bm_change); + init_waitqueue_head(&b->bm_io_wait); + + mdev->bitmap = b; + + return 0; +} + +sector_t drbd_bm_capacity(struct drbd_conf *mdev) +{ + ERR_IF(!mdev->bitmap) return 0; + return mdev->bitmap->bm_dev_capacity; +} + +/* called on driver unload. TODO: call when a device is destroyed. + */ +void drbd_bm_cleanup(struct drbd_conf *mdev) +{ + ERR_IF (!mdev->bitmap) return; + bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); + bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags)); + kfree(mdev->bitmap); + mdev->bitmap = NULL; +} + +/* + * since (b->bm_bits % BITS_PER_LONG) != 0, + * this masks out the remaining bits. + * Returns the number of bits cleared. + */ +static int bm_clear_surplus(struct drbd_bitmap *b) +{ + const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; + size_t w = b->bm_bits >> LN2_BPL; + int cleared = 0; + unsigned long *p_addr, *bm; + + p_addr = bm_map_paddr(b, w); + bm = p_addr + MLPP(w); + if (w < b->bm_words) { + cleared = hweight_long(*bm & ~mask); + *bm &= mask; + w++; bm++; + } + + if (w < b->bm_words) { + cleared += hweight_long(*bm); + *bm = 0; + } + bm_unmap(p_addr); + return cleared; +} + +static void bm_set_surplus(struct drbd_bitmap *b) +{ + const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; + size_t w = b->bm_bits >> LN2_BPL; + unsigned long *p_addr, *bm; + + p_addr = bm_map_paddr(b, w); + bm = p_addr + MLPP(w); + if (w < b->bm_words) { + *bm |= ~mask; + bm++; w++; + } + + if (w < b->bm_words) { + *bm = ~(0UL); + } + bm_unmap(p_addr); +} + +static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian) +{ + unsigned long *p_addr, *bm, offset = 0; + unsigned long bits = 0; + unsigned long i, do_now; + + while (offset < b->bm_words) { + i = do_now = min_t(size_t, b->bm_words-offset, LWPP); + p_addr = __bm_map_paddr(b, offset, KM_USER0); + bm = p_addr + MLPP(offset); + while (i--) { +#ifndef __LITTLE_ENDIAN + if (swap_endian) + *bm = lel_to_cpu(*bm); +#endif + bits += hweight_long(*bm++); + } + __bm_unmap(p_addr, KM_USER0); + offset += do_now; + cond_resched(); + } + + return bits; +} + +static unsigned long bm_count_bits(struct drbd_bitmap *b) +{ + return __bm_count_bits(b, 0); +} + +static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b) +{ + return __bm_count_bits(b, 1); +} + +/* offset and len in long words.*/ +static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) +{ + unsigned long *p_addr, *bm; + size_t do_now, end; + +#define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512) + + end = offset + len; + + if (end > b->bm_words) { + printk(KERN_ALERT "drbd: bm_memset end > bm_words\n"); + return; + } + + while (offset < end) { + do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; + p_addr = bm_map_paddr(b, offset); + bm = p_addr + MLPP(offset); + if (bm+do_now > p_addr + LWPP) { + printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", + p_addr, bm, (int)do_now); + break; /* breaks to after catch_oob_access_end() only! */ + } + memset(bm, c, do_now * sizeof(long)); + bm_unmap(p_addr); + offset += do_now; + } +} + +/* + * make sure the bitmap has enough room for the attached storage, + * if necessary, resize. + * called whenever we may have changed the device size. + * returns -ENOMEM if we could not allocate enough memory, 0 on success. + * In case this is actually a resize, we copy the old bitmap into the new one. + * Otherwise, the bitmap is initialized to all bits set. + */ +int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long bits, words, owords, obits, *p_addr, *bm; + unsigned long want, have, onpages; /* number of pages */ + struct page **npages, **opages = NULL; + int err = 0, growing; + int opages_vmalloced; + + ERR_IF(!b) return -ENOMEM; + + drbd_bm_lock(mdev, "resize"); + + dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", + (unsigned long long)capacity); + + if (capacity == b->bm_dev_capacity) + goto out; + + opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags); + + if (capacity == 0) { + spin_lock_irq(&b->bm_lock); + opages = b->bm_pages; + onpages = b->bm_number_of_pages; + owords = b->bm_words; + b->bm_pages = NULL; + b->bm_number_of_pages = + b->bm_set = + b->bm_bits = + b->bm_words = + b->bm_dev_capacity = 0; + spin_unlock_irq(&b->bm_lock); + bm_free_pages(opages, onpages); + bm_vk_free(opages, opages_vmalloced); + goto out; + } + bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT)); + + /* if we would use + words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL; + a 32bit host could present the wrong number of words + to a 64bit host. + */ + words = ALIGN(bits, 64) >> LN2_BPL; + + if (get_ldev(mdev)) { + D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12)); + put_ldev(mdev); + } + + /* one extra long to catch off by one errors */ + want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT; + have = b->bm_number_of_pages; + if (want == have) { + D_ASSERT(b->bm_pages != NULL); + npages = b->bm_pages; + } else { + if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC)) + npages = NULL; + else + npages = bm_realloc_pages(b, want); + } + + if (!npages) { + err = -ENOMEM; + goto out; + } + + spin_lock_irq(&b->bm_lock); + opages = b->bm_pages; + owords = b->bm_words; + obits = b->bm_bits; + + growing = bits > obits; + if (opages) + bm_set_surplus(b); + + b->bm_pages = npages; + b->bm_number_of_pages = want; + b->bm_bits = bits; + b->bm_words = words; + b->bm_dev_capacity = capacity; + + if (growing) { + bm_memset(b, owords, 0xff, words-owords); + b->bm_set += bits - obits; + } + + if (want < have) { + /* implicit: (opages != NULL) && (opages != npages) */ + bm_free_pages(opages + want, have - want); + } + + p_addr = bm_map_paddr(b, words); + bm = p_addr + MLPP(words); + *bm = DRBD_MAGIC; + bm_unmap(p_addr); + + (void)bm_clear_surplus(b); + + spin_unlock_irq(&b->bm_lock); + if (opages != npages) + bm_vk_free(opages, opages_vmalloced); + if (!growing) + b->bm_set = bm_count_bits(b); + dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words); + + out: + drbd_bm_unlock(mdev); + return err; +} + +/* inherently racy: + * if not protected by other means, return value may be out of date when + * leaving this function... + * we still need to lock it, since it is important that this returns + * bm_set == 0 precisely. + * + * maybe bm_set should be atomic_t ? + */ +static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long s; + unsigned long flags; + + ERR_IF(!b) return 0; + ERR_IF(!b->bm_pages) return 0; + + spin_lock_irqsave(&b->bm_lock, flags); + s = b->bm_set; + spin_unlock_irqrestore(&b->bm_lock, flags); + + return s; +} + +unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) +{ + unsigned long s; + /* if I don't have a disk, I don't know about out-of-sync status */ + if (!get_ldev_if_state(mdev, D_NEGOTIATING)) + return 0; + s = _drbd_bm_total_weight(mdev); + put_ldev(mdev); + return s; +} + +size_t drbd_bm_words(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + ERR_IF(!b) return 0; + ERR_IF(!b->bm_pages) return 0; + + return b->bm_words; +} + +unsigned long drbd_bm_bits(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + ERR_IF(!b) return 0; + + return b->bm_bits; +} + +/* merge number words from buffer into the bitmap starting at offset. + * buffer[i] is expected to be little endian unsigned long. + * bitmap must be locked by drbd_bm_lock. + * currently only used from receive_bitmap. + */ +void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, + unsigned long *buffer) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr, *bm; + unsigned long word, bits; + size_t end, do_now; + + end = offset + number; + + ERR_IF(!b) return; + ERR_IF(!b->bm_pages) return; + if (number == 0) + return; + WARN_ON(offset >= b->bm_words); + WARN_ON(end > b->bm_words); + + spin_lock_irq(&b->bm_lock); + while (offset < end) { + do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; + p_addr = bm_map_paddr(b, offset); + bm = p_addr + MLPP(offset); + offset += do_now; + while (do_now--) { + bits = hweight_long(*bm); + word = *bm | lel_to_cpu(*buffer++); + *bm++ = word; + b->bm_set += hweight_long(word) - bits; + } + bm_unmap(p_addr); + } + /* with 32bit <-> 64bit cross-platform connect + * this is only correct for current usage, + * where we _know_ that we are 64 bit aligned, + * and know that this function is used in this way, too... + */ + if (end == b->bm_words) + b->bm_set -= bm_clear_surplus(b); + + spin_unlock_irq(&b->bm_lock); +} + +/* copy number words from the bitmap starting at offset into the buffer. + * buffer[i] will be little endian unsigned long. + */ +void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, + unsigned long *buffer) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr, *bm; + size_t end, do_now; + + end = offset + number; + + ERR_IF(!b) return; + ERR_IF(!b->bm_pages) return; + + spin_lock_irq(&b->bm_lock); + if ((offset >= b->bm_words) || + (end > b->bm_words) || + (number <= 0)) + dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n", + (unsigned long) offset, + (unsigned long) number, + (unsigned long) b->bm_words); + else { + while (offset < end) { + do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; + p_addr = bm_map_paddr(b, offset); + bm = p_addr + MLPP(offset); + offset += do_now; + while (do_now--) + *buffer++ = cpu_to_lel(*bm++); + bm_unmap(p_addr); + } + } + spin_unlock_irq(&b->bm_lock); +} + +/* set all bits in the bitmap */ +void drbd_bm_set_all(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + ERR_IF(!b) return; + ERR_IF(!b->bm_pages) return; + + spin_lock_irq(&b->bm_lock); + bm_memset(b, 0, 0xff, b->bm_words); + (void)bm_clear_surplus(b); + b->bm_set = b->bm_bits; + spin_unlock_irq(&b->bm_lock); +} + +/* clear all bits in the bitmap */ +void drbd_bm_clear_all(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + ERR_IF(!b) return; + ERR_IF(!b->bm_pages) return; + + spin_lock_irq(&b->bm_lock); + bm_memset(b, 0, 0, b->bm_words); + b->bm_set = 0; + spin_unlock_irq(&b->bm_lock); +} + +static void bm_async_io_complete(struct bio *bio, int error) +{ + struct drbd_bitmap *b = bio->bi_private; + int uptodate = bio_flagged(bio, BIO_UPTODATE); + + + /* strange behavior of some lower level drivers... + * fail the request by clearing the uptodate flag, + * but do not return any error?! + * do we want to WARN() on this? */ + if (!error && !uptodate) + error = -EIO; + + if (error) { + /* doh. what now? + * for now, set all bits, and flag MD_IO_ERROR */ + __set_bit(BM_MD_IO_ERROR, &b->bm_flags); + } + if (atomic_dec_and_test(&b->bm_async_io)) + wake_up(&b->bm_io_wait); + + bio_put(bio); +} + +static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local) +{ + /* we are process context. we always get a bio */ + struct bio *bio = bio_alloc(GFP_KERNEL, 1); + unsigned int len; + sector_t on_disk_sector = + mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset; + on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); + + /* this might happen with very small + * flexible external meta data device */ + len = min_t(unsigned int, PAGE_SIZE, + (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9); + + bio->bi_bdev = mdev->ldev->md_bdev; + bio->bi_sector = on_disk_sector; + bio_add_page(bio, b->bm_pages[page_nr], len, 0); + bio->bi_private = b; + bio->bi_end_io = bm_async_io_complete; + + if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { + bio->bi_rw |= rw; + bio_endio(bio, -EIO); + } else { + submit_bio(rw, bio); + } +} + +# if defined(__LITTLE_ENDIAN) + /* nothing to do, on disk == in memory */ +# define bm_cpu_to_lel(x) ((void)0) +# else +void bm_cpu_to_lel(struct drbd_bitmap *b) +{ + /* need to cpu_to_lel all the pages ... + * this may be optimized by using + * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0; + * the following is still not optimal, but better than nothing */ + unsigned int i; + unsigned long *p_addr, *bm; + if (b->bm_set == 0) { + /* no page at all; avoid swap if all is 0 */ + i = b->bm_number_of_pages; + } else if (b->bm_set == b->bm_bits) { + /* only the last page */ + i = b->bm_number_of_pages - 1; + } else { + /* all pages */ + i = 0; + } + for (; i < b->bm_number_of_pages; i++) { + p_addr = kmap_atomic(b->bm_pages[i], KM_USER0); + for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++) + *bm = cpu_to_lel(*bm); + kunmap_atomic(p_addr, KM_USER0); + } +} +# endif +/* lel_to_cpu == cpu_to_lel */ +# define bm_lel_to_cpu(x) bm_cpu_to_lel(x) + +/* + * bm_rw: read/write the whole bitmap from/to its on disk location. + */ +static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) +{ + struct drbd_bitmap *b = mdev->bitmap; + /* sector_t sector; */ + int bm_words, num_pages, i; + unsigned long now; + char ppb[10]; + int err = 0; + + WARN_ON(!bm_is_locked(b)); + + /* no spinlock here, the drbd_bm_lock should be enough! */ + + bm_words = drbd_bm_words(mdev); + num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT; + + /* on disk bitmap is little endian */ + if (rw == WRITE) + bm_cpu_to_lel(b); + + now = jiffies; + atomic_set(&b->bm_async_io, num_pages); + __clear_bit(BM_MD_IO_ERROR, &b->bm_flags); + + /* let the layers below us try to merge these bios... */ + for (i = 0; i < num_pages; i++) + bm_page_io_async(mdev, b, i, rw); + + drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev)); + wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); + + if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { + dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); + drbd_chk_io_error(mdev, 1, TRUE); + err = -EIO; + } + + now = jiffies; + if (rw == WRITE) { + /* swap back endianness */ + bm_lel_to_cpu(b); + /* flush bitmap to stable storage */ + drbd_md_flush(mdev); + } else /* rw == READ */ { + /* just read, if necessary adjust endianness */ + b->bm_set = bm_count_bits_swap_endian(b); + dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", + jiffies - now); + } + now = b->bm_set; + + dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", + ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); + + return err; +} + +/** + * drbd_bm_read() - Read the whole bitmap from its on disk location. + * @mdev: DRBD device. + */ +int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, READ); +} + +/** + * drbd_bm_write() - Write the whole bitmap to its on disk location. + * @mdev: DRBD device. + */ +int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE); +} + +/** + * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap + * @mdev: DRBD device. + * @enr: Extent number in the resync lru (happens to be sector offset) + * + * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered + * by a single sector write. Therefore enr == sector offset from the + * start of the bitmap. + */ +int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local) +{ + sector_t on_disk_sector = enr + mdev->ldev->md.md_offset + + mdev->ldev->md.bm_offset; + int bm_words, num_words, offset; + int err = 0; + + mutex_lock(&mdev->md_io_mutex); + bm_words = drbd_bm_words(mdev); + offset = S2W(enr); /* word offset into bitmap */ + num_words = min(S2W(1), bm_words - offset); + if (num_words < S2W(1)) + memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE); + drbd_bm_get_lel(mdev, offset, num_words, + page_address(mdev->md_io_page)); + if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) { + int i; + err = -EIO; + dev_err(DEV, "IO ERROR writing bitmap sector %lu " + "(meta-disk sector %llus)\n", + enr, (unsigned long long)on_disk_sector); + drbd_chk_io_error(mdev, 1, TRUE); + for (i = 0; i < AL_EXT_PER_BM_SECT; i++) + drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i); + } + mdev->bm_writ_cnt++; + mutex_unlock(&mdev->md_io_mutex); + return err; +} + +/* NOTE + * find_first_bit returns int, we return unsigned long. + * should not make much difference anyways, but ... + * + * this returns a bit number, NOT a sector! + */ +#define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1) +static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, + const int find_zero_bit, const enum km_type km) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long i = -1UL; + unsigned long *p_addr; + unsigned long bit_offset; /* bit offset of the mapped page. */ + + if (bm_fo > b->bm_bits) { + dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); + } else { + while (bm_fo < b->bm_bits) { + unsigned long offset; + bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */ + offset = bit_offset >> LN2_BPL; /* word offset of the page */ + p_addr = __bm_map_paddr(b, offset, km); + + if (find_zero_bit) + i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); + else + i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); + + __bm_unmap(p_addr, km); + if (i < PAGE_SIZE*8) { + i = bit_offset + i; + if (i >= b->bm_bits) + break; + goto found; + } + bm_fo = bit_offset + PAGE_SIZE*8; + } + i = -1UL; + } + found: + return i; +} + +static unsigned long bm_find_next(struct drbd_conf *mdev, + unsigned long bm_fo, const int find_zero_bit) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long i = -1UL; + + ERR_IF(!b) return i; + ERR_IF(!b->bm_pages) return i; + + spin_lock_irq(&b->bm_lock); + if (bm_is_locked(b)) + bm_print_lock_info(mdev); + + i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); + + spin_unlock_irq(&b->bm_lock); + return i; +} + +unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) +{ + return bm_find_next(mdev, bm_fo, 0); +} + +#if 0 +/* not yet needed for anything. */ +unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) +{ + return bm_find_next(mdev, bm_fo, 1); +} +#endif + +/* does not spin_lock_irqsave. + * you must take drbd_bm_lock() first */ +unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) +{ + /* WARN_ON(!bm_is_locked(mdev)); */ + return __bm_find_next(mdev, bm_fo, 0, KM_USER1); +} + +unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) +{ + /* WARN_ON(!bm_is_locked(mdev)); */ + return __bm_find_next(mdev, bm_fo, 1, KM_USER1); +} + +/* returns number of bits actually changed. + * for val != 0, we change 0 -> 1, return code positive + * for val == 0, we change 1 -> 0, return code negative + * wants bitnr, not sector. + * expected to be called for only a few bits (e - s about BITS_PER_LONG). + * Must hold bitmap lock already. */ +int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, + unsigned long e, int val, const enum km_type km) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr = NULL; + unsigned long bitnr; + unsigned long last_page_nr = -1UL; + int c = 0; + + if (e >= b->bm_bits) { + dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", + s, e, b->bm_bits); + e = b->bm_bits ? b->bm_bits -1 : 0; + } + for (bitnr = s; bitnr <= e; bitnr++) { + unsigned long offset = bitnr>>LN2_BPL; + unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); + if (page_nr != last_page_nr) { + if (p_addr) + __bm_unmap(p_addr, km); + p_addr = __bm_map_paddr(b, offset, km); + last_page_nr = page_nr; + } + if (val) + c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr)); + else + c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr)); + } + if (p_addr) + __bm_unmap(p_addr, km); + b->bm_set += c; + return c; +} + +/* returns number of bits actually changed. + * for val != 0, we change 0 -> 1, return code positive + * for val == 0, we change 1 -> 0, return code negative + * wants bitnr, not sector */ +int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, + const unsigned long e, int val) +{ + unsigned long flags; + struct drbd_bitmap *b = mdev->bitmap; + int c = 0; + + ERR_IF(!b) return 1; + ERR_IF(!b->bm_pages) return 0; + + spin_lock_irqsave(&b->bm_lock, flags); + if (bm_is_locked(b)) + bm_print_lock_info(mdev); + + c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); + + spin_unlock_irqrestore(&b->bm_lock, flags); + return c; +} + +/* returns number of bits changed 0 -> 1 */ +int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +{ + return bm_change_bits_to(mdev, s, e, 1); +} + +/* returns number of bits changed 1 -> 0 */ +int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +{ + return -bm_change_bits_to(mdev, s, e, 0); +} + +/* sets all bits in full words, + * from first_word up to, but not including, last_word */ +static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, + int page_nr, int first_word, int last_word) +{ + int i; + int bits; + unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0); + for (i = first_word; i < last_word; i++) { + bits = hweight_long(paddr[i]); + paddr[i] = ~0UL; + b->bm_set += BITS_PER_LONG - bits; + } + kunmap_atomic(paddr, KM_USER0); +} + +/* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave. + * You must first drbd_bm_lock(). + * Can be called to set the whole bitmap in one go. + * Sets bits from s to e _inclusive_. */ +void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +{ + /* First set_bit from the first bit (s) + * up to the next long boundary (sl), + * then assign full words up to the last long boundary (el), + * then set_bit up to and including the last bit (e). + * + * Do not use memset, because we must account for changes, + * so we need to loop over the words with hweight() anyways. + */ + unsigned long sl = ALIGN(s,BITS_PER_LONG); + unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1); + int first_page; + int last_page; + int page_nr; + int first_word; + int last_word; + + if (e - s <= 3*BITS_PER_LONG) { + /* don't bother; el and sl may even be wrong. */ + __bm_change_bits_to(mdev, s, e, 1, KM_USER0); + return; + } + + /* difference is large enough that we can trust sl and el */ + + /* bits filling the current long */ + if (sl) + __bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0); + + first_page = sl >> (3 + PAGE_SHIFT); + last_page = el >> (3 + PAGE_SHIFT); + + /* MLPP: modulo longs per page */ + /* LWPP: long words per page */ + first_word = MLPP(sl >> LN2_BPL); + last_word = LWPP; + + /* first and full pages, unless first page == last page */ + for (page_nr = first_page; page_nr < last_page; page_nr++) { + bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word); + cond_resched(); + first_word = 0; + } + + /* last page (respectively only page, for first page == last page) */ + last_word = MLPP(el >> LN2_BPL); + bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); + + /* possibly trailing bits. + * example: (e & 63) == 63, el will be e+1. + * if that even was the very last bit, + * it would trigger an assert in __bm_change_bits_to() + */ + if (el <= e) + __bm_change_bits_to(mdev, el, e, 1, KM_USER0); +} + +/* returns bit state + * wants bitnr, NOT sector. + * inherently racy... area needs to be locked by means of {al,rs}_lru + * 1 ... bit set + * 0 ... bit not set + * -1 ... first out of bounds access, stop testing for bits! + */ +int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) +{ + unsigned long flags; + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr; + int i; + + ERR_IF(!b) return 0; + ERR_IF(!b->bm_pages) return 0; + + spin_lock_irqsave(&b->bm_lock, flags); + if (bm_is_locked(b)) + bm_print_lock_info(mdev); + if (bitnr < b->bm_bits) { + unsigned long offset = bitnr>>LN2_BPL; + p_addr = bm_map_paddr(b, offset); + i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; + bm_unmap(p_addr); + } else if (bitnr == b->bm_bits) { + i = -1; + } else { /* (bitnr > b->bm_bits) */ + dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits); + i = 0; + } + + spin_unlock_irqrestore(&b->bm_lock, flags); + return i; +} + +/* returns number of bits set in the range [s, e] */ +int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +{ + unsigned long flags; + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr = NULL, page_nr = -1; + unsigned long bitnr; + int c = 0; + size_t w; + + /* If this is called without a bitmap, that is a bug. But just to be + * robust in case we screwed up elsewhere, in that case pretend there + * was one dirty bit in the requested area, so we won't try to do a + * local read there (no bitmap probably implies no disk) */ + ERR_IF(!b) return 1; + ERR_IF(!b->bm_pages) return 1; + + spin_lock_irqsave(&b->bm_lock, flags); + if (bm_is_locked(b)) + bm_print_lock_info(mdev); + for (bitnr = s; bitnr <= e; bitnr++) { + w = bitnr >> LN2_BPL; + if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) { + page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3); + if (p_addr) + bm_unmap(p_addr); + p_addr = bm_map_paddr(b, w); + } + ERR_IF (bitnr >= b->bm_bits) { + dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); + } else { + c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); + } + } + if (p_addr) + bm_unmap(p_addr); + spin_unlock_irqrestore(&b->bm_lock, flags); + return c; +} + + +/* inherently racy... + * return value may be already out-of-date when this function returns. + * but the general usage is that this is only use during a cstate when bits are + * only cleared, not set, and typically only care for the case when the return + * value is zero, or we already "locked" this "bitmap extent" by other means. + * + * enr is bm-extent number, since we chose to name one sector (512 bytes) + * worth of the bitmap a "bitmap extent". + * + * TODO + * I think since we use it like a reference count, we should use the real + * reference count of some bitmap extent element from some lru instead... + * + */ +int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) +{ + struct drbd_bitmap *b = mdev->bitmap; + int count, s, e; + unsigned long flags; + unsigned long *p_addr, *bm; + + ERR_IF(!b) return 0; + ERR_IF(!b->bm_pages) return 0; + + spin_lock_irqsave(&b->bm_lock, flags); + if (bm_is_locked(b)) + bm_print_lock_info(mdev); + + s = S2W(enr); + e = min((size_t)S2W(enr+1), b->bm_words); + count = 0; + if (s < b->bm_words) { + int n = e-s; + p_addr = bm_map_paddr(b, s); + bm = p_addr + MLPP(s); + while (n--) + count += hweight_long(*bm++); + bm_unmap(p_addr); + } else { + dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s); + } + spin_unlock_irqrestore(&b->bm_lock, flags); + return count; +} + +/* set all bits covered by the AL-extent al_enr */ +unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr, *bm; + unsigned long weight; + int count, s, e, i, do_now; + ERR_IF(!b) return 0; + ERR_IF(!b->bm_pages) return 0; + + spin_lock_irq(&b->bm_lock); + if (bm_is_locked(b)) + bm_print_lock_info(mdev); + weight = b->bm_set; + + s = al_enr * BM_WORDS_PER_AL_EXT; + e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words); + /* assert that s and e are on the same page */ + D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3) + == s >> (PAGE_SHIFT - LN2_BPL + 3)); + count = 0; + if (s < b->bm_words) { + i = do_now = e-s; + p_addr = bm_map_paddr(b, s); + bm = p_addr + MLPP(s); + while (i--) { + count += hweight_long(*bm); + *bm = -1UL; + bm++; + } + bm_unmap(p_addr); + b->bm_set += do_now*BITS_PER_LONG - count; + if (e == b->bm_words) + b->bm_set -= bm_clear_surplus(b); + } else { + dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s); + } + weight = b->bm_set - weight; + spin_unlock_irq(&b->bm_lock); + return weight; +} diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h new file mode 100644 index 000000000000..8da602e010bb --- /dev/null +++ b/drivers/block/drbd/drbd_int.h @@ -0,0 +1,2258 @@ +/* + drbd_int.h + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +#ifndef _DRBD_INT_H +#define _DRBD_INT_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __CHECKER__ +# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) +# define __protected_read_by(x) __attribute__((require_context(x,1,999,"read"))) +# define __protected_write_by(x) __attribute__((require_context(x,1,999,"write"))) +# define __must_hold(x) __attribute__((context(x,1,1), require_context(x,1,999,"call"))) +#else +# define __protected_by(x) +# define __protected_read_by(x) +# define __protected_write_by(x) +# define __must_hold(x) +#endif + +#define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0) + +/* module parameter, defined in drbd_main.c */ +extern unsigned int minor_count; +extern int disable_sendpage; +extern int allow_oos; +extern unsigned int cn_idx; + +#ifdef CONFIG_DRBD_FAULT_INJECTION +extern int enable_faults; +extern int fault_rate; +extern int fault_devs; +#endif + +extern char usermode_helper[]; + + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +/* I don't remember why XCPU ... + * This is used to wake the asender, + * and to interrupt sending the sending task + * on disconnect. + */ +#define DRBD_SIG SIGXCPU + +/* This is used to stop/restart our threads. + * Cannot use SIGTERM nor SIGKILL, since these + * are sent out by init on runlevel changes + * I choose SIGHUP for now. + */ +#define DRBD_SIGKILL SIGHUP + +/* All EEs on the free list should have ID_VACANT (== 0) + * freshly allocated EEs get !ID_VACANT (== 1) + * so if it says "cannot dereference null pointer at adress 0x00000001", + * it is most likely one of these :( */ + +#define ID_IN_SYNC (4711ULL) +#define ID_OUT_OF_SYNC (4712ULL) + +#define ID_SYNCER (-1ULL) +#define ID_VACANT 0 +#define is_syncer_block_id(id) ((id) == ID_SYNCER) + +struct drbd_conf; + + +/* to shorten dev_warn(DEV, "msg"); and relatives statements */ +#define DEV (disk_to_dev(mdev->vdisk)) + +#define D_ASSERT(exp) if (!(exp)) \ + dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) + +#define ERR_IF(exp) if (({ \ + int _b = (exp) != 0; \ + if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n", \ + __func__, #exp, __FILE__, __LINE__); \ + _b; \ + })) + +/* Defines to control fault insertion */ +enum { + DRBD_FAULT_MD_WR = 0, /* meta data write */ + DRBD_FAULT_MD_RD = 1, /* read */ + DRBD_FAULT_RS_WR = 2, /* resync */ + DRBD_FAULT_RS_RD = 3, + DRBD_FAULT_DT_WR = 4, /* data */ + DRBD_FAULT_DT_RD = 5, + DRBD_FAULT_DT_RA = 6, /* data read ahead */ + DRBD_FAULT_BM_ALLOC = 7, /* bitmap allocation */ + DRBD_FAULT_AL_EE = 8, /* alloc ee */ + + DRBD_FAULT_MAX, +}; + +extern void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...); + +#ifdef CONFIG_DRBD_FAULT_INJECTION +extern unsigned int +_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); +static inline int +drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { + return fault_rate && + (enable_faults & (1< P_MAY_IGNORE) ... */ + P_MAX_OPT_CMD = 0x101, + + /* special command ids for handshake */ + + P_HAND_SHAKE_M = 0xfff1, /* First Packet on the MetaSock */ + P_HAND_SHAKE_S = 0xfff2, /* First Packet on the Socket */ + + P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ +}; + +static inline const char *cmdname(enum drbd_packets cmd) +{ + /* THINK may need to become several global tables + * when we want to support more than + * one PRO_VERSION */ + static const char *cmdnames[] = { + [P_DATA] = "Data", + [P_DATA_REPLY] = "DataReply", + [P_RS_DATA_REPLY] = "RSDataReply", + [P_BARRIER] = "Barrier", + [P_BITMAP] = "ReportBitMap", + [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget", + [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource", + [P_UNPLUG_REMOTE] = "UnplugRemote", + [P_DATA_REQUEST] = "DataRequest", + [P_RS_DATA_REQUEST] = "RSDataRequest", + [P_SYNC_PARAM] = "SyncParam", + [P_SYNC_PARAM89] = "SyncParam89", + [P_PROTOCOL] = "ReportProtocol", + [P_UUIDS] = "ReportUUIDs", + [P_SIZES] = "ReportSizes", + [P_STATE] = "ReportState", + [P_SYNC_UUID] = "ReportSyncUUID", + [P_AUTH_CHALLENGE] = "AuthChallenge", + [P_AUTH_RESPONSE] = "AuthResponse", + [P_PING] = "Ping", + [P_PING_ACK] = "PingAck", + [P_RECV_ACK] = "RecvAck", + [P_WRITE_ACK] = "WriteAck", + [P_RS_WRITE_ACK] = "RSWriteAck", + [P_DISCARD_ACK] = "DiscardAck", + [P_NEG_ACK] = "NegAck", + [P_NEG_DREPLY] = "NegDReply", + [P_NEG_RS_DREPLY] = "NegRSDReply", + [P_BARRIER_ACK] = "BarrierAck", + [P_STATE_CHG_REQ] = "StateChgRequest", + [P_STATE_CHG_REPLY] = "StateChgReply", + [P_OV_REQUEST] = "OVRequest", + [P_OV_REPLY] = "OVReply", + [P_OV_RESULT] = "OVResult", + [P_MAX_CMD] = NULL, + }; + + if (cmd == P_HAND_SHAKE_M) + return "HandShakeM"; + if (cmd == P_HAND_SHAKE_S) + return "HandShakeS"; + if (cmd == P_HAND_SHAKE) + return "HandShake"; + if (cmd >= P_MAX_CMD) + return "Unknown"; + return cmdnames[cmd]; +} + +/* for sending/receiving the bitmap, + * possibly in some encoding scheme */ +struct bm_xfer_ctx { + /* "const" + * stores total bits and long words + * of the bitmap, so we don't need to + * call the accessor functions over and again. */ + unsigned long bm_bits; + unsigned long bm_words; + /* during xfer, current position within the bitmap */ + unsigned long bit_offset; + unsigned long word_offset; + + /* statistics; index: (h->command == P_BITMAP) */ + unsigned packets[2]; + unsigned bytes[2]; +}; + +extern void INFO_bm_xfer_stats(struct drbd_conf *mdev, + const char *direction, struct bm_xfer_ctx *c); + +static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) +{ + /* word_offset counts "native long words" (32 or 64 bit), + * aligned at 64 bit. + * Encoded packet may end at an unaligned bit offset. + * In case a fallback clear text packet is transmitted in + * between, we adjust this offset back to the last 64bit + * aligned "native long word", which makes coding and decoding + * the plain text bitmap much more convenient. */ +#if BITS_PER_LONG == 64 + c->word_offset = c->bit_offset >> 6; +#elif BITS_PER_LONG == 32 + c->word_offset = c->bit_offset >> 5; + c->word_offset &= ~(1UL); +#else +# error "unsupported BITS_PER_LONG" +#endif +} + +#ifndef __packed +#define __packed __attribute__((packed)) +#endif + +/* This is the layout for a packet on the wire. + * The byteorder is the network byte order. + * (except block_id and barrier fields. + * these are pointers to local structs + * and have no relevance for the partner, + * which just echoes them as received.) + * + * NOTE that the payload starts at a long aligned offset, + * regardless of 32 or 64 bit arch! + */ +struct p_header { + u32 magic; + u16 command; + u16 length; /* bytes of data after this header */ + u8 payload[0]; +} __packed; +/* 8 bytes. packet FIXED for the next century! */ + +/* + * short commands, packets without payload, plain p_header: + * P_PING + * P_PING_ACK + * P_BECOME_SYNC_TARGET + * P_BECOME_SYNC_SOURCE + * P_UNPLUG_REMOTE + */ + +/* + * commands with out-of-struct payload: + * P_BITMAP (no additional fields) + * P_DATA, P_DATA_REPLY (see p_data) + * P_COMPRESSED_BITMAP (see receive_compressed_bitmap) + */ + +/* these defines must not be changed without changing the protocol version */ +#define DP_HARDBARRIER 1 +#define DP_RW_SYNC 2 +#define DP_MAY_SET_IN_SYNC 4 + +struct p_data { + struct p_header head; + u64 sector; /* 64 bits sector number */ + u64 block_id; /* to identify the request in protocol B&C */ + u32 seq_num; + u32 dp_flags; +} __packed; + +/* + * commands which share a struct: + * p_block_ack: + * P_RECV_ACK (proto B), P_WRITE_ACK (proto C), + * P_DISCARD_ACK (proto C, two-primaries conflict detection) + * p_block_req: + * P_DATA_REQUEST, P_RS_DATA_REQUEST + */ +struct p_block_ack { + struct p_header head; + u64 sector; + u64 block_id; + u32 blksize; + u32 seq_num; +} __packed; + + +struct p_block_req { + struct p_header head; + u64 sector; + u64 block_id; + u32 blksize; + u32 pad; /* to multiple of 8 Byte */ +} __packed; + +/* + * commands with their own struct for additional fields: + * P_HAND_SHAKE + * P_BARRIER + * P_BARRIER_ACK + * P_SYNC_PARAM + * ReportParams + */ + +struct p_handshake { + struct p_header head; /* 8 bytes */ + u32 protocol_min; + u32 feature_flags; + u32 protocol_max; + + /* should be more than enough for future enhancements + * for now, feature_flags and the reserverd array shall be zero. + */ + + u32 _pad; + u64 reserverd[7]; +} __packed; +/* 80 bytes, FIXED for the next century */ + +struct p_barrier { + struct p_header head; + u32 barrier; /* barrier number _handle_ only */ + u32 pad; /* to multiple of 8 Byte */ +} __packed; + +struct p_barrier_ack { + struct p_header head; + u32 barrier; + u32 set_size; +} __packed; + +struct p_rs_param { + struct p_header head; + u32 rate; + + /* Since protocol version 88 and higher. */ + char verify_alg[0]; +} __packed; + +struct p_rs_param_89 { + struct p_header head; + u32 rate; + /* protocol version 89: */ + char verify_alg[SHARED_SECRET_MAX]; + char csums_alg[SHARED_SECRET_MAX]; +} __packed; + +struct p_protocol { + struct p_header head; + u32 protocol; + u32 after_sb_0p; + u32 after_sb_1p; + u32 after_sb_2p; + u32 want_lose; + u32 two_primaries; + + /* Since protocol version 87 and higher. */ + char integrity_alg[0]; + +} __packed; + +struct p_uuids { + struct p_header head; + u64 uuid[UI_EXTENDED_SIZE]; +} __packed; + +struct p_rs_uuid { + struct p_header head; + u64 uuid; +} __packed; + +struct p_sizes { + struct p_header head; + u64 d_size; /* size of disk */ + u64 u_size; /* user requested size */ + u64 c_size; /* current exported size */ + u32 max_segment_size; /* Maximal size of a BIO */ + u32 queue_order_type; +} __packed; + +struct p_state { + struct p_header head; + u32 state; +} __packed; + +struct p_req_state { + struct p_header head; + u32 mask; + u32 val; +} __packed; + +struct p_req_state_reply { + struct p_header head; + u32 retcode; +} __packed; + +struct p_drbd06_param { + u64 size; + u32 state; + u32 blksize; + u32 protocol; + u32 version; + u32 gen_cnt[5]; + u32 bit_map_gen[5]; +} __packed; + +struct p_discard { + struct p_header head; + u64 block_id; + u32 seq_num; + u32 pad; +} __packed; + +/* Valid values for the encoding field. + * Bump proto version when changing this. */ +enum drbd_bitmap_code { + /* RLE_VLI_Bytes = 0, + * and other bit variants had been defined during + * algorithm evaluation. */ + RLE_VLI_Bits = 2, +}; + +struct p_compressed_bm { + struct p_header head; + /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code + * (encoding & 0x80): polarity (set/unset) of first runlength + * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits + * used to pad up to head.length bytes + */ + u8 encoding; + + u8 code[0]; +} __packed; + +/* DCBP: Drbd Compressed Bitmap Packet ... */ +static inline enum drbd_bitmap_code +DCBP_get_code(struct p_compressed_bm *p) +{ + return (enum drbd_bitmap_code)(p->encoding & 0x0f); +} + +static inline void +DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code) +{ + BUG_ON(code & ~0xf); + p->encoding = (p->encoding & ~0xf) | code; +} + +static inline int +DCBP_get_start(struct p_compressed_bm *p) +{ + return (p->encoding & 0x80) != 0; +} + +static inline void +DCBP_set_start(struct p_compressed_bm *p, int set) +{ + p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0); +} + +static inline int +DCBP_get_pad_bits(struct p_compressed_bm *p) +{ + return (p->encoding >> 4) & 0x7; +} + +static inline void +DCBP_set_pad_bits(struct p_compressed_bm *p, int n) +{ + BUG_ON(n & ~0x7); + p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4); +} + +/* one bitmap packet, including the p_header, + * should fit within one _architecture independend_ page. + * so we need to use the fixed size 4KiB page size + * most architechtures have used for a long time. + */ +#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header)) +#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) +#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm)) +#if (PAGE_SIZE < 4096) +/* drbd_send_bitmap / receive_bitmap would break horribly */ +#error "PAGE_SIZE too small" +#endif + +union p_polymorph { + struct p_header header; + struct p_handshake handshake; + struct p_data data; + struct p_block_ack block_ack; + struct p_barrier barrier; + struct p_barrier_ack barrier_ack; + struct p_rs_param_89 rs_param_89; + struct p_protocol protocol; + struct p_sizes sizes; + struct p_uuids uuids; + struct p_state state; + struct p_req_state req_state; + struct p_req_state_reply req_state_reply; + struct p_block_req block_req; +} __packed; + +/**********************************************************************/ +enum drbd_thread_state { + None, + Running, + Exiting, + Restarting +}; + +struct drbd_thread { + spinlock_t t_lock; + struct task_struct *task; + struct completion stop; + enum drbd_thread_state t_state; + int (*function) (struct drbd_thread *); + struct drbd_conf *mdev; + int reset_cpu_mask; +}; + +static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) +{ + /* THINK testing the t_state seems to be uncritical in all cases + * (but thread_{start,stop}), so we can read it *without* the lock. + * --lge */ + + smp_rmb(); + return thi->t_state; +} + + +/* + * Having this as the first member of a struct provides sort of "inheritance". + * "derived" structs can be "drbd_queue_work()"ed. + * The callback should know and cast back to the descendant struct. + * drbd_request and drbd_epoch_entry are descendants of drbd_work. + */ +struct drbd_work; +typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); +struct drbd_work { + struct list_head list; + drbd_work_cb cb; +}; + +struct drbd_tl_epoch; +struct drbd_request { + struct drbd_work w; + struct drbd_conf *mdev; + + /* if local IO is not allowed, will be NULL. + * if local IO _is_ allowed, holds the locally submitted bio clone, + * or, after local IO completion, the ERR_PTR(error). + * see drbd_endio_pri(). */ + struct bio *private_bio; + + struct hlist_node colision; + sector_t sector; + unsigned int size; + unsigned int epoch; /* barrier_nr */ + + /* barrier_nr: used to check on "completion" whether this req was in + * the current epoch, and we therefore have to close it, + * starting a new epoch... + */ + + /* up to here, the struct layout is identical to drbd_epoch_entry; + * we might be able to use that to our advantage... */ + + struct list_head tl_requests; /* ring list in the transfer log */ + struct bio *master_bio; /* master bio pointer */ + unsigned long rq_state; /* see comments above _req_mod() */ + int seq_num; + unsigned long start_time; +}; + +struct drbd_tl_epoch { + struct drbd_work w; + struct list_head requests; /* requests before */ + struct drbd_tl_epoch *next; /* pointer to the next barrier */ + unsigned int br_number; /* the barriers identifier. */ + int n_req; /* number of requests attached before this barrier */ +}; + +struct drbd_request; + +/* These Tl_epoch_entries may be in one of 6 lists: + active_ee .. data packet being written + sync_ee .. syncer block being written + done_ee .. block written, need to send P_WRITE_ACK + read_ee .. [RS]P_DATA_REQUEST being read +*/ + +struct drbd_epoch { + struct list_head list; + unsigned int barrier_nr; + atomic_t epoch_size; /* increased on every request added. */ + atomic_t active; /* increased on every req. added, and dec on every finished. */ + unsigned long flags; +}; + +/* drbd_epoch flag bits */ +enum { + DE_BARRIER_IN_NEXT_EPOCH_ISSUED, + DE_BARRIER_IN_NEXT_EPOCH_DONE, + DE_CONTAINS_A_BARRIER, + DE_HAVE_BARRIER_NUMBER, + DE_IS_FINISHING, +}; + +enum epoch_event { + EV_PUT, + EV_GOT_BARRIER_NR, + EV_BARRIER_DONE, + EV_BECAME_LAST, + EV_TRACE_FLUSH, /* TRACE_ are not real events, only used for tracing */ + EV_TRACE_ADD_BARRIER, /* Doing the first write as a barrier write */ + EV_TRACE_SETTING_BI, /* Barrier is expressed with the first write of the next epoch */ + EV_TRACE_ALLOC, + EV_TRACE_FREE, + EV_CLEANUP = 32, /* used as flag */ +}; + +struct drbd_epoch_entry { + struct drbd_work w; + struct drbd_conf *mdev; + struct bio *private_bio; + struct hlist_node colision; + sector_t sector; + unsigned int size; + struct drbd_epoch *epoch; + + /* up to here, the struct layout is identical to drbd_request; + * we might be able to use that to our advantage... */ + + unsigned int flags; + u64 block_id; +}; + +struct drbd_wq_barrier { + struct drbd_work w; + struct completion done; +}; + +struct digest_info { + int digest_size; + void *digest; +}; + +/* ee flag bits */ +enum { + __EE_CALL_AL_COMPLETE_IO, + __EE_CONFLICT_PENDING, + __EE_MAY_SET_IN_SYNC, + __EE_IS_BARRIER, +}; +#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) +#define EE_CONFLICT_PENDING (1<<__EE_CONFLICT_PENDING) +#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) +#define EE_IS_BARRIER (1<<__EE_IS_BARRIER) + +/* global flag bits */ +enum { + CREATE_BARRIER, /* next P_DATA is preceeded by a P_BARRIER */ + SIGNAL_ASENDER, /* whether asender wants to be interrupted */ + SEND_PING, /* whether asender should send a ping asap */ + + STOP_SYNC_TIMER, /* tell timer to cancel itself */ + UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ + UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ + MD_DIRTY, /* current uuids and flags not yet on disk */ + DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ + USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */ + CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */ + CL_ST_CHG_SUCCESS, + CL_ST_CHG_FAIL, + CRASHED_PRIMARY, /* This node was a crashed primary. + * Gets cleared when the state.conn + * goes into C_CONNECTED state. */ + WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ + NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ + CONSIDER_RESYNC, + + MD_NO_BARRIER, /* meta data device does not support barriers, + so don't even try */ + SUSPEND_IO, /* suspend application io */ + BITMAP_IO, /* suspend application io; + once no more io in flight, start bitmap io */ + BITMAP_IO_QUEUED, /* Started bitmap IO */ + RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ + NET_CONGESTED, /* The data socket is congested */ + + CONFIG_PENDING, /* serialization of (re)configuration requests. + * if set, also prevents the device from dying */ + DEVICE_DYING, /* device became unconfigured, + * but worker thread is still handling the cleanup. + * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, + * while this is set. */ + RESIZE_PENDING, /* Size change detected locally, waiting for the response from + * the peer, if it changed there as well. */ +}; + +struct drbd_bitmap; /* opaque for drbd_conf */ + +/* TODO sort members for performance + * MAYBE group them further */ + +/* THINK maybe we actually want to use the default "event/%s" worker threads + * or similar in linux 2.6, which uses per cpu data and threads. + * + * To be general, this might need a spin_lock member. + * For now, please use the mdev->req_lock to protect list_head, + * see drbd_queue_work below. + */ +struct drbd_work_queue { + struct list_head q; + struct semaphore s; /* producers up it, worker down()s it */ + spinlock_t q_lock; /* to protect the list. */ +}; + +struct drbd_socket { + struct drbd_work_queue work; + struct mutex mutex; + struct socket *socket; + /* this way we get our + * send/receive buffers off the stack */ + union p_polymorph sbuf; + union p_polymorph rbuf; +}; + +struct drbd_md { + u64 md_offset; /* sector offset to 'super' block */ + + u64 la_size_sect; /* last agreed size, unit sectors */ + u64 uuid[UI_SIZE]; + u64 device_uuid; + u32 flags; + u32 md_size_sect; + + s32 al_offset; /* signed relative sector offset to al area */ + s32 bm_offset; /* signed relative sector offset to bitmap */ + + /* u32 al_nr_extents; important for restoring the AL + * is stored into sync_conf.al_extents, which in turn + * gets applied to act_log->nr_elements + */ +}; + +/* for sync_conf and other types... */ +#define NL_PACKET(name, number, fields) struct name { fields }; +#define NL_INTEGER(pn,pr,member) int member; +#define NL_INT64(pn,pr,member) __u64 member; +#define NL_BIT(pn,pr,member) unsigned member:1; +#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len; +#include "linux/drbd_nl.h" + +struct drbd_backing_dev { + struct block_device *backing_bdev; + struct block_device *md_bdev; + struct file *lo_file; + struct file *md_file; + struct drbd_md md; + struct disk_conf dc; /* The user provided config... */ + sector_t known_size; /* last known size of that backing device */ +}; + +struct drbd_md_io { + struct drbd_conf *mdev; + struct completion event; + int error; +}; + +struct bm_io_work { + struct drbd_work w; + char *why; + int (*io_fn)(struct drbd_conf *mdev); + void (*done)(struct drbd_conf *mdev, int rv); +}; + +enum write_ordering_e { + WO_none, + WO_drain_io, + WO_bdev_flush, + WO_bio_barrier +}; + +struct drbd_conf { + /* things that are stored as / read from meta data on disk */ + unsigned long flags; + + /* configured by drbdsetup */ + struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ + struct syncer_conf sync_conf; + struct drbd_backing_dev *ldev __protected_by(local); + + sector_t p_size; /* partner's disk size */ + struct request_queue *rq_queue; + struct block_device *this_bdev; + struct gendisk *vdisk; + + struct drbd_socket data; /* data/barrier/cstate/parameter packets */ + struct drbd_socket meta; /* ping/ack (metadata) packets */ + int agreed_pro_version; /* actually used protocol version */ + unsigned long last_received; /* in jiffies, either socket */ + unsigned int ko_count; + struct drbd_work resync_work, + unplug_work, + md_sync_work; + struct timer_list resync_timer; + struct timer_list md_sync_timer; + + /* Used after attach while negotiating new disk state. */ + union drbd_state new_state_tmp; + + union drbd_state state; + wait_queue_head_t misc_wait; + wait_queue_head_t state_wait; /* upon each state change. */ + unsigned int send_cnt; + unsigned int recv_cnt; + unsigned int read_cnt; + unsigned int writ_cnt; + unsigned int al_writ_cnt; + unsigned int bm_writ_cnt; + atomic_t ap_bio_cnt; /* Requests we need to complete */ + atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ + atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ + atomic_t unacked_cnt; /* Need to send replys for */ + atomic_t local_cnt; /* Waiting for local completion */ + atomic_t net_cnt; /* Users of net_conf */ + spinlock_t req_lock; + struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ + struct drbd_tl_epoch *newest_tle; + struct drbd_tl_epoch *oldest_tle; + struct list_head out_of_sequence_requests; + struct hlist_head *tl_hash; + unsigned int tl_hash_s; + + /* blocks to sync in this run [unit BM_BLOCK_SIZE] */ + unsigned long rs_total; + /* number of sync IOs that failed in this run */ + unsigned long rs_failed; + /* Syncer's start time [unit jiffies] */ + unsigned long rs_start; + /* cumulated time in PausedSyncX state [unit jiffies] */ + unsigned long rs_paused; + /* block not up-to-date at mark [unit BM_BLOCK_SIZE] */ + unsigned long rs_mark_left; + /* marks's time [unit jiffies] */ + unsigned long rs_mark_time; + /* skipped because csum was equeal [unit BM_BLOCK_SIZE] */ + unsigned long rs_same_csum; + + /* where does the admin want us to start? (sector) */ + sector_t ov_start_sector; + /* where are we now? (sector) */ + sector_t ov_position; + /* Start sector of out of sync range (to merge printk reporting). */ + sector_t ov_last_oos_start; + /* size of out-of-sync range in sectors. */ + sector_t ov_last_oos_size; + unsigned long ov_left; /* in bits */ + struct crypto_hash *csums_tfm; + struct crypto_hash *verify_tfm; + + struct drbd_thread receiver; + struct drbd_thread worker; + struct drbd_thread asender; + struct drbd_bitmap *bitmap; + unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ + + /* Used to track operations of resync... */ + struct lru_cache *resync; + /* Number of locked elements in resync LRU */ + unsigned int resync_locked; + /* resync extent number waiting for application requests */ + unsigned int resync_wenr; + + int open_cnt; + u64 *p_uuid; + struct drbd_epoch *current_epoch; + spinlock_t epoch_lock; + unsigned int epochs; + enum write_ordering_e write_ordering; + struct list_head active_ee; /* IO in progress */ + struct list_head sync_ee; /* IO in progress */ + struct list_head done_ee; /* send ack */ + struct list_head read_ee; /* IO in progress */ + struct list_head net_ee; /* zero-copy network send in progress */ + struct hlist_head *ee_hash; /* is proteced by req_lock! */ + unsigned int ee_hash_s; + + /* this one is protected by ee_lock, single thread */ + struct drbd_epoch_entry *last_write_w_barrier; + + int next_barrier_nr; + struct hlist_head *app_reads_hash; /* is proteced by req_lock */ + struct list_head resync_reads; + atomic_t pp_in_use; + wait_queue_head_t ee_wait; + struct page *md_io_page; /* one page buffer for md_io */ + struct page *md_io_tmpp; /* for logical_block_size != 512 */ + struct mutex md_io_mutex; /* protects the md_io_buffer */ + spinlock_t al_lock; + wait_queue_head_t al_wait; + struct lru_cache *act_log; /* activity log */ + unsigned int al_tr_number; + int al_tr_cycle; + int al_tr_pos; /* position of the next transaction in the journal */ + struct crypto_hash *cram_hmac_tfm; + struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ + struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ + void *int_dig_out; + void *int_dig_in; + void *int_dig_vv; + wait_queue_head_t seq_wait; + atomic_t packet_seq; + unsigned int peer_seq; + spinlock_t peer_seq_lock; + unsigned int minor; + unsigned long comm_bm_set; /* communicated number of set bits. */ + cpumask_var_t cpu_mask; + struct bm_io_work bm_io_work; + u64 ed_uuid; /* UUID of the exposed data */ + struct mutex state_mutex; + char congestion_reason; /* Why we where congested... */ +}; + +static inline struct drbd_conf *minor_to_mdev(unsigned int minor) +{ + struct drbd_conf *mdev; + + mdev = minor < minor_count ? minor_table[minor] : NULL; + + return mdev; +} + +static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) +{ + return mdev->minor; +} + +/* returns 1 if it was successfull, + * returns 0 if there was no data socket. + * so wherever you are going to use the data.socket, e.g. do + * if (!drbd_get_data_sock(mdev)) + * return 0; + * CODE(); + * drbd_put_data_sock(mdev); + */ +static inline int drbd_get_data_sock(struct drbd_conf *mdev) +{ + mutex_lock(&mdev->data.mutex); + /* drbd_disconnect() could have called drbd_free_sock() + * while we were waiting in down()... */ + if (unlikely(mdev->data.socket == NULL)) { + mutex_unlock(&mdev->data.mutex); + return 0; + } + return 1; +} + +static inline void drbd_put_data_sock(struct drbd_conf *mdev) +{ + mutex_unlock(&mdev->data.mutex); +} + +/* + * function declarations + *************************/ + +/* drbd_main.c */ + +enum chg_state_flags { + CS_HARD = 1, + CS_VERBOSE = 2, + CS_WAIT_COMPLETE = 4, + CS_SERIALIZE = 8, + CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, +}; + +extern void drbd_init_set_defaults(struct drbd_conf *mdev); +extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, + union drbd_state mask, union drbd_state val); +extern void drbd_force_state(struct drbd_conf *, union drbd_state, + union drbd_state); +extern int _drbd_request_state(struct drbd_conf *, union drbd_state, + union drbd_state, enum chg_state_flags); +extern int __drbd_set_state(struct drbd_conf *, union drbd_state, + enum chg_state_flags, struct completion *done); +extern void print_st_err(struct drbd_conf *, union drbd_state, + union drbd_state, int); +extern int drbd_thread_start(struct drbd_thread *thi); +extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); +#ifdef CONFIG_SMP +extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); +extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); +#else +#define drbd_thread_current_set_cpu(A) ({}) +#define drbd_calc_cpu_mask(A) ({}) +#endif +extern void drbd_free_resources(struct drbd_conf *mdev); +extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, + unsigned int set_size); +extern void tl_clear(struct drbd_conf *mdev); +extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); +extern void drbd_free_sock(struct drbd_conf *mdev); +extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, + void *buf, size_t size, unsigned msg_flags); +extern int drbd_send_protocol(struct drbd_conf *mdev); +extern int drbd_send_uuids(struct drbd_conf *mdev); +extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); +extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); +extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply); +extern int _drbd_send_state(struct drbd_conf *mdev); +extern int drbd_send_state(struct drbd_conf *mdev); +extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, + enum drbd_packets cmd, struct p_header *h, + size_t size, unsigned msg_flags); +#define USE_DATA_SOCKET 1 +#define USE_META_SOCKET 0 +extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, + enum drbd_packets cmd, struct p_header *h, + size_t size); +extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, + char *data, size_t size); +extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); +extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, + u32 set_size); +extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, + struct drbd_epoch_entry *e); +extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, + struct p_block_req *rp); +extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, + struct p_data *dp); +extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, + sector_t sector, int blksize, u64 block_id); +extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, + struct drbd_epoch_entry *e); +extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); +extern int _drbd_send_barrier(struct drbd_conf *mdev, + struct drbd_tl_epoch *barrier); +extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, + sector_t sector, int size, u64 block_id); +extern int drbd_send_drequest_csum(struct drbd_conf *mdev, + sector_t sector,int size, + void *digest, int digest_size, + enum drbd_packets cmd); +extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size); + +extern int drbd_send_bitmap(struct drbd_conf *mdev); +extern int _drbd_send_bitmap(struct drbd_conf *mdev); +extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode); +extern void drbd_free_bc(struct drbd_backing_dev *ldev); +extern void drbd_mdev_cleanup(struct drbd_conf *mdev); + +/* drbd_meta-data.c (still in drbd_main.c) */ +extern void drbd_md_sync(struct drbd_conf *mdev); +extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); +/* maybe define them below as inline? */ +extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); +extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); +extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); +extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); +extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local); +extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local); +extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local); +extern int drbd_md_test_flag(struct drbd_backing_dev *, int); +extern void drbd_md_mark_dirty(struct drbd_conf *mdev); +extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, + int (*io_fn)(struct drbd_conf *), + void (*done)(struct drbd_conf *, int), + char *why); +extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); +extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); +extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); + + +/* Meta data layout + We reserve a 128MB Block (4k aligned) + * either at the end of the backing device + * or on a seperate meta data device. */ + +#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ +/* The following numbers are sectors */ +#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ +#define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */ +/* Allows up to about 3.8TB */ +#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) + +/* Since the smalles IO unit is usually 512 byte */ +#define MD_SECTOR_SHIFT 9 +#define MD_SECTOR_SIZE (1< we need 32 KB bitmap. + * Bit 0 ==> local node thinks this block is binary identical on both nodes + * Bit 1 ==> local node thinks this block needs to be synced. + */ + +#define BM_BLOCK_SHIFT 12 /* 4k per bit */ +#define BM_BLOCK_SIZE (1<>(BM_BLOCK_SHIFT-9)) +#define BM_BIT_TO_SECT(x) ((sector_t)(x)<<(BM_BLOCK_SHIFT-9)) +#define BM_SECT_PER_BIT BM_BIT_TO_SECT(1) + +/* bit to represented kilo byte conversion */ +#define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10)) + +/* in which _bitmap_ extent (resp. sector) the bit for a certain + * _storage_ sector is located in */ +#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) + +/* how much _storage_ sectors we have per bitmap sector */ +#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) +#define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) + +/* in one sector of the bitmap, we have this many activity_log extents. */ +#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) +#define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) + +#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT) +#define BM_BLOCKS_PER_BM_EXT_MASK ((1<ov_last_oos_size) { + dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n", + (unsigned long long)mdev->ov_last_oos_start, + (unsigned long)mdev->ov_last_oos_size); + } + mdev->ov_last_oos_size=0; +} + + +extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); +/* worker callbacks */ +extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); +extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); +extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); +extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); +extern int w_io_error(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); +extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); + +extern void resync_timer_fn(unsigned long data); + +/* drbd_receiver.c */ +extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); +extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, + u64 id, + sector_t sector, + unsigned int data_size, + gfp_t gfp_mask) __must_hold(local); +extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e); +extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev, + struct list_head *head); +extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, + struct list_head *head); +extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); +extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); +extern void drbd_flush_workqueue(struct drbd_conf *mdev); + +/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to + * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ +static inline int drbd_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, int optlen) +{ + int err; + if (level == SOL_SOCKET) + err = sock_setsockopt(sock, level, optname, optval, optlen); + else + err = sock->ops->setsockopt(sock, level, optname, optval, + optlen); + return err; +} + +static inline void drbd_tcp_cork(struct socket *sock) +{ + int __user val = 1; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, + (char __user *)&val, sizeof(val)); +} + +static inline void drbd_tcp_uncork(struct socket *sock) +{ + int __user val = 0; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, + (char __user *)&val, sizeof(val)); +} + +static inline void drbd_tcp_nodelay(struct socket *sock) +{ + int __user val = 1; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY, + (char __user *)&val, sizeof(val)); +} + +static inline void drbd_tcp_quickack(struct socket *sock) +{ + int __user val = 1; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, + (char __user *)&val, sizeof(val)); +} + +void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo); + +/* drbd_proc.c */ +extern struct proc_dir_entry *drbd_proc; +extern struct file_operations drbd_proc_fops; +extern const char *drbd_conn_str(enum drbd_conns s); +extern const char *drbd_role_str(enum drbd_role s); + +/* drbd_actlog.c */ +extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector); +extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector); +extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector); +extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector); +extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector); +extern void drbd_rs_cancel_all(struct drbd_conf *mdev); +extern int drbd_rs_del_all(struct drbd_conf *mdev); +extern void drbd_rs_failed_io(struct drbd_conf *mdev, + sector_t sector, int size); +extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); +extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, + int size, const char *file, const unsigned int line); +#define drbd_set_in_sync(mdev, sector, size) \ + __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) +extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, + int size, const char *file, const unsigned int line); +#define drbd_set_out_of_sync(mdev, sector, size) \ + __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) +extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); +extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev); +extern void drbd_al_shrink(struct drbd_conf *mdev); + + +/* drbd_nl.c */ + +void drbd_nl_cleanup(void); +int __init drbd_nl_init(void); +void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state); +void drbd_bcast_sync_progress(struct drbd_conf *mdev); +void drbd_bcast_ee(struct drbd_conf *mdev, + const char *reason, const int dgs, + const char* seen_hash, const char* calc_hash, + const struct drbd_epoch_entry* e); + + +/** + * DOC: DRBD State macros + * + * These macros are used to express state changes in easily readable form. + * + * The NS macros expand to a mask and a value, that can be bit ored onto the + * current state as soon as the spinlock (req_lock) was taken. + * + * The _NS macros are used for state functions that get called with the + * spinlock. These macros expand directly to the new state value. + * + * Besides the basic forms NS() and _NS() additional _?NS[23] are defined + * to express state changes that affect more than one aspect of the state. + * + * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY) + * Means that the network connection was established and that the peer + * is in secondary role. + */ +#define role_MASK R_MASK +#define peer_MASK R_MASK +#define disk_MASK D_MASK +#define pdsk_MASK D_MASK +#define conn_MASK C_MASK +#define susp_MASK 1 +#define user_isp_MASK 1 +#define aftr_isp_MASK 1 + +#define NS(T, S) \ + ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T = (S); val; }) +#define NS2(T1, S1, T2, S2) \ + ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ + mask.T2 = T2##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ + val.T2 = (S2); val; }) +#define NS3(T1, S1, T2, S2, T3, S3) \ + ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ + mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ + val.T2 = (S2); val.T3 = (S3); val; }) + +#define _NS(D, T, S) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) +#define _NS2(D, T1, S1, T2, S2) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + __ns.T2 = (S2); __ns; }) +#define _NS3(D, T1, S1, T2, S2, T3, S3) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) + +/* + * inline helper functions + *************************/ + +static inline void drbd_state_lock(struct drbd_conf *mdev) +{ + wait_event(mdev->misc_wait, + !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)); +} + +static inline void drbd_state_unlock(struct drbd_conf *mdev) +{ + clear_bit(CLUSTER_ST_CHANGE, &mdev->flags); + wake_up(&mdev->misc_wait); +} + +static inline int _drbd_set_state(struct drbd_conf *mdev, + union drbd_state ns, enum chg_state_flags flags, + struct completion *done) +{ + int rv; + + read_lock(&global_state_lock); + rv = __drbd_set_state(mdev, ns, flags, done); + read_unlock(&global_state_lock); + + return rv; +} + +/** + * drbd_request_state() - Reqest a state change + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * + * This is the most graceful way of requesting a state change. It is verbose + * quite verbose in case the state change is not possible, and all those + * state changes are globally serialized. + */ +static inline int drbd_request_state(struct drbd_conf *mdev, + union drbd_state mask, + union drbd_state val) +{ + return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); +} + +#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) +static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) +{ + switch (mdev->ldev->dc.on_io_error) { + case EP_PASS_ON: + if (!forcedetach) { + if (printk_ratelimit()) + dev_err(DEV, "Local IO failed in %s." + "Passing error on...\n", where); + break; + } + /* NOTE fall through to detach case if forcedetach set */ + case EP_DETACH: + case EP_CALL_HELPER: + if (mdev->state.disk > D_FAILED) { + _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); + dev_err(DEV, "Local IO failed in %s." + "Detaching...\n", where); + } + break; + } +} + +/** + * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers + * @mdev: DRBD device. + * @error: Error code passed to the IO completion callback + * @forcedetach: Force detach. I.e. the error happened while accessing the meta data + * + * See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED) + */ +#define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__) +static inline void drbd_chk_io_error_(struct drbd_conf *mdev, + int error, int forcedetach, const char *where) +{ + if (error) { + unsigned long flags; + spin_lock_irqsave(&mdev->req_lock, flags); + __drbd_chk_io_error_(mdev, forcedetach, where); + spin_unlock_irqrestore(&mdev->req_lock, flags); + } +} + + +/** + * drbd_md_first_sector() - Returns the first sector number of the meta data area + * @bdev: Meta data block device. + * + * BTW, for internal meta data, this happens to be the maximum capacity + * we could agree upon with our peer node. + */ +static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) +{ + switch (bdev->dc.meta_dev_idx) { + case DRBD_MD_INDEX_INTERNAL: + case DRBD_MD_INDEX_FLEX_INT: + return bdev->md.md_offset + bdev->md.bm_offset; + case DRBD_MD_INDEX_FLEX_EXT: + default: + return bdev->md.md_offset; + } +} + +/** + * drbd_md_last_sector() - Return the last sector number of the meta data area + * @bdev: Meta data block device. + */ +static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) +{ + switch (bdev->dc.meta_dev_idx) { + case DRBD_MD_INDEX_INTERNAL: + case DRBD_MD_INDEX_FLEX_INT: + return bdev->md.md_offset + MD_AL_OFFSET - 1; + case DRBD_MD_INDEX_FLEX_EXT: + default: + return bdev->md.md_offset + bdev->md.md_size_sect; + } +} + +/* Returns the number of 512 byte sectors of the device */ +static inline sector_t drbd_get_capacity(struct block_device *bdev) +{ + /* return bdev ? get_capacity(bdev->bd_disk) : 0; */ + return bdev ? bdev->bd_inode->i_size >> 9 : 0; +} + +/** + * drbd_get_max_capacity() - Returns the capacity we announce to out peer + * @bdev: Meta data block device. + * + * returns the capacity we announce to out peer. we clip ourselves at the + * various MAX_SECTORS, because if we don't, current implementation will + * oops sooner or later + */ +static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) +{ + sector_t s; + switch (bdev->dc.meta_dev_idx) { + case DRBD_MD_INDEX_INTERNAL: + case DRBD_MD_INDEX_FLEX_INT: + s = drbd_get_capacity(bdev->backing_bdev) + ? min_t(sector_t, DRBD_MAX_SECTORS_FLEX, + drbd_md_first_sector(bdev)) + : 0; + break; + case DRBD_MD_INDEX_FLEX_EXT: + s = min_t(sector_t, DRBD_MAX_SECTORS_FLEX, + drbd_get_capacity(bdev->backing_bdev)); + /* clip at maximum size the meta device can support */ + s = min_t(sector_t, s, + BM_EXT_TO_SECT(bdev->md.md_size_sect + - bdev->md.bm_offset)); + break; + default: + s = min_t(sector_t, DRBD_MAX_SECTORS, + drbd_get_capacity(bdev->backing_bdev)); + } + return s; +} + +/** + * drbd_md_ss__() - Return the sector number of our meta data super block + * @mdev: DRBD device. + * @bdev: Meta data block device. + */ +static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev) +{ + switch (bdev->dc.meta_dev_idx) { + default: /* external, some index */ + return MD_RESERVED_SECT * bdev->dc.meta_dev_idx; + case DRBD_MD_INDEX_INTERNAL: + /* with drbd08, internal meta data is always "flexible" */ + case DRBD_MD_INDEX_FLEX_INT: + /* sizeof(struct md_on_disk_07) == 4k + * position: last 4k aligned block of 4k size */ + if (!bdev->backing_bdev) { + if (__ratelimit(&drbd_ratelimit_state)) { + dev_err(DEV, "bdev->backing_bdev==NULL\n"); + dump_stack(); + } + return 0; + } + return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) + - MD_AL_OFFSET; + case DRBD_MD_INDEX_FLEX_EXT: + return 0; + } +} + +static inline void +_drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) +{ + list_add_tail(&w->list, &q->q); + up(&q->s); +} + +static inline void +drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w) +{ + unsigned long flags; + spin_lock_irqsave(&q->q_lock, flags); + list_add(&w->list, &q->q); + up(&q->s); /* within the spinlock, + see comment near end of drbd_worker() */ + spin_unlock_irqrestore(&q->q_lock, flags); +} + +static inline void +drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) +{ + unsigned long flags; + spin_lock_irqsave(&q->q_lock, flags); + list_add_tail(&w->list, &q->q); + up(&q->s); /* within the spinlock, + see comment near end of drbd_worker() */ + spin_unlock_irqrestore(&q->q_lock, flags); +} + +static inline void wake_asender(struct drbd_conf *mdev) +{ + if (test_bit(SIGNAL_ASENDER, &mdev->flags)) + force_sig(DRBD_SIG, mdev->asender.task); +} + +static inline void request_ping(struct drbd_conf *mdev) +{ + set_bit(SEND_PING, &mdev->flags); + wake_asender(mdev); +} + +static inline int drbd_send_short_cmd(struct drbd_conf *mdev, + enum drbd_packets cmd) +{ + struct p_header h; + return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); +} + +static inline int drbd_send_ping(struct drbd_conf *mdev) +{ + struct p_header h; + return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); +} + +static inline int drbd_send_ping_ack(struct drbd_conf *mdev) +{ + struct p_header h; + return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); +} + +static inline void drbd_thread_stop(struct drbd_thread *thi) +{ + _drbd_thread_stop(thi, FALSE, TRUE); +} + +static inline void drbd_thread_stop_nowait(struct drbd_thread *thi) +{ + _drbd_thread_stop(thi, FALSE, FALSE); +} + +static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) +{ + _drbd_thread_stop(thi, TRUE, FALSE); +} + +/* counts how many answer packets packets we expect from our peer, + * for either explicit application requests, + * or implicit barrier packets as necessary. + * increased: + * w_send_barrier + * _req_mod(req, queue_for_net_write or queue_for_net_read); + * it is much easier and equally valid to count what we queue for the + * worker, even before it actually was queued or send. + * (drbd_make_request_common; recovery path on read io-error) + * decreased: + * got_BarrierAck (respective tl_clear, tl_clear_barrier) + * _req_mod(req, data_received) + * [from receive_DataReply] + * _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked) + * [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)] + * for some reason it is NOT decreased in got_NegAck, + * but in the resulting cleanup code from report_params. + * we should try to remember the reason for that... + * _req_mod(req, send_failed or send_canceled) + * _req_mod(req, connection_lost_while_pending) + * [from tl_clear_barrier] + */ +static inline void inc_ap_pending(struct drbd_conf *mdev) +{ + atomic_inc(&mdev->ap_pending_cnt); +} + +#define ERR_IF_CNT_IS_NEGATIVE(which) \ + if (atomic_read(&mdev->which) < 0) \ + dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n", \ + __func__ , __LINE__ , \ + atomic_read(&mdev->which)) + +#define dec_ap_pending(mdev) do { \ + typecheck(struct drbd_conf *, mdev); \ + if (atomic_dec_and_test(&mdev->ap_pending_cnt)) \ + wake_up(&mdev->misc_wait); \ + ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0) + +/* counts how many resync-related answers we still expect from the peer + * increase decrease + * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY) + * C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK whith ID_SYNCER) + * (or P_NEG_ACK with ID_SYNCER) + */ +static inline void inc_rs_pending(struct drbd_conf *mdev) +{ + atomic_inc(&mdev->rs_pending_cnt); +} + +#define dec_rs_pending(mdev) do { \ + typecheck(struct drbd_conf *, mdev); \ + atomic_dec(&mdev->rs_pending_cnt); \ + ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0) + +/* counts how many answers we still need to send to the peer. + * increased on + * receive_Data unless protocol A; + * we need to send a P_RECV_ACK (proto B) + * or P_WRITE_ACK (proto C) + * receive_RSDataReply (recv_resync_read) we need to send a P_WRITE_ACK + * receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA + * receive_Barrier_* we need to send a P_BARRIER_ACK + */ +static inline void inc_unacked(struct drbd_conf *mdev) +{ + atomic_inc(&mdev->unacked_cnt); +} + +#define dec_unacked(mdev) do { \ + typecheck(struct drbd_conf *, mdev); \ + atomic_dec(&mdev->unacked_cnt); \ + ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) + +#define sub_unacked(mdev, n) do { \ + typecheck(struct drbd_conf *, mdev); \ + atomic_sub(n, &mdev->unacked_cnt); \ + ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) + + +static inline void put_net_conf(struct drbd_conf *mdev) +{ + if (atomic_dec_and_test(&mdev->net_cnt)) + wake_up(&mdev->misc_wait); +} + +/** + * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there + * @mdev: DRBD device. + * + * You have to call put_net_conf() when finished working with mdev->net_conf. + */ +static inline int get_net_conf(struct drbd_conf *mdev) +{ + int have_net_conf; + + atomic_inc(&mdev->net_cnt); + have_net_conf = mdev->state.conn >= C_UNCONNECTED; + if (!have_net_conf) + put_net_conf(mdev); + return have_net_conf; +} + +/** + * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev + * @M: DRBD device. + * + * You have to call put_ldev() when finished working with mdev->ldev. + */ +#define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT)) +#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS)) + +static inline void put_ldev(struct drbd_conf *mdev) +{ + __release(local); + if (atomic_dec_and_test(&mdev->local_cnt)) + wake_up(&mdev->misc_wait); + D_ASSERT(atomic_read(&mdev->local_cnt) >= 0); +} + +#ifndef __CHECKER__ +static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) +{ + int io_allowed; + + atomic_inc(&mdev->local_cnt); + io_allowed = (mdev->state.disk >= mins); + if (!io_allowed) + put_ldev(mdev); + return io_allowed; +} +#else +extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins); +#endif + +/* you must have an "get_ldev" reference */ +static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, + unsigned long *bits_left, unsigned int *per_mil_done) +{ + /* + * this is to break it at compile time when we change that + * (we may feel 4TB maximum storage per drbd is not enough) + */ + typecheck(unsigned long, mdev->rs_total); + + /* note: both rs_total and rs_left are in bits, i.e. in + * units of BM_BLOCK_SIZE. + * for the percentage, we don't care. */ + + *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; + /* >> 10 to prevent overflow, + * +1 to prevent division by zero */ + if (*bits_left > mdev->rs_total) { + /* doh. maybe a logic bug somewhere. + * may also be just a race condition + * between this and a disconnect during sync. + * for now, just prevent in-kernel buffer overflow. + */ + smp_rmb(); + dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", + drbd_conn_str(mdev->state.conn), + *bits_left, mdev->rs_total, mdev->rs_failed); + *per_mil_done = 0; + } else { + /* make sure the calculation happens in long context */ + unsigned long tmp = 1000UL - + (*bits_left >> 10)*1000UL + / ((mdev->rs_total >> 10) + 1UL); + *per_mil_done = tmp; + } +} + + +/* this throttles on-the-fly application requests + * according to max_buffers settings; + * maybe re-implement using semaphores? */ +static inline int drbd_get_max_buffers(struct drbd_conf *mdev) +{ + int mxb = 1000000; /* arbitrary limit on open requests */ + if (get_net_conf(mdev)) { + mxb = mdev->net_conf->max_buffers; + put_net_conf(mdev); + } + return mxb; +} + +static inline int drbd_state_is_stable(union drbd_state s) +{ + + /* DO NOT add a default clause, we want the compiler to warn us + * for any newly introduced state we may have forgotten to add here */ + + switch ((enum drbd_conns)s.conn) { + /* new io only accepted when there is no connection, ... */ + case C_STANDALONE: + case C_WF_CONNECTION: + /* ... or there is a well established connection. */ + case C_CONNECTED: + case C_SYNC_SOURCE: + case C_SYNC_TARGET: + case C_VERIFY_S: + case C_VERIFY_T: + case C_PAUSED_SYNC_S: + case C_PAUSED_SYNC_T: + /* maybe stable, look at the disk state */ + break; + + /* no new io accepted during tansitional states + * like handshake or teardown */ + case C_DISCONNECTING: + case C_UNCONNECTED: + case C_TIMEOUT: + case C_BROKEN_PIPE: + case C_NETWORK_FAILURE: + case C_PROTOCOL_ERROR: + case C_TEAR_DOWN: + case C_WF_REPORT_PARAMS: + case C_STARTING_SYNC_S: + case C_STARTING_SYNC_T: + case C_WF_BITMAP_S: + case C_WF_BITMAP_T: + case C_WF_SYNC_UUID: + case C_MASK: + /* not "stable" */ + return 0; + } + + switch ((enum drbd_disk_state)s.disk) { + case D_DISKLESS: + case D_INCONSISTENT: + case D_OUTDATED: + case D_CONSISTENT: + case D_UP_TO_DATE: + /* disk state is stable as well. */ + break; + + /* no new io accepted during tansitional states */ + case D_ATTACHING: + case D_FAILED: + case D_NEGOTIATING: + case D_UNKNOWN: + case D_MASK: + /* not "stable" */ + return 0; + } + + return 1; +} + +static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) +{ + int mxb = drbd_get_max_buffers(mdev); + + if (mdev->state.susp) + return 0; + if (test_bit(SUSPEND_IO, &mdev->flags)) + return 0; + + /* to avoid potential deadlock or bitmap corruption, + * in various places, we only allow new application io + * to start during "stable" states. */ + + /* no new io accepted when attaching or detaching the disk */ + if (!drbd_state_is_stable(mdev->state)) + return 0; + + /* since some older kernels don't have atomic_add_unless, + * and we are within the spinlock anyways, we have this workaround. */ + if (atomic_read(&mdev->ap_bio_cnt) > mxb) + return 0; + if (test_bit(BITMAP_IO, &mdev->flags)) + return 0; + return 1; +} + +/* I'd like to use wait_event_lock_irq, + * but I'm not sure when it got introduced, + * and not sure when it has 3 or 4 arguments */ +static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) +{ + /* compare with after_state_ch, + * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ + DEFINE_WAIT(wait); + + /* we wait here + * as long as the device is suspended + * until the bitmap is no longer on the fly during connection + * handshake as long as we would exeed the max_buffer limit. + * + * to avoid races with the reconnect code, + * we need to atomic_inc within the spinlock. */ + + spin_lock_irq(&mdev->req_lock); + while (!__inc_ap_bio_cond(mdev)) { + prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&mdev->req_lock); + schedule(); + finish_wait(&mdev->misc_wait, &wait); + spin_lock_irq(&mdev->req_lock); + } + atomic_add(one_or_two, &mdev->ap_bio_cnt); + spin_unlock_irq(&mdev->req_lock); +} + +static inline void dec_ap_bio(struct drbd_conf *mdev) +{ + int mxb = drbd_get_max_buffers(mdev); + int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt); + + D_ASSERT(ap_bio >= 0); + /* this currently does wake_up for every dec_ap_bio! + * maybe rather introduce some type of hysteresis? + * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */ + if (ap_bio < mxb) + wake_up(&mdev->misc_wait); + if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { + if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) + drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); + } +} + +static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) +{ + mdev->ed_uuid = val; +} + +static inline int seq_cmp(u32 a, u32 b) +{ + /* we assume wrap around at 32bit. + * for wrap around at 24bit (old atomic_t), + * we'd have to + * a <<= 8; b <<= 8; + */ + return (s32)(a) - (s32)(b); +} +#define seq_lt(a, b) (seq_cmp((a), (b)) < 0) +#define seq_gt(a, b) (seq_cmp((a), (b)) > 0) +#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0) +#define seq_le(a, b) (seq_cmp((a), (b)) <= 0) +/* CAUTION: please no side effects in arguments! */ +#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b))) + +static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) +{ + unsigned int m; + spin_lock(&mdev->peer_seq_lock); + m = seq_max(mdev->peer_seq, new_seq); + mdev->peer_seq = m; + spin_unlock(&mdev->peer_seq_lock); + if (m == new_seq) + wake_up(&mdev->seq_wait); +} + +static inline void drbd_update_congested(struct drbd_conf *mdev) +{ + struct sock *sk = mdev->data.socket->sk; + if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) + set_bit(NET_CONGESTED, &mdev->flags); +} + +static inline int drbd_queue_order_type(struct drbd_conf *mdev) +{ + /* sorry, we currently have no working implementation + * of distributed TCQ stuff */ +#ifndef QUEUE_ORDERED_NONE +#define QUEUE_ORDERED_NONE 0 +#endif + return QUEUE_ORDERED_NONE; +} + +static inline void drbd_blk_run_queue(struct request_queue *q) +{ + if (q && q->unplug_fn) + q->unplug_fn(q); +} + +static inline void drbd_kick_lo(struct drbd_conf *mdev) +{ + if (get_ldev(mdev)) { + drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev)); + put_ldev(mdev); + } +} + +static inline void drbd_md_flush(struct drbd_conf *mdev) +{ + int r; + + if (test_bit(MD_NO_BARRIER, &mdev->flags)) + return; + + r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL); + if (r) { + set_bit(MD_NO_BARRIER, &mdev->flags); + dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); + } +} + +#endif diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c new file mode 100644 index 000000000000..edf0b8031e69 --- /dev/null +++ b/drivers/block/drbd/drbd_main.c @@ -0,0 +1,3735 @@ +/* + drbd.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev + from Logicworks, Inc. for making SDP replication support possible. + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define __KERNEL_SYSCALLS__ +#include +#include + +#include +#include "drbd_int.h" +#include "drbd_tracing.h" +#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ + +#include "drbd_vli.h" + +struct after_state_chg_work { + struct drbd_work w; + union drbd_state os; + union drbd_state ns; + enum chg_state_flags flags; + struct completion *done; +}; + +int drbdd_init(struct drbd_thread *); +int drbd_worker(struct drbd_thread *); +int drbd_asender(struct drbd_thread *); + +int drbd_init(void); +static int drbd_open(struct block_device *bdev, fmode_t mode); +static int drbd_release(struct gendisk *gd, fmode_t mode); +static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags); +static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static void md_sync_timer_fn(unsigned long data); +static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); + +DEFINE_TRACE(drbd_unplug); +DEFINE_TRACE(drbd_uuid); +DEFINE_TRACE(drbd_ee); +DEFINE_TRACE(drbd_packet); +DEFINE_TRACE(drbd_md_io); +DEFINE_TRACE(drbd_epoch); +DEFINE_TRACE(drbd_netlink); +DEFINE_TRACE(drbd_actlog); +DEFINE_TRACE(drbd_bio); +DEFINE_TRACE(_drbd_resync); +DEFINE_TRACE(drbd_req); + +MODULE_AUTHOR("Philipp Reisner , " + "Lars Ellenberg "); +MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); +MODULE_VERSION(REL_VERSION); +MODULE_LICENSE("GPL"); +MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)"); +MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); + +#include +/* allow_open_on_secondary */ +MODULE_PARM_DESC(allow_oos, "DONT USE!"); +/* thanks to these macros, if compiled into the kernel (not-module), + * this becomes the boot parameter drbd.minor_count */ +module_param(minor_count, uint, 0444); +module_param(disable_sendpage, bool, 0644); +module_param(allow_oos, bool, 0); +module_param(cn_idx, uint, 0444); +module_param(proc_details, int, 0644); + +#ifdef CONFIG_DRBD_FAULT_INJECTION +int enable_faults; +int fault_rate; +static int fault_count; +int fault_devs; +/* bitmap of enabled faults */ +module_param(enable_faults, int, 0664); +/* fault rate % value - applies to all enabled faults */ +module_param(fault_rate, int, 0664); +/* count of faults inserted */ +module_param(fault_count, int, 0664); +/* bitmap of devices to insert faults on */ +module_param(fault_devs, int, 0644); +#endif + +/* module parameter, defined */ +unsigned int minor_count = 32; +int disable_sendpage; +int allow_oos; +unsigned int cn_idx = CN_IDX_DRBD; +int proc_details; /* Detail level in proc drbd*/ + +/* Module parameter for setting the user mode helper program + * to run. Default is /sbin/drbdadm */ +char usermode_helper[80] = "/sbin/drbdadm"; + +module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644); + +/* in 2.6.x, our device mapping and config info contains our virtual gendisks + * as member "struct gendisk *vdisk;" + */ +struct drbd_conf **minor_table; + +struct kmem_cache *drbd_request_cache; +struct kmem_cache *drbd_ee_cache; /* epoch entries */ +struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ +struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ +mempool_t *drbd_request_mempool; +mempool_t *drbd_ee_mempool; + +/* I do not use a standard mempool, because: + 1) I want to hand out the pre-allocated objects first. + 2) I want to be able to interrupt sleeping allocation with a signal. + Note: This is a single linked list, the next pointer is the private + member of struct page. + */ +struct page *drbd_pp_pool; +spinlock_t drbd_pp_lock; +int drbd_pp_vacant; +wait_queue_head_t drbd_pp_wait; + +DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5); + +static struct block_device_operations drbd_ops = { + .owner = THIS_MODULE, + .open = drbd_open, + .release = drbd_release, +}; + +#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0])) + +#ifdef __CHECKER__ +/* When checking with sparse, and this is an inline function, sparse will + give tons of false positives. When this is a real functions sparse works. + */ +int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) +{ + int io_allowed; + + atomic_inc(&mdev->local_cnt); + io_allowed = (mdev->state.disk >= mins); + if (!io_allowed) { + if (atomic_dec_and_test(&mdev->local_cnt)) + wake_up(&mdev->misc_wait); + } + return io_allowed; +} + +#endif + +/** + * DOC: The transfer log + * + * The transfer log is a single linked list of &struct drbd_tl_epoch objects. + * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail + * of the list. There is always at least one &struct drbd_tl_epoch object. + * + * Each &struct drbd_tl_epoch has a circular double linked list of requests + * attached. + */ +static int tl_init(struct drbd_conf *mdev) +{ + struct drbd_tl_epoch *b; + + /* during device minor initialization, we may well use GFP_KERNEL */ + b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL); + if (!b) + return 0; + INIT_LIST_HEAD(&b->requests); + INIT_LIST_HEAD(&b->w.list); + b->next = NULL; + b->br_number = 4711; + b->n_req = 0; + b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ + + mdev->oldest_tle = b; + mdev->newest_tle = b; + INIT_LIST_HEAD(&mdev->out_of_sequence_requests); + + mdev->tl_hash = NULL; + mdev->tl_hash_s = 0; + + return 1; +} + +static void tl_cleanup(struct drbd_conf *mdev) +{ + D_ASSERT(mdev->oldest_tle == mdev->newest_tle); + D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); + kfree(mdev->oldest_tle); + mdev->oldest_tle = NULL; + kfree(mdev->unused_spare_tle); + mdev->unused_spare_tle = NULL; + kfree(mdev->tl_hash); + mdev->tl_hash = NULL; + mdev->tl_hash_s = 0; +} + +/** + * _tl_add_barrier() - Adds a barrier to the transfer log + * @mdev: DRBD device. + * @new: Barrier to be added before the current head of the TL. + * + * The caller must hold the req_lock. + */ +void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) +{ + struct drbd_tl_epoch *newest_before; + + INIT_LIST_HEAD(&new->requests); + INIT_LIST_HEAD(&new->w.list); + new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ + new->next = NULL; + new->n_req = 0; + + newest_before = mdev->newest_tle; + /* never send a barrier number == 0, because that is special-cased + * when using TCQ for our write ordering code */ + new->br_number = (newest_before->br_number+1) ?: 1; + if (mdev->newest_tle != new) { + mdev->newest_tle->next = new; + mdev->newest_tle = new; + } +} + +/** + * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL + * @mdev: DRBD device. + * @barrier_nr: Expected identifier of the DRBD write barrier packet. + * @set_size: Expected number of requests before that barrier. + * + * In case the passed barrier_nr or set_size does not match the oldest + * &struct drbd_tl_epoch objects this function will cause a termination + * of the connection. + */ +void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, + unsigned int set_size) +{ + struct drbd_tl_epoch *b, *nob; /* next old barrier */ + struct list_head *le, *tle; + struct drbd_request *r; + + spin_lock_irq(&mdev->req_lock); + + b = mdev->oldest_tle; + + /* first some paranoia code */ + if (b == NULL) { + dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", + barrier_nr); + goto bail; + } + if (b->br_number != barrier_nr) { + dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n", + barrier_nr, b->br_number); + goto bail; + } + if (b->n_req != set_size) { + dev_err(DEV, "BAD! BarrierAck #%u received with n_req=%u, expected n_req=%u!\n", + barrier_nr, set_size, b->n_req); + goto bail; + } + + /* Clean up list of requests processed during current epoch */ + list_for_each_safe(le, tle, &b->requests) { + r = list_entry(le, struct drbd_request, tl_requests); + _req_mod(r, barrier_acked); + } + /* There could be requests on the list waiting for completion + of the write to the local disk. To avoid corruptions of + slab's data structures we have to remove the lists head. + + Also there could have been a barrier ack out of sequence, overtaking + the write acks - which would be a bug and violating write ordering. + To not deadlock in case we lose connection while such requests are + still pending, we need some way to find them for the + _req_mode(connection_lost_while_pending). + + These have been list_move'd to the out_of_sequence_requests list in + _req_mod(, barrier_acked) above. + */ + list_del_init(&b->requests); + + nob = b->next; + if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { + _tl_add_barrier(mdev, b); + if (nob) + mdev->oldest_tle = nob; + /* if nob == NULL b was the only barrier, and becomes the new + barrier. Therefore mdev->oldest_tle points already to b */ + } else { + D_ASSERT(nob != NULL); + mdev->oldest_tle = nob; + kfree(b); + } + + spin_unlock_irq(&mdev->req_lock); + dec_ap_pending(mdev); + + return; + +bail: + spin_unlock_irq(&mdev->req_lock); + drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); +} + + +/** + * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL + * @mdev: DRBD device. + * + * This is called after the connection to the peer was lost. The storage covered + * by the requests on the transfer gets marked as our of sync. Called from the + * receiver thread and the worker thread. + */ +void tl_clear(struct drbd_conf *mdev) +{ + struct drbd_tl_epoch *b, *tmp; + struct list_head *le, *tle; + struct drbd_request *r; + int new_initial_bnr = net_random(); + + spin_lock_irq(&mdev->req_lock); + + b = mdev->oldest_tle; + while (b) { + list_for_each_safe(le, tle, &b->requests) { + r = list_entry(le, struct drbd_request, tl_requests); + /* It would be nice to complete outside of spinlock. + * But this is easier for now. */ + _req_mod(r, connection_lost_while_pending); + } + tmp = b->next; + + /* there could still be requests on that ring list, + * in case local io is still pending */ + list_del(&b->requests); + + /* dec_ap_pending corresponding to queue_barrier. + * the newest barrier may not have been queued yet, + * in which case w.cb is still NULL. */ + if (b->w.cb != NULL) + dec_ap_pending(mdev); + + if (b == mdev->newest_tle) { + /* recycle, but reinit! */ + D_ASSERT(tmp == NULL); + INIT_LIST_HEAD(&b->requests); + INIT_LIST_HEAD(&b->w.list); + b->w.cb = NULL; + b->br_number = new_initial_bnr; + b->n_req = 0; + + mdev->oldest_tle = b; + break; + } + kfree(b); + b = tmp; + } + + /* we expect this list to be empty. */ + D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); + + /* but just in case, clean it up anyways! */ + list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) { + r = list_entry(le, struct drbd_request, tl_requests); + /* It would be nice to complete outside of spinlock. + * But this is easier for now. */ + _req_mod(r, connection_lost_while_pending); + } + + /* ensure bit indicating barrier is required is clear */ + clear_bit(CREATE_BARRIER, &mdev->flags); + + spin_unlock_irq(&mdev->req_lock); +} + +/** + * cl_wide_st_chg() - TRUE if the state change is a cluster wide one + * @mdev: DRBD device. + * @os: old (current) state. + * @ns: new (wanted) state. + */ +static int cl_wide_st_chg(struct drbd_conf *mdev, + union drbd_state os, union drbd_state ns) +{ + return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && + ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || + (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || + (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || + (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || + (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || + (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); +} + +int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, + union drbd_state mask, union drbd_state val) +{ + unsigned long flags; + union drbd_state os, ns; + int rv; + + spin_lock_irqsave(&mdev->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + rv = _drbd_set_state(mdev, ns, f, NULL); + ns = mdev->state; + spin_unlock_irqrestore(&mdev->req_lock, flags); + + return rv; +} + +/** + * drbd_force_state() - Impose a change which happens outside our control on our state + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + */ +void drbd_force_state(struct drbd_conf *mdev, + union drbd_state mask, union drbd_state val) +{ + drbd_change_state(mdev, CS_HARD, mask, val); +} + +static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns); +static int is_valid_state_transition(struct drbd_conf *, + union drbd_state, union drbd_state); +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, int *warn_sync_abort); +int drbd_send_state_req(struct drbd_conf *, + union drbd_state, union drbd_state); + +static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, + union drbd_state mask, union drbd_state val) +{ + union drbd_state os, ns; + unsigned long flags; + int rv; + + if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) + return SS_CW_SUCCESS; + + if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) + return SS_CW_FAILED_BY_PEER; + + rv = 0; + spin_lock_irqsave(&mdev->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + ns = sanitize_state(mdev, os, ns, NULL); + + if (!cl_wide_st_chg(mdev, os, ns)) + rv = SS_CW_NO_NEED; + if (!rv) { + rv = is_valid_state(mdev, ns); + if (rv == SS_SUCCESS) { + rv = is_valid_state_transition(mdev, ns, os); + if (rv == SS_SUCCESS) + rv = 0; /* cont waiting, otherwise fail. */ + } + } + spin_unlock_irqrestore(&mdev->req_lock, flags); + + return rv; +} + +/** + * drbd_req_state() - Perform an eventually cluster wide state change + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * @f: flags + * + * Should not be called directly, use drbd_request_state() or + * _drbd_request_state(). + */ +static int drbd_req_state(struct drbd_conf *mdev, + union drbd_state mask, union drbd_state val, + enum chg_state_flags f) +{ + struct completion done; + unsigned long flags; + union drbd_state os, ns; + int rv; + + init_completion(&done); + + if (f & CS_SERIALIZE) + mutex_lock(&mdev->state_mutex); + + spin_lock_irqsave(&mdev->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + ns = sanitize_state(mdev, os, ns, NULL); + + if (cl_wide_st_chg(mdev, os, ns)) { + rv = is_valid_state(mdev, ns); + if (rv == SS_SUCCESS) + rv = is_valid_state_transition(mdev, ns, os); + spin_unlock_irqrestore(&mdev->req_lock, flags); + + if (rv < SS_SUCCESS) { + if (f & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + goto abort; + } + + drbd_state_lock(mdev); + if (!drbd_send_state_req(mdev, mask, val)) { + drbd_state_unlock(mdev); + rv = SS_CW_FAILED_BY_PEER; + if (f & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + goto abort; + } + + wait_event(mdev->state_wait, + (rv = _req_st_cond(mdev, mask, val))); + + if (rv < SS_SUCCESS) { + drbd_state_unlock(mdev); + if (f & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + goto abort; + } + spin_lock_irqsave(&mdev->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + rv = _drbd_set_state(mdev, ns, f, &done); + drbd_state_unlock(mdev); + } else { + rv = _drbd_set_state(mdev, ns, f, &done); + } + + spin_unlock_irqrestore(&mdev->req_lock, flags); + + if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { + D_ASSERT(current != mdev->worker.task); + wait_for_completion(&done); + } + +abort: + if (f & CS_SERIALIZE) + mutex_unlock(&mdev->state_mutex); + + return rv; +} + +/** + * _drbd_request_state() - Request a state change (with flags) + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * @f: flags + * + * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE + * flag, or when logging of failed state change requests is not desired. + */ +int _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, + union drbd_state val, enum chg_state_flags f) +{ + int rv; + + wait_event(mdev->state_wait, + (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); + + return rv; +} + +static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) +{ + dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n", + name, + drbd_conn_str(ns.conn), + drbd_role_str(ns.role), + drbd_role_str(ns.peer), + drbd_disk_str(ns.disk), + drbd_disk_str(ns.pdsk), + ns.susp ? 's' : 'r', + ns.aftr_isp ? 'a' : '-', + ns.peer_isp ? 'p' : '-', + ns.user_isp ? 'u' : '-' + ); +} + +void print_st_err(struct drbd_conf *mdev, + union drbd_state os, union drbd_state ns, int err) +{ + if (err == SS_IN_TRANSIENT_STATE) + return; + dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err)); + print_st(mdev, " state", os); + print_st(mdev, "wanted", ns); +} + + +#define drbd_peer_str drbd_role_str +#define drbd_pdsk_str drbd_disk_str + +#define drbd_susp_str(A) ((A) ? "1" : "0") +#define drbd_aftr_isp_str(A) ((A) ? "1" : "0") +#define drbd_peer_isp_str(A) ((A) ? "1" : "0") +#define drbd_user_isp_str(A) ((A) ? "1" : "0") + +#define PSC(A) \ + ({ if (ns.A != os.A) { \ + pbp += sprintf(pbp, #A "( %s -> %s ) ", \ + drbd_##A##_str(os.A), \ + drbd_##A##_str(ns.A)); \ + } }) + +/** + * is_valid_state() - Returns an SS_ error code if ns is not valid + * @mdev: DRBD device. + * @ns: State to consider. + */ +static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) +{ + /* See drbd_state_sw_errors in drbd_strings.c */ + + enum drbd_fencing_p fp; + int rv = SS_SUCCESS; + + fp = FP_DONT_CARE; + if (get_ldev(mdev)) { + fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + } + + if (get_net_conf(mdev)) { + if (!mdev->net_conf->two_primaries && + ns.role == R_PRIMARY && ns.peer == R_PRIMARY) + rv = SS_TWO_PRIMARIES; + put_net_conf(mdev); + } + + if (rv <= 0) + /* already found a reason to abort */; + else if (ns.role == R_SECONDARY && mdev->open_cnt) + rv = SS_DEVICE_IN_USE; + + else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE) + rv = SS_NO_UP_TO_DATE_DISK; + + else if (fp >= FP_RESOURCE && + ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN) + rv = SS_PRIMARY_NOP; + + else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT) + rv = SS_NO_UP_TO_DATE_DISK; + + else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT) + rv = SS_NO_LOCAL_DISK; + + else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) + rv = SS_NO_REMOTE_DISK; + + else if ((ns.conn == C_CONNECTED || + ns.conn == C_WF_BITMAP_S || + ns.conn == C_SYNC_SOURCE || + ns.conn == C_PAUSED_SYNC_S) && + ns.disk == D_OUTDATED) + rv = SS_CONNECTED_OUTDATES; + + else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && + (mdev->sync_conf.verify_alg[0] == 0)) + rv = SS_NO_VERIFY_ALG; + + else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && + mdev->agreed_pro_version < 88) + rv = SS_NOT_SUPPORTED; + + return rv; +} + +/** + * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible + * @mdev: DRBD device. + * @ns: new state. + * @os: old state. + */ +static int is_valid_state_transition(struct drbd_conf *mdev, + union drbd_state ns, union drbd_state os) +{ + int rv = SS_SUCCESS; + + if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && + os.conn > C_CONNECTED) + rv = SS_RESYNC_RUNNING; + + if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE) + rv = SS_ALREADY_STANDALONE; + + if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS) + rv = SS_IS_DISKLESS; + + if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED) + rv = SS_NO_NET_CONFIG; + + if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING) + rv = SS_LOWER_THAN_OUTDATED; + + if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) + rv = SS_IN_TRANSIENT_STATE; + + if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) + rv = SS_IN_TRANSIENT_STATE; + + if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) + rv = SS_NEED_CONNECTION; + + if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && + ns.conn != os.conn && os.conn > C_CONNECTED) + rv = SS_RESYNC_RUNNING; + + if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) && + os.conn < C_CONNECTED) + rv = SS_NEED_CONNECTION; + + return rv; +} + +/** + * sanitize_state() - Resolves implicitly necessary additional changes to a state transition + * @mdev: DRBD device. + * @os: old state. + * @ns: new state. + * @warn_sync_abort: + * + * When we loose connection, we have to set the state of the peers disk (pdsk) + * to D_UNKNOWN. This rule and many more along those lines are in this function. + */ +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, int *warn_sync_abort) +{ + enum drbd_fencing_p fp; + + fp = FP_DONT_CARE; + if (get_ldev(mdev)) { + fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + } + + /* Disallow Network errors to configure a device's network part */ + if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && + os.conn <= C_DISCONNECTING) + ns.conn = os.conn; + + /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow */ + if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && + ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING) + ns.conn = os.conn; + + /* After C_DISCONNECTING only C_STANDALONE may follow */ + if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) + ns.conn = os.conn; + + if (ns.conn < C_CONNECTED) { + ns.peer_isp = 0; + ns.peer = R_UNKNOWN; + if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT) + ns.pdsk = D_UNKNOWN; + } + + /* Clear the aftr_isp when becoming unconfigured */ + if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) + ns.aftr_isp = 0; + + if (ns.conn <= C_DISCONNECTING && ns.disk == D_DISKLESS) + ns.pdsk = D_UNKNOWN; + + /* Abort resync if a disk fails/detaches */ + if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && + (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { + if (warn_sync_abort) + *warn_sync_abort = 1; + ns.conn = C_CONNECTED; + } + + if (ns.conn >= C_CONNECTED && + ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) || + (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) { + switch (ns.conn) { + case C_WF_BITMAP_T: + case C_PAUSED_SYNC_T: + ns.disk = D_OUTDATED; + break; + case C_CONNECTED: + case C_WF_BITMAP_S: + case C_SYNC_SOURCE: + case C_PAUSED_SYNC_S: + ns.disk = D_UP_TO_DATE; + break; + case C_SYNC_TARGET: + ns.disk = D_INCONSISTENT; + dev_warn(DEV, "Implicitly set disk state Inconsistent!\n"); + break; + } + if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE) + dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n"); + } + + if (ns.conn >= C_CONNECTED && + (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) { + switch (ns.conn) { + case C_CONNECTED: + case C_WF_BITMAP_T: + case C_PAUSED_SYNC_T: + case C_SYNC_TARGET: + ns.pdsk = D_UP_TO_DATE; + break; + case C_WF_BITMAP_S: + case C_PAUSED_SYNC_S: + ns.pdsk = D_OUTDATED; + break; + case C_SYNC_SOURCE: + ns.pdsk = D_INCONSISTENT; + dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n"); + break; + } + if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE) + dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n"); + } + + /* Connection breaks down before we finished "Negotiating" */ + if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && + get_ldev_if_state(mdev, D_NEGOTIATING)) { + if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) { + ns.disk = mdev->new_state_tmp.disk; + ns.pdsk = mdev->new_state_tmp.pdsk; + } else { + dev_alert(DEV, "Connection lost while negotiating, no data!\n"); + ns.disk = D_DISKLESS; + ns.pdsk = D_UNKNOWN; + } + put_ldev(mdev); + } + + if (fp == FP_STONITH && + (ns.role == R_PRIMARY && + ns.conn < C_CONNECTED && + ns.pdsk > D_OUTDATED)) + ns.susp = 1; + + if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { + if (ns.conn == C_SYNC_SOURCE) + ns.conn = C_PAUSED_SYNC_S; + if (ns.conn == C_SYNC_TARGET) + ns.conn = C_PAUSED_SYNC_T; + } else { + if (ns.conn == C_PAUSED_SYNC_S) + ns.conn = C_SYNC_SOURCE; + if (ns.conn == C_PAUSED_SYNC_T) + ns.conn = C_SYNC_TARGET; + } + + return ns; +} + +/* helper for __drbd_set_state */ +static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) +{ + if (cs == C_VERIFY_T) { + /* starting online verify from an arbitrary position + * does not fit well into the existing protocol. + * on C_VERIFY_T, we initialize ov_left and friends + * implicitly in receive_DataRequest once the + * first P_OV_REQUEST is received */ + mdev->ov_start_sector = ~(sector_t)0; + } else { + unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); + if (bit >= mdev->rs_total) + mdev->ov_start_sector = + BM_BIT_TO_SECT(mdev->rs_total - 1); + mdev->ov_position = mdev->ov_start_sector; + } +} + +/** + * __drbd_set_state() - Set a new DRBD state + * @mdev: DRBD device. + * @ns: new state. + * @flags: Flags + * @done: Optional completion, that will get completed after the after_state_ch() finished + * + * Caller needs to hold req_lock, and global_state_lock. Do not call directly. + */ +int __drbd_set_state(struct drbd_conf *mdev, + union drbd_state ns, enum chg_state_flags flags, + struct completion *done) +{ + union drbd_state os; + int rv = SS_SUCCESS; + int warn_sync_abort = 0; + struct after_state_chg_work *ascw; + + os = mdev->state; + + ns = sanitize_state(mdev, os, ns, &warn_sync_abort); + + if (ns.i == os.i) + return SS_NOTHING_TO_DO; + + if (!(flags & CS_HARD)) { + /* pre-state-change checks ; only look at ns */ + /* See drbd_state_sw_errors in drbd_strings.c */ + + rv = is_valid_state(mdev, ns); + if (rv < SS_SUCCESS) { + /* If the old state was illegal as well, then let + this happen...*/ + + if (is_valid_state(mdev, os) == rv) { + dev_err(DEV, "Considering state change from bad state. " + "Error would be: '%s'\n", + drbd_set_st_err_str(rv)); + print_st(mdev, "old", os); + print_st(mdev, "new", ns); + rv = is_valid_state_transition(mdev, ns, os); + } + } else + rv = is_valid_state_transition(mdev, ns, os); + } + + if (rv < SS_SUCCESS) { + if (flags & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + return rv; + } + + if (warn_sync_abort) + dev_warn(DEV, "Resync aborted.\n"); + + { + char *pbp, pb[300]; + pbp = pb; + *pbp = 0; + PSC(role); + PSC(peer); + PSC(conn); + PSC(disk); + PSC(pdsk); + PSC(susp); + PSC(aftr_isp); + PSC(peer_isp); + PSC(user_isp); + dev_info(DEV, "%s\n", pb); + } + + /* solve the race between becoming unconfigured, + * worker doing the cleanup, and + * admin reconfiguring us: + * on (re)configure, first set CONFIG_PENDING, + * then wait for a potentially exiting worker, + * start the worker, and schedule one no_op. + * then proceed with configuration. + */ + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY && + !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) + set_bit(DEVICE_DYING, &mdev->flags); + + mdev->state.i = ns.i; + wake_up(&mdev->misc_wait); + wake_up(&mdev->state_wait); + + /* post-state-change actions */ + if (os.conn >= C_SYNC_SOURCE && ns.conn <= C_CONNECTED) { + set_bit(STOP_SYNC_TIMER, &mdev->flags); + mod_timer(&mdev->resync_timer, jiffies); + } + + /* aborted verify run. log the last position */ + if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && + ns.conn < C_CONNECTED) { + mdev->ov_start_sector = + BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left); + dev_info(DEV, "Online Verify reached sector %llu\n", + (unsigned long long)mdev->ov_start_sector); + } + + if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && + (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { + dev_info(DEV, "Syncer continues.\n"); + mdev->rs_paused += (long)jiffies-(long)mdev->rs_mark_time; + if (ns.conn == C_SYNC_TARGET) { + if (!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags)) + mod_timer(&mdev->resync_timer, jiffies); + /* This if (!test_bit) is only needed for the case + that a device that has ceased to used its timer, + i.e. it is already in drbd_resync_finished() gets + paused and resumed. */ + } + } + + if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && + (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { + dev_info(DEV, "Resync suspended\n"); + mdev->rs_mark_time = jiffies; + if (ns.conn == C_PAUSED_SYNC_T) + set_bit(STOP_SYNC_TIMER, &mdev->flags); + } + + if (os.conn == C_CONNECTED && + (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { + mdev->ov_position = 0; + mdev->rs_total = + mdev->rs_mark_left = drbd_bm_bits(mdev); + if (mdev->agreed_pro_version >= 90) + set_ov_position(mdev, ns.conn); + else + mdev->ov_start_sector = 0; + mdev->ov_left = mdev->rs_total + - BM_SECT_TO_BIT(mdev->ov_position); + mdev->rs_start = + mdev->rs_mark_time = jiffies; + mdev->ov_last_oos_size = 0; + mdev->ov_last_oos_start = 0; + + if (ns.conn == C_VERIFY_S) { + dev_info(DEV, "Starting Online Verify from sector %llu\n", + (unsigned long long)mdev->ov_position); + mod_timer(&mdev->resync_timer, jiffies); + } + } + + if (get_ldev(mdev)) { + u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| + MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| + MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); + + if (test_bit(CRASHED_PRIMARY, &mdev->flags)) + mdf |= MDF_CRASHED_PRIMARY; + if (mdev->state.role == R_PRIMARY || + (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY)) + mdf |= MDF_PRIMARY_IND; + if (mdev->state.conn > C_WF_REPORT_PARAMS) + mdf |= MDF_CONNECTED_IND; + if (mdev->state.disk > D_INCONSISTENT) + mdf |= MDF_CONSISTENT; + if (mdev->state.disk > D_OUTDATED) + mdf |= MDF_WAS_UP_TO_DATE; + if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT) + mdf |= MDF_PEER_OUT_DATED; + if (mdf != mdev->ldev->md.flags) { + mdev->ldev->md.flags = mdf; + drbd_md_mark_dirty(mdev); + } + if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) + drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]); + put_ldev(mdev); + } + + /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ + if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT && + os.peer == R_SECONDARY && ns.peer == R_PRIMARY) + set_bit(CONSIDER_RESYNC, &mdev->flags); + + /* Receiver should clean up itself */ + if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) + drbd_thread_stop_nowait(&mdev->receiver); + + /* Now the receiver finished cleaning up itself, it should die */ + if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) + drbd_thread_stop_nowait(&mdev->receiver); + + /* Upon network failure, we need to restart the receiver. */ + if (os.conn > C_TEAR_DOWN && + ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) + drbd_thread_restart_nowait(&mdev->receiver); + + ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); + if (ascw) { + ascw->os = os; + ascw->ns = ns; + ascw->flags = flags; + ascw->w.cb = w_after_state_ch; + ascw->done = done; + drbd_queue_work(&mdev->data.work, &ascw->w); + } else { + dev_warn(DEV, "Could not kmalloc an ascw\n"); + } + + return rv; +} + +static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct after_state_chg_work *ascw = + container_of(w, struct after_state_chg_work, w); + after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); + if (ascw->flags & CS_WAIT_COMPLETE) { + D_ASSERT(ascw->done != NULL); + complete(ascw->done); + } + kfree(ascw); + + return 1; +} + +static void abw_start_sync(struct drbd_conf *mdev, int rv) +{ + if (rv) { + dev_err(DEV, "Writing the bitmap failed not starting resync.\n"); + _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE); + return; + } + + switch (mdev->state.conn) { + case C_STARTING_SYNC_T: + _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); + break; + case C_STARTING_SYNC_S: + drbd_start_resync(mdev, C_SYNC_SOURCE); + break; + } +} + +/** + * after_state_ch() - Perform after state change actions that may sleep + * @mdev: DRBD device. + * @os: old state. + * @ns: new state. + * @flags: Flags + */ +static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags) +{ + enum drbd_fencing_p fp; + + if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { + clear_bit(CRASHED_PRIMARY, &mdev->flags); + if (mdev->p_uuid) + mdev->p_uuid[UI_FLAGS] &= ~((u64)2); + } + + fp = FP_DONT_CARE; + if (get_ldev(mdev)) { + fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + } + + /* Inform userspace about the change... */ + drbd_bcast_state(mdev, ns); + + if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && + (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) + drbd_khelper(mdev, "pri-on-incon-degr"); + + /* Here we have the actions that are performed after a + state change. This function might sleep */ + + if (fp == FP_STONITH && ns.susp) { + /* case1: The outdate peer handler is successful: + * case2: The connection was established again: */ + if ((os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) || + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)) { + tl_clear(mdev); + spin_lock_irq(&mdev->req_lock); + _drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL); + spin_unlock_irq(&mdev->req_lock); + } + } + /* Do not change the order of the if above and the two below... */ + if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S) + drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)"); + + /* Lost contact to peer's copy of the data */ + if ((os.pdsk >= D_INCONSISTENT && + os.pdsk != D_UNKNOWN && + os.pdsk != D_OUTDATED) + && (ns.pdsk < D_INCONSISTENT || + ns.pdsk == D_UNKNOWN || + ns.pdsk == D_OUTDATED)) { + kfree(mdev->p_uuid); + mdev->p_uuid = NULL; + if (get_ldev(mdev)) { + if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && + mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { + drbd_uuid_new_current(mdev); + drbd_send_uuids(mdev); + } + put_ldev(mdev); + } + } + + if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { + if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) + drbd_uuid_new_current(mdev); + + /* D_DISKLESS Peer becomes secondary */ + if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) + drbd_al_to_on_disk_bm(mdev); + put_ldev(mdev); + } + + /* Last part of the attaching process ... */ + if (ns.conn >= C_CONNECTED && + os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { + kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */ + mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */ + drbd_send_sizes(mdev, 0); /* to start sync... */ + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + + /* We want to pause/continue resync, tell peer. */ + if (ns.conn >= C_CONNECTED && + ((os.aftr_isp != ns.aftr_isp) || + (os.user_isp != ns.user_isp))) + drbd_send_state(mdev); + + /* In case one of the isp bits got set, suspend other devices. */ + if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && + (ns.aftr_isp || ns.peer_isp || ns.user_isp)) + suspend_other_sg(mdev); + + /* Make sure the peer gets informed about eventual state + changes (ISP bits) while we were in WFReportParams. */ + if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) + drbd_send_state(mdev); + + /* We are in the progress to start a full sync... */ + if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || + (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) + drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync"); + + /* We are invalidating our self... */ + if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && + os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) + drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); + + if (os.disk > D_FAILED && ns.disk == D_FAILED) { + enum drbd_io_error_p eh; + + eh = EP_PASS_ON; + if (get_ldev_if_state(mdev, D_FAILED)) { + eh = mdev->ldev->dc.on_io_error; + put_ldev(mdev); + } + + drbd_rs_cancel_all(mdev); + /* since get_ldev() only works as long as disk>=D_INCONSISTENT, + and it is D_DISKLESS here, local_cnt can only go down, it can + not increase... It will reach zero */ + wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + mdev->rs_total = 0; + mdev->rs_failed = 0; + atomic_set(&mdev->rs_pending_cnt, 0); + + spin_lock_irq(&mdev->req_lock); + _drbd_set_state(_NS(mdev, disk, D_DISKLESS), CS_HARD, NULL); + spin_unlock_irq(&mdev->req_lock); + + if (eh == EP_CALL_HELPER) + drbd_khelper(mdev, "local-io-error"); + } + + if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) { + + if (os.disk == D_FAILED) /* && ns.disk == D_DISKLESS*/ { + if (drbd_send_state(mdev)) + dev_warn(DEV, "Notified peer that my disk is broken.\n"); + else + dev_err(DEV, "Sending state in drbd_io_error() failed\n"); + } + + lc_destroy(mdev->resync); + mdev->resync = NULL; + lc_destroy(mdev->act_log); + mdev->act_log = NULL; + __no_warn(local, + drbd_free_bc(mdev->ldev); + mdev->ldev = NULL;); + + if (mdev->md_io_tmpp) + __free_page(mdev->md_io_tmpp); + } + + /* Disks got bigger while they were detached */ + if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && + test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { + if (ns.conn == C_CONNECTED) + resync_after_online_grow(mdev); + } + + /* A resync finished or aborted, wake paused devices... */ + if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) || + (os.peer_isp && !ns.peer_isp) || + (os.user_isp && !ns.user_isp)) + resume_next_sg(mdev); + + /* Upon network connection, we need to start the receiver */ + if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) + drbd_thread_start(&mdev->receiver); + + /* Terminate worker thread if we are unconfigured - it will be + restarted as needed... */ + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY) { + if (os.aftr_isp != ns.aftr_isp) + resume_next_sg(mdev); + /* set in __drbd_set_state, unless CONFIG_PENDING was set */ + if (test_bit(DEVICE_DYING, &mdev->flags)) + drbd_thread_stop_nowait(&mdev->worker); + } + + drbd_md_sync(mdev); +} + + +static int drbd_thread_setup(void *arg) +{ + struct drbd_thread *thi = (struct drbd_thread *) arg; + struct drbd_conf *mdev = thi->mdev; + unsigned long flags; + int retval; + +restart: + retval = thi->function(thi); + + spin_lock_irqsave(&thi->t_lock, flags); + + /* if the receiver has been "Exiting", the last thing it did + * was set the conn state to "StandAlone", + * if now a re-connect request comes in, conn state goes C_UNCONNECTED, + * and receiver thread will be "started". + * drbd_thread_start needs to set "Restarting" in that case. + * t_state check and assignment needs to be within the same spinlock, + * so either thread_start sees Exiting, and can remap to Restarting, + * or thread_start see None, and can proceed as normal. + */ + + if (thi->t_state == Restarting) { + dev_info(DEV, "Restarting %s\n", current->comm); + thi->t_state = Running; + spin_unlock_irqrestore(&thi->t_lock, flags); + goto restart; + } + + thi->task = NULL; + thi->t_state = None; + smp_mb(); + complete(&thi->stop); + spin_unlock_irqrestore(&thi->t_lock, flags); + + dev_info(DEV, "Terminating %s\n", current->comm); + + /* Release mod reference taken when thread was started */ + module_put(THIS_MODULE); + return retval; +} + +static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, + int (*func) (struct drbd_thread *)) +{ + spin_lock_init(&thi->t_lock); + thi->task = NULL; + thi->t_state = None; + thi->function = func; + thi->mdev = mdev; +} + +int drbd_thread_start(struct drbd_thread *thi) +{ + struct drbd_conf *mdev = thi->mdev; + struct task_struct *nt; + unsigned long flags; + + const char *me = + thi == &mdev->receiver ? "receiver" : + thi == &mdev->asender ? "asender" : + thi == &mdev->worker ? "worker" : "NONSENSE"; + + /* is used from state engine doing drbd_thread_stop_nowait, + * while holding the req lock irqsave */ + spin_lock_irqsave(&thi->t_lock, flags); + + switch (thi->t_state) { + case None: + dev_info(DEV, "Starting %s thread (from %s [%d])\n", + me, current->comm, current->pid); + + /* Get ref on module for thread - this is released when thread exits */ + if (!try_module_get(THIS_MODULE)) { + dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); + spin_unlock_irqrestore(&thi->t_lock, flags); + return FALSE; + } + + init_completion(&thi->stop); + D_ASSERT(thi->task == NULL); + thi->reset_cpu_mask = 1; + thi->t_state = Running; + spin_unlock_irqrestore(&thi->t_lock, flags); + flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ + + nt = kthread_create(drbd_thread_setup, (void *) thi, + "drbd%d_%s", mdev_to_minor(mdev), me); + + if (IS_ERR(nt)) { + dev_err(DEV, "Couldn't start thread\n"); + + module_put(THIS_MODULE); + return FALSE; + } + spin_lock_irqsave(&thi->t_lock, flags); + thi->task = nt; + thi->t_state = Running; + spin_unlock_irqrestore(&thi->t_lock, flags); + wake_up_process(nt); + break; + case Exiting: + thi->t_state = Restarting; + dev_info(DEV, "Restarting %s thread (from %s [%d])\n", + me, current->comm, current->pid); + /* fall through */ + case Running: + case Restarting: + default: + spin_unlock_irqrestore(&thi->t_lock, flags); + break; + } + + return TRUE; +} + + +void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) +{ + unsigned long flags; + + enum drbd_thread_state ns = restart ? Restarting : Exiting; + + /* may be called from state engine, holding the req lock irqsave */ + spin_lock_irqsave(&thi->t_lock, flags); + + if (thi->t_state == None) { + spin_unlock_irqrestore(&thi->t_lock, flags); + if (restart) + drbd_thread_start(thi); + return; + } + + if (thi->t_state != ns) { + if (thi->task == NULL) { + spin_unlock_irqrestore(&thi->t_lock, flags); + return; + } + + thi->t_state = ns; + smp_mb(); + init_completion(&thi->stop); + if (thi->task != current) + force_sig(DRBD_SIGKILL, thi->task); + + } + + spin_unlock_irqrestore(&thi->t_lock, flags); + + if (wait) + wait_for_completion(&thi->stop); +} + +#ifdef CONFIG_SMP +/** + * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs + * @mdev: DRBD device. + * + * Forces all threads of a device onto the same CPU. This is beneficial for + * DRBD's performance. May be overwritten by user's configuration. + */ +void drbd_calc_cpu_mask(struct drbd_conf *mdev) +{ + int ord, cpu; + + /* user override. */ + if (cpumask_weight(mdev->cpu_mask)) + return; + + ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask); + for_each_online_cpu(cpu) { + if (ord-- == 0) { + cpumask_set_cpu(cpu, mdev->cpu_mask); + return; + } + } + /* should not be reached */ + cpumask_setall(mdev->cpu_mask); +} + +/** + * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread + * @mdev: DRBD device. + * + * call in the "main loop" of _all_ threads, no need for any mutex, current won't die + * prematurely. + */ +void drbd_thread_current_set_cpu(struct drbd_conf *mdev) +{ + struct task_struct *p = current; + struct drbd_thread *thi = + p == mdev->asender.task ? &mdev->asender : + p == mdev->receiver.task ? &mdev->receiver : + p == mdev->worker.task ? &mdev->worker : + NULL; + ERR_IF(thi == NULL) + return; + if (!thi->reset_cpu_mask) + return; + thi->reset_cpu_mask = 0; + set_cpus_allowed_ptr(p, mdev->cpu_mask); +} +#endif + +/* the appropriate socket mutex must be held already */ +int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, + enum drbd_packets cmd, struct p_header *h, + size_t size, unsigned msg_flags) +{ + int sent, ok; + + ERR_IF(!h) return FALSE; + ERR_IF(!size) return FALSE; + + h->magic = BE_DRBD_MAGIC; + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be16(size-sizeof(struct p_header)); + + trace_drbd_packet(mdev, sock, 0, (void *)h, __FILE__, __LINE__); + sent = drbd_send(mdev, sock, h, size, msg_flags); + + ok = (sent == size); + if (!ok) + dev_err(DEV, "short sent %s size=%d sent=%d\n", + cmdname(cmd), (int)size, sent); + return ok; +} + +/* don't pass the socket. we may only look at it + * when we hold the appropriate socket mutex. + */ +int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, + enum drbd_packets cmd, struct p_header *h, size_t size) +{ + int ok = 0; + struct socket *sock; + + if (use_data_socket) { + mutex_lock(&mdev->data.mutex); + sock = mdev->data.socket; + } else { + mutex_lock(&mdev->meta.mutex); + sock = mdev->meta.socket; + } + + /* drbd_disconnect() could have called drbd_free_sock() + * while we were waiting in down()... */ + if (likely(sock != NULL)) + ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0); + + if (use_data_socket) + mutex_unlock(&mdev->data.mutex); + else + mutex_unlock(&mdev->meta.mutex); + return ok; +} + +int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, + size_t size) +{ + struct p_header h; + int ok; + + h.magic = BE_DRBD_MAGIC; + h.command = cpu_to_be16(cmd); + h.length = cpu_to_be16(size); + + if (!drbd_get_data_sock(mdev)) + return 0; + + trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&h, __FILE__, __LINE__); + + ok = (sizeof(h) == + drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0)); + ok = ok && (size == + drbd_send(mdev, mdev->data.socket, data, size, 0)); + + drbd_put_data_sock(mdev); + + return ok; +} + +int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) +{ + struct p_rs_param_89 *p; + struct socket *sock; + int size, rv; + const int apv = mdev->agreed_pro_version; + + size = apv <= 87 ? sizeof(struct p_rs_param) + : apv == 88 ? sizeof(struct p_rs_param) + + strlen(mdev->sync_conf.verify_alg) + 1 + : /* 89 */ sizeof(struct p_rs_param_89); + + /* used from admin command context and receiver/worker context. + * to avoid kmalloc, grab the socket right here, + * then use the pre-allocated sbuf there */ + mutex_lock(&mdev->data.mutex); + sock = mdev->data.socket; + + if (likely(sock != NULL)) { + enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; + + p = &mdev->data.sbuf.rs_param_89; + + /* initialize verify_alg and csums_alg */ + memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); + + p->rate = cpu_to_be32(sc->rate); + + if (apv >= 88) + strcpy(p->verify_alg, mdev->sync_conf.verify_alg); + if (apv >= 89) + strcpy(p->csums_alg, mdev->sync_conf.csums_alg); + + rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0); + } else + rv = 0; /* not ok */ + + mutex_unlock(&mdev->data.mutex); + + return rv; +} + +int drbd_send_protocol(struct drbd_conf *mdev) +{ + struct p_protocol *p; + int size, rv; + + size = sizeof(struct p_protocol); + + if (mdev->agreed_pro_version >= 87) + size += strlen(mdev->net_conf->integrity_alg) + 1; + + /* we must not recurse into our own queue, + * as that is blocked during handshake */ + p = kmalloc(size, GFP_NOIO); + if (p == NULL) + return 0; + + p->protocol = cpu_to_be32(mdev->net_conf->wire_protocol); + p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p); + p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p); + p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p); + p->want_lose = cpu_to_be32(mdev->net_conf->want_lose); + p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries); + + if (mdev->agreed_pro_version >= 87) + strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); + + rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, + (struct p_header *)p, size); + kfree(p); + return rv; +} + +int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) +{ + struct p_uuids p; + int i; + + if (!get_ldev_if_state(mdev, D_NEGOTIATING)) + return 1; + + for (i = UI_CURRENT; i < UI_SIZE; i++) + p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0; + + mdev->comm_bm_set = drbd_bm_total_weight(mdev); + p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); + uuid_flags |= mdev->net_conf->want_lose ? 1 : 0; + uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; + uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; + p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); + + put_ldev(mdev); + + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, + (struct p_header *)&p, sizeof(p)); +} + +int drbd_send_uuids(struct drbd_conf *mdev) +{ + return _drbd_send_uuids(mdev, 0); +} + +int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev) +{ + return _drbd_send_uuids(mdev, 8); +} + + +int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) +{ + struct p_rs_uuid p; + + p.uuid = cpu_to_be64(val); + + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, + (struct p_header *)&p, sizeof(p)); +} + +int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) +{ + struct p_sizes p; + sector_t d_size, u_size; + int q_order_type; + int ok; + + if (get_ldev_if_state(mdev, D_NEGOTIATING)) { + D_ASSERT(mdev->ldev->backing_bdev); + d_size = drbd_get_max_capacity(mdev->ldev); + u_size = mdev->ldev->dc.disk_size; + q_order_type = drbd_queue_order_type(mdev); + p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev)); + put_ldev(mdev); + } else { + d_size = 0; + u_size = 0; + q_order_type = QUEUE_ORDERED_NONE; + } + + p.d_size = cpu_to_be64(d_size); + p.u_size = cpu_to_be64(u_size); + p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); + p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue)); + p.queue_order_type = cpu_to_be32(q_order_type); + + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, + (struct p_header *)&p, sizeof(p)); + return ok; +} + +/** + * drbd_send_state() - Sends the drbd state to the peer + * @mdev: DRBD device. + */ +int drbd_send_state(struct drbd_conf *mdev) +{ + struct socket *sock; + struct p_state p; + int ok = 0; + + /* Grab state lock so we wont send state if we're in the middle + * of a cluster wide state change on another thread */ + drbd_state_lock(mdev); + + mutex_lock(&mdev->data.mutex); + + p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ + sock = mdev->data.socket; + + if (likely(sock != NULL)) { + ok = _drbd_send_cmd(mdev, sock, P_STATE, + (struct p_header *)&p, sizeof(p), 0); + } + + mutex_unlock(&mdev->data.mutex); + + drbd_state_unlock(mdev); + return ok; +} + +int drbd_send_state_req(struct drbd_conf *mdev, + union drbd_state mask, union drbd_state val) +{ + struct p_req_state p; + + p.mask = cpu_to_be32(mask.i); + p.val = cpu_to_be32(val.i); + + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ, + (struct p_header *)&p, sizeof(p)); +} + +int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) +{ + struct p_req_state_reply p; + + p.retcode = cpu_to_be32(retcode); + + return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, + (struct p_header *)&p, sizeof(p)); +} + +int fill_bitmap_rle_bits(struct drbd_conf *mdev, + struct p_compressed_bm *p, + struct bm_xfer_ctx *c) +{ + struct bitstream bs; + unsigned long plain_bits; + unsigned long tmp; + unsigned long rl; + unsigned len; + unsigned toggle; + int bits; + + /* may we use this feature? */ + if ((mdev->sync_conf.use_rle == 0) || + (mdev->agreed_pro_version < 90)) + return 0; + + if (c->bit_offset >= c->bm_bits) + return 0; /* nothing to do. */ + + /* use at most thus many bytes */ + bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0); + memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX); + /* plain bits covered in this code string */ + plain_bits = 0; + + /* p->encoding & 0x80 stores whether the first run length is set. + * bit offset is implicit. + * start with toggle == 2 to be able to tell the first iteration */ + toggle = 2; + + /* see how much plain bits we can stuff into one packet + * using RLE and VLI. */ + do { + tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset) + : _drbd_bm_find_next(mdev, c->bit_offset); + if (tmp == -1UL) + tmp = c->bm_bits; + rl = tmp - c->bit_offset; + + if (toggle == 2) { /* first iteration */ + if (rl == 0) { + /* the first checked bit was set, + * store start value, */ + DCBP_set_start(p, 1); + /* but skip encoding of zero run length */ + toggle = !toggle; + continue; + } + DCBP_set_start(p, 0); + } + + /* paranoia: catch zero runlength. + * can only happen if bitmap is modified while we scan it. */ + if (rl == 0) { + dev_err(DEV, "unexpected zero runlength while encoding bitmap " + "t:%u bo:%lu\n", toggle, c->bit_offset); + return -1; + } + + bits = vli_encode_bits(&bs, rl); + if (bits == -ENOBUFS) /* buffer full */ + break; + if (bits <= 0) { + dev_err(DEV, "error while encoding bitmap: %d\n", bits); + return 0; + } + + toggle = !toggle; + plain_bits += rl; + c->bit_offset = tmp; + } while (c->bit_offset < c->bm_bits); + + len = bs.cur.b - p->code + !!bs.cur.bit; + + if (plain_bits < (len << 3)) { + /* incompressible with this method. + * we need to rewind both word and bit position. */ + c->bit_offset -= plain_bits; + bm_xfer_ctx_bit_to_word_offset(c); + c->bit_offset = c->word_offset * BITS_PER_LONG; + return 0; + } + + /* RLE + VLI was able to compress it just fine. + * update c->word_offset. */ + bm_xfer_ctx_bit_to_word_offset(c); + + /* store pad_bits */ + DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7); + + return len; +} + +enum { OK, FAILED, DONE } +send_bitmap_rle_or_plain(struct drbd_conf *mdev, + struct p_header *h, struct bm_xfer_ctx *c) +{ + struct p_compressed_bm *p = (void*)h; + unsigned long num_words; + int len; + int ok; + + len = fill_bitmap_rle_bits(mdev, p, c); + + if (len < 0) + return FAILED; + + if (len) { + DCBP_set_code(p, RLE_VLI_Bits); + ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h, + sizeof(*p) + len, 0); + + c->packets[0]++; + c->bytes[0] += sizeof(*p) + len; + + if (c->bit_offset >= c->bm_bits) + len = 0; /* DONE */ + } else { + /* was not compressible. + * send a buffer full of plain text bits instead. */ + num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); + len = num_words * sizeof(long); + if (len) + drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload); + ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP, + h, sizeof(struct p_header) + len, 0); + c->word_offset += num_words; + c->bit_offset = c->word_offset * BITS_PER_LONG; + + c->packets[1]++; + c->bytes[1] += sizeof(struct p_header) + len; + + if (c->bit_offset > c->bm_bits) + c->bit_offset = c->bm_bits; + } + ok = ok ? ((len == 0) ? DONE : OK) : FAILED; + + if (ok == DONE) + INFO_bm_xfer_stats(mdev, "send", c); + return ok; +} + +/* See the comment at receive_bitmap() */ +int _drbd_send_bitmap(struct drbd_conf *mdev) +{ + struct bm_xfer_ctx c; + struct p_header *p; + int ret; + + ERR_IF(!mdev->bitmap) return FALSE; + + /* maybe we should use some per thread scratch page, + * and allocate that during initial device creation? */ + p = (struct p_header *) __get_free_page(GFP_NOIO); + if (!p) { + dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); + return FALSE; + } + + if (get_ldev(mdev)) { + if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { + dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n"); + drbd_bm_set_all(mdev); + if (drbd_bm_write(mdev)) { + /* write_bm did fail! Leave full sync flag set in Meta P_DATA + * but otherwise process as per normal - need to tell other + * side that a full resync is required! */ + dev_err(DEV, "Failed to write bitmap to disk!\n"); + } else { + drbd_md_clear_flag(mdev, MDF_FULL_SYNC); + drbd_md_sync(mdev); + } + } + put_ldev(mdev); + } + + c = (struct bm_xfer_ctx) { + .bm_bits = drbd_bm_bits(mdev), + .bm_words = drbd_bm_words(mdev), + }; + + do { + ret = send_bitmap_rle_or_plain(mdev, p, &c); + } while (ret == OK); + + free_page((unsigned long) p); + return (ret == DONE); +} + +int drbd_send_bitmap(struct drbd_conf *mdev) +{ + int err; + + if (!drbd_get_data_sock(mdev)) + return -1; + err = !_drbd_send_bitmap(mdev); + drbd_put_data_sock(mdev); + return err; +} + +int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) +{ + int ok; + struct p_barrier_ack p; + + p.barrier = barrier_nr; + p.set_size = cpu_to_be32(set_size); + + if (mdev->state.conn < C_CONNECTED) + return FALSE; + ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, + (struct p_header *)&p, sizeof(p)); + return ok; +} + +/** + * _drbd_send_ack() - Sends an ack packet + * @mdev: DRBD device. + * @cmd: Packet command code. + * @sector: sector, needs to be in big endian byte order + * @blksize: size in byte, needs to be in big endian byte order + * @block_id: Id, big endian byte order + */ +static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, + u64 sector, + u32 blksize, + u64 block_id) +{ + int ok; + struct p_block_ack p; + + p.sector = sector; + p.block_id = block_id; + p.blksize = blksize; + p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); + + if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) + return FALSE; + ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, + (struct p_header *)&p, sizeof(p)); + return ok; +} + +int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, + struct p_data *dp) +{ + const int header_size = sizeof(struct p_data) + - sizeof(struct p_header); + int data_size = ((struct p_header *)dp)->length - header_size; + + return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), + dp->block_id); +} + +int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, + struct p_block_req *rp) +{ + return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); +} + +/** + * drbd_send_ack() - Sends an ack packet + * @mdev: DRBD device. + * @cmd: Packet command code. + * @e: Epoch entry. + */ +int drbd_send_ack(struct drbd_conf *mdev, + enum drbd_packets cmd, struct drbd_epoch_entry *e) +{ + return _drbd_send_ack(mdev, cmd, + cpu_to_be64(e->sector), + cpu_to_be32(e->size), + e->block_id); +} + +/* This function misuses the block_id field to signal if the blocks + * are is sync or not. */ +int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, + sector_t sector, int blksize, u64 block_id) +{ + return _drbd_send_ack(mdev, cmd, + cpu_to_be64(sector), + cpu_to_be32(blksize), + cpu_to_be64(block_id)); +} + +int drbd_send_drequest(struct drbd_conf *mdev, int cmd, + sector_t sector, int size, u64 block_id) +{ + int ok; + struct p_block_req p; + + p.sector = cpu_to_be64(sector); + p.block_id = block_id; + p.blksize = cpu_to_be32(size); + + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, + (struct p_header *)&p, sizeof(p)); + return ok; +} + +int drbd_send_drequest_csum(struct drbd_conf *mdev, + sector_t sector, int size, + void *digest, int digest_size, + enum drbd_packets cmd) +{ + int ok; + struct p_block_req p; + + p.sector = cpu_to_be64(sector); + p.block_id = BE_DRBD_MAGIC + 0xbeef; + p.blksize = cpu_to_be32(size); + + p.head.magic = BE_DRBD_MAGIC; + p.head.command = cpu_to_be16(cmd); + p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header) + digest_size); + + mutex_lock(&mdev->data.mutex); + + ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0)); + ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0)); + + mutex_unlock(&mdev->data.mutex); + + return ok; +} + +int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) +{ + int ok; + struct p_block_req p; + + p.sector = cpu_to_be64(sector); + p.block_id = BE_DRBD_MAGIC + 0xbabe; + p.blksize = cpu_to_be32(size); + + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, + (struct p_header *)&p, sizeof(p)); + return ok; +} + +/* called on sndtimeo + * returns FALSE if we should retry, + * TRUE if we think connection is dead + */ +static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) +{ + int drop_it; + /* long elapsed = (long)(jiffies - mdev->last_received); */ + + drop_it = mdev->meta.socket == sock + || !mdev->asender.task + || get_t_state(&mdev->asender) != Running + || mdev->state.conn < C_CONNECTED; + + if (drop_it) + return TRUE; + + drop_it = !--mdev->ko_count; + if (!drop_it) { + dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n", + current->comm, current->pid, mdev->ko_count); + request_ping(mdev); + } + + return drop_it; /* && (mdev->state == R_PRIMARY) */; +} + +/* The idea of sendpage seems to be to put some kind of reference + * to the page into the skb, and to hand it over to the NIC. In + * this process get_page() gets called. + * + * As soon as the page was really sent over the network put_page() + * gets called by some part of the network layer. [ NIC driver? ] + * + * [ get_page() / put_page() increment/decrement the count. If count + * reaches 0 the page will be freed. ] + * + * This works nicely with pages from FSs. + * But this means that in protocol A we might signal IO completion too early! + * + * In order not to corrupt data during a resync we must make sure + * that we do not reuse our own buffer pages (EEs) to early, therefore + * we have the net_ee list. + * + * XFS seems to have problems, still, it submits pages with page_count == 0! + * As a workaround, we disable sendpage on pages + * with page_count == 0 or PageSlab. + */ +static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, + int offset, size_t size) +{ + int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); + kunmap(page); + if (sent == size) + mdev->send_cnt += size>>9; + return sent == size; +} + +static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, + int offset, size_t size) +{ + mm_segment_t oldfs = get_fs(); + int sent, ok; + int len = size; + + /* e.g. XFS meta- & log-data is in slab pages, which have a + * page_count of 0 and/or have PageSlab() set. + * we cannot use send_page for those, as that does get_page(); + * put_page(); and would cause either a VM_BUG directly, or + * __page_cache_release a page that would actually still be referenced + * by someone, leading to some obscure delayed Oops somewhere else. */ + if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) + return _drbd_no_send_page(mdev, page, offset, size); + + drbd_update_congested(mdev); + set_fs(KERNEL_DS); + do { + sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, + offset, len, + MSG_NOSIGNAL); + if (sent == -EAGAIN) { + if (we_should_drop_the_connection(mdev, + mdev->data.socket)) + break; + else + continue; + } + if (sent <= 0) { + dev_warn(DEV, "%s: size=%d len=%d sent=%d\n", + __func__, (int)size, len, sent); + break; + } + len -= sent; + offset += sent; + } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/); + set_fs(oldfs); + clear_bit(NET_CONGESTED, &mdev->flags); + + ok = (len == 0); + if (likely(ok)) + mdev->send_cnt += size>>9; + return ok; +} + +static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) +{ + struct bio_vec *bvec; + int i; + __bio_for_each_segment(bvec, bio, i, 0) { + if (!_drbd_no_send_page(mdev, bvec->bv_page, + bvec->bv_offset, bvec->bv_len)) + return 0; + } + return 1; +} + +static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) +{ + struct bio_vec *bvec; + int i; + __bio_for_each_segment(bvec, bio, i, 0) { + if (!_drbd_send_page(mdev, bvec->bv_page, + bvec->bv_offset, bvec->bv_len)) + return 0; + } + + return 1; +} + +/* Used to send write requests + * R_PRIMARY -> Peer (P_DATA) + */ +int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) +{ + int ok = 1; + struct p_data p; + unsigned int dp_flags = 0; + void *dgb; + int dgs; + + if (!drbd_get_data_sock(mdev)) + return 0; + + dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? + crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; + + p.head.magic = BE_DRBD_MAGIC; + p.head.command = cpu_to_be16(P_DATA); + p.head.length = + cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->size); + + p.sector = cpu_to_be64(req->sector); + p.block_id = (unsigned long)req; + p.seq_num = cpu_to_be32(req->seq_num = + atomic_add_return(1, &mdev->packet_seq)); + dp_flags = 0; + + /* NOTE: no need to check if barriers supported here as we would + * not pass the test in make_request_common in that case + */ + if (bio_rw_flagged(req->master_bio, BIO_RW_BARRIER)) { + dev_err(DEV, "ASSERT FAILED would have set DP_HARDBARRIER\n"); + /* dp_flags |= DP_HARDBARRIER; */ + } + if (bio_rw_flagged(req->master_bio, BIO_RW_SYNCIO)) + dp_flags |= DP_RW_SYNC; + /* for now handle SYNCIO and UNPLUG + * as if they still were one and the same flag */ + if (bio_rw_flagged(req->master_bio, BIO_RW_UNPLUG)) + dp_flags |= DP_RW_SYNC; + if (mdev->state.conn >= C_SYNC_SOURCE && + mdev->state.conn <= C_PAUSED_SYNC_T) + dp_flags |= DP_MAY_SET_IN_SYNC; + + p.dp_flags = cpu_to_be32(dp_flags); + trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); + set_bit(UNPLUG_REMOTE, &mdev->flags); + ok = (sizeof(p) == + drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); + if (ok && dgs) { + dgb = mdev->int_dig_out; + drbd_csum(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); + } + if (ok) { + if (mdev->net_conf->wire_protocol == DRBD_PROT_A) + ok = _drbd_send_bio(mdev, req->master_bio); + else + ok = _drbd_send_zc_bio(mdev, req->master_bio); + } + + drbd_put_data_sock(mdev); + return ok; +} + +/* answer packet, used to send data back for read requests: + * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY) + * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY) + */ +int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, + struct drbd_epoch_entry *e) +{ + int ok; + struct p_data p; + void *dgb; + int dgs; + + dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? + crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; + + p.head.magic = BE_DRBD_MAGIC; + p.head.command = cpu_to_be16(cmd); + p.head.length = + cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + e->size); + + p.sector = cpu_to_be64(e->sector); + p.block_id = e->block_id; + /* p.seq_num = 0; No sequence numbers here.. */ + + /* Only called by our kernel thread. + * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL + * in response to admin command or module unload. + */ + if (!drbd_get_data_sock(mdev)) + return 0; + + trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); + ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, + sizeof(p), MSG_MORE); + if (ok && dgs) { + dgb = mdev->int_dig_out; + drbd_csum(mdev, mdev->integrity_w_tfm, e->private_bio, dgb); + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); + } + if (ok) + ok = _drbd_send_zc_bio(mdev, e->private_bio); + + drbd_put_data_sock(mdev); + return ok; +} + +/* + drbd_send distinguishes two cases: + + Packets sent via the data socket "sock" + and packets sent via the meta data socket "msock" + + sock msock + -----------------+-------------------------+------------------------------ + timeout conf.timeout / 2 conf.timeout / 2 + timeout action send a ping via msock Abort communication + and close all sockets +*/ + +/* + * you must have down()ed the appropriate [m]sock_mutex elsewhere! + */ +int drbd_send(struct drbd_conf *mdev, struct socket *sock, + void *buf, size_t size, unsigned msg_flags) +{ + struct kvec iov; + struct msghdr msg; + int rv, sent = 0; + + if (!sock) + return -1000; + + /* THINK if (signal_pending) return ... ? */ + + iov.iov_base = buf; + iov.iov_len = size; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = msg_flags | MSG_NOSIGNAL; + + if (sock == mdev->data.socket) { + mdev->ko_count = mdev->net_conf->ko_count; + drbd_update_congested(mdev); + } + do { + /* STRANGE + * tcp_sendmsg does _not_ use its size parameter at all ? + * + * -EAGAIN on timeout, -EINTR on signal. + */ +/* THINK + * do we need to block DRBD_SIG if sock == &meta.socket ?? + * otherwise wake_asender() might interrupt some send_*Ack ! + */ + rv = kernel_sendmsg(sock, &msg, &iov, 1, size); + if (rv == -EAGAIN) { + if (we_should_drop_the_connection(mdev, sock)) + break; + else + continue; + } + D_ASSERT(rv != 0); + if (rv == -EINTR) { + flush_signals(current); + rv = 0; + } + if (rv < 0) + break; + sent += rv; + iov.iov_base += rv; + iov.iov_len -= rv; + } while (sent < size); + + if (sock == mdev->data.socket) + clear_bit(NET_CONGESTED, &mdev->flags); + + if (rv <= 0) { + if (rv != -EAGAIN) { + dev_err(DEV, "%s_sendmsg returned %d\n", + sock == mdev->meta.socket ? "msock" : "sock", + rv); + drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); + } else + drbd_force_state(mdev, NS(conn, C_TIMEOUT)); + } + + return sent; +} + +static int drbd_open(struct block_device *bdev, fmode_t mode) +{ + struct drbd_conf *mdev = bdev->bd_disk->private_data; + unsigned long flags; + int rv = 0; + + spin_lock_irqsave(&mdev->req_lock, flags); + /* to have a stable mdev->state.role + * and no race with updating open_cnt */ + + if (mdev->state.role != R_PRIMARY) { + if (mode & FMODE_WRITE) + rv = -EROFS; + else if (!allow_oos) + rv = -EMEDIUMTYPE; + } + + if (!rv) + mdev->open_cnt++; + spin_unlock_irqrestore(&mdev->req_lock, flags); + + return rv; +} + +static int drbd_release(struct gendisk *gd, fmode_t mode) +{ + struct drbd_conf *mdev = gd->private_data; + mdev->open_cnt--; + return 0; +} + +static void drbd_unplug_fn(struct request_queue *q) +{ + struct drbd_conf *mdev = q->queuedata; + + trace_drbd_unplug(mdev, "got unplugged"); + + /* unplug FIRST */ + spin_lock_irq(q->queue_lock); + blk_remove_plug(q); + spin_unlock_irq(q->queue_lock); + + /* only if connected */ + spin_lock_irq(&mdev->req_lock); + if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) { + D_ASSERT(mdev->state.role == R_PRIMARY); + if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) { + /* add to the data.work queue, + * unless already queued. + * XXX this might be a good addition to drbd_queue_work + * anyways, to detect "double queuing" ... */ + if (list_empty(&mdev->unplug_work.list)) + drbd_queue_work(&mdev->data.work, + &mdev->unplug_work); + } + } + spin_unlock_irq(&mdev->req_lock); + + if (mdev->state.disk >= D_INCONSISTENT) + drbd_kick_lo(mdev); +} + +static void drbd_set_defaults(struct drbd_conf *mdev) +{ + mdev->sync_conf.after = DRBD_AFTER_DEF; + mdev->sync_conf.rate = DRBD_RATE_DEF; + mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_DEF; + mdev->state = (union drbd_state) { + { .role = R_SECONDARY, + .peer = R_UNKNOWN, + .conn = C_STANDALONE, + .disk = D_DISKLESS, + .pdsk = D_UNKNOWN, + .susp = 0 + } }; +} + +void drbd_init_set_defaults(struct drbd_conf *mdev) +{ + /* the memset(,0,) did most of this. + * note: only assignments, no allocation in here */ + + drbd_set_defaults(mdev); + + /* for now, we do NOT yet support it, + * even though we start some framework + * to eventually support barriers */ + set_bit(NO_BARRIER_SUPP, &mdev->flags); + + atomic_set(&mdev->ap_bio_cnt, 0); + atomic_set(&mdev->ap_pending_cnt, 0); + atomic_set(&mdev->rs_pending_cnt, 0); + atomic_set(&mdev->unacked_cnt, 0); + atomic_set(&mdev->local_cnt, 0); + atomic_set(&mdev->net_cnt, 0); + atomic_set(&mdev->packet_seq, 0); + atomic_set(&mdev->pp_in_use, 0); + + mutex_init(&mdev->md_io_mutex); + mutex_init(&mdev->data.mutex); + mutex_init(&mdev->meta.mutex); + sema_init(&mdev->data.work.s, 0); + sema_init(&mdev->meta.work.s, 0); + mutex_init(&mdev->state_mutex); + + spin_lock_init(&mdev->data.work.q_lock); + spin_lock_init(&mdev->meta.work.q_lock); + + spin_lock_init(&mdev->al_lock); + spin_lock_init(&mdev->req_lock); + spin_lock_init(&mdev->peer_seq_lock); + spin_lock_init(&mdev->epoch_lock); + + INIT_LIST_HEAD(&mdev->active_ee); + INIT_LIST_HEAD(&mdev->sync_ee); + INIT_LIST_HEAD(&mdev->done_ee); + INIT_LIST_HEAD(&mdev->read_ee); + INIT_LIST_HEAD(&mdev->net_ee); + INIT_LIST_HEAD(&mdev->resync_reads); + INIT_LIST_HEAD(&mdev->data.work.q); + INIT_LIST_HEAD(&mdev->meta.work.q); + INIT_LIST_HEAD(&mdev->resync_work.list); + INIT_LIST_HEAD(&mdev->unplug_work.list); + INIT_LIST_HEAD(&mdev->md_sync_work.list); + INIT_LIST_HEAD(&mdev->bm_io_work.w.list); + mdev->resync_work.cb = w_resync_inactive; + mdev->unplug_work.cb = w_send_write_hint; + mdev->md_sync_work.cb = w_md_sync; + mdev->bm_io_work.w.cb = w_bitmap_io; + init_timer(&mdev->resync_timer); + init_timer(&mdev->md_sync_timer); + mdev->resync_timer.function = resync_timer_fn; + mdev->resync_timer.data = (unsigned long) mdev; + mdev->md_sync_timer.function = md_sync_timer_fn; + mdev->md_sync_timer.data = (unsigned long) mdev; + + init_waitqueue_head(&mdev->misc_wait); + init_waitqueue_head(&mdev->state_wait); + init_waitqueue_head(&mdev->ee_wait); + init_waitqueue_head(&mdev->al_wait); + init_waitqueue_head(&mdev->seq_wait); + + drbd_thread_init(mdev, &mdev->receiver, drbdd_init); + drbd_thread_init(mdev, &mdev->worker, drbd_worker); + drbd_thread_init(mdev, &mdev->asender, drbd_asender); + + mdev->agreed_pro_version = PRO_VERSION_MAX; + mdev->write_ordering = WO_bio_barrier; + mdev->resync_wenr = LC_FREE; +} + +void drbd_mdev_cleanup(struct drbd_conf *mdev) +{ + if (mdev->receiver.t_state != None) + dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", + mdev->receiver.t_state); + + /* no need to lock it, I'm the only thread alive */ + if (atomic_read(&mdev->current_epoch->epoch_size) != 0) + dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size)); + mdev->al_writ_cnt = + mdev->bm_writ_cnt = + mdev->read_cnt = + mdev->recv_cnt = + mdev->send_cnt = + mdev->writ_cnt = + mdev->p_size = + mdev->rs_start = + mdev->rs_total = + mdev->rs_failed = + mdev->rs_mark_left = + mdev->rs_mark_time = 0; + D_ASSERT(mdev->net_conf == NULL); + + drbd_set_my_capacity(mdev, 0); + if (mdev->bitmap) { + /* maybe never allocated. */ + drbd_bm_resize(mdev, 0); + drbd_bm_cleanup(mdev); + } + + drbd_free_resources(mdev); + + /* + * currently we drbd_init_ee only on module load, so + * we may do drbd_release_ee only on module unload! + */ + D_ASSERT(list_empty(&mdev->active_ee)); + D_ASSERT(list_empty(&mdev->sync_ee)); + D_ASSERT(list_empty(&mdev->done_ee)); + D_ASSERT(list_empty(&mdev->read_ee)); + D_ASSERT(list_empty(&mdev->net_ee)); + D_ASSERT(list_empty(&mdev->resync_reads)); + D_ASSERT(list_empty(&mdev->data.work.q)); + D_ASSERT(list_empty(&mdev->meta.work.q)); + D_ASSERT(list_empty(&mdev->resync_work.list)); + D_ASSERT(list_empty(&mdev->unplug_work.list)); + +} + + +static void drbd_destroy_mempools(void) +{ + struct page *page; + + while (drbd_pp_pool) { + page = drbd_pp_pool; + drbd_pp_pool = (struct page *)page_private(page); + __free_page(page); + drbd_pp_vacant--; + } + + /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ + + if (drbd_ee_mempool) + mempool_destroy(drbd_ee_mempool); + if (drbd_request_mempool) + mempool_destroy(drbd_request_mempool); + if (drbd_ee_cache) + kmem_cache_destroy(drbd_ee_cache); + if (drbd_request_cache) + kmem_cache_destroy(drbd_request_cache); + if (drbd_bm_ext_cache) + kmem_cache_destroy(drbd_bm_ext_cache); + if (drbd_al_ext_cache) + kmem_cache_destroy(drbd_al_ext_cache); + + drbd_ee_mempool = NULL; + drbd_request_mempool = NULL; + drbd_ee_cache = NULL; + drbd_request_cache = NULL; + drbd_bm_ext_cache = NULL; + drbd_al_ext_cache = NULL; + + return; +} + +static int drbd_create_mempools(void) +{ + struct page *page; + const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count; + int i; + + /* prepare our caches and mempools */ + drbd_request_mempool = NULL; + drbd_ee_cache = NULL; + drbd_request_cache = NULL; + drbd_bm_ext_cache = NULL; + drbd_al_ext_cache = NULL; + drbd_pp_pool = NULL; + + /* caches */ + drbd_request_cache = kmem_cache_create( + "drbd_req", sizeof(struct drbd_request), 0, 0, NULL); + if (drbd_request_cache == NULL) + goto Enomem; + + drbd_ee_cache = kmem_cache_create( + "drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL); + if (drbd_ee_cache == NULL) + goto Enomem; + + drbd_bm_ext_cache = kmem_cache_create( + "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL); + if (drbd_bm_ext_cache == NULL) + goto Enomem; + + drbd_al_ext_cache = kmem_cache_create( + "drbd_al", sizeof(struct lc_element), 0, 0, NULL); + if (drbd_al_ext_cache == NULL) + goto Enomem; + + /* mempools */ + drbd_request_mempool = mempool_create(number, + mempool_alloc_slab, mempool_free_slab, drbd_request_cache); + if (drbd_request_mempool == NULL) + goto Enomem; + + drbd_ee_mempool = mempool_create(number, + mempool_alloc_slab, mempool_free_slab, drbd_ee_cache); + if (drbd_request_mempool == NULL) + goto Enomem; + + /* drbd's page pool */ + spin_lock_init(&drbd_pp_lock); + + for (i = 0; i < number; i++) { + page = alloc_page(GFP_HIGHUSER); + if (!page) + goto Enomem; + set_page_private(page, (unsigned long)drbd_pp_pool); + drbd_pp_pool = page; + } + drbd_pp_vacant = number; + + return 0; + +Enomem: + drbd_destroy_mempools(); /* in case we allocated some */ + return -ENOMEM; +} + +static int drbd_notify_sys(struct notifier_block *this, unsigned long code, + void *unused) +{ + /* just so we have it. you never know what interesting things we + * might want to do here some day... + */ + + return NOTIFY_DONE; +} + +static struct notifier_block drbd_notifier = { + .notifier_call = drbd_notify_sys, +}; + +static void drbd_release_ee_lists(struct drbd_conf *mdev) +{ + int rr; + + rr = drbd_release_ee(mdev, &mdev->active_ee); + if (rr) + dev_err(DEV, "%d EEs in active list found!\n", rr); + + rr = drbd_release_ee(mdev, &mdev->sync_ee); + if (rr) + dev_err(DEV, "%d EEs in sync list found!\n", rr); + + rr = drbd_release_ee(mdev, &mdev->read_ee); + if (rr) + dev_err(DEV, "%d EEs in read list found!\n", rr); + + rr = drbd_release_ee(mdev, &mdev->done_ee); + if (rr) + dev_err(DEV, "%d EEs in done list found!\n", rr); + + rr = drbd_release_ee(mdev, &mdev->net_ee); + if (rr) + dev_err(DEV, "%d EEs in net list found!\n", rr); +} + +/* caution. no locking. + * currently only used from module cleanup code. */ +static void drbd_delete_device(unsigned int minor) +{ + struct drbd_conf *mdev = minor_to_mdev(minor); + + if (!mdev) + return; + + /* paranoia asserts */ + if (mdev->open_cnt != 0) + dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, + __FILE__ , __LINE__); + + ERR_IF (!list_empty(&mdev->data.work.q)) { + struct list_head *lp; + list_for_each(lp, &mdev->data.work.q) { + dev_err(DEV, "lp = %p\n", lp); + } + }; + /* end paranoia asserts */ + + del_gendisk(mdev->vdisk); + + /* cleanup stuff that may have been allocated during + * device (re-)configuration or state changes */ + + if (mdev->this_bdev) + bdput(mdev->this_bdev); + + drbd_free_resources(mdev); + + drbd_release_ee_lists(mdev); + + /* should be free'd on disconnect? */ + kfree(mdev->ee_hash); + /* + mdev->ee_hash_s = 0; + mdev->ee_hash = NULL; + */ + + lc_destroy(mdev->act_log); + lc_destroy(mdev->resync); + + kfree(mdev->p_uuid); + /* mdev->p_uuid = NULL; */ + + kfree(mdev->int_dig_out); + kfree(mdev->int_dig_in); + kfree(mdev->int_dig_vv); + + /* cleanup the rest that has been + * allocated from drbd_new_device + * and actually free the mdev itself */ + drbd_free_mdev(mdev); +} + +static void drbd_cleanup(void) +{ + unsigned int i; + + unregister_reboot_notifier(&drbd_notifier); + + drbd_nl_cleanup(); + + if (minor_table) { + if (drbd_proc) + remove_proc_entry("drbd", NULL); + i = minor_count; + while (i--) + drbd_delete_device(i); + drbd_destroy_mempools(); + } + + kfree(minor_table); + + unregister_blkdev(DRBD_MAJOR, "drbd"); + + printk(KERN_INFO "drbd: module cleanup done.\n"); +} + +/** + * drbd_congested() - Callback for pdflush + * @congested_data: User data + * @bdi_bits: Bits pdflush is currently interested in + * + * Returns 1<ldev->backing_bdev); + r = bdi_congested(&q->backing_dev_info, bdi_bits); + put_ldev(mdev); + if (r) + reason = 'b'; + } + + if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) { + r |= (1 << BDI_async_congested); + reason = reason == 'b' ? 'a' : 'n'; + } + +out: + mdev->congestion_reason = reason; + return r; +} + +struct drbd_conf *drbd_new_device(unsigned int minor) +{ + struct drbd_conf *mdev; + struct gendisk *disk; + struct request_queue *q; + + /* GFP_KERNEL, we are outside of all write-out paths */ + mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); + if (!mdev) + return NULL; + if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL)) + goto out_no_cpumask; + + mdev->minor = minor; + + drbd_init_set_defaults(mdev); + + q = blk_alloc_queue(GFP_KERNEL); + if (!q) + goto out_no_q; + mdev->rq_queue = q; + q->queuedata = mdev; + blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE); + + disk = alloc_disk(1); + if (!disk) + goto out_no_disk; + mdev->vdisk = disk; + + set_disk_ro(disk, TRUE); + + disk->queue = q; + disk->major = DRBD_MAJOR; + disk->first_minor = minor; + disk->fops = &drbd_ops; + sprintf(disk->disk_name, "drbd%d", minor); + disk->private_data = mdev; + + mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor)); + /* we have no partitions. we contain only ourselves. */ + mdev->this_bdev->bd_contains = mdev->this_bdev; + + q->backing_dev_info.congested_fn = drbd_congested; + q->backing_dev_info.congested_data = mdev; + + blk_queue_make_request(q, drbd_make_request_26); + blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); + blk_queue_merge_bvec(q, drbd_merge_bvec); + q->queue_lock = &mdev->req_lock; /* needed since we use */ + /* plugging on a queue, that actually has no requests! */ + q->unplug_fn = drbd_unplug_fn; + + mdev->md_io_page = alloc_page(GFP_KERNEL); + if (!mdev->md_io_page) + goto out_no_io_page; + + if (drbd_bm_init(mdev)) + goto out_no_bitmap; + /* no need to lock access, we are still initializing this minor device. */ + if (!tl_init(mdev)) + goto out_no_tl; + + mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); + if (!mdev->app_reads_hash) + goto out_no_app_reads; + + mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); + if (!mdev->current_epoch) + goto out_no_epoch; + + INIT_LIST_HEAD(&mdev->current_epoch->list); + mdev->epochs = 1; + + return mdev; + +/* out_whatever_else: + kfree(mdev->current_epoch); */ +out_no_epoch: + kfree(mdev->app_reads_hash); +out_no_app_reads: + tl_cleanup(mdev); +out_no_tl: + drbd_bm_cleanup(mdev); +out_no_bitmap: + __free_page(mdev->md_io_page); +out_no_io_page: + put_disk(disk); +out_no_disk: + blk_cleanup_queue(q); +out_no_q: + free_cpumask_var(mdev->cpu_mask); +out_no_cpumask: + kfree(mdev); + return NULL; +} + +/* counterpart of drbd_new_device. + * last part of drbd_delete_device. */ +void drbd_free_mdev(struct drbd_conf *mdev) +{ + kfree(mdev->current_epoch); + kfree(mdev->app_reads_hash); + tl_cleanup(mdev); + if (mdev->bitmap) /* should no longer be there. */ + drbd_bm_cleanup(mdev); + __free_page(mdev->md_io_page); + put_disk(mdev->vdisk); + blk_cleanup_queue(mdev->rq_queue); + free_cpumask_var(mdev->cpu_mask); + kfree(mdev); +} + + +int __init drbd_init(void) +{ + int err; + + if (sizeof(struct p_handshake) != 80) { + printk(KERN_ERR + "drbd: never change the size or layout " + "of the HandShake packet.\n"); + return -EINVAL; + } + + if (1 > minor_count || minor_count > 255) { + printk(KERN_ERR + "drbd: invalid minor_count (%d)\n", minor_count); +#ifdef MODULE + return -EINVAL; +#else + minor_count = 8; +#endif + } + + err = drbd_nl_init(); + if (err) + return err; + + err = register_blkdev(DRBD_MAJOR, "drbd"); + if (err) { + printk(KERN_ERR + "drbd: unable to register block device major %d\n", + DRBD_MAJOR); + return err; + } + + register_reboot_notifier(&drbd_notifier); + + /* + * allocate all necessary structs + */ + err = -ENOMEM; + + init_waitqueue_head(&drbd_pp_wait); + + drbd_proc = NULL; /* play safe for drbd_cleanup */ + minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count, + GFP_KERNEL); + if (!minor_table) + goto Enomem; + + err = drbd_create_mempools(); + if (err) + goto Enomem; + + drbd_proc = proc_create("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops); + if (!drbd_proc) { + printk(KERN_ERR "drbd: unable to register proc file\n"); + goto Enomem; + } + + rwlock_init(&global_state_lock); + + printk(KERN_INFO "drbd: initialized. " + "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", + API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); + printk(KERN_INFO "drbd: %s\n", drbd_buildtag()); + printk(KERN_INFO "drbd: registered as block device major %d\n", + DRBD_MAJOR); + printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table); + + return 0; /* Success! */ + +Enomem: + drbd_cleanup(); + if (err == -ENOMEM) + /* currently always the case */ + printk(KERN_ERR "drbd: ran out of memory\n"); + else + printk(KERN_ERR "drbd: initialization failure\n"); + return err; +} + +void drbd_free_bc(struct drbd_backing_dev *ldev) +{ + if (ldev == NULL) + return; + + bd_release(ldev->backing_bdev); + bd_release(ldev->md_bdev); + + fput(ldev->lo_file); + fput(ldev->md_file); + + kfree(ldev); +} + +void drbd_free_sock(struct drbd_conf *mdev) +{ + if (mdev->data.socket) { + kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR); + sock_release(mdev->data.socket); + mdev->data.socket = NULL; + } + if (mdev->meta.socket) { + kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR); + sock_release(mdev->meta.socket); + mdev->meta.socket = NULL; + } +} + + +void drbd_free_resources(struct drbd_conf *mdev) +{ + crypto_free_hash(mdev->csums_tfm); + mdev->csums_tfm = NULL; + crypto_free_hash(mdev->verify_tfm); + mdev->verify_tfm = NULL; + crypto_free_hash(mdev->cram_hmac_tfm); + mdev->cram_hmac_tfm = NULL; + crypto_free_hash(mdev->integrity_w_tfm); + mdev->integrity_w_tfm = NULL; + crypto_free_hash(mdev->integrity_r_tfm); + mdev->integrity_r_tfm = NULL; + + drbd_free_sock(mdev); + + __no_warn(local, + drbd_free_bc(mdev->ldev); + mdev->ldev = NULL;); +} + +/* meta data management */ + +struct meta_data_on_disk { + u64 la_size; /* last agreed size. */ + u64 uuid[UI_SIZE]; /* UUIDs. */ + u64 device_uuid; + u64 reserved_u64_1; + u32 flags; /* MDF */ + u32 magic; + u32 md_size_sect; + u32 al_offset; /* offset to this block */ + u32 al_nr_extents; /* important for restoring the AL */ + /* `-- act_log->nr_elements <-- sync_conf.al_extents */ + u32 bm_offset; /* offset to the bitmap, from here */ + u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ + u32 reserved_u32[4]; + +} __packed; + +/** + * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set + * @mdev: DRBD device. + */ +void drbd_md_sync(struct drbd_conf *mdev) +{ + struct meta_data_on_disk *buffer; + sector_t sector; + int i; + + if (!test_and_clear_bit(MD_DIRTY, &mdev->flags)) + return; + del_timer(&mdev->md_sync_timer); + + /* We use here D_FAILED and not D_ATTACHING because we try to write + * metadata even if we detach due to a disk failure! */ + if (!get_ldev_if_state(mdev, D_FAILED)) + return; + + trace_drbd_md_io(mdev, WRITE, mdev->ldev); + + mutex_lock(&mdev->md_io_mutex); + buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + memset(buffer, 0, 512); + + buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); + for (i = UI_CURRENT; i < UI_SIZE; i++) + buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]); + buffer->flags = cpu_to_be32(mdev->ldev->md.flags); + buffer->magic = cpu_to_be32(DRBD_MD_MAGIC); + + buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect); + buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset); + buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements); + buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE); + buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid); + + buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); + + D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); + sector = mdev->ldev->md.md_offset; + + if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + clear_bit(MD_DIRTY, &mdev->flags); + } else { + /* this was a try anyways ... */ + dev_err(DEV, "meta data update failed!\n"); + + drbd_chk_io_error(mdev, 1, TRUE); + } + + /* Update mdev->ldev->md.la_size_sect, + * since we updated it on metadata. */ + mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); + + mutex_unlock(&mdev->md_io_mutex); + put_ldev(mdev); +} + +/** + * drbd_md_read() - Reads in the meta data super block + * @mdev: DRBD device. + * @bdev: Device from which the meta data should be read in. + * + * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case + * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID. + */ +int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) +{ + struct meta_data_on_disk *buffer; + int i, rv = NO_ERROR; + + if (!get_ldev_if_state(mdev, D_ATTACHING)) + return ERR_IO_MD_DISK; + + trace_drbd_md_io(mdev, READ, bdev); + + mutex_lock(&mdev->md_io_mutex); + buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + + if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { + /* NOTE: cant do normal error processing here as this is + called BEFORE disk is attached */ + dev_err(DEV, "Error while reading metadata.\n"); + rv = ERR_IO_MD_DISK; + goto err; + } + + if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) { + dev_err(DEV, "Error while reading metadata, magic not found.\n"); + rv = ERR_MD_INVALID; + goto err; + } + if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) { + dev_err(DEV, "unexpected al_offset: %d (expected %d)\n", + be32_to_cpu(buffer->al_offset), bdev->md.al_offset); + rv = ERR_MD_INVALID; + goto err; + } + if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { + dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", + be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); + rv = ERR_MD_INVALID; + goto err; + } + if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) { + dev_err(DEV, "unexpected md_size: %u (expected %u)\n", + be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect); + rv = ERR_MD_INVALID; + goto err; + } + + if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { + dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n", + be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); + rv = ERR_MD_INVALID; + goto err; + } + + bdev->md.la_size_sect = be64_to_cpu(buffer->la_size); + for (i = UI_CURRENT; i < UI_SIZE; i++) + bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); + bdev->md.flags = be32_to_cpu(buffer->flags); + mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); + bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); + + if (mdev->sync_conf.al_extents < 7) + mdev->sync_conf.al_extents = 127; + + err: + mutex_unlock(&mdev->md_io_mutex); + put_ldev(mdev); + + return rv; +} + +/** + * drbd_md_mark_dirty() - Mark meta data super block as dirty + * @mdev: DRBD device. + * + * Call this function if you change anything that should be written to + * the meta-data super block. This function sets MD_DIRTY, and starts a + * timer that ensures that within five seconds you have to call drbd_md_sync(). + */ +void drbd_md_mark_dirty(struct drbd_conf *mdev) +{ + set_bit(MD_DIRTY, &mdev->flags); + mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ); +} + + +static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) +{ + int i; + + for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) { + mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; + + trace_drbd_uuid(mdev, i+1); + } +} + +void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) +{ + if (idx == UI_CURRENT) { + if (mdev->state.role == R_PRIMARY) + val |= 1; + else + val &= ~((u64)1); + + drbd_set_ed_uuid(mdev, val); + } + + mdev->ldev->md.uuid[idx] = val; + trace_drbd_uuid(mdev, idx); + drbd_md_mark_dirty(mdev); +} + + +void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) +{ + if (mdev->ldev->md.uuid[idx]) { + drbd_uuid_move_history(mdev); + mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; + trace_drbd_uuid(mdev, UI_HISTORY_START); + } + _drbd_uuid_set(mdev, idx, val); +} + +/** + * drbd_uuid_new_current() - Creates a new current UUID + * @mdev: DRBD device. + * + * Creates a new current UUID, and rotates the old current UUID into + * the bitmap slot. Causes an incremental resync upon next connect. + */ +void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) +{ + u64 val; + + dev_info(DEV, "Creating new current UUID\n"); + D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0); + mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; + trace_drbd_uuid(mdev, UI_BITMAP); + + get_random_bytes(&val, sizeof(u64)); + _drbd_uuid_set(mdev, UI_CURRENT, val); +} + +void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) +{ + if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) + return; + + if (val == 0) { + drbd_uuid_move_history(mdev); + mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; + mdev->ldev->md.uuid[UI_BITMAP] = 0; + trace_drbd_uuid(mdev, UI_HISTORY_START); + trace_drbd_uuid(mdev, UI_BITMAP); + } else { + if (mdev->ldev->md.uuid[UI_BITMAP]) + dev_warn(DEV, "bm UUID already set"); + + mdev->ldev->md.uuid[UI_BITMAP] = val; + mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1); + + trace_drbd_uuid(mdev, UI_BITMAP); + } + drbd_md_mark_dirty(mdev); +} + +/** + * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() + * @mdev: DRBD device. + * + * Sets all bits in the bitmap and writes the whole bitmap to stable storage. + */ +int drbd_bmio_set_n_write(struct drbd_conf *mdev) +{ + int rv = -EIO; + + if (get_ldev_if_state(mdev, D_ATTACHING)) { + drbd_md_set_flag(mdev, MDF_FULL_SYNC); + drbd_md_sync(mdev); + drbd_bm_set_all(mdev); + + rv = drbd_bm_write(mdev); + + if (!rv) { + drbd_md_clear_flag(mdev, MDF_FULL_SYNC); + drbd_md_sync(mdev); + } + + put_ldev(mdev); + } + + return rv; +} + +/** + * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() + * @mdev: DRBD device. + * + * Clears all bits in the bitmap and writes the whole bitmap to stable storage. + */ +int drbd_bmio_clear_n_write(struct drbd_conf *mdev) +{ + int rv = -EIO; + + if (get_ldev_if_state(mdev, D_ATTACHING)) { + drbd_bm_clear_all(mdev); + rv = drbd_bm_write(mdev); + put_ldev(mdev); + } + + return rv; +} + +static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct bm_io_work *work = container_of(w, struct bm_io_work, w); + int rv; + + D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); + + drbd_bm_lock(mdev, work->why); + rv = work->io_fn(mdev); + drbd_bm_unlock(mdev); + + clear_bit(BITMAP_IO, &mdev->flags); + wake_up(&mdev->misc_wait); + + if (work->done) + work->done(mdev, rv); + + clear_bit(BITMAP_IO_QUEUED, &mdev->flags); + work->why = NULL; + + return 1; +} + +/** + * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap + * @mdev: DRBD device. + * @io_fn: IO callback to be called when bitmap IO is possible + * @done: callback to be called after the bitmap IO was performed + * @why: Descriptive text of the reason for doing the IO + * + * While IO on the bitmap happens we freeze application IO thus we ensure + * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be + * called from worker context. It MUST NOT be used while a previous such + * work is still pending! + */ +void drbd_queue_bitmap_io(struct drbd_conf *mdev, + int (*io_fn)(struct drbd_conf *), + void (*done)(struct drbd_conf *, int), + char *why) +{ + D_ASSERT(current == mdev->worker.task); + + D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags)); + D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags)); + D_ASSERT(list_empty(&mdev->bm_io_work.w.list)); + if (mdev->bm_io_work.why) + dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n", + why, mdev->bm_io_work.why); + + mdev->bm_io_work.io_fn = io_fn; + mdev->bm_io_work.done = done; + mdev->bm_io_work.why = why; + + set_bit(BITMAP_IO, &mdev->flags); + if (atomic_read(&mdev->ap_bio_cnt) == 0) { + if (list_empty(&mdev->bm_io_work.w.list)) { + set_bit(BITMAP_IO_QUEUED, &mdev->flags); + drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); + } else + dev_err(DEV, "FIXME avoided double queuing bm_io_work\n"); + } +} + +/** + * drbd_bitmap_io() - Does an IO operation on the whole bitmap + * @mdev: DRBD device. + * @io_fn: IO callback to be called when bitmap IO is possible + * @why: Descriptive text of the reason for doing the IO + * + * freezes application IO while that the actual IO operations runs. This + * functions MAY NOT be called from worker context. + */ +int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why) +{ + int rv; + + D_ASSERT(current != mdev->worker.task); + + drbd_suspend_io(mdev); + + drbd_bm_lock(mdev, why); + rv = io_fn(mdev); + drbd_bm_unlock(mdev); + + drbd_resume_io(mdev); + + return rv; +} + +void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local) +{ + if ((mdev->ldev->md.flags & flag) != flag) { + drbd_md_mark_dirty(mdev); + mdev->ldev->md.flags |= flag; + } +} + +void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local) +{ + if ((mdev->ldev->md.flags & flag) != 0) { + drbd_md_mark_dirty(mdev); + mdev->ldev->md.flags &= ~flag; + } +} +int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag) +{ + return (bdev->md.flags & flag) != 0; +} + +static void md_sync_timer_fn(unsigned long data) +{ + struct drbd_conf *mdev = (struct drbd_conf *) data; + + drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work); +} + +static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); + drbd_md_sync(mdev); + + return 1; +} + +#ifdef CONFIG_DRBD_FAULT_INJECTION +/* Fault insertion support including random number generator shamelessly + * stolen from kernel/rcutorture.c */ +struct fault_random_state { + unsigned long state; + unsigned long count; +}; + +#define FAULT_RANDOM_MULT 39916801 /* prime */ +#define FAULT_RANDOM_ADD 479001701 /* prime */ +#define FAULT_RANDOM_REFRESH 10000 + +/* + * Crude but fast random-number generator. Uses a linear congruential + * generator, with occasional help from get_random_bytes(). + */ +static unsigned long +_drbd_fault_random(struct fault_random_state *rsp) +{ + long refresh; + + if (--rsp->count < 0) { + get_random_bytes(&refresh, sizeof(refresh)); + rsp->state += refresh; + rsp->count = FAULT_RANDOM_REFRESH; + } + rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD; + return swahw32(rsp->state); +} + +static char * +_drbd_fault_str(unsigned int type) { + static char *_faults[] = { + [DRBD_FAULT_MD_WR] = "Meta-data write", + [DRBD_FAULT_MD_RD] = "Meta-data read", + [DRBD_FAULT_RS_WR] = "Resync write", + [DRBD_FAULT_RS_RD] = "Resync read", + [DRBD_FAULT_DT_WR] = "Data write", + [DRBD_FAULT_DT_RD] = "Data read", + [DRBD_FAULT_DT_RA] = "Data read ahead", + [DRBD_FAULT_BM_ALLOC] = "BM allocation", + [DRBD_FAULT_AL_EE] = "EE allocation" + }; + + return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**"; +} + +unsigned int +_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) +{ + static struct fault_random_state rrs = {0, 0}; + + unsigned int ret = ( + (fault_devs == 0 || + ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) && + (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate)); + + if (ret) { + fault_count++; + + if (printk_ratelimit()) + dev_warn(DEV, "***Simulating %s failure\n", + _drbd_fault_str(type)); + } + + return ret; +} +#endif + +const char *drbd_buildtag(void) +{ + /* DRBD built from external sources has here a reference to the + git hash of the source code. */ + + static char buildtag[38] = "\0uilt-in"; + + if (buildtag[0] == 0) { +#ifdef CONFIG_MODULES + if (THIS_MODULE != NULL) + sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); + else +#endif + buildtag[0] = 'b'; + } + + return buildtag; +} + +module_init(drbd_init) +module_exit(drbd_cleanup) + +/* For drbd_tracing: */ +EXPORT_SYMBOL(drbd_conn_str); +EXPORT_SYMBOL(drbd_role_str); +EXPORT_SYMBOL(drbd_disk_str); +EXPORT_SYMBOL(drbd_set_st_err_str); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c new file mode 100644 index 000000000000..1927acefe230 --- /dev/null +++ b/drivers/block/drbd/drbd_nl.c @@ -0,0 +1,2365 @@ +/* + drbd_nl.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "drbd_int.h" +#include "drbd_tracing.h" +#include "drbd_wrappers.h" +#include +#include +#include + +static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int); +static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *); +static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *); + +/* see get_sb_bdev and bd_claim */ +static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; + +/* Generate the tag_list to struct functions */ +#define NL_PACKET(name, number, fields) \ +static int name ## _from_tags(struct drbd_conf *mdev, \ + unsigned short *tags, struct name *arg) __attribute__ ((unused)); \ +static int name ## _from_tags(struct drbd_conf *mdev, \ + unsigned short *tags, struct name *arg) \ +{ \ + int tag; \ + int dlen; \ + \ + while ((tag = get_unaligned(tags++)) != TT_END) { \ + dlen = get_unaligned(tags++); \ + switch (tag_number(tag)) { \ + fields \ + default: \ + if (tag & T_MANDATORY) { \ + dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \ + return 0; \ + } \ + } \ + tags = (unsigned short *)((char *)tags + dlen); \ + } \ + return 1; \ +} +#define NL_INTEGER(pn, pr, member) \ + case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \ + arg->member = get_unaligned((int *)(tags)); \ + break; +#define NL_INT64(pn, pr, member) \ + case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \ + arg->member = get_unaligned((u64 *)(tags)); \ + break; +#define NL_BIT(pn, pr, member) \ + case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \ + arg->member = *(char *)(tags) ? 1 : 0; \ + break; +#define NL_STRING(pn, pr, member, len) \ + case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \ + if (dlen > len) { \ + dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \ + #member, dlen, (unsigned int)len); \ + return 0; \ + } \ + arg->member ## _len = dlen; \ + memcpy(arg->member, tags, min_t(size_t, dlen, len)); \ + break; +#include "linux/drbd_nl.h" + +/* Generate the struct to tag_list functions */ +#define NL_PACKET(name, number, fields) \ +static unsigned short* \ +name ## _to_tags(struct drbd_conf *mdev, \ + struct name *arg, unsigned short *tags) __attribute__ ((unused)); \ +static unsigned short* \ +name ## _to_tags(struct drbd_conf *mdev, \ + struct name *arg, unsigned short *tags) \ +{ \ + fields \ + return tags; \ +} + +#define NL_INTEGER(pn, pr, member) \ + put_unaligned(pn | pr | TT_INTEGER, tags++); \ + put_unaligned(sizeof(int), tags++); \ + put_unaligned(arg->member, (int *)tags); \ + tags = (unsigned short *)((char *)tags+sizeof(int)); +#define NL_INT64(pn, pr, member) \ + put_unaligned(pn | pr | TT_INT64, tags++); \ + put_unaligned(sizeof(u64), tags++); \ + put_unaligned(arg->member, (u64 *)tags); \ + tags = (unsigned short *)((char *)tags+sizeof(u64)); +#define NL_BIT(pn, pr, member) \ + put_unaligned(pn | pr | TT_BIT, tags++); \ + put_unaligned(sizeof(char), tags++); \ + *(char *)tags = arg->member; \ + tags = (unsigned short *)((char *)tags+sizeof(char)); +#define NL_STRING(pn, pr, member, len) \ + put_unaligned(pn | pr | TT_STRING, tags++); \ + put_unaligned(arg->member ## _len, tags++); \ + memcpy(tags, arg->member, arg->member ## _len); \ + tags = (unsigned short *)((char *)tags + arg->member ## _len); +#include "linux/drbd_nl.h" + +void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name); +void drbd_nl_send_reply(struct cn_msg *, int); + +int drbd_khelper(struct drbd_conf *mdev, char *cmd) +{ + char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL, /* Will be set to address family */ + NULL, /* Will be set to address */ + NULL }; + + char mb[12], af[20], ad[60], *afs; + char *argv[] = {usermode_helper, cmd, mb, NULL }; + int ret; + + snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); + + if (get_net_conf(mdev)) { + switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) { + case AF_INET6: + afs = "ipv6"; + snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6", + &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr); + break; + case AF_INET: + afs = "ipv4"; + snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr); + break; + default: + afs = "ssocks"; + snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr); + } + snprintf(af, 20, "DRBD_PEER_AF=%s", afs); + envp[3]=af; + envp[4]=ad; + put_net_conf(mdev); + } + + dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); + + drbd_bcast_ev_helper(mdev, cmd); + ret = call_usermodehelper(usermode_helper, argv, envp, 1); + if (ret) + dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", + usermode_helper, cmd, mb, + (ret >> 8) & 0xff, ret); + else + dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", + usermode_helper, cmd, mb, + (ret >> 8) & 0xff, ret); + + if (ret < 0) /* Ignore any ERRNOs we got. */ + ret = 0; + + return ret; +} + +enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) +{ + char *ex_to_string; + int r; + enum drbd_disk_state nps; + enum drbd_fencing_p fp; + + D_ASSERT(mdev->state.pdsk == D_UNKNOWN); + + if (get_ldev_if_state(mdev, D_CONSISTENT)) { + fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + } else { + dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n"); + return mdev->state.pdsk; + } + + if (fp == FP_STONITH) + _drbd_request_state(mdev, NS(susp, 1), CS_WAIT_COMPLETE); + + r = drbd_khelper(mdev, "fence-peer"); + + switch ((r>>8) & 0xff) { + case 3: /* peer is inconsistent */ + ex_to_string = "peer is inconsistent or worse"; + nps = D_INCONSISTENT; + break; + case 4: /* peer got outdated, or was already outdated */ + ex_to_string = "peer was fenced"; + nps = D_OUTDATED; + break; + case 5: /* peer was down */ + if (mdev->state.disk == D_UP_TO_DATE) { + /* we will(have) create(d) a new UUID anyways... */ + ex_to_string = "peer is unreachable, assumed to be dead"; + nps = D_OUTDATED; + } else { + ex_to_string = "peer unreachable, doing nothing since disk != UpToDate"; + nps = mdev->state.pdsk; + } + break; + case 6: /* Peer is primary, voluntarily outdate myself. + * This is useful when an unconnected R_SECONDARY is asked to + * become R_PRIMARY, but finds the other peer being active. */ + ex_to_string = "peer is active"; + dev_warn(DEV, "Peer is primary, outdating myself.\n"); + nps = D_UNKNOWN; + _drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE); + break; + case 7: + if (fp != FP_STONITH) + dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n"); + ex_to_string = "peer was stonithed"; + nps = D_OUTDATED; + break; + default: + /* The script is broken ... */ + nps = D_UNKNOWN; + dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); + return nps; + } + + dev_info(DEV, "fence-peer helper returned %d (%s)\n", + (r>>8) & 0xff, ex_to_string); + return nps; +} + + +int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) +{ + const int max_tries = 4; + int r = 0; + int try = 0; + int forced = 0; + union drbd_state mask, val; + enum drbd_disk_state nps; + + if (new_role == R_PRIMARY) + request_ping(mdev); /* Detect a dead peer ASAP */ + + mutex_lock(&mdev->state_mutex); + + mask.i = 0; mask.role = R_MASK; + val.i = 0; val.role = new_role; + + while (try++ < max_tries) { + r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE); + + /* in case we first succeeded to outdate, + * but now suddenly could establish a connection */ + if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { + val.pdsk = 0; + mask.pdsk = 0; + continue; + } + + if (r == SS_NO_UP_TO_DATE_DISK && force && + (mdev->state.disk == D_INCONSISTENT || + mdev->state.disk == D_OUTDATED)) { + mask.disk = D_MASK; + val.disk = D_UP_TO_DATE; + forced = 1; + continue; + } + + if (r == SS_NO_UP_TO_DATE_DISK && + mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { + D_ASSERT(mdev->state.pdsk == D_UNKNOWN); + nps = drbd_try_outdate_peer(mdev); + + if (nps == D_OUTDATED || nps == D_INCONSISTENT) { + val.disk = D_UP_TO_DATE; + mask.disk = D_MASK; + } + + val.pdsk = nps; + mask.pdsk = D_MASK; + + continue; + } + + if (r == SS_NOTHING_TO_DO) + goto fail; + if (r == SS_PRIMARY_NOP && mask.pdsk == 0) { + nps = drbd_try_outdate_peer(mdev); + + if (force && nps > D_OUTDATED) { + dev_warn(DEV, "Forced into split brain situation!\n"); + nps = D_OUTDATED; + } + + mask.pdsk = D_MASK; + val.pdsk = nps; + + continue; + } + if (r == SS_TWO_PRIMARIES) { + /* Maybe the peer is detected as dead very soon... + retry at most once more in this case. */ + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10); + if (try < max_tries) + try = max_tries - 1; + continue; + } + if (r < SS_SUCCESS) { + r = _drbd_request_state(mdev, mask, val, + CS_VERBOSE + CS_WAIT_COMPLETE); + if (r < SS_SUCCESS) + goto fail; + } + break; + } + + if (r < SS_SUCCESS) + goto fail; + + if (forced) + dev_warn(DEV, "Forced to consider local data as UpToDate!\n"); + + /* Wait until nothing is on the fly :) */ + wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); + + if (new_role == R_SECONDARY) { + set_disk_ro(mdev->vdisk, TRUE); + if (get_ldev(mdev)) { + mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; + put_ldev(mdev); + } + } else { + if (get_net_conf(mdev)) { + mdev->net_conf->want_lose = 0; + put_net_conf(mdev); + } + set_disk_ro(mdev->vdisk, FALSE); + if (get_ldev(mdev)) { + if (((mdev->state.conn < C_CONNECTED || + mdev->state.pdsk <= D_FAILED) + && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced) + drbd_uuid_new_current(mdev); + + mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; + put_ldev(mdev); + } + } + + if ((new_role == R_SECONDARY) && get_ldev(mdev)) { + drbd_al_to_on_disk_bm(mdev); + put_ldev(mdev); + } + + if (mdev->state.conn >= C_WF_REPORT_PARAMS) { + /* if this was forced, we should consider sync */ + if (forced) + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + + drbd_md_sync(mdev); + + kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); + fail: + mutex_unlock(&mdev->state_mutex); + return r; +} + + +static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + struct primary primary_args; + + memset(&primary_args, 0, sizeof(struct primary)); + if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + reply->ret_code = + drbd_set_role(mdev, R_PRIMARY, primary_args.overwrite_peer); + + return 0; +} + +static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0); + + return 0; +} + +/* initializes the md.*_offset members, so we are able to find + * the on disk meta data */ +static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev) +{ + sector_t md_size_sect = 0; + switch (bdev->dc.meta_dev_idx) { + default: + /* v07 style fixed size indexed meta data */ + bdev->md.md_size_sect = MD_RESERVED_SECT; + bdev->md.md_offset = drbd_md_ss__(mdev, bdev); + bdev->md.al_offset = MD_AL_OFFSET; + bdev->md.bm_offset = MD_BM_OFFSET; + break; + case DRBD_MD_INDEX_FLEX_EXT: + /* just occupy the full device; unit: sectors */ + bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev); + bdev->md.md_offset = 0; + bdev->md.al_offset = MD_AL_OFFSET; + bdev->md.bm_offset = MD_BM_OFFSET; + break; + case DRBD_MD_INDEX_INTERNAL: + case DRBD_MD_INDEX_FLEX_INT: + bdev->md.md_offset = drbd_md_ss__(mdev, bdev); + /* al size is still fixed */ + bdev->md.al_offset = -MD_AL_MAX_SIZE; + /* we need (slightly less than) ~ this much bitmap sectors: */ + md_size_sect = drbd_get_capacity(bdev->backing_bdev); + md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); + md_size_sect = BM_SECT_TO_EXT(md_size_sect); + md_size_sect = ALIGN(md_size_sect, 8); + + /* plus the "drbd meta data super block", + * and the activity log; */ + md_size_sect += MD_BM_OFFSET; + + bdev->md.md_size_sect = md_size_sect; + /* bitmap offset is adjusted by 'super' block size */ + bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET; + break; + } +} + +char *ppsize(char *buf, unsigned long long size) +{ + /* Needs 9 bytes at max. */ + static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; + int base = 0; + while (size >= 10000) { + /* shift + round */ + size = (size >> 10) + !!(size & (1<<9)); + base++; + } + sprintf(buf, "%lu %cB", (long)size, units[base]); + + return buf; +} + +/* there is still a theoretical deadlock when called from receiver + * on an D_INCONSISTENT R_PRIMARY: + * remote READ does inc_ap_bio, receiver would need to receive answer + * packet from remote to dec_ap_bio again. + * receiver receive_sizes(), comes here, + * waits for ap_bio_cnt == 0. -> deadlock. + * but this cannot happen, actually, because: + * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable + * (not connected, or bad/no disk on peer): + * see drbd_fail_request_early, ap_bio_cnt is zero. + * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET: + * peer may not initiate a resize. + */ +void drbd_suspend_io(struct drbd_conf *mdev) +{ + set_bit(SUSPEND_IO, &mdev->flags); + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); +} + +void drbd_resume_io(struct drbd_conf *mdev) +{ + clear_bit(SUSPEND_IO, &mdev->flags); + wake_up(&mdev->misc_wait); +} + +/** + * drbd_determine_dev_size() - Sets the right device size obeying all constraints + * @mdev: DRBD device. + * + * Returns 0 on success, negative return values indicate errors. + * You should call drbd_md_sync() after calling this function. + */ +enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local) +{ + sector_t prev_first_sect, prev_size; /* previous meta location */ + sector_t la_size; + sector_t size; + char ppb[10]; + + int md_moved, la_size_changed; + enum determine_dev_size rv = unchanged; + + /* race: + * application request passes inc_ap_bio, + * but then cannot get an AL-reference. + * this function later may wait on ap_bio_cnt == 0. -> deadlock. + * + * to avoid that: + * Suspend IO right here. + * still lock the act_log to not trigger ASSERTs there. + */ + drbd_suspend_io(mdev); + + /* no wait necessary anymore, actually we could assert that */ + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + + prev_first_sect = drbd_md_first_sector(mdev->ldev); + prev_size = mdev->ldev->md.md_size_sect; + la_size = mdev->ldev->md.la_size_sect; + + /* TODO: should only be some assert here, not (re)init... */ + drbd_md_set_sector_offsets(mdev, mdev->ldev); + + size = drbd_new_dev_size(mdev, mdev->ldev); + + if (drbd_get_capacity(mdev->this_bdev) != size || + drbd_bm_capacity(mdev) != size) { + int err; + err = drbd_bm_resize(mdev, size); + if (unlikely(err)) { + /* currently there is only one error: ENOMEM! */ + size = drbd_bm_capacity(mdev)>>1; + if (size == 0) { + dev_err(DEV, "OUT OF MEMORY! " + "Could not allocate bitmap!\n"); + } else { + dev_err(DEV, "BM resizing failed. " + "Leaving size unchanged at size = %lu KB\n", + (unsigned long)size); + } + rv = dev_size_error; + } + /* racy, see comments above. */ + drbd_set_my_capacity(mdev, size); + mdev->ldev->md.la_size_sect = size; + dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), + (unsigned long long)size>>1); + } + if (rv == dev_size_error) + goto out; + + la_size_changed = (la_size != mdev->ldev->md.la_size_sect); + + md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev) + || prev_size != mdev->ldev->md.md_size_sect; + + if (la_size_changed || md_moved) { + drbd_al_shrink(mdev); /* All extents inactive. */ + dev_info(DEV, "Writing the whole bitmap, %s\n", + la_size_changed && md_moved ? "size changed and md moved" : + la_size_changed ? "size changed" : "md moved"); + rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */ + drbd_md_mark_dirty(mdev); + } + + if (size > la_size) + rv = grew; + if (size < la_size) + rv = shrunk; +out: + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + drbd_resume_io(mdev); + + return rv; +} + +sector_t +drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) +{ + sector_t p_size = mdev->p_size; /* partner's disk size. */ + sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */ + sector_t m_size; /* my size */ + sector_t u_size = bdev->dc.disk_size; /* size requested by user. */ + sector_t size = 0; + + m_size = drbd_get_max_capacity(bdev); + + if (p_size && m_size) { + size = min_t(sector_t, p_size, m_size); + } else { + if (la_size) { + size = la_size; + if (m_size && m_size < size) + size = m_size; + if (p_size && p_size < size) + size = p_size; + } else { + if (m_size) + size = m_size; + if (p_size) + size = p_size; + } + } + + if (size == 0) + dev_err(DEV, "Both nodes diskless!\n"); + + if (u_size) { + if (u_size > size) + dev_err(DEV, "Requested disk size is too big (%lu > %lu)\n", + (unsigned long)u_size>>1, (unsigned long)size>>1); + else + size = u_size; + } + + return size; +} + +/** + * drbd_check_al_size() - Ensures that the AL is of the right size + * @mdev: DRBD device. + * + * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation + * failed, and 0 on success. You should call drbd_md_sync() after you called + * this function. + */ +static int drbd_check_al_size(struct drbd_conf *mdev) +{ + struct lru_cache *n, *t; + struct lc_element *e; + unsigned int in_use; + int i; + + ERR_IF(mdev->sync_conf.al_extents < 7) + mdev->sync_conf.al_extents = 127; + + if (mdev->act_log && + mdev->act_log->nr_elements == mdev->sync_conf.al_extents) + return 0; + + in_use = 0; + t = mdev->act_log; + n = lc_create("act_log", drbd_al_ext_cache, + mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); + + if (n == NULL) { + dev_err(DEV, "Cannot allocate act_log lru!\n"); + return -ENOMEM; + } + spin_lock_irq(&mdev->al_lock); + if (t) { + for (i = 0; i < t->nr_elements; i++) { + e = lc_element_by_index(t, i); + if (e->refcnt) + dev_err(DEV, "refcnt(%d)==%d\n", + e->lc_number, e->refcnt); + in_use += e->refcnt; + } + } + if (!in_use) + mdev->act_log = n; + spin_unlock_irq(&mdev->al_lock); + if (in_use) { + dev_err(DEV, "Activity log still in use!\n"); + lc_destroy(n); + return -EBUSY; + } else { + if (t) + lc_destroy(t); + } + drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */ + return 0; +} + +void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local) +{ + struct request_queue * const q = mdev->rq_queue; + struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; + int max_segments = mdev->ldev->dc.max_bio_bvecs; + + if (b->merge_bvec_fn && !mdev->ldev->dc.use_bmbv) + max_seg_s = PAGE_SIZE; + + max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s); + + blk_queue_max_sectors(q, max_seg_s >> 9); + blk_queue_max_phys_segments(q, max_segments ? max_segments : MAX_PHYS_SEGMENTS); + blk_queue_max_hw_segments(q, max_segments ? max_segments : MAX_HW_SEGMENTS); + blk_queue_max_segment_size(q, max_seg_s); + blk_queue_logical_block_size(q, 512); + blk_queue_segment_boundary(q, PAGE_SIZE-1); + blk_stack_limits(&q->limits, &b->limits, 0); + + if (b->merge_bvec_fn) + dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n", + b->merge_bvec_fn); + dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q)); + + if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { + dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", + q->backing_dev_info.ra_pages, + b->backing_dev_info.ra_pages); + q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; + } +} + +/* serialize deconfig (worker exiting, doing cleanup) + * and reconfig (drbdsetup disk, drbdsetup net) + * + * wait for a potentially exiting worker, then restart it, + * or start a new one. + */ +static void drbd_reconfig_start(struct drbd_conf *mdev) +{ + wait_event(mdev->state_wait, test_and_set_bit(CONFIG_PENDING, &mdev->flags)); + wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); + drbd_thread_start(&mdev->worker); +} + +/* if still unconfigured, stops worker again. + * if configured now, clears CONFIG_PENDING. + * wakes potential waiters */ +static void drbd_reconfig_done(struct drbd_conf *mdev) +{ + spin_lock_irq(&mdev->req_lock); + if (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY) { + set_bit(DEVICE_DYING, &mdev->flags); + drbd_thread_stop_nowait(&mdev->worker); + } else + clear_bit(CONFIG_PENDING, &mdev->flags); + spin_unlock_irq(&mdev->req_lock); + wake_up(&mdev->state_wait); +} + +/* does always return 0; + * interesting return code is in reply->ret_code */ +static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + enum drbd_ret_codes retcode; + enum determine_dev_size dd; + sector_t max_possible_sectors; + sector_t min_md_device_sectors; + struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ + struct inode *inode, *inode2; + struct lru_cache *resync_lru = NULL; + union drbd_state ns, os; + int rv; + int cp_discovered = 0; + int logical_block_size; + + drbd_reconfig_start(mdev); + + /* if you want to reconfigure, please tear down first */ + if (mdev->state.disk > D_DISKLESS) { + retcode = ERR_DISK_CONFIGURED; + goto fail; + } + + /* allocation not in the IO path, cqueue thread context */ + nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); + if (!nbc) { + retcode = ERR_NOMEM; + goto fail; + } + + nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; + nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; + nbc->dc.fencing = DRBD_FENCING_DEF; + nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; + + if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) { + retcode = ERR_MANDATORY_TAG; + goto fail; + } + + if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { + retcode = ERR_MD_IDX_INVALID; + goto fail; + } + + nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); + if (IS_ERR(nbc->lo_file)) { + dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, + PTR_ERR(nbc->lo_file)); + nbc->lo_file = NULL; + retcode = ERR_OPEN_DISK; + goto fail; + } + + inode = nbc->lo_file->f_dentry->d_inode; + + if (!S_ISBLK(inode->i_mode)) { + retcode = ERR_DISK_NOT_BDEV; + goto fail; + } + + nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0); + if (IS_ERR(nbc->md_file)) { + dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, + PTR_ERR(nbc->md_file)); + nbc->md_file = NULL; + retcode = ERR_OPEN_MD_DISK; + goto fail; + } + + inode2 = nbc->md_file->f_dentry->d_inode; + + if (!S_ISBLK(inode2->i_mode)) { + retcode = ERR_MD_NOT_BDEV; + goto fail; + } + + nbc->backing_bdev = inode->i_bdev; + if (bd_claim(nbc->backing_bdev, mdev)) { + printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n", + nbc->backing_bdev, mdev, + nbc->backing_bdev->bd_holder, + nbc->backing_bdev->bd_contains->bd_holder, + nbc->backing_bdev->bd_holders); + retcode = ERR_BDCLAIM_DISK; + goto fail; + } + + resync_lru = lc_create("resync", drbd_bm_ext_cache, + 61, sizeof(struct bm_extent), + offsetof(struct bm_extent, lce)); + if (!resync_lru) { + retcode = ERR_NOMEM; + goto release_bdev_fail; + } + + /* meta_dev_idx >= 0: external fixed size, + * possibly multiple drbd sharing one meta device. + * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is + * not yet used by some other drbd minor! + * (if you use drbd.conf + drbdadm, + * that should check it for you already; but if you don't, or someone + * fooled it, we need to double check here) */ + nbc->md_bdev = inode2->i_bdev; + if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev + : (void *) drbd_m_holder)) { + retcode = ERR_BDCLAIM_MD_DISK; + goto release_bdev_fail; + } + + if ((nbc->backing_bdev == nbc->md_bdev) != + (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || + nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { + retcode = ERR_MD_IDX_INVALID; + goto release_bdev2_fail; + } + + /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ + drbd_md_set_sector_offsets(mdev, nbc); + + if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) { + dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", + (unsigned long long) drbd_get_max_capacity(nbc), + (unsigned long long) nbc->dc.disk_size); + retcode = ERR_DISK_TO_SMALL; + goto release_bdev2_fail; + } + + if (nbc->dc.meta_dev_idx < 0) { + max_possible_sectors = DRBD_MAX_SECTORS_FLEX; + /* at least one MB, otherwise it does not make sense */ + min_md_device_sectors = (2<<10); + } else { + max_possible_sectors = DRBD_MAX_SECTORS; + min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1); + } + + if (drbd_get_capacity(nbc->md_bdev) > max_possible_sectors) + dev_warn(DEV, "truncating very big lower level device " + "to currently maximum possible %llu sectors\n", + (unsigned long long) max_possible_sectors); + + if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { + retcode = ERR_MD_DISK_TO_SMALL; + dev_warn(DEV, "refusing attach: md-device too small, " + "at least %llu sectors needed for this meta-disk type\n", + (unsigned long long) min_md_device_sectors); + goto release_bdev2_fail; + } + + /* Make sure the new disk is big enough + * (we may currently be R_PRIMARY with no local disk...) */ + if (drbd_get_max_capacity(nbc) < + drbd_get_capacity(mdev->this_bdev)) { + retcode = ERR_DISK_TO_SMALL; + goto release_bdev2_fail; + } + + nbc->known_size = drbd_get_capacity(nbc->backing_bdev); + + drbd_suspend_io(mdev); + /* also wait for the last barrier ack. */ + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt)); + /* and for any other previously queued work */ + drbd_flush_workqueue(mdev); + + retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); + drbd_resume_io(mdev); + if (retcode < SS_SUCCESS) + goto release_bdev2_fail; + + if (!get_ldev_if_state(mdev, D_ATTACHING)) + goto force_diskless; + + drbd_md_set_sector_offsets(mdev, nbc); + + if (!mdev->bitmap) { + if (drbd_bm_init(mdev)) { + retcode = ERR_NOMEM; + goto force_diskless_dec; + } + } + + retcode = drbd_md_read(mdev, nbc); + if (retcode != NO_ERROR) + goto force_diskless_dec; + + if (mdev->state.conn < C_CONNECTED && + mdev->state.role == R_PRIMARY && + (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { + dev_err(DEV, "Can only attach to data with current UUID=%016llX\n", + (unsigned long long)mdev->ed_uuid); + retcode = ERR_DATA_NOT_CURRENT; + goto force_diskless_dec; + } + + /* Since we are diskless, fix the activity log first... */ + if (drbd_check_al_size(mdev)) { + retcode = ERR_NOMEM; + goto force_diskless_dec; + } + + /* Prevent shrinking of consistent devices ! */ + if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && + drbd_new_dev_size(mdev, nbc) < nbc->md.la_size_sect) { + dev_warn(DEV, "refusing to truncate a consistent device\n"); + retcode = ERR_DISK_TO_SMALL; + goto force_diskless_dec; + } + + if (!drbd_al_read_log(mdev, nbc)) { + retcode = ERR_IO_MD_DISK; + goto force_diskless_dec; + } + + /* allocate a second IO page if logical_block_size != 512 */ + logical_block_size = bdev_logical_block_size(nbc->md_bdev); + if (logical_block_size == 0) + logical_block_size = MD_SECTOR_SIZE; + + if (logical_block_size != MD_SECTOR_SIZE) { + if (!mdev->md_io_tmpp) { + struct page *page = alloc_page(GFP_NOIO); + if (!page) + goto force_diskless_dec; + + dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n", + logical_block_size, MD_SECTOR_SIZE); + dev_warn(DEV, "Workaround engaged (has performance impact).\n"); + + mdev->md_io_tmpp = page; + } + } + + /* Reset the "barriers don't work" bits here, then force meta data to + * be written, to ensure we determine if barriers are supported. */ + if (nbc->dc.no_md_flush) + set_bit(MD_NO_BARRIER, &mdev->flags); + else + clear_bit(MD_NO_BARRIER, &mdev->flags); + + /* Point of no return reached. + * Devices and memory are no longer released by error cleanup below. + * now mdev takes over responsibility, and the state engine should + * clean it up somewhere. */ + D_ASSERT(mdev->ldev == NULL); + mdev->ldev = nbc; + mdev->resync = resync_lru; + nbc = NULL; + resync_lru = NULL; + + mdev->write_ordering = WO_bio_barrier; + drbd_bump_write_ordering(mdev, WO_bio_barrier); + + if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY)) + set_bit(CRASHED_PRIMARY, &mdev->flags); + else + clear_bit(CRASHED_PRIMARY, &mdev->flags); + + if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND)) { + set_bit(CRASHED_PRIMARY, &mdev->flags); + cp_discovered = 1; + } + + mdev->send_cnt = 0; + mdev->recv_cnt = 0; + mdev->read_cnt = 0; + mdev->writ_cnt = 0; + + drbd_setup_queue_param(mdev, DRBD_MAX_SEGMENT_SIZE); + + /* If I am currently not R_PRIMARY, + * but meta data primary indicator is set, + * I just now recover from a hard crash, + * and have been R_PRIMARY before that crash. + * + * Now, if I had no connection before that crash + * (have been degraded R_PRIMARY), chances are that + * I won't find my peer now either. + * + * In that case, and _only_ in that case, + * we use the degr-wfc-timeout instead of the default, + * so we can automatically recover from a crash of a + * degraded but active "cluster" after a certain timeout. + */ + clear_bit(USE_DEGR_WFC_T, &mdev->flags); + if (mdev->state.role != R_PRIMARY && + drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && + !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) + set_bit(USE_DEGR_WFC_T, &mdev->flags); + + dd = drbd_determin_dev_size(mdev); + if (dd == dev_size_error) { + retcode = ERR_NOMEM_BITMAP; + goto force_diskless_dec; + } else if (dd == grew) + set_bit(RESYNC_AFTER_NEG, &mdev->flags); + + if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { + dev_info(DEV, "Assuming that all blocks are out of sync " + "(aka FullSync)\n"); + if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) { + retcode = ERR_IO_MD_DISK; + goto force_diskless_dec; + } + } else { + if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) { + retcode = ERR_IO_MD_DISK; + goto force_diskless_dec; + } + } + + if (cp_discovered) { + drbd_al_apply_to_bm(mdev); + drbd_al_to_on_disk_bm(mdev); + } + + spin_lock_irq(&mdev->req_lock); + os = mdev->state; + ns.i = os.i; + /* If MDF_CONSISTENT is not set go into inconsistent state, + otherwise investigate MDF_WasUpToDate... + If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state, + otherwise into D_CONSISTENT state. + */ + if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) { + if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE)) + ns.disk = D_CONSISTENT; + else + ns.disk = D_OUTDATED; + } else { + ns.disk = D_INCONSISTENT; + } + + if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED)) + ns.pdsk = D_OUTDATED; + + if ( ns.disk == D_CONSISTENT && + (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE)) + ns.disk = D_UP_TO_DATE; + + /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, + MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before + this point, because drbd_request_state() modifies these + flags. */ + + /* In case we are C_CONNECTED postpone any decision on the new disk + state after the negotiation phase. */ + if (mdev->state.conn == C_CONNECTED) { + mdev->new_state_tmp.i = ns.i; + ns.i = os.i; + ns.disk = D_NEGOTIATING; + } + + rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); + ns = mdev->state; + spin_unlock_irq(&mdev->req_lock); + + if (rv < SS_SUCCESS) + goto force_diskless_dec; + + if (mdev->state.role == R_PRIMARY) + mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; + else + mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; + + drbd_md_mark_dirty(mdev); + drbd_md_sync(mdev); + + kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); + put_ldev(mdev); + reply->ret_code = retcode; + drbd_reconfig_done(mdev); + return 0; + + force_diskless_dec: + put_ldev(mdev); + force_diskless: + drbd_force_state(mdev, NS(disk, D_DISKLESS)); + drbd_md_sync(mdev); + release_bdev2_fail: + if (nbc) + bd_release(nbc->md_bdev); + release_bdev_fail: + if (nbc) + bd_release(nbc->backing_bdev); + fail: + if (nbc) { + if (nbc->lo_file) + fput(nbc->lo_file); + if (nbc->md_file) + fput(nbc->md_file); + kfree(nbc); + } + lc_destroy(resync_lru); + + reply->ret_code = retcode; + drbd_reconfig_done(mdev); + return 0; +} + +static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); + return 0; +} + +static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int i, ns; + enum drbd_ret_codes retcode; + struct net_conf *new_conf = NULL; + struct crypto_hash *tfm = NULL; + struct crypto_hash *integrity_w_tfm = NULL; + struct crypto_hash *integrity_r_tfm = NULL; + struct hlist_head *new_tl_hash = NULL; + struct hlist_head *new_ee_hash = NULL; + struct drbd_conf *odev; + char hmac_name[CRYPTO_MAX_ALG_NAME]; + void *int_dig_out = NULL; + void *int_dig_in = NULL; + void *int_dig_vv = NULL; + struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; + + drbd_reconfig_start(mdev); + + if (mdev->state.conn > C_STANDALONE) { + retcode = ERR_NET_CONFIGURED; + goto fail; + } + + /* allocation not in the IO path, cqueue thread context */ + new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); + if (!new_conf) { + retcode = ERR_NOMEM; + goto fail; + } + + memset(new_conf, 0, sizeof(struct net_conf)); + new_conf->timeout = DRBD_TIMEOUT_DEF; + new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; + new_conf->ping_int = DRBD_PING_INT_DEF; + new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; + new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; + new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; + new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; + new_conf->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF; + new_conf->ko_count = DRBD_KO_COUNT_DEF; + new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; + new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; + new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF; + new_conf->want_lose = 0; + new_conf->two_primaries = 0; + new_conf->wire_protocol = DRBD_PROT_C; + new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; + new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; + + if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { + retcode = ERR_MANDATORY_TAG; + goto fail; + } + + if (new_conf->two_primaries + && (new_conf->wire_protocol != DRBD_PROT_C)) { + retcode = ERR_NOT_PROTO_C; + goto fail; + }; + + if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { + retcode = ERR_DISCARD; + goto fail; + } + + retcode = NO_ERROR; + + new_my_addr = (struct sockaddr *)&new_conf->my_addr; + new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; + for (i = 0; i < minor_count; i++) { + odev = minor_to_mdev(i); + if (!odev || odev == mdev) + continue; + if (get_net_conf(odev)) { + taken_addr = (struct sockaddr *)&odev->net_conf->my_addr; + if (new_conf->my_addr_len == odev->net_conf->my_addr_len && + !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) + retcode = ERR_LOCAL_ADDR; + + taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr; + if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len && + !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) + retcode = ERR_PEER_ADDR; + + put_net_conf(odev); + if (retcode != NO_ERROR) + goto fail; + } + } + + if (new_conf->cram_hmac_alg[0] != 0) { + snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", + new_conf->cram_hmac_alg); + tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + tfm = NULL; + retcode = ERR_AUTH_ALG; + goto fail; + } + + if (crypto_tfm_alg_type(crypto_hash_tfm(tfm)) + != CRYPTO_ALG_TYPE_HASH) { + retcode = ERR_AUTH_ALG_ND; + goto fail; + } + } + + if (new_conf->integrity_alg[0]) { + integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(integrity_w_tfm)) { + integrity_w_tfm = NULL; + retcode=ERR_INTEGRITY_ALG; + goto fail; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) { + retcode=ERR_INTEGRITY_ALG_ND; + goto fail; + } + + integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(integrity_r_tfm)) { + integrity_r_tfm = NULL; + retcode=ERR_INTEGRITY_ALG; + goto fail; + } + } + + ns = new_conf->max_epoch_size/8; + if (mdev->tl_hash_s != ns) { + new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); + if (!new_tl_hash) { + retcode = ERR_NOMEM; + goto fail; + } + } + + ns = new_conf->max_buffers/8; + if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) { + new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); + if (!new_ee_hash) { + retcode = ERR_NOMEM; + goto fail; + } + } + + ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; + + if (integrity_w_tfm) { + i = crypto_hash_digestsize(integrity_w_tfm); + int_dig_out = kmalloc(i, GFP_KERNEL); + if (!int_dig_out) { + retcode = ERR_NOMEM; + goto fail; + } + int_dig_in = kmalloc(i, GFP_KERNEL); + if (!int_dig_in) { + retcode = ERR_NOMEM; + goto fail; + } + int_dig_vv = kmalloc(i, GFP_KERNEL); + if (!int_dig_vv) { + retcode = ERR_NOMEM; + goto fail; + } + } + + if (!mdev->bitmap) { + if(drbd_bm_init(mdev)) { + retcode = ERR_NOMEM; + goto fail; + } + } + + spin_lock_irq(&mdev->req_lock); + if (mdev->net_conf != NULL) { + retcode = ERR_NET_CONFIGURED; + spin_unlock_irq(&mdev->req_lock); + goto fail; + } + mdev->net_conf = new_conf; + + mdev->send_cnt = 0; + mdev->recv_cnt = 0; + + if (new_tl_hash) { + kfree(mdev->tl_hash); + mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8; + mdev->tl_hash = new_tl_hash; + } + + if (new_ee_hash) { + kfree(mdev->ee_hash); + mdev->ee_hash_s = mdev->net_conf->max_buffers/8; + mdev->ee_hash = new_ee_hash; + } + + crypto_free_hash(mdev->cram_hmac_tfm); + mdev->cram_hmac_tfm = tfm; + + crypto_free_hash(mdev->integrity_w_tfm); + mdev->integrity_w_tfm = integrity_w_tfm; + + crypto_free_hash(mdev->integrity_r_tfm); + mdev->integrity_r_tfm = integrity_r_tfm; + + kfree(mdev->int_dig_out); + kfree(mdev->int_dig_in); + kfree(mdev->int_dig_vv); + mdev->int_dig_out=int_dig_out; + mdev->int_dig_in=int_dig_in; + mdev->int_dig_vv=int_dig_vv; + spin_unlock_irq(&mdev->req_lock); + + retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE); + + kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); + reply->ret_code = retcode; + drbd_reconfig_done(mdev); + return 0; + +fail: + kfree(int_dig_out); + kfree(int_dig_in); + kfree(int_dig_vv); + crypto_free_hash(tfm); + crypto_free_hash(integrity_w_tfm); + crypto_free_hash(integrity_r_tfm); + kfree(new_tl_hash); + kfree(new_ee_hash); + kfree(new_conf); + + reply->ret_code = retcode; + drbd_reconfig_done(mdev); + return 0; +} + +static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode; + + retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); + + if (retcode == SS_NOTHING_TO_DO) + goto done; + else if (retcode == SS_ALREADY_STANDALONE) + goto done; + else if (retcode == SS_PRIMARY_NOP) { + /* Our statche checking code wants to see the peer outdated. */ + retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, + pdsk, D_OUTDATED)); + } else if (retcode == SS_CW_FAILED_BY_PEER) { + /* The peer probably wants to see us outdated. */ + retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, + disk, D_OUTDATED), + CS_ORDERED); + if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) { + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + retcode = SS_SUCCESS; + } + } + + if (retcode < SS_SUCCESS) + goto fail; + + if (wait_event_interruptible(mdev->state_wait, + mdev->state.conn != C_DISCONNECTING)) { + /* Do not test for mdev->state.conn == C_STANDALONE, since + someone else might connect us in the mean time! */ + retcode = ERR_INTR; + goto fail; + } + + done: + retcode = NO_ERROR; + fail: + drbd_md_sync(mdev); + reply->ret_code = retcode; + return 0; +} + +void resync_after_online_grow(struct drbd_conf *mdev) +{ + int iass; /* I am sync source */ + + dev_info(DEV, "Resync of new storage after online grow\n"); + if (mdev->state.role != mdev->state.peer) + iass = (mdev->state.role == R_PRIMARY); + else + iass = test_bit(DISCARD_CONCURRENT, &mdev->flags); + + if (iass) + drbd_start_resync(mdev, C_SYNC_SOURCE); + else + _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); +} + +static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + struct resize rs; + int retcode = NO_ERROR; + int ldsc = 0; /* local disk size changed */ + enum determine_dev_size dd; + + memset(&rs, 0, sizeof(struct resize)); + if (!resize_from_tags(mdev, nlp->tag_list, &rs)) { + retcode = ERR_MANDATORY_TAG; + goto fail; + } + + if (mdev->state.conn > C_CONNECTED) { + retcode = ERR_RESIZE_RESYNC; + goto fail; + } + + if (mdev->state.role == R_SECONDARY && + mdev->state.peer == R_SECONDARY) { + retcode = ERR_NO_PRIMARY; + goto fail; + } + + if (!get_ldev(mdev)) { + retcode = ERR_NO_DISK; + goto fail; + } + + if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { + mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); + ldsc = 1; + } + + mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; + dd = drbd_determin_dev_size(mdev); + drbd_md_sync(mdev); + put_ldev(mdev); + if (dd == dev_size_error) { + retcode = ERR_NOMEM_BITMAP; + goto fail; + } + + if (mdev->state.conn == C_CONNECTED && (dd != unchanged || ldsc)) { + if (dd == grew) + set_bit(RESIZE_PENDING, &mdev->flags); + + drbd_send_uuids(mdev); + drbd_send_sizes(mdev, 1); + } + + fail: + reply->ret_code = retcode; + return 0; +} + +static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode = NO_ERROR; + int err; + int ovr; /* online verify running */ + int rsr; /* re-sync running */ + struct crypto_hash *verify_tfm = NULL; + struct crypto_hash *csums_tfm = NULL; + struct syncer_conf sc; + cpumask_var_t new_cpu_mask; + + if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { + retcode = ERR_NOMEM; + goto fail; + } + + if (nlp->flags & DRBD_NL_SET_DEFAULTS) { + memset(&sc, 0, sizeof(struct syncer_conf)); + sc.rate = DRBD_RATE_DEF; + sc.after = DRBD_AFTER_DEF; + sc.al_extents = DRBD_AL_EXTENTS_DEF; + } else + memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); + + if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) { + retcode = ERR_MANDATORY_TAG; + goto fail; + } + + /* re-sync running */ + rsr = ( mdev->state.conn == C_SYNC_SOURCE || + mdev->state.conn == C_SYNC_TARGET || + mdev->state.conn == C_PAUSED_SYNC_S || + mdev->state.conn == C_PAUSED_SYNC_T ); + + if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) { + retcode = ERR_CSUMS_RESYNC_RUNNING; + goto fail; + } + + if (!rsr && sc.csums_alg[0]) { + csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(csums_tfm)) { + csums_tfm = NULL; + retcode = ERR_CSUMS_ALG; + goto fail; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { + retcode = ERR_CSUMS_ALG_ND; + goto fail; + } + } + + /* online verify running */ + ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T); + + if (ovr) { + if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) { + retcode = ERR_VERIFY_RUNNING; + goto fail; + } + } + + if (!ovr && sc.verify_alg[0]) { + verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(verify_tfm)) { + verify_tfm = NULL; + retcode = ERR_VERIFY_ALG; + goto fail; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { + retcode = ERR_VERIFY_ALG_ND; + goto fail; + } + } + + /* silently ignore cpu mask on UP kernel */ + if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { + err = __bitmap_parse(sc.cpu_mask, 32, 0, + cpumask_bits(new_cpu_mask), nr_cpu_ids); + if (err) { + dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); + retcode = ERR_CPU_MASK_PARSE; + goto fail; + } + } + + ERR_IF (sc.rate < 1) sc.rate = 1; + ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */ +#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT) + if (sc.al_extents > AL_MAX) { + dev_err(DEV, "sc.al_extents > %d\n", AL_MAX); + sc.al_extents = AL_MAX; + } +#undef AL_MAX + + /* most sanity checks done, try to assign the new sync-after + * dependency. need to hold the global lock in there, + * to avoid a race in the dependency loop check. */ + retcode = drbd_alter_sa(mdev, sc.after); + if (retcode != NO_ERROR) + goto fail; + + /* ok, assign the rest of it as well. + * lock against receive_SyncParam() */ + spin_lock(&mdev->peer_seq_lock); + mdev->sync_conf = sc; + + if (!rsr) { + crypto_free_hash(mdev->csums_tfm); + mdev->csums_tfm = csums_tfm; + csums_tfm = NULL; + } + + if (!ovr) { + crypto_free_hash(mdev->verify_tfm); + mdev->verify_tfm = verify_tfm; + verify_tfm = NULL; + } + spin_unlock(&mdev->peer_seq_lock); + + if (get_ldev(mdev)) { + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + drbd_al_shrink(mdev); + err = drbd_check_al_size(mdev); + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + + put_ldev(mdev); + drbd_md_sync(mdev); + + if (err) { + retcode = ERR_NOMEM; + goto fail; + } + } + + if (mdev->state.conn >= C_CONNECTED) + drbd_send_sync_param(mdev, &sc); + + if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) { + cpumask_copy(mdev->cpu_mask, new_cpu_mask); + drbd_calc_cpu_mask(mdev); + mdev->receiver.reset_cpu_mask = 1; + mdev->asender.reset_cpu_mask = 1; + mdev->worker.reset_cpu_mask = 1; + } + + kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); +fail: + free_cpumask_var(new_cpu_mask); + crypto_free_hash(csums_tfm); + crypto_free_hash(verify_tfm); + reply->ret_code = retcode; + return 0; +} + +static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode; + + retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); + + if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION) + retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); + + while (retcode == SS_NEED_CONNECTION) { + spin_lock_irq(&mdev->req_lock); + if (mdev->state.conn < C_CONNECTED) + retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL); + spin_unlock_irq(&mdev->req_lock); + + if (retcode != SS_NEED_CONNECTION) + break; + + retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); + } + + reply->ret_code = retcode; + return 0; +} + +static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + + reply->ret_code = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); + + return 0; +} + +static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode = NO_ERROR; + + if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO) + retcode = ERR_PAUSE_IS_SET; + + reply->ret_code = retcode; + return 0; +} + +static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode = NO_ERROR; + + if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) + retcode = ERR_PAUSE_IS_CLEAR; + + reply->ret_code = retcode; + return 0; +} + +static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + reply->ret_code = drbd_request_state(mdev, NS(susp, 1)); + + return 0; +} + +static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + reply->ret_code = drbd_request_state(mdev, NS(susp, 0)); + return 0; +} + +static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED)); + return 0; +} + +static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + unsigned short *tl; + + tl = reply->tag_list; + + if (get_ldev(mdev)) { + tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl); + put_ldev(mdev); + } + + if (get_net_conf(mdev)) { + tl = net_conf_to_tags(mdev, mdev->net_conf, tl); + put_net_conf(mdev); + } + tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl); + + put_unaligned(TT_END, tl++); /* Close the tag list */ + + return (int)((char *)tl - (char *)reply->tag_list); +} + +static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + unsigned short *tl = reply->tag_list; + union drbd_state s = mdev->state; + unsigned long rs_left; + unsigned int res; + + tl = get_state_to_tags(mdev, (struct get_state *)&s, tl); + + /* no local ref, no bitmap, no syncer progress. */ + if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) { + if (get_ldev(mdev)) { + drbd_get_syncer_progress(mdev, &rs_left, &res); + tl = tl_add_int(tl, T_sync_progress, &res); + put_ldev(mdev); + } + } + put_unaligned(TT_END, tl++); /* Close the tag list */ + + return (int)((char *)tl - (char *)reply->tag_list); +} + +static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + unsigned short *tl; + + tl = reply->tag_list; + + if (get_ldev(mdev)) { + tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64)); + tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags); + put_ldev(mdev); + } + put_unaligned(TT_END, tl++); /* Close the tag list */ + + return (int)((char *)tl - (char *)reply->tag_list); +} + +/** + * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use + * @mdev: DRBD device. + * @nlp: Netlink/connector packet from drbdsetup + * @reply: Reply packet for drbdsetup + */ +static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + unsigned short *tl; + char rv; + + tl = reply->tag_list; + + rv = mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : + test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT; + + tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv)); + put_unaligned(TT_END, tl++); /* Close the tag list */ + + return (int)((char *)tl - (char *)reply->tag_list); +} + +static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + /* default to resume from last known position, if possible */ + struct start_ov args = + { .start_sector = mdev->ov_start_sector }; + + if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + /* w_make_ov_request expects position to be aligned */ + mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; + reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); + return 0; +} + + +static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode = NO_ERROR; + int skip_initial_sync = 0; + int err; + + struct new_c_uuid args; + + memset(&args, 0, sizeof(struct new_c_uuid)); + if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */ + + if (!get_ldev(mdev)) { + retcode = ERR_NO_DISK; + goto out; + } + + /* this is "skip initial sync", assume to be clean */ + if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 && + mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { + dev_info(DEV, "Preparing to skip initial sync\n"); + skip_initial_sync = 1; + } else if (mdev->state.conn != C_STANDALONE) { + retcode = ERR_CONNECTED; + goto out_dec; + } + + drbd_uuid_set(mdev, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */ + drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */ + + if (args.clear_bm) { + err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid"); + if (err) { + dev_err(DEV, "Writing bitmap failed with %d\n",err); + retcode = ERR_IO_MD_DISK; + } + if (skip_initial_sync) { + drbd_send_uuids_skip_initial_sync(mdev); + _drbd_uuid_set(mdev, UI_BITMAP, 0); + spin_lock_irq(&mdev->req_lock); + _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), + CS_VERBOSE, NULL); + spin_unlock_irq(&mdev->req_lock); + } + } + + drbd_md_sync(mdev); +out_dec: + put_ldev(mdev); +out: + mutex_unlock(&mdev->state_mutex); + + reply->ret_code = retcode; + return 0; +} + +static struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp) +{ + struct drbd_conf *mdev; + + if (nlp->drbd_minor >= minor_count) + return NULL; + + mdev = minor_to_mdev(nlp->drbd_minor); + + if (!mdev && (nlp->flags & DRBD_NL_CREATE_DEVICE)) { + struct gendisk *disk = NULL; + mdev = drbd_new_device(nlp->drbd_minor); + + spin_lock_irq(&drbd_pp_lock); + if (minor_table[nlp->drbd_minor] == NULL) { + minor_table[nlp->drbd_minor] = mdev; + disk = mdev->vdisk; + mdev = NULL; + } /* else: we lost the race */ + spin_unlock_irq(&drbd_pp_lock); + + if (disk) /* we won the race above */ + /* in case we ever add a drbd_delete_device(), + * don't forget the del_gendisk! */ + add_disk(disk); + else /* we lost the race above */ + drbd_free_mdev(mdev); + + mdev = minor_to_mdev(nlp->drbd_minor); + } + + return mdev; +} + +struct cn_handler_struct { + int (*function)(struct drbd_conf *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int reply_body_size; +}; + +static struct cn_handler_struct cnd_table[] = { + [ P_primary ] = { &drbd_nl_primary, 0 }, + [ P_secondary ] = { &drbd_nl_secondary, 0 }, + [ P_disk_conf ] = { &drbd_nl_disk_conf, 0 }, + [ P_detach ] = { &drbd_nl_detach, 0 }, + [ P_net_conf ] = { &drbd_nl_net_conf, 0 }, + [ P_disconnect ] = { &drbd_nl_disconnect, 0 }, + [ P_resize ] = { &drbd_nl_resize, 0 }, + [ P_syncer_conf ] = { &drbd_nl_syncer_conf, 0 }, + [ P_invalidate ] = { &drbd_nl_invalidate, 0 }, + [ P_invalidate_peer ] = { &drbd_nl_invalidate_peer, 0 }, + [ P_pause_sync ] = { &drbd_nl_pause_sync, 0 }, + [ P_resume_sync ] = { &drbd_nl_resume_sync, 0 }, + [ P_suspend_io ] = { &drbd_nl_suspend_io, 0 }, + [ P_resume_io ] = { &drbd_nl_resume_io, 0 }, + [ P_outdate ] = { &drbd_nl_outdate, 0 }, + [ P_get_config ] = { &drbd_nl_get_config, + sizeof(struct syncer_conf_tag_len_struct) + + sizeof(struct disk_conf_tag_len_struct) + + sizeof(struct net_conf_tag_len_struct) }, + [ P_get_state ] = { &drbd_nl_get_state, + sizeof(struct get_state_tag_len_struct) + + sizeof(struct sync_progress_tag_len_struct) }, + [ P_get_uuids ] = { &drbd_nl_get_uuids, + sizeof(struct get_uuids_tag_len_struct) }, + [ P_get_timeout_flag ] = { &drbd_nl_get_timeout_flag, + sizeof(struct get_timeout_flag_tag_len_struct)}, + [ P_start_ov ] = { &drbd_nl_start_ov, 0 }, + [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 }, +}; + +static void drbd_connector_callback(struct cn_msg *req) +{ + struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; + struct cn_handler_struct *cm; + struct cn_msg *cn_reply; + struct drbd_nl_cfg_reply *reply; + struct drbd_conf *mdev; + int retcode, rr; + int reply_size = sizeof(struct cn_msg) + + sizeof(struct drbd_nl_cfg_reply) + + sizeof(short int); + + if (!try_module_get(THIS_MODULE)) { + printk(KERN_ERR "drbd: try_module_get() failed!\n"); + return; + } + + mdev = ensure_mdev(nlp); + if (!mdev) { + retcode = ERR_MINOR_INVALID; + goto fail; + } + + trace_drbd_netlink(req, 1); + + if (nlp->packet_type >= P_nl_after_last_packet) { + retcode = ERR_PACKET_NR; + goto fail; + } + + cm = cnd_table + nlp->packet_type; + + /* This may happen if packet number is 0: */ + if (cm->function == NULL) { + retcode = ERR_PACKET_NR; + goto fail; + } + + reply_size += cm->reply_body_size; + + /* allocation not in the IO path, cqueue thread context */ + cn_reply = kmalloc(reply_size, GFP_KERNEL); + if (!cn_reply) { + retcode = ERR_NOMEM; + goto fail; + } + reply = (struct drbd_nl_cfg_reply *) cn_reply->data; + + reply->packet_type = + cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet; + reply->minor = nlp->drbd_minor; + reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ + /* reply->tag_list; might be modified by cm->function. */ + + rr = cm->function(mdev, nlp, reply); + + cn_reply->id = req->id; + cn_reply->seq = req->seq; + cn_reply->ack = req->ack + 1; + cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr; + cn_reply->flags = 0; + + trace_drbd_netlink(cn_reply, 0); + rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); + if (rr && rr != -ESRCH) + printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); + + kfree(cn_reply); + module_put(THIS_MODULE); + return; + fail: + drbd_nl_send_reply(req, retcode); + module_put(THIS_MODULE); +} + +static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */ + +static unsigned short * +__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, + unsigned short len, int nul_terminated) +{ + unsigned short l = tag_descriptions[tag_number(tag)].max_len; + len = (len < l) ? len : l; + put_unaligned(tag, tl++); + put_unaligned(len, tl++); + memcpy(tl, data, len); + tl = (unsigned short*)((char*)tl + len); + if (nul_terminated) + *((char*)tl - 1) = 0; + return tl; +} + +static unsigned short * +tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len) +{ + return __tl_add_blob(tl, tag, data, len, 0); +} + +static unsigned short * +tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str) +{ + return __tl_add_blob(tl, tag, str, strlen(str)+1, 0); +} + +static unsigned short * +tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val) +{ + put_unaligned(tag, tl++); + switch(tag_type(tag)) { + case TT_INTEGER: + put_unaligned(sizeof(int), tl++); + put_unaligned(*(int *)val, (int *)tl); + tl = (unsigned short*)((char*)tl+sizeof(int)); + break; + case TT_INT64: + put_unaligned(sizeof(u64), tl++); + put_unaligned(*(u64 *)val, (u64 *)tl); + tl = (unsigned short*)((char*)tl+sizeof(u64)); + break; + default: + /* someone did something stupid. */ + ; + } + return tl; +} + +void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) +{ + char buffer[sizeof(struct cn_msg)+ + sizeof(struct drbd_nl_cfg_reply)+ + sizeof(struct get_state_tag_len_struct)+ + sizeof(short int)]; + struct cn_msg *cn_reply = (struct cn_msg *) buffer; + struct drbd_nl_cfg_reply *reply = + (struct drbd_nl_cfg_reply *)cn_reply->data; + unsigned short *tl = reply->tag_list; + + /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ + + tl = get_state_to_tags(mdev, (struct get_state *)&state, tl); + + put_unaligned(TT_END, tl++); /* Close the tag list */ + + cn_reply->id.idx = CN_IDX_DRBD; + cn_reply->id.val = CN_VAL_DRBD; + + cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->ack = 0; /* not used here. */ + cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + + (int)((char *)tl - (char *)reply->tag_list); + cn_reply->flags = 0; + + reply->packet_type = P_get_state; + reply->minor = mdev_to_minor(mdev); + reply->ret_code = NO_ERROR; + + trace_drbd_netlink(cn_reply, 0); + cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); +} + +void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) +{ + char buffer[sizeof(struct cn_msg)+ + sizeof(struct drbd_nl_cfg_reply)+ + sizeof(struct call_helper_tag_len_struct)+ + sizeof(short int)]; + struct cn_msg *cn_reply = (struct cn_msg *) buffer; + struct drbd_nl_cfg_reply *reply = + (struct drbd_nl_cfg_reply *)cn_reply->data; + unsigned short *tl = reply->tag_list; + + /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ + + tl = tl_add_str(tl, T_helper, helper_name); + put_unaligned(TT_END, tl++); /* Close the tag list */ + + cn_reply->id.idx = CN_IDX_DRBD; + cn_reply->id.val = CN_VAL_DRBD; + + cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->ack = 0; /* not used here. */ + cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + + (int)((char *)tl - (char *)reply->tag_list); + cn_reply->flags = 0; + + reply->packet_type = P_call_helper; + reply->minor = mdev_to_minor(mdev); + reply->ret_code = NO_ERROR; + + trace_drbd_netlink(cn_reply, 0); + cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); +} + +void drbd_bcast_ee(struct drbd_conf *mdev, + const char *reason, const int dgs, + const char* seen_hash, const char* calc_hash, + const struct drbd_epoch_entry* e) +{ + struct cn_msg *cn_reply; + struct drbd_nl_cfg_reply *reply; + struct bio_vec *bvec; + unsigned short *tl; + int i; + + if (!e) + return; + if (!reason || !reason[0]) + return; + + /* apparently we have to memcpy twice, first to prepare the data for the + * struct cn_msg, then within cn_netlink_send from the cn_msg to the + * netlink skb. */ + /* receiver thread context, which is not in the writeout path (of this node), + * but may be in the writeout path of the _other_ node. + * GFP_NOIO to avoid potential "distributed deadlock". */ + cn_reply = kmalloc( + sizeof(struct cn_msg)+ + sizeof(struct drbd_nl_cfg_reply)+ + sizeof(struct dump_ee_tag_len_struct)+ + sizeof(short int), + GFP_NOIO); + + if (!cn_reply) { + dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n", + (unsigned long long)e->sector, e->size); + return; + } + + reply = (struct drbd_nl_cfg_reply*)cn_reply->data; + tl = reply->tag_list; + + tl = tl_add_str(tl, T_dump_ee_reason, reason); + tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs); + tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs); + tl = tl_add_int(tl, T_ee_sector, &e->sector); + tl = tl_add_int(tl, T_ee_block_id, &e->block_id); + + put_unaligned(T_ee_data, tl++); + put_unaligned(e->size, tl++); + + __bio_for_each_segment(bvec, e->private_bio, i, 0) { + void *d = kmap(bvec->bv_page); + memcpy(tl, d + bvec->bv_offset, bvec->bv_len); + kunmap(bvec->bv_page); + tl=(unsigned short*)((char*)tl + bvec->bv_len); + } + put_unaligned(TT_END, tl++); /* Close the tag list */ + + cn_reply->id.idx = CN_IDX_DRBD; + cn_reply->id.val = CN_VAL_DRBD; + + cn_reply->seq = atomic_add_return(1,&drbd_nl_seq); + cn_reply->ack = 0; // not used here. + cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + + (int)((char*)tl - (char*)reply->tag_list); + cn_reply->flags = 0; + + reply->packet_type = P_dump_ee; + reply->minor = mdev_to_minor(mdev); + reply->ret_code = NO_ERROR; + + trace_drbd_netlink(cn_reply, 0); + cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); + kfree(cn_reply); +} + +void drbd_bcast_sync_progress(struct drbd_conf *mdev) +{ + char buffer[sizeof(struct cn_msg)+ + sizeof(struct drbd_nl_cfg_reply)+ + sizeof(struct sync_progress_tag_len_struct)+ + sizeof(short int)]; + struct cn_msg *cn_reply = (struct cn_msg *) buffer; + struct drbd_nl_cfg_reply *reply = + (struct drbd_nl_cfg_reply *)cn_reply->data; + unsigned short *tl = reply->tag_list; + unsigned long rs_left; + unsigned int res; + + /* no local ref, no bitmap, no syncer progress, no broadcast. */ + if (!get_ldev(mdev)) + return; + drbd_get_syncer_progress(mdev, &rs_left, &res); + put_ldev(mdev); + + tl = tl_add_int(tl, T_sync_progress, &res); + put_unaligned(TT_END, tl++); /* Close the tag list */ + + cn_reply->id.idx = CN_IDX_DRBD; + cn_reply->id.val = CN_VAL_DRBD; + + cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->ack = 0; /* not used here. */ + cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + + (int)((char *)tl - (char *)reply->tag_list); + cn_reply->flags = 0; + + reply->packet_type = P_sync_progress; + reply->minor = mdev_to_minor(mdev); + reply->ret_code = NO_ERROR; + + trace_drbd_netlink(cn_reply, 0); + cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); +} + +int __init drbd_nl_init(void) +{ + static struct cb_id cn_id_drbd; + int err, try=10; + + cn_id_drbd.val = CN_VAL_DRBD; + do { + cn_id_drbd.idx = cn_idx; + err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback); + if (!err) + break; + cn_idx = (cn_idx + CN_IDX_STEP); + } while (try--); + + if (err) { + printk(KERN_ERR "drbd: cn_drbd failed to register\n"); + return err; + } + + return 0; +} + +void drbd_nl_cleanup(void) +{ + static struct cb_id cn_id_drbd; + + cn_id_drbd.idx = cn_idx; + cn_id_drbd.val = CN_VAL_DRBD; + + cn_del_callback(&cn_id_drbd); +} + +void drbd_nl_send_reply(struct cn_msg *req, int ret_code) +{ + char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)]; + struct cn_msg *cn_reply = (struct cn_msg *) buffer; + struct drbd_nl_cfg_reply *reply = + (struct drbd_nl_cfg_reply *)cn_reply->data; + int rr; + + cn_reply->id = req->id; + + cn_reply->seq = req->seq; + cn_reply->ack = req->ack + 1; + cn_reply->len = sizeof(struct drbd_nl_cfg_reply); + cn_reply->flags = 0; + + reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; + reply->ret_code = ret_code; + + trace_drbd_netlink(cn_reply, 0); + rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); + if (rr && rr != -ESRCH) + printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); +} + diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c new file mode 100644 index 000000000000..98fcb7450c76 --- /dev/null +++ b/drivers/block/drbd/drbd_proc.c @@ -0,0 +1,266 @@ +/* + drbd_proc.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include "drbd_int.h" + +static int drbd_proc_open(struct inode *inode, struct file *file); + + +struct proc_dir_entry *drbd_proc; +struct file_operations drbd_proc_fops = { + .owner = THIS_MODULE, + .open = drbd_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + +/*lge + * progress bars shamelessly adapted from driver/md/md.c + * output looks like + * [=====>..............] 33.5% (23456/123456) + * finish: 2:20:20 speed: 6,345 (6,456) K/sec + */ +static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) +{ + unsigned long db, dt, dbdt, rt, rs_left; + unsigned int res; + int i, x, y; + + drbd_get_syncer_progress(mdev, &rs_left, &res); + + x = res/50; + y = 20-x; + seq_printf(seq, "\t["); + for (i = 1; i < x; i++) + seq_printf(seq, "="); + seq_printf(seq, ">"); + for (i = 0; i < y; i++) + seq_printf(seq, "."); + seq_printf(seq, "] "); + + seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10); + /* if more than 1 GB display in MB */ + if (mdev->rs_total > 0x100000L) + seq_printf(seq, "(%lu/%lu)M\n\t", + (unsigned long) Bit2KB(rs_left >> 10), + (unsigned long) Bit2KB(mdev->rs_total >> 10)); + else + seq_printf(seq, "(%lu/%lu)K\n\t", + (unsigned long) Bit2KB(rs_left), + (unsigned long) Bit2KB(mdev->rs_total)); + + /* see drivers/md/md.c + * We do not want to overflow, so the order of operands and + * the * 100 / 100 trick are important. We do a +1 to be + * safe against division by zero. We only estimate anyway. + * + * dt: time from mark until now + * db: blocks written from mark until now + * rt: remaining time + */ + dt = (jiffies - mdev->rs_mark_time) / HZ; + + if (dt > 20) { + /* if we made no update to rs_mark_time for too long, + * we are stalled. show that. */ + seq_printf(seq, "stalled\n"); + return; + } + + if (!dt) + dt++; + db = mdev->rs_mark_left - rs_left; + rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */ + + seq_printf(seq, "finish: %lu:%02lu:%02lu", + rt / 3600, (rt % 3600) / 60, rt % 60); + + /* current speed average over (SYNC_MARKS * SYNC_MARK_STEP) jiffies */ + dbdt = Bit2KB(db/dt); + if (dbdt > 1000) + seq_printf(seq, " speed: %ld,%03ld", + dbdt/1000, dbdt % 1000); + else + seq_printf(seq, " speed: %ld", dbdt); + + /* mean speed since syncer started + * we do account for PausedSync periods */ + dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; + if (dt <= 0) + dt = 1; + db = mdev->rs_total - rs_left; + dbdt = Bit2KB(db/dt); + if (dbdt > 1000) + seq_printf(seq, " (%ld,%03ld)", + dbdt/1000, dbdt % 1000); + else + seq_printf(seq, " (%ld)", dbdt); + + seq_printf(seq, " K/sec\n"); +} + +static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) +{ + struct bm_extent *bme = lc_entry(e, struct bm_extent, lce); + + seq_printf(seq, "%5d %s %s\n", bme->rs_left, + bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------", + bme->flags & BME_LOCKED ? "LOCKED" : "------" + ); +} + +static int drbd_seq_show(struct seq_file *seq, void *v) +{ + int i, hole = 0; + const char *sn; + struct drbd_conf *mdev; + + static char write_ordering_chars[] = { + [WO_none] = 'n', + [WO_drain_io] = 'd', + [WO_bdev_flush] = 'f', + [WO_bio_barrier] = 'b', + }; + + seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n", + API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag()); + + /* + cs .. connection state + ro .. node role (local/remote) + ds .. disk state (local/remote) + protocol + various flags + ns .. network send + nr .. network receive + dw .. disk write + dr .. disk read + al .. activity log write count + bm .. bitmap update write count + pe .. pending (waiting for ack or data reply) + ua .. unack'd (still need to send ack or data reply) + ap .. application requests accepted, but not yet completed + ep .. number of epochs currently "on the fly", P_BARRIER_ACK pending + wo .. write ordering mode currently in use + oos .. known out-of-sync kB + */ + + for (i = 0; i < minor_count; i++) { + mdev = minor_to_mdev(i); + if (!mdev) { + hole = 1; + continue; + } + if (hole) { + hole = 0; + seq_printf(seq, "\n"); + } + + sn = drbd_conn_str(mdev->state.conn); + + if (mdev->state.conn == C_STANDALONE && + mdev->state.disk == D_DISKLESS && + mdev->state.role == R_SECONDARY) { + seq_printf(seq, "%2d: cs:Unconfigured\n", i); + } else { + seq_printf(seq, + "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c\n" + " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " + "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c", + i, sn, + drbd_role_str(mdev->state.role), + drbd_role_str(mdev->state.peer), + drbd_disk_str(mdev->state.disk), + drbd_disk_str(mdev->state.pdsk), + (mdev->net_conf == NULL ? ' ' : + (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')), + mdev->state.susp ? 's' : 'r', + mdev->state.aftr_isp ? 'a' : '-', + mdev->state.peer_isp ? 'p' : '-', + mdev->state.user_isp ? 'u' : '-', + mdev->congestion_reason ?: '-', + mdev->send_cnt/2, + mdev->recv_cnt/2, + mdev->writ_cnt/2, + mdev->read_cnt/2, + mdev->al_writ_cnt, + mdev->bm_writ_cnt, + atomic_read(&mdev->local_cnt), + atomic_read(&mdev->ap_pending_cnt) + + atomic_read(&mdev->rs_pending_cnt), + atomic_read(&mdev->unacked_cnt), + atomic_read(&mdev->ap_bio_cnt), + mdev->epochs, + write_ordering_chars[mdev->write_ordering] + ); + seq_printf(seq, " oos:%lu\n", + Bit2KB(drbd_bm_total_weight(mdev))); + } + if (mdev->state.conn == C_SYNC_SOURCE || + mdev->state.conn == C_SYNC_TARGET) + drbd_syncer_progress(mdev, seq); + + if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) + seq_printf(seq, "\t%3d%% %lu/%lu\n", + (int)((mdev->rs_total-mdev->ov_left) / + (mdev->rs_total/100+1)), + mdev->rs_total - mdev->ov_left, + mdev->rs_total); + + if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) { + lc_seq_printf_stats(seq, mdev->resync); + lc_seq_printf_stats(seq, mdev->act_log); + put_ldev(mdev); + } + + if (proc_details >= 2) { + if (mdev->resync) { + lc_seq_dump_details(seq, mdev->resync, "rs_left", + resync_dump_detail); + } + } + } + + return 0; +} + +static int drbd_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, drbd_seq_show, PDE(inode)->data); +} + +/* PROC FS stuff end */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c new file mode 100644 index 000000000000..63686c4d85cf --- /dev/null +++ b/drivers/block/drbd/drbd_receiver.c @@ -0,0 +1,4456 @@ +/* + drbd_receiver.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define __KERNEL_SYSCALLS__ +#include +#include +#include +#include +#include +#include +#include "drbd_int.h" +#include "drbd_tracing.h" +#include "drbd_req.h" + +#include "drbd_vli.h" + +struct flush_work { + struct drbd_work w; + struct drbd_epoch *epoch; +}; + +enum finish_epoch { + FE_STILL_LIVE, + FE_DESTROYED, + FE_RECYCLED, +}; + +static int drbd_do_handshake(struct drbd_conf *mdev); +static int drbd_do_auth(struct drbd_conf *mdev); + +static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); +static int e_end_block(struct drbd_conf *, struct drbd_work *, int); + +static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch) +{ + struct drbd_epoch *prev; + spin_lock(&mdev->epoch_lock); + prev = list_entry(epoch->list.prev, struct drbd_epoch, list); + if (prev == epoch || prev == mdev->current_epoch) + prev = NULL; + spin_unlock(&mdev->epoch_lock); + return prev; +} + +#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) + +static struct page *drbd_pp_first_page_or_try_alloc(struct drbd_conf *mdev) +{ + struct page *page = NULL; + + /* Yes, testing drbd_pp_vacant outside the lock is racy. + * So what. It saves a spin_lock. */ + if (drbd_pp_vacant > 0) { + spin_lock(&drbd_pp_lock); + page = drbd_pp_pool; + if (page) { + drbd_pp_pool = (struct page *)page_private(page); + set_page_private(page, 0); /* just to be polite */ + drbd_pp_vacant--; + } + spin_unlock(&drbd_pp_lock); + } + /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD + * "criss-cross" setup, that might cause write-out on some other DRBD, + * which in turn might block on the other node at this very place. */ + if (!page) + page = alloc_page(GFP_TRY); + if (page) + atomic_inc(&mdev->pp_in_use); + return page; +} + +/* kick lower level device, if we have more than (arbitrary number) + * reference counts on it, which typically are locally submitted io + * requests. don't use unacked_cnt, so we speed up proto A and B, too. */ +static void maybe_kick_lo(struct drbd_conf *mdev) +{ + if (atomic_read(&mdev->local_cnt) >= mdev->net_conf->unplug_watermark) + drbd_kick_lo(mdev); +} + +static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed) +{ + struct drbd_epoch_entry *e; + struct list_head *le, *tle; + + /* The EEs are always appended to the end of the list. Since + they are sent in order over the wire, they have to finish + in order. As soon as we see the first not finished we can + stop to examine the list... */ + + list_for_each_safe(le, tle, &mdev->net_ee) { + e = list_entry(le, struct drbd_epoch_entry, w.list); + if (drbd_bio_has_active_page(e->private_bio)) + break; + list_move(le, to_be_freed); + } +} + +static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) +{ + LIST_HEAD(reclaimed); + struct drbd_epoch_entry *e, *t; + + maybe_kick_lo(mdev); + spin_lock_irq(&mdev->req_lock); + reclaim_net_ee(mdev, &reclaimed); + spin_unlock_irq(&mdev->req_lock); + + list_for_each_entry_safe(e, t, &reclaimed, w.list) + drbd_free_ee(mdev, e); +} + +/** + * drbd_pp_alloc() - Returns a page, fails only if a signal comes in + * @mdev: DRBD device. + * @retry: whether or not to retry allocation forever (or until signalled) + * + * Tries to allocate a page, first from our own page pool, then from the + * kernel, unless this allocation would exceed the max_buffers setting. + * If @retry is non-zero, retry until DRBD frees a page somewhere else. + */ +static struct page *drbd_pp_alloc(struct drbd_conf *mdev, int retry) +{ + struct page *page = NULL; + DEFINE_WAIT(wait); + + if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) { + page = drbd_pp_first_page_or_try_alloc(mdev); + if (page) + return page; + } + + for (;;) { + prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); + + drbd_kick_lo_and_reclaim_net(mdev); + + if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) { + page = drbd_pp_first_page_or_try_alloc(mdev); + if (page) + break; + } + + if (!retry) + break; + + if (signal_pending(current)) { + dev_warn(DEV, "drbd_pp_alloc interrupted!\n"); + break; + } + + schedule(); + } + finish_wait(&drbd_pp_wait, &wait); + + return page; +} + +/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc. + * Is also used from inside an other spin_lock_irq(&mdev->req_lock) */ +static void drbd_pp_free(struct drbd_conf *mdev, struct page *page) +{ + int free_it; + + spin_lock(&drbd_pp_lock); + if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) { + free_it = 1; + } else { + set_page_private(page, (unsigned long)drbd_pp_pool); + drbd_pp_pool = page; + drbd_pp_vacant++; + free_it = 0; + } + spin_unlock(&drbd_pp_lock); + + atomic_dec(&mdev->pp_in_use); + + if (free_it) + __free_page(page); + + wake_up(&drbd_pp_wait); +} + +static void drbd_pp_free_bio_pages(struct drbd_conf *mdev, struct bio *bio) +{ + struct page *p_to_be_freed = NULL; + struct page *page; + struct bio_vec *bvec; + int i; + + spin_lock(&drbd_pp_lock); + __bio_for_each_segment(bvec, bio, i, 0) { + if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) { + set_page_private(bvec->bv_page, (unsigned long)p_to_be_freed); + p_to_be_freed = bvec->bv_page; + } else { + set_page_private(bvec->bv_page, (unsigned long)drbd_pp_pool); + drbd_pp_pool = bvec->bv_page; + drbd_pp_vacant++; + } + } + spin_unlock(&drbd_pp_lock); + atomic_sub(bio->bi_vcnt, &mdev->pp_in_use); + + while (p_to_be_freed) { + page = p_to_be_freed; + p_to_be_freed = (struct page *)page_private(page); + set_page_private(page, 0); /* just to be polite */ + put_page(page); + } + + wake_up(&drbd_pp_wait); +} + +/* +You need to hold the req_lock: + _drbd_wait_ee_list_empty() + +You must not have the req_lock: + drbd_free_ee() + drbd_alloc_ee() + drbd_init_ee() + drbd_release_ee() + drbd_ee_fix_bhs() + drbd_process_done_ee() + drbd_clear_done_ee() + drbd_wait_ee_list_empty() +*/ + +struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, + u64 id, + sector_t sector, + unsigned int data_size, + gfp_t gfp_mask) __must_hold(local) +{ + struct request_queue *q; + struct drbd_epoch_entry *e; + struct page *page; + struct bio *bio; + unsigned int ds; + + if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE)) + return NULL; + + e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); + if (!e) { + if (!(gfp_mask & __GFP_NOWARN)) + dev_err(DEV, "alloc_ee: Allocation of an EE failed\n"); + return NULL; + } + + bio = bio_alloc(gfp_mask & ~__GFP_HIGHMEM, div_ceil(data_size, PAGE_SIZE)); + if (!bio) { + if (!(gfp_mask & __GFP_NOWARN)) + dev_err(DEV, "alloc_ee: Allocation of a bio failed\n"); + goto fail1; + } + + bio->bi_bdev = mdev->ldev->backing_bdev; + bio->bi_sector = sector; + + ds = data_size; + while (ds) { + page = drbd_pp_alloc(mdev, (gfp_mask & __GFP_WAIT)); + if (!page) { + if (!(gfp_mask & __GFP_NOWARN)) + dev_err(DEV, "alloc_ee: Allocation of a page failed\n"); + goto fail2; + } + if (!bio_add_page(bio, page, min_t(int, ds, PAGE_SIZE), 0)) { + drbd_pp_free(mdev, page); + dev_err(DEV, "alloc_ee: bio_add_page(s=%llu," + "data_size=%u,ds=%u) failed\n", + (unsigned long long)sector, data_size, ds); + + q = bdev_get_queue(bio->bi_bdev); + if (q->merge_bvec_fn) { + struct bvec_merge_data bvm = { + .bi_bdev = bio->bi_bdev, + .bi_sector = bio->bi_sector, + .bi_size = bio->bi_size, + .bi_rw = bio->bi_rw, + }; + int l = q->merge_bvec_fn(q, &bvm, + &bio->bi_io_vec[bio->bi_vcnt]); + dev_err(DEV, "merge_bvec_fn() = %d\n", l); + } + + /* dump more of the bio. */ + dev_err(DEV, "bio->bi_max_vecs = %d\n", bio->bi_max_vecs); + dev_err(DEV, "bio->bi_vcnt = %d\n", bio->bi_vcnt); + dev_err(DEV, "bio->bi_size = %d\n", bio->bi_size); + dev_err(DEV, "bio->bi_phys_segments = %d\n", bio->bi_phys_segments); + + goto fail2; + break; + } + ds -= min_t(int, ds, PAGE_SIZE); + } + + D_ASSERT(data_size == bio->bi_size); + + bio->bi_private = e; + e->mdev = mdev; + e->sector = sector; + e->size = bio->bi_size; + + e->private_bio = bio; + e->block_id = id; + INIT_HLIST_NODE(&e->colision); + e->epoch = NULL; + e->flags = 0; + + trace_drbd_ee(mdev, e, "allocated"); + + return e; + + fail2: + drbd_pp_free_bio_pages(mdev, bio); + bio_put(bio); + fail1: + mempool_free(e, drbd_ee_mempool); + + return NULL; +} + +void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) +{ + struct bio *bio = e->private_bio; + trace_drbd_ee(mdev, e, "freed"); + drbd_pp_free_bio_pages(mdev, bio); + bio_put(bio); + D_ASSERT(hlist_unhashed(&e->colision)); + mempool_free(e, drbd_ee_mempool); +} + +int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) +{ + LIST_HEAD(work_list); + struct drbd_epoch_entry *e, *t; + int count = 0; + + spin_lock_irq(&mdev->req_lock); + list_splice_init(list, &work_list); + spin_unlock_irq(&mdev->req_lock); + + list_for_each_entry_safe(e, t, &work_list, w.list) { + drbd_free_ee(mdev, e); + count++; + } + return count; +} + + +/* + * This function is called from _asender only_ + * but see also comments in _req_mod(,barrier_acked) + * and receive_Barrier. + * + * Move entries from net_ee to done_ee, if ready. + * Grab done_ee, call all callbacks, free the entries. + * The callbacks typically send out ACKs. + */ +static int drbd_process_done_ee(struct drbd_conf *mdev) +{ + LIST_HEAD(work_list); + LIST_HEAD(reclaimed); + struct drbd_epoch_entry *e, *t; + int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS); + + spin_lock_irq(&mdev->req_lock); + reclaim_net_ee(mdev, &reclaimed); + list_splice_init(&mdev->done_ee, &work_list); + spin_unlock_irq(&mdev->req_lock); + + list_for_each_entry_safe(e, t, &reclaimed, w.list) + drbd_free_ee(mdev, e); + + /* possible callbacks here: + * e_end_block, and e_end_resync_block, e_send_discard_ack. + * all ignore the last argument. + */ + list_for_each_entry_safe(e, t, &work_list, w.list) { + trace_drbd_ee(mdev, e, "process_done_ee"); + /* list_del not necessary, next/prev members not touched */ + ok = e->w.cb(mdev, &e->w, !ok) && ok; + drbd_free_ee(mdev, e); + } + wake_up(&mdev->ee_wait); + + return ok; +} + +void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) +{ + DEFINE_WAIT(wait); + + /* avoids spin_lock/unlock + * and calling prepare_to_wait in the fast path */ + while (!list_empty(head)) { + prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&mdev->req_lock); + drbd_kick_lo(mdev); + schedule(); + finish_wait(&mdev->ee_wait, &wait); + spin_lock_irq(&mdev->req_lock); + } +} + +void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) +{ + spin_lock_irq(&mdev->req_lock); + _drbd_wait_ee_list_empty(mdev, head); + spin_unlock_irq(&mdev->req_lock); +} + +/* see also kernel_accept; which is only present since 2.6.18. + * also we want to log which part of it failed, exactly */ +static int drbd_accept(struct drbd_conf *mdev, const char **what, + struct socket *sock, struct socket **newsock) +{ + struct sock *sk = sock->sk; + int err = 0; + + *what = "listen"; + err = sock->ops->listen(sock, 5); + if (err < 0) + goto out; + + *what = "sock_create_lite"; + err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, + newsock); + if (err < 0) + goto out; + + *what = "accept"; + err = sock->ops->accept(sock, *newsock, 0); + if (err < 0) { + sock_release(*newsock); + *newsock = NULL; + goto out; + } + (*newsock)->ops = sock->ops; + +out: + return err; +} + +static int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock, + void *buf, size_t size, int flags) +{ + mm_segment_t oldfs; + struct kvec iov = { + .iov_base = buf, + .iov_len = size, + }; + struct msghdr msg = { + .msg_iovlen = 1, + .msg_iov = (struct iovec *)&iov, + .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) + }; + int rv; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + rv = sock_recvmsg(sock, &msg, size, msg.msg_flags); + set_fs(oldfs); + + return rv; +} + +static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) +{ + mm_segment_t oldfs; + struct kvec iov = { + .iov_base = buf, + .iov_len = size, + }; + struct msghdr msg = { + .msg_iovlen = 1, + .msg_iov = (struct iovec *)&iov, + .msg_flags = MSG_WAITALL | MSG_NOSIGNAL + }; + int rv; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + + for (;;) { + rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags); + if (rv == size) + break; + + /* Note: + * ECONNRESET other side closed the connection + * ERESTARTSYS (on sock) we got a signal + */ + + if (rv < 0) { + if (rv == -ECONNRESET) + dev_info(DEV, "sock was reset by peer\n"); + else if (rv != -ERESTARTSYS) + dev_err(DEV, "sock_recvmsg returned %d\n", rv); + break; + } else if (rv == 0) { + dev_info(DEV, "sock was shut down by peer\n"); + break; + } else { + /* signal came in, or peer/link went down, + * after we read a partial message + */ + /* D_ASSERT(signal_pending(current)); */ + break; + } + }; + + set_fs(oldfs); + + if (rv != size) + drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); + + return rv; +} + +static struct socket *drbd_try_connect(struct drbd_conf *mdev) +{ + const char *what; + struct socket *sock; + struct sockaddr_in6 src_in6; + int err; + int disconnect_on_error = 1; + + if (!get_net_conf(mdev)) + return NULL; + + what = "sock_create_kern"; + err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family, + SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + sock = NULL; + goto out; + } + + sock->sk->sk_rcvtimeo = + sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; + + /* explicitly bind to the configured IP as source IP + * for the outgoing connections. + * This is needed for multihomed hosts and to be + * able to use lo: interfaces for drbd. + * Make sure to use 0 as port number, so linux selects + * a free one dynamically. + */ + memcpy(&src_in6, mdev->net_conf->my_addr, + min_t(int, mdev->net_conf->my_addr_len, sizeof(src_in6))); + if (((struct sockaddr *)mdev->net_conf->my_addr)->sa_family == AF_INET6) + src_in6.sin6_port = 0; + else + ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ + + what = "bind before connect"; + err = sock->ops->bind(sock, + (struct sockaddr *) &src_in6, + mdev->net_conf->my_addr_len); + if (err < 0) + goto out; + + /* connect may fail, peer not yet available. + * stay C_WF_CONNECTION, don't go Disconnecting! */ + disconnect_on_error = 0; + what = "connect"; + err = sock->ops->connect(sock, + (struct sockaddr *)mdev->net_conf->peer_addr, + mdev->net_conf->peer_addr_len, 0); + +out: + if (err < 0) { + if (sock) { + sock_release(sock); + sock = NULL; + } + switch (-err) { + /* timeout, busy, signal pending */ + case ETIMEDOUT: case EAGAIN: case EINPROGRESS: + case EINTR: case ERESTARTSYS: + /* peer not (yet) available, network problem */ + case ECONNREFUSED: case ENETUNREACH: + case EHOSTDOWN: case EHOSTUNREACH: + disconnect_on_error = 0; + break; + default: + dev_err(DEV, "%s failed, err = %d\n", what, err); + } + if (disconnect_on_error) + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + } + put_net_conf(mdev); + return sock; +} + +static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) +{ + int timeo, err; + struct socket *s_estab = NULL, *s_listen; + const char *what; + + if (!get_net_conf(mdev)) + return NULL; + + what = "sock_create_kern"; + err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family, + SOCK_STREAM, IPPROTO_TCP, &s_listen); + if (err) { + s_listen = NULL; + goto out; + } + + timeo = mdev->net_conf->try_connect_int * HZ; + timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ + + s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ + s_listen->sk->sk_rcvtimeo = timeo; + s_listen->sk->sk_sndtimeo = timeo; + + what = "bind before listen"; + err = s_listen->ops->bind(s_listen, + (struct sockaddr *) mdev->net_conf->my_addr, + mdev->net_conf->my_addr_len); + if (err < 0) + goto out; + + err = drbd_accept(mdev, &what, s_listen, &s_estab); + +out: + if (s_listen) + sock_release(s_listen); + if (err < 0) { + if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { + dev_err(DEV, "%s failed, err = %d\n", what, err); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + } + } + put_net_conf(mdev); + + return s_estab; +} + +static int drbd_send_fp(struct drbd_conf *mdev, + struct socket *sock, enum drbd_packets cmd) +{ + struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; + + return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); +} + +static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) +{ + struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; + int rr; + + rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); + + if (rr == sizeof(*h) && h->magic == BE_DRBD_MAGIC) + return be16_to_cpu(h->command); + + return 0xffff; +} + +/** + * drbd_socket_okay() - Free the socket if its connection is not okay + * @mdev: DRBD device. + * @sock: pointer to the pointer to the socket. + */ +static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) +{ + int rr; + char tb[4]; + + if (!*sock) + return FALSE; + + rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); + + if (rr > 0 || rr == -EAGAIN) { + return TRUE; + } else { + sock_release(*sock); + *sock = NULL; + return FALSE; + } +} + +/* + * return values: + * 1 yes, we have a valid connection + * 0 oops, did not work out, please try again + * -1 peer talks different language, + * no point in trying again, please go standalone. + * -2 We do not have a network config... + */ +static int drbd_connect(struct drbd_conf *mdev) +{ + struct socket *s, *sock, *msock; + int try, h, ok; + + D_ASSERT(!mdev->data.socket); + + if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) + dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n"); + + if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) + return -2; + + clear_bit(DISCARD_CONCURRENT, &mdev->flags); + + sock = NULL; + msock = NULL; + + do { + for (try = 0;;) { + /* 3 tries, this should take less than a second! */ + s = drbd_try_connect(mdev); + if (s || ++try >= 3) + break; + /* give the other side time to call bind() & listen() */ + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 10); + } + + if (s) { + if (!sock) { + drbd_send_fp(mdev, s, P_HAND_SHAKE_S); + sock = s; + s = NULL; + } else if (!msock) { + drbd_send_fp(mdev, s, P_HAND_SHAKE_M); + msock = s; + s = NULL; + } else { + dev_err(DEV, "Logic error in drbd_connect()\n"); + goto out_release_sockets; + } + } + + if (sock && msock) { + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 10); + ok = drbd_socket_okay(mdev, &sock); + ok = drbd_socket_okay(mdev, &msock) && ok; + if (ok) + break; + } + +retry: + s = drbd_wait_for_connect(mdev); + if (s) { + try = drbd_recv_fp(mdev, s); + drbd_socket_okay(mdev, &sock); + drbd_socket_okay(mdev, &msock); + switch (try) { + case P_HAND_SHAKE_S: + if (sock) { + dev_warn(DEV, "initial packet S crossed\n"); + sock_release(sock); + } + sock = s; + break; + case P_HAND_SHAKE_M: + if (msock) { + dev_warn(DEV, "initial packet M crossed\n"); + sock_release(msock); + } + msock = s; + set_bit(DISCARD_CONCURRENT, &mdev->flags); + break; + default: + dev_warn(DEV, "Error receiving initial packet\n"); + sock_release(s); + if (random32() & 1) + goto retry; + } + } + + if (mdev->state.conn <= C_DISCONNECTING) + goto out_release_sockets; + if (signal_pending(current)) { + flush_signals(current); + smp_rmb(); + if (get_t_state(&mdev->receiver) == Exiting) + goto out_release_sockets; + } + + if (sock && msock) { + ok = drbd_socket_okay(mdev, &sock); + ok = drbd_socket_okay(mdev, &msock) && ok; + if (ok) + break; + } + } while (1); + + msock->sk->sk_reuse = 1; /* SO_REUSEADDR */ + sock->sk->sk_reuse = 1; /* SO_REUSEADDR */ + + sock->sk->sk_allocation = GFP_NOIO; + msock->sk->sk_allocation = GFP_NOIO; + + sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; + msock->sk->sk_priority = TC_PRIO_INTERACTIVE; + + if (mdev->net_conf->sndbuf_size) { + sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size; + sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + } + + if (mdev->net_conf->rcvbuf_size) { + sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size; + sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + } + + /* NOT YET ... + * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; + * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; + * first set it to the P_HAND_SHAKE timeout, + * which we set to 4x the configured ping_timeout. */ + sock->sk->sk_sndtimeo = + sock->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*4*HZ/10; + + msock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; + msock->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; + + /* we don't want delays. + * we use TCP_CORK where apropriate, though */ + drbd_tcp_nodelay(sock); + drbd_tcp_nodelay(msock); + + mdev->data.socket = sock; + mdev->meta.socket = msock; + mdev->last_received = jiffies; + + D_ASSERT(mdev->asender.task == NULL); + + h = drbd_do_handshake(mdev); + if (h <= 0) + return h; + + if (mdev->cram_hmac_tfm) { + /* drbd_request_state(mdev, NS(conn, WFAuth)); */ + if (!drbd_do_auth(mdev)) { + dev_err(DEV, "Authentication of peer failed\n"); + return -1; + } + } + + if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) + return 0; + + sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; + sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; + + atomic_set(&mdev->packet_seq, 0); + mdev->peer_seq = 0; + + drbd_thread_start(&mdev->asender); + + drbd_send_protocol(mdev); + drbd_send_sync_param(mdev, &mdev->sync_conf); + drbd_send_sizes(mdev, 0); + drbd_send_uuids(mdev); + drbd_send_state(mdev); + clear_bit(USE_DEGR_WFC_T, &mdev->flags); + clear_bit(RESIZE_PENDING, &mdev->flags); + + return 1; + +out_release_sockets: + if (sock) + sock_release(sock); + if (msock) + sock_release(msock); + return -1; +} + +static int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h) +{ + int r; + + r = drbd_recv(mdev, h, sizeof(*h)); + + if (unlikely(r != sizeof(*h))) { + dev_err(DEV, "short read expecting header on sock: r=%d\n", r); + return FALSE; + }; + h->command = be16_to_cpu(h->command); + h->length = be16_to_cpu(h->length); + if (unlikely(h->magic != BE_DRBD_MAGIC)) { + dev_err(DEV, "magic?? on data m: 0x%lx c: %d l: %d\n", + (long)be32_to_cpu(h->magic), + h->command, h->length); + return FALSE; + } + mdev->last_received = jiffies; + + return TRUE; +} + +static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch) +{ + int rv; + + if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { + rv = blkdev_issue_flush(mdev->ldev->backing_bdev, NULL); + if (rv) { + dev_err(DEV, "local disk flush failed with status %d\n", rv); + /* would rather check on EOPNOTSUPP, but that is not reliable. + * don't try again for ANY return value != 0 + * if (rv == -EOPNOTSUPP) */ + drbd_bump_write_ordering(mdev, WO_drain_io); + } + put_ldev(mdev); + } + + return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE); +} + +static int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct flush_work *fw = (struct flush_work *)w; + struct drbd_epoch *epoch = fw->epoch; + + kfree(w); + + if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags)) + drbd_flush_after_epoch(mdev, epoch); + + drbd_may_finish_epoch(mdev, epoch, EV_PUT | + (mdev->state.conn < C_CONNECTED ? EV_CLEANUP : 0)); + + return 1; +} + +/** + * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it. + * @mdev: DRBD device. + * @epoch: Epoch object. + * @ev: Epoch event. + */ +static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, + struct drbd_epoch *epoch, + enum epoch_event ev) +{ + int finish, epoch_size; + struct drbd_epoch *next_epoch; + int schedule_flush = 0; + enum finish_epoch rv = FE_STILL_LIVE; + + spin_lock(&mdev->epoch_lock); + do { + next_epoch = NULL; + finish = 0; + + epoch_size = atomic_read(&epoch->epoch_size); + + switch (ev & ~EV_CLEANUP) { + case EV_PUT: + atomic_dec(&epoch->active); + break; + case EV_GOT_BARRIER_NR: + set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags); + + /* Special case: If we just switched from WO_bio_barrier to + WO_bdev_flush we should not finish the current epoch */ + if (test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) && epoch_size == 1 && + mdev->write_ordering != WO_bio_barrier && + epoch == mdev->current_epoch) + clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags); + break; + case EV_BARRIER_DONE: + set_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags); + break; + case EV_BECAME_LAST: + /* nothing to do*/ + break; + } + + trace_drbd_epoch(mdev, epoch, ev); + + if (epoch_size != 0 && + atomic_read(&epoch->active) == 0 && + test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) && + epoch->list.prev == &mdev->current_epoch->list && + !test_bit(DE_IS_FINISHING, &epoch->flags)) { + /* Nearly all conditions are met to finish that epoch... */ + if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) || + mdev->write_ordering == WO_none || + (epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) || + ev & EV_CLEANUP) { + finish = 1; + set_bit(DE_IS_FINISHING, &epoch->flags); + } else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) && + mdev->write_ordering == WO_bio_barrier) { + atomic_inc(&epoch->active); + schedule_flush = 1; + } + } + if (finish) { + if (!(ev & EV_CLEANUP)) { + spin_unlock(&mdev->epoch_lock); + drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); + spin_lock(&mdev->epoch_lock); + } + dec_unacked(mdev); + + if (mdev->current_epoch != epoch) { + next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); + list_del(&epoch->list); + ev = EV_BECAME_LAST | (ev & EV_CLEANUP); + mdev->epochs--; + trace_drbd_epoch(mdev, epoch, EV_TRACE_FREE); + kfree(epoch); + + if (rv == FE_STILL_LIVE) + rv = FE_DESTROYED; + } else { + epoch->flags = 0; + atomic_set(&epoch->epoch_size, 0); + /* atomic_set(&epoch->active, 0); is alrady zero */ + if (rv == FE_STILL_LIVE) + rv = FE_RECYCLED; + } + } + + if (!next_epoch) + break; + + epoch = next_epoch; + } while (1); + + spin_unlock(&mdev->epoch_lock); + + if (schedule_flush) { + struct flush_work *fw; + fw = kmalloc(sizeof(*fw), GFP_ATOMIC); + if (fw) { + trace_drbd_epoch(mdev, epoch, EV_TRACE_FLUSH); + fw->w.cb = w_flush; + fw->epoch = epoch; + drbd_queue_work(&mdev->data.work, &fw->w); + } else { + dev_warn(DEV, "Could not kmalloc a flush_work obj\n"); + set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); + /* That is not a recursion, only one level */ + drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE); + drbd_may_finish_epoch(mdev, epoch, EV_PUT); + } + } + + return rv; +} + +/** + * drbd_bump_write_ordering() - Fall back to an other write ordering method + * @mdev: DRBD device. + * @wo: Write ordering method to try. + */ +void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local) +{ + enum write_ordering_e pwo; + static char *write_ordering_str[] = { + [WO_none] = "none", + [WO_drain_io] = "drain", + [WO_bdev_flush] = "flush", + [WO_bio_barrier] = "barrier", + }; + + pwo = mdev->write_ordering; + wo = min(pwo, wo); + if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier) + wo = WO_bdev_flush; + if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush) + wo = WO_drain_io; + if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain) + wo = WO_none; + mdev->write_ordering = wo; + if (pwo != mdev->write_ordering || wo == WO_bio_barrier) + dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]); +} + +/** + * w_e_reissue() - Worker callback; Resubmit a bio, without BIO_RW_BARRIER set + * @mdev: DRBD device. + * @w: work object. + * @cancel: The connection will be closed anyways (unused in this callback) + */ +int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local) +{ + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; + struct bio *bio = e->private_bio; + + /* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place, + (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch) + so that we can finish that epoch in drbd_may_finish_epoch(). + That is necessary if we already have a long chain of Epochs, before + we realize that BIO_RW_BARRIER is actually not supported */ + + /* As long as the -ENOTSUPP on the barrier is reported immediately + that will never trigger. If it is reported late, we will just + print that warning and continue correctly for all future requests + with WO_bdev_flush */ + if (previous_epoch(mdev, e->epoch)) + dev_warn(DEV, "Write ordering was not enforced (one time event)\n"); + + /* prepare bio for re-submit, + * re-init volatile members */ + /* we still have a local reference, + * get_ldev was done in receive_Data. */ + bio->bi_bdev = mdev->ldev->backing_bdev; + bio->bi_sector = e->sector; + bio->bi_size = e->size; + bio->bi_idx = 0; + + bio->bi_flags &= ~(BIO_POOL_MASK - 1); + bio->bi_flags |= 1 << BIO_UPTODATE; + + /* don't know whether this is necessary: */ + bio->bi_phys_segments = 0; + bio->bi_next = NULL; + + /* these should be unchanged: */ + /* bio->bi_end_io = drbd_endio_write_sec; */ + /* bio->bi_vcnt = whatever; */ + + e->w.cb = e_end_block; + + /* This is no longer a barrier request. */ + bio->bi_rw &= ~(1UL << BIO_RW_BARRIER); + + drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, bio); + + return 1; +} + +static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) +{ + int rv, issue_flush; + struct p_barrier *p = (struct p_barrier *)h; + struct drbd_epoch *epoch; + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; + + rv = drbd_recv(mdev, h->payload, h->length); + ERR_IF(rv != h->length) return FALSE; + + inc_unacked(mdev); + + if (mdev->net_conf->wire_protocol != DRBD_PROT_C) + drbd_kick_lo(mdev); + + mdev->current_epoch->barrier_nr = p->barrier; + rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR); + + /* P_BARRIER_ACK may imply that the corresponding extent is dropped from + * the activity log, which means it would not be resynced in case the + * R_PRIMARY crashes now. + * Therefore we must send the barrier_ack after the barrier request was + * completed. */ + switch (mdev->write_ordering) { + case WO_bio_barrier: + case WO_none: + if (rv == FE_RECYCLED) + return TRUE; + break; + + case WO_bdev_flush: + case WO_drain_io: + D_ASSERT(rv == FE_STILL_LIVE); + set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags); + drbd_wait_ee_list_empty(mdev, &mdev->active_ee); + rv = drbd_flush_after_epoch(mdev, mdev->current_epoch); + if (rv == FE_RECYCLED) + return TRUE; + + /* The asender will send all the ACKs and barrier ACKs out, since + all EEs moved from the active_ee to the done_ee. We need to + provide a new epoch object for the EEs that come in soon */ + break; + } + + /* receiver context, in the writeout path of the other node. + * avoid potential distributed deadlock */ + epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); + if (!epoch) { + dev_warn(DEV, "Allocation of an epoch failed, slowing down\n"); + issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); + drbd_wait_ee_list_empty(mdev, &mdev->active_ee); + if (issue_flush) { + rv = drbd_flush_after_epoch(mdev, mdev->current_epoch); + if (rv == FE_RECYCLED) + return TRUE; + } + + drbd_wait_ee_list_empty(mdev, &mdev->done_ee); + + return TRUE; + } + + epoch->flags = 0; + atomic_set(&epoch->epoch_size, 0); + atomic_set(&epoch->active, 0); + + spin_lock(&mdev->epoch_lock); + if (atomic_read(&mdev->current_epoch->epoch_size)) { + list_add(&epoch->list, &mdev->current_epoch->list); + mdev->current_epoch = epoch; + mdev->epochs++; + trace_drbd_epoch(mdev, epoch, EV_TRACE_ALLOC); + } else { + /* The current_epoch got recycled while we allocated this one... */ + kfree(epoch); + } + spin_unlock(&mdev->epoch_lock); + + return TRUE; +} + +/* used from receive_RSDataReply (recv_resync_read) + * and from receive_Data */ +static struct drbd_epoch_entry * +read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local) +{ + struct drbd_epoch_entry *e; + struct bio_vec *bvec; + struct page *page; + struct bio *bio; + int dgs, ds, i, rr; + void *dig_in = mdev->int_dig_in; + void *dig_vv = mdev->int_dig_vv; + + dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? + crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; + + if (dgs) { + rr = drbd_recv(mdev, dig_in, dgs); + if (rr != dgs) { + dev_warn(DEV, "short read receiving data digest: read %d expected %d\n", + rr, dgs); + return NULL; + } + } + + data_size -= dgs; + + ERR_IF(data_size & 0x1ff) return NULL; + ERR_IF(data_size > DRBD_MAX_SEGMENT_SIZE) return NULL; + + /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD + * "criss-cross" setup, that might cause write-out on some other DRBD, + * which in turn might block on the other node at this very place. */ + e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO); + if (!e) + return NULL; + bio = e->private_bio; + ds = data_size; + bio_for_each_segment(bvec, bio, i) { + page = bvec->bv_page; + rr = drbd_recv(mdev, kmap(page), min_t(int, ds, PAGE_SIZE)); + kunmap(page); + if (rr != min_t(int, ds, PAGE_SIZE)) { + drbd_free_ee(mdev, e); + dev_warn(DEV, "short read receiving data: read %d expected %d\n", + rr, min_t(int, ds, PAGE_SIZE)); + return NULL; + } + ds -= rr; + } + + if (dgs) { + drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv); + if (memcmp(dig_in, dig_vv, dgs)) { + dev_err(DEV, "Digest integrity check FAILED.\n"); + drbd_bcast_ee(mdev, "digest failed", + dgs, dig_in, dig_vv, e); + drbd_free_ee(mdev, e); + return NULL; + } + } + mdev->recv_cnt += data_size>>9; + return e; +} + +/* drbd_drain_block() just takes a data block + * out of the socket input buffer, and discards it. + */ +static int drbd_drain_block(struct drbd_conf *mdev, int data_size) +{ + struct page *page; + int rr, rv = 1; + void *data; + + page = drbd_pp_alloc(mdev, 1); + + data = kmap(page); + while (data_size) { + rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE)); + if (rr != min_t(int, data_size, PAGE_SIZE)) { + rv = 0; + dev_warn(DEV, "short read receiving data: read %d expected %d\n", + rr, min_t(int, data_size, PAGE_SIZE)); + break; + } + data_size -= rr; + } + kunmap(page); + drbd_pp_free(mdev, page); + return rv; +} + +static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, + sector_t sector, int data_size) +{ + struct bio_vec *bvec; + struct bio *bio; + int dgs, rr, i, expect; + void *dig_in = mdev->int_dig_in; + void *dig_vv = mdev->int_dig_vv; + + dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? + crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; + + if (dgs) { + rr = drbd_recv(mdev, dig_in, dgs); + if (rr != dgs) { + dev_warn(DEV, "short read receiving data reply digest: read %d expected %d\n", + rr, dgs); + return 0; + } + } + + data_size -= dgs; + + /* optimistically update recv_cnt. if receiving fails below, + * we disconnect anyways, and counters will be reset. */ + mdev->recv_cnt += data_size>>9; + + bio = req->master_bio; + D_ASSERT(sector == bio->bi_sector); + + bio_for_each_segment(bvec, bio, i) { + expect = min_t(int, data_size, bvec->bv_len); + rr = drbd_recv(mdev, + kmap(bvec->bv_page)+bvec->bv_offset, + expect); + kunmap(bvec->bv_page); + if (rr != expect) { + dev_warn(DEV, "short read receiving data reply: " + "read %d expected %d\n", + rr, expect); + return 0; + } + data_size -= rr; + } + + if (dgs) { + drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv); + if (memcmp(dig_in, dig_vv, dgs)) { + dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n"); + return 0; + } + } + + D_ASSERT(data_size == 0); + return 1; +} + +/* e_end_resync_block() is called via + * drbd_process_done_ee() by asender only */ +static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; + sector_t sector = e->sector; + int ok; + + D_ASSERT(hlist_unhashed(&e->colision)); + + if (likely(drbd_bio_uptodate(e->private_bio))) { + drbd_set_in_sync(mdev, sector, e->size); + ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e); + } else { + /* Record failure to sync */ + drbd_rs_failed_io(mdev, sector, e->size); + + ok = drbd_send_ack(mdev, P_NEG_ACK, e); + } + dec_unacked(mdev); + + return ok; +} + +static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) +{ + struct drbd_epoch_entry *e; + + e = read_in_block(mdev, ID_SYNCER, sector, data_size); + if (!e) { + put_ldev(mdev); + return FALSE; + } + + dec_rs_pending(mdev); + + e->private_bio->bi_end_io = drbd_endio_write_sec; + e->private_bio->bi_rw = WRITE; + e->w.cb = e_end_resync_block; + + inc_unacked(mdev); + /* corresponding dec_unacked() in e_end_resync_block() + * respective _drbd_clear_done_ee */ + + spin_lock_irq(&mdev->req_lock); + list_add(&e->w.list, &mdev->sync_ee); + spin_unlock_irq(&mdev->req_lock); + + trace_drbd_ee(mdev, e, "submitting for (rs)write"); + trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); + drbd_generic_make_request(mdev, DRBD_FAULT_RS_WR, e->private_bio); + /* accounting done in endio */ + + maybe_kick_lo(mdev); + return TRUE; +} + +static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) +{ + struct drbd_request *req; + sector_t sector; + unsigned int header_size, data_size; + int ok; + struct p_data *p = (struct p_data *)h; + + header_size = sizeof(*p) - sizeof(*h); + data_size = h->length - header_size; + + ERR_IF(data_size == 0) return FALSE; + + if (drbd_recv(mdev, h->payload, header_size) != header_size) + return FALSE; + + sector = be64_to_cpu(p->sector); + + spin_lock_irq(&mdev->req_lock); + req = _ar_id_to_req(mdev, p->block_id, sector); + spin_unlock_irq(&mdev->req_lock); + if (unlikely(!req)) { + dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); + return FALSE; + } + + /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid + * special casing it there for the various failure cases. + * still no race with drbd_fail_pending_reads */ + ok = recv_dless_read(mdev, req, sector, data_size); + + if (ok) + req_mod(req, data_received); + /* else: nothing. handled from drbd_disconnect... + * I don't think we may complete this just yet + * in case we are "on-disconnect: freeze" */ + + return ok; +} + +static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) +{ + sector_t sector; + unsigned int header_size, data_size; + int ok; + struct p_data *p = (struct p_data *)h; + + header_size = sizeof(*p) - sizeof(*h); + data_size = h->length - header_size; + + ERR_IF(data_size == 0) return FALSE; + + if (drbd_recv(mdev, h->payload, header_size) != header_size) + return FALSE; + + sector = be64_to_cpu(p->sector); + D_ASSERT(p->block_id == ID_SYNCER); + + if (get_ldev(mdev)) { + /* data is submitted to disk within recv_resync_read. + * corresponding put_ldev done below on error, + * or in drbd_endio_write_sec. */ + ok = recv_resync_read(mdev, sector, data_size); + } else { + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "Can not write resync data to local disk.\n"); + + ok = drbd_drain_block(mdev, data_size); + + drbd_send_ack_dp(mdev, P_NEG_ACK, p); + } + + return ok; +} + +/* e_end_block() is called via drbd_process_done_ee(). + * this means this function only runs in the asender thread + */ +static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; + sector_t sector = e->sector; + struct drbd_epoch *epoch; + int ok = 1, pcmd; + + if (e->flags & EE_IS_BARRIER) { + epoch = previous_epoch(mdev, e->epoch); + if (epoch) + drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE + (cancel ? EV_CLEANUP : 0)); + } + + if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { + if (likely(drbd_bio_uptodate(e->private_bio))) { + pcmd = (mdev->state.conn >= C_SYNC_SOURCE && + mdev->state.conn <= C_PAUSED_SYNC_T && + e->flags & EE_MAY_SET_IN_SYNC) ? + P_RS_WRITE_ACK : P_WRITE_ACK; + ok &= drbd_send_ack(mdev, pcmd, e); + if (pcmd == P_RS_WRITE_ACK) + drbd_set_in_sync(mdev, sector, e->size); + } else { + ok = drbd_send_ack(mdev, P_NEG_ACK, e); + /* we expect it to be marked out of sync anyways... + * maybe assert this? */ + } + dec_unacked(mdev); + } + /* we delete from the conflict detection hash _after_ we sent out the + * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ + if (mdev->net_conf->two_primaries) { + spin_lock_irq(&mdev->req_lock); + D_ASSERT(!hlist_unhashed(&e->colision)); + hlist_del_init(&e->colision); + spin_unlock_irq(&mdev->req_lock); + } else { + D_ASSERT(hlist_unhashed(&e->colision)); + } + + drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); + + return ok; +} + +static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; + int ok = 1; + + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); + + spin_lock_irq(&mdev->req_lock); + D_ASSERT(!hlist_unhashed(&e->colision)); + hlist_del_init(&e->colision); + spin_unlock_irq(&mdev->req_lock); + + dec_unacked(mdev); + + return ok; +} + +/* Called from receive_Data. + * Synchronize packets on sock with packets on msock. + * + * This is here so even when a P_DATA packet traveling via sock overtook an Ack + * packet traveling on msock, they are still processed in the order they have + * been sent. + * + * Note: we don't care for Ack packets overtaking P_DATA packets. + * + * In case packet_seq is larger than mdev->peer_seq number, there are + * outstanding packets on the msock. We wait for them to arrive. + * In case we are the logically next packet, we update mdev->peer_seq + * ourselves. Correctly handles 32bit wrap around. + * + * Assume we have a 10 GBit connection, that is about 1<<30 byte per second, + * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds + * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have + * 1<<9 == 512 seconds aka ages for the 32bit wrap around... + * + * returns 0 if we may process the packet, + * -ERESTARTSYS if we were interrupted (by disconnect signal). */ +static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) +{ + DEFINE_WAIT(wait); + unsigned int p_seq; + long timeout; + int ret = 0; + spin_lock(&mdev->peer_seq_lock); + for (;;) { + prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); + if (seq_le(packet_seq, mdev->peer_seq+1)) + break; + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + p_seq = mdev->peer_seq; + spin_unlock(&mdev->peer_seq_lock); + timeout = schedule_timeout(30*HZ); + spin_lock(&mdev->peer_seq_lock); + if (timeout == 0 && p_seq == mdev->peer_seq) { + ret = -ETIMEDOUT; + dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n"); + break; + } + } + finish_wait(&mdev->seq_wait, &wait); + if (mdev->peer_seq+1 == packet_seq) + mdev->peer_seq++; + spin_unlock(&mdev->peer_seq_lock); + return ret; +} + +/* mirrored write */ +static int receive_Data(struct drbd_conf *mdev, struct p_header *h) +{ + sector_t sector; + struct drbd_epoch_entry *e; + struct p_data *p = (struct p_data *)h; + int header_size, data_size; + int rw = WRITE; + u32 dp_flags; + + header_size = sizeof(*p) - sizeof(*h); + data_size = h->length - header_size; + + ERR_IF(data_size == 0) return FALSE; + + if (drbd_recv(mdev, h->payload, header_size) != header_size) + return FALSE; + + if (!get_ldev(mdev)) { + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "Can not write mirrored data block " + "to local disk.\n"); + spin_lock(&mdev->peer_seq_lock); + if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num)) + mdev->peer_seq++; + spin_unlock(&mdev->peer_seq_lock); + + drbd_send_ack_dp(mdev, P_NEG_ACK, p); + atomic_inc(&mdev->current_epoch->epoch_size); + return drbd_drain_block(mdev, data_size); + } + + /* get_ldev(mdev) successful. + * Corresponding put_ldev done either below (on various errors), + * or in drbd_endio_write_sec, if we successfully submit the data at + * the end of this function. */ + + sector = be64_to_cpu(p->sector); + e = read_in_block(mdev, p->block_id, sector, data_size); + if (!e) { + put_ldev(mdev); + return FALSE; + } + + e->private_bio->bi_end_io = drbd_endio_write_sec; + e->w.cb = e_end_block; + + spin_lock(&mdev->epoch_lock); + e->epoch = mdev->current_epoch; + atomic_inc(&e->epoch->epoch_size); + atomic_inc(&e->epoch->active); + + if (mdev->write_ordering == WO_bio_barrier && atomic_read(&e->epoch->epoch_size) == 1) { + struct drbd_epoch *epoch; + /* Issue a barrier if we start a new epoch, and the previous epoch + was not a epoch containing a single request which already was + a Barrier. */ + epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list); + if (epoch == e->epoch) { + set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); + trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER); + rw |= (1<flags |= EE_IS_BARRIER; + } else { + if (atomic_read(&epoch->epoch_size) > 1 || + !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) { + set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); + trace_drbd_epoch(mdev, epoch, EV_TRACE_SETTING_BI); + set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); + trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER); + rw |= (1<flags |= EE_IS_BARRIER; + } + } + } + spin_unlock(&mdev->epoch_lock); + + dp_flags = be32_to_cpu(p->dp_flags); + if (dp_flags & DP_HARDBARRIER) { + dev_err(DEV, "ASSERT FAILED would have submitted barrier request\n"); + /* rw |= (1<flags |= EE_MAY_SET_IN_SYNC; + + /* I'm the receiver, I do hold a net_cnt reference. */ + if (!mdev->net_conf->two_primaries) { + spin_lock_irq(&mdev->req_lock); + } else { + /* don't get the req_lock yet, + * we may sleep in drbd_wait_peer_seq */ + const int size = e->size; + const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); + DEFINE_WAIT(wait); + struct drbd_request *i; + struct hlist_node *n; + struct hlist_head *slot; + int first; + + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + BUG_ON(mdev->ee_hash == NULL); + BUG_ON(mdev->tl_hash == NULL); + + /* conflict detection and handling: + * 1. wait on the sequence number, + * in case this data packet overtook ACK packets. + * 2. check our hash tables for conflicting requests. + * we only need to walk the tl_hash, since an ee can not + * have a conflict with an other ee: on the submitting + * node, the corresponding req had already been conflicting, + * and a conflicting req is never sent. + * + * Note: for two_primaries, we are protocol C, + * so there cannot be any request that is DONE + * but still on the transfer log. + * + * unconditionally add to the ee_hash. + * + * if no conflicting request is found: + * submit. + * + * if any conflicting request is found + * that has not yet been acked, + * AND I have the "discard concurrent writes" flag: + * queue (via done_ee) the P_DISCARD_ACK; OUT. + * + * if any conflicting request is found: + * block the receiver, waiting on misc_wait + * until no more conflicting requests are there, + * or we get interrupted (disconnect). + * + * we do not just write after local io completion of those + * requests, but only after req is done completely, i.e. + * we wait for the P_DISCARD_ACK to arrive! + * + * then proceed normally, i.e. submit. + */ + if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num))) + goto out_interrupted; + + spin_lock_irq(&mdev->req_lock); + + hlist_add_head(&e->colision, ee_hash_slot(mdev, sector)); + +#define OVERLAPS overlaps(i->sector, i->size, sector, size) + slot = tl_hash_slot(mdev, sector); + first = 1; + for (;;) { + int have_unacked = 0; + int have_conflict = 0; + prepare_to_wait(&mdev->misc_wait, &wait, + TASK_INTERRUPTIBLE); + hlist_for_each_entry(i, n, slot, colision) { + if (OVERLAPS) { + /* only ALERT on first iteration, + * we may be woken up early... */ + if (first) + dev_alert(DEV, "%s[%u] Concurrent local write detected!" + " new: %llus +%u; pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)i->sector, i->size); + if (i->rq_state & RQ_NET_PENDING) + ++have_unacked; + ++have_conflict; + } + } +#undef OVERLAPS + if (!have_conflict) + break; + + /* Discard Ack only for the _first_ iteration */ + if (first && discard && have_unacked) { + dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n", + (unsigned long long)sector); + inc_unacked(mdev); + e->w.cb = e_send_discard_ack; + list_add_tail(&e->w.list, &mdev->done_ee); + + spin_unlock_irq(&mdev->req_lock); + + /* we could probably send that P_DISCARD_ACK ourselves, + * but I don't like the receiver using the msock */ + + put_ldev(mdev); + wake_asender(mdev); + finish_wait(&mdev->misc_wait, &wait); + return TRUE; + } + + if (signal_pending(current)) { + hlist_del_init(&e->colision); + + spin_unlock_irq(&mdev->req_lock); + + finish_wait(&mdev->misc_wait, &wait); + goto out_interrupted; + } + + spin_unlock_irq(&mdev->req_lock); + if (first) { + first = 0; + dev_alert(DEV, "Concurrent write! [W AFTERWARDS] " + "sec=%llus\n", (unsigned long long)sector); + } else if (discard) { + /* we had none on the first iteration. + * there must be none now. */ + D_ASSERT(have_unacked == 0); + } + schedule(); + spin_lock_irq(&mdev->req_lock); + } + finish_wait(&mdev->misc_wait, &wait); + } + + list_add(&e->w.list, &mdev->active_ee); + spin_unlock_irq(&mdev->req_lock); + + switch (mdev->net_conf->wire_protocol) { + case DRBD_PROT_C: + inc_unacked(mdev); + /* corresponding dec_unacked() in e_end_block() + * respective _drbd_clear_done_ee */ + break; + case DRBD_PROT_B: + /* I really don't like it that the receiver thread + * sends on the msock, but anyways */ + drbd_send_ack(mdev, P_RECV_ACK, e); + break; + case DRBD_PROT_A: + /* nothing to do */ + break; + } + + if (mdev->state.pdsk == D_DISKLESS) { + /* In case we have the only disk of the cluster, */ + drbd_set_out_of_sync(mdev, e->sector, e->size); + e->flags |= EE_CALL_AL_COMPLETE_IO; + drbd_al_begin_io(mdev, e->sector); + } + + e->private_bio->bi_rw = rw; + trace_drbd_ee(mdev, e, "submitting for (data)write"); + trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); + drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, e->private_bio); + /* accounting done in endio */ + + maybe_kick_lo(mdev); + return TRUE; + +out_interrupted: + /* yes, the epoch_size now is imbalanced. + * but we drop the connection anyways, so we don't have a chance to + * receive a barrier... atomic_inc(&mdev->epoch_size); */ + put_ldev(mdev); + drbd_free_ee(mdev, e); + return FALSE; +} + +static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) +{ + sector_t sector; + const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + struct drbd_epoch_entry *e; + struct digest_info *di = NULL; + int size, digest_size; + unsigned int fault_type; + struct p_block_req *p = + (struct p_block_req *)h; + const int brps = sizeof(*p)-sizeof(*h); + + if (drbd_recv(mdev, h->payload, brps) != brps) + return FALSE; + + sector = be64_to_cpu(p->sector); + size = be32_to_cpu(p->blksize); + + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, + (unsigned long long)sector, size); + return FALSE; + } + if (sector + (size>>9) > capacity) { + dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, + (unsigned long long)sector, size); + return FALSE; + } + + if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "Can not satisfy peer's read request, " + "no local data.\n"); + drbd_send_ack_rp(mdev, h->command == P_DATA_REQUEST ? P_NEG_DREPLY : + P_NEG_RS_DREPLY , p); + return TRUE; + } + + /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD + * "criss-cross" setup, that might cause write-out on some other DRBD, + * which in turn might block on the other node at this very place. */ + e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); + if (!e) { + put_ldev(mdev); + return FALSE; + } + + e->private_bio->bi_rw = READ; + e->private_bio->bi_end_io = drbd_endio_read_sec; + + switch (h->command) { + case P_DATA_REQUEST: + e->w.cb = w_e_end_data_req; + fault_type = DRBD_FAULT_DT_RD; + break; + case P_RS_DATA_REQUEST: + e->w.cb = w_e_end_rsdata_req; + fault_type = DRBD_FAULT_RS_RD; + /* Eventually this should become asynchronously. Currently it + * blocks the whole receiver just to delay the reading of a + * resync data block. + * the drbd_work_queue mechanism is made for this... + */ + if (!drbd_rs_begin_io(mdev, sector)) { + /* we have been interrupted, + * probably connection lost! */ + D_ASSERT(signal_pending(current)); + goto out_free_e; + } + break; + + case P_OV_REPLY: + case P_CSUM_RS_REQUEST: + fault_type = DRBD_FAULT_RS_RD; + digest_size = h->length - brps ; + di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO); + if (!di) + goto out_free_e; + + di->digest_size = digest_size; + di->digest = (((char *)di)+sizeof(struct digest_info)); + + if (drbd_recv(mdev, di->digest, digest_size) != digest_size) + goto out_free_e; + + e->block_id = (u64)(unsigned long)di; + if (h->command == P_CSUM_RS_REQUEST) { + D_ASSERT(mdev->agreed_pro_version >= 89); + e->w.cb = w_e_end_csum_rs_req; + } else if (h->command == P_OV_REPLY) { + e->w.cb = w_e_end_ov_reply; + dec_rs_pending(mdev); + break; + } + + if (!drbd_rs_begin_io(mdev, sector)) { + /* we have been interrupted, probably connection lost! */ + D_ASSERT(signal_pending(current)); + goto out_free_e; + } + break; + + case P_OV_REQUEST: + if (mdev->state.conn >= C_CONNECTED && + mdev->state.conn != C_VERIFY_T) + dev_warn(DEV, "ASSERT FAILED: got P_OV_REQUEST while being %s\n", + drbd_conn_str(mdev->state.conn)); + if (mdev->ov_start_sector == ~(sector_t)0 && + mdev->agreed_pro_version >= 90) { + mdev->ov_start_sector = sector; + mdev->ov_position = sector; + mdev->ov_left = mdev->rs_total - BM_SECT_TO_BIT(sector); + dev_info(DEV, "Online Verify start sector: %llu\n", + (unsigned long long)sector); + } + e->w.cb = w_e_end_ov_req; + fault_type = DRBD_FAULT_RS_RD; + /* Eventually this should become asynchronous. Currently it + * blocks the whole receiver just to delay the reading of a + * resync data block. + * the drbd_work_queue mechanism is made for this... + */ + if (!drbd_rs_begin_io(mdev, sector)) { + /* we have been interrupted, + * probably connection lost! */ + D_ASSERT(signal_pending(current)); + goto out_free_e; + } + break; + + + default: + dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n", + cmdname(h->command)); + fault_type = DRBD_FAULT_MAX; + } + + spin_lock_irq(&mdev->req_lock); + list_add(&e->w.list, &mdev->read_ee); + spin_unlock_irq(&mdev->req_lock); + + inc_unacked(mdev); + + trace_drbd_ee(mdev, e, "submitting for read"); + trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); + drbd_generic_make_request(mdev, fault_type, e->private_bio); + maybe_kick_lo(mdev); + + return TRUE; + +out_free_e: + kfree(di); + put_ldev(mdev); + drbd_free_ee(mdev, e); + return FALSE; +} + +static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) +{ + int self, peer, rv = -100; + unsigned long ch_self, ch_peer; + + self = mdev->ldev->md.uuid[UI_BITMAP] & 1; + peer = mdev->p_uuid[UI_BITMAP] & 1; + + ch_peer = mdev->p_uuid[UI_SIZE]; + ch_self = mdev->comm_bm_set; + + switch (mdev->net_conf->after_sb_0p) { + case ASB_CONSENSUS: + case ASB_DISCARD_SECONDARY: + case ASB_CALL_HELPER: + dev_err(DEV, "Configuration error.\n"); + break; + case ASB_DISCONNECT: + break; + case ASB_DISCARD_YOUNGER_PRI: + if (self == 0 && peer == 1) { + rv = -1; + break; + } + if (self == 1 && peer == 0) { + rv = 1; + break; + } + /* Else fall through to one of the other strategies... */ + case ASB_DISCARD_OLDER_PRI: + if (self == 0 && peer == 1) { + rv = 1; + break; + } + if (self == 1 && peer == 0) { + rv = -1; + break; + } + /* Else fall through to one of the other strategies... */ + dev_warn(DEV, "Discard younger/older primary did not found a decision\n" + "Using discard-least-changes instead\n"); + case ASB_DISCARD_ZERO_CHG: + if (ch_peer == 0 && ch_self == 0) { + rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) + ? -1 : 1; + break; + } else { + if (ch_peer == 0) { rv = 1; break; } + if (ch_self == 0) { rv = -1; break; } + } + if (mdev->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG) + break; + case ASB_DISCARD_LEAST_CHG: + if (ch_self < ch_peer) + rv = -1; + else if (ch_self > ch_peer) + rv = 1; + else /* ( ch_self == ch_peer ) */ + /* Well, then use something else. */ + rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) + ? -1 : 1; + break; + case ASB_DISCARD_LOCAL: + rv = -1; + break; + case ASB_DISCARD_REMOTE: + rv = 1; + } + + return rv; +} + +static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) +{ + int self, peer, hg, rv = -100; + + self = mdev->ldev->md.uuid[UI_BITMAP] & 1; + peer = mdev->p_uuid[UI_BITMAP] & 1; + + switch (mdev->net_conf->after_sb_1p) { + case ASB_DISCARD_YOUNGER_PRI: + case ASB_DISCARD_OLDER_PRI: + case ASB_DISCARD_LEAST_CHG: + case ASB_DISCARD_LOCAL: + case ASB_DISCARD_REMOTE: + dev_err(DEV, "Configuration error.\n"); + break; + case ASB_DISCONNECT: + break; + case ASB_CONSENSUS: + hg = drbd_asb_recover_0p(mdev); + if (hg == -1 && mdev->state.role == R_SECONDARY) + rv = hg; + if (hg == 1 && mdev->state.role == R_PRIMARY) + rv = hg; + break; + case ASB_VIOLENTLY: + rv = drbd_asb_recover_0p(mdev); + break; + case ASB_DISCARD_SECONDARY: + return mdev->state.role == R_PRIMARY ? 1 : -1; + case ASB_CALL_HELPER: + hg = drbd_asb_recover_0p(mdev); + if (hg == -1 && mdev->state.role == R_PRIMARY) { + self = drbd_set_role(mdev, R_SECONDARY, 0); + /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, + * we might be here in C_WF_REPORT_PARAMS which is transient. + * we do not need to wait for the after state change work either. */ + self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); + if (self != SS_SUCCESS) { + drbd_khelper(mdev, "pri-lost-after-sb"); + } else { + dev_warn(DEV, "Successfully gave up primary role.\n"); + rv = hg; + } + } else + rv = hg; + } + + return rv; +} + +static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) +{ + int self, peer, hg, rv = -100; + + self = mdev->ldev->md.uuid[UI_BITMAP] & 1; + peer = mdev->p_uuid[UI_BITMAP] & 1; + + switch (mdev->net_conf->after_sb_2p) { + case ASB_DISCARD_YOUNGER_PRI: + case ASB_DISCARD_OLDER_PRI: + case ASB_DISCARD_LEAST_CHG: + case ASB_DISCARD_LOCAL: + case ASB_DISCARD_REMOTE: + case ASB_CONSENSUS: + case ASB_DISCARD_SECONDARY: + dev_err(DEV, "Configuration error.\n"); + break; + case ASB_VIOLENTLY: + rv = drbd_asb_recover_0p(mdev); + break; + case ASB_DISCONNECT: + break; + case ASB_CALL_HELPER: + hg = drbd_asb_recover_0p(mdev); + if (hg == -1) { + /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, + * we might be here in C_WF_REPORT_PARAMS which is transient. + * we do not need to wait for the after state change work either. */ + self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); + if (self != SS_SUCCESS) { + drbd_khelper(mdev, "pri-lost-after-sb"); + } else { + dev_warn(DEV, "Successfully gave up primary role.\n"); + rv = hg; + } + } else + rv = hg; + } + + return rv; +} + +static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid, + u64 bits, u64 flags) +{ + if (!uuid) { + dev_info(DEV, "%s uuid info vanished while I was looking!\n", text); + return; + } + dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", + text, + (unsigned long long)uuid[UI_CURRENT], + (unsigned long long)uuid[UI_BITMAP], + (unsigned long long)uuid[UI_HISTORY_START], + (unsigned long long)uuid[UI_HISTORY_END], + (unsigned long long)bits, + (unsigned long long)flags); +} + +/* + 100 after split brain try auto recover + 2 C_SYNC_SOURCE set BitMap + 1 C_SYNC_SOURCE use BitMap + 0 no Sync + -1 C_SYNC_TARGET use BitMap + -2 C_SYNC_TARGET set BitMap + -100 after split brain, disconnect +-1000 unrelated data + */ +static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local) +{ + u64 self, peer; + int i, j; + + self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1); + peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1); + + *rule_nr = 10; + if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED) + return 0; + + *rule_nr = 20; + if ((self == UUID_JUST_CREATED || self == (u64)0) && + peer != UUID_JUST_CREATED) + return -2; + + *rule_nr = 30; + if (self != UUID_JUST_CREATED && + (peer == UUID_JUST_CREATED || peer == (u64)0)) + return 2; + + if (self == peer) { + int rct, dc; /* roles at crash time */ + + if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) { + + if (mdev->agreed_pro_version < 91) + return -1001; + + if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && + (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { + dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n"); + drbd_uuid_set_bm(mdev, 0UL); + + drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, + mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); + *rule_nr = 34; + } else { + dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n"); + *rule_nr = 36; + } + + return 1; + } + + if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) { + + if (mdev->agreed_pro_version < 91) + return -1001; + + if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) && + (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) { + dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n"); + + mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START]; + mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP]; + mdev->p_uuid[UI_BITMAP] = 0UL; + + drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); + *rule_nr = 35; + } else { + dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n"); + *rule_nr = 37; + } + + return -1; + } + + /* Common power [off|failure] */ + rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) + + (mdev->p_uuid[UI_FLAGS] & 2); + /* lowest bit is set when we were primary, + * next bit (weight 2) is set when peer was primary */ + *rule_nr = 40; + + switch (rct) { + case 0: /* !self_pri && !peer_pri */ return 0; + case 1: /* self_pri && !peer_pri */ return 1; + case 2: /* !self_pri && peer_pri */ return -1; + case 3: /* self_pri && peer_pri */ + dc = test_bit(DISCARD_CONCURRENT, &mdev->flags); + return dc ? -1 : 1; + } + } + + *rule_nr = 50; + peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1); + if (self == peer) + return -1; + + *rule_nr = 51; + peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); + if (self == peer) { + self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); + peer = mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1); + if (self == peer) { + /* The last P_SYNC_UUID did not get though. Undo the last start of + resync as sync source modifications of the peer's UUIDs. */ + + if (mdev->agreed_pro_version < 91) + return -1001; + + mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; + mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; + return -1; + } + } + + *rule_nr = 60; + self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1); + for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { + peer = mdev->p_uuid[i] & ~((u64)1); + if (self == peer) + return -2; + } + + *rule_nr = 70; + self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1); + peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1); + if (self == peer) + return 1; + + *rule_nr = 71; + self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); + if (self == peer) { + self = mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1); + peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); + if (self == peer) { + /* The last P_SYNC_UUID did not get though. Undo the last start of + resync as sync source modifications of our UUIDs. */ + + if (mdev->agreed_pro_version < 91) + return -1001; + + _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); + _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); + + dev_info(DEV, "Undid last start of resync:\n"); + + drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, + mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); + + return 1; + } + } + + + *rule_nr = 80; + for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { + self = mdev->ldev->md.uuid[i] & ~((u64)1); + if (self == peer) + return 2; + } + + *rule_nr = 90; + self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1); + peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1); + if (self == peer && self != ((u64)0)) + return 100; + + *rule_nr = 100; + for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { + self = mdev->ldev->md.uuid[i] & ~((u64)1); + for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) { + peer = mdev->p_uuid[j] & ~((u64)1); + if (self == peer) + return -100; + } + } + + return -1000; +} + +/* drbd_sync_handshake() returns the new conn state on success, or + CONN_MASK (-1) on failure. + */ +static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role, + enum drbd_disk_state peer_disk) __must_hold(local) +{ + int hg, rule_nr; + enum drbd_conns rv = C_MASK; + enum drbd_disk_state mydisk; + + mydisk = mdev->state.disk; + if (mydisk == D_NEGOTIATING) + mydisk = mdev->new_state_tmp.disk; + + dev_info(DEV, "drbd_sync_handshake:\n"); + drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0); + drbd_uuid_dump(mdev, "peer", mdev->p_uuid, + mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); + + hg = drbd_uuid_compare(mdev, &rule_nr); + + dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr); + + if (hg == -1000) { + dev_alert(DEV, "Unrelated data, aborting!\n"); + return C_MASK; + } + if (hg == -1001) { + dev_alert(DEV, "To resolve this both sides have to support at least protocol\n"); + return C_MASK; + } + + if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) || + (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) { + int f = (hg == -100) || abs(hg) == 2; + hg = mydisk > D_INCONSISTENT ? 1 : -1; + if (f) + hg = hg*2; + dev_info(DEV, "Becoming sync %s due to disk states.\n", + hg > 0 ? "source" : "target"); + } + + if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) { + int pcount = (mdev->state.role == R_PRIMARY) + + (peer_role == R_PRIMARY); + int forced = (hg == -100); + + switch (pcount) { + case 0: + hg = drbd_asb_recover_0p(mdev); + break; + case 1: + hg = drbd_asb_recover_1p(mdev); + break; + case 2: + hg = drbd_asb_recover_2p(mdev); + break; + } + if (abs(hg) < 100) { + dev_warn(DEV, "Split-Brain detected, %d primaries, " + "automatically solved. Sync from %s node\n", + pcount, (hg < 0) ? "peer" : "this"); + if (forced) { + dev_warn(DEV, "Doing a full sync, since" + " UUIDs where ambiguous.\n"); + hg = hg*2; + } + } + } + + if (hg == -100) { + if (mdev->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) + hg = -1; + if (!mdev->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) + hg = 1; + + if (abs(hg) < 100) + dev_warn(DEV, "Split-Brain detected, manually solved. " + "Sync from %s node\n", + (hg < 0) ? "peer" : "this"); + } + + if (hg == -100) { + dev_alert(DEV, "Split-Brain detected, dropping connection!\n"); + drbd_khelper(mdev, "split-brain"); + return C_MASK; + } + + if (hg > 0 && mydisk <= D_INCONSISTENT) { + dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n"); + return C_MASK; + } + + if (hg < 0 && /* by intention we do not use mydisk here. */ + mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) { + switch (mdev->net_conf->rr_conflict) { + case ASB_CALL_HELPER: + drbd_khelper(mdev, "pri-lost"); + /* fall through */ + case ASB_DISCONNECT: + dev_err(DEV, "I shall become SyncTarget, but I am primary!\n"); + return C_MASK; + case ASB_VIOLENTLY: + dev_warn(DEV, "Becoming SyncTarget, violating the stable-data" + "assumption\n"); + } + } + + if (abs(hg) >= 2) { + dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); + if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) + return C_MASK; + } + + if (hg > 0) { /* become sync source. */ + rv = C_WF_BITMAP_S; + } else if (hg < 0) { /* become sync target */ + rv = C_WF_BITMAP_T; + } else { + rv = C_CONNECTED; + if (drbd_bm_total_weight(mdev)) { + dev_info(DEV, "No resync, but %lu bits in bitmap!\n", + drbd_bm_total_weight(mdev)); + } + } + + return rv; +} + +/* returns 1 if invalid */ +static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) +{ + /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */ + if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) || + (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL)) + return 0; + + /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */ + if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL || + self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL) + return 1; + + /* everything else is valid if they are equal on both sides. */ + if (peer == self) + return 0; + + /* everything es is invalid. */ + return 1; +} + +static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_protocol *p = (struct p_protocol *)h; + int header_size, data_size; + int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; + int p_want_lose, p_two_primaries; + char p_integrity_alg[SHARED_SECRET_MAX] = ""; + + header_size = sizeof(*p) - sizeof(*h); + data_size = h->length - header_size; + + if (drbd_recv(mdev, h->payload, header_size) != header_size) + return FALSE; + + p_proto = be32_to_cpu(p->protocol); + p_after_sb_0p = be32_to_cpu(p->after_sb_0p); + p_after_sb_1p = be32_to_cpu(p->after_sb_1p); + p_after_sb_2p = be32_to_cpu(p->after_sb_2p); + p_want_lose = be32_to_cpu(p->want_lose); + p_two_primaries = be32_to_cpu(p->two_primaries); + + if (p_proto != mdev->net_conf->wire_protocol) { + dev_err(DEV, "incompatible communication protocols\n"); + goto disconnect; + } + + if (cmp_after_sb(p_after_sb_0p, mdev->net_conf->after_sb_0p)) { + dev_err(DEV, "incompatible after-sb-0pri settings\n"); + goto disconnect; + } + + if (cmp_after_sb(p_after_sb_1p, mdev->net_conf->after_sb_1p)) { + dev_err(DEV, "incompatible after-sb-1pri settings\n"); + goto disconnect; + } + + if (cmp_after_sb(p_after_sb_2p, mdev->net_conf->after_sb_2p)) { + dev_err(DEV, "incompatible after-sb-2pri settings\n"); + goto disconnect; + } + + if (p_want_lose && mdev->net_conf->want_lose) { + dev_err(DEV, "both sides have the 'want_lose' flag set\n"); + goto disconnect; + } + + if (p_two_primaries != mdev->net_conf->two_primaries) { + dev_err(DEV, "incompatible setting of the two-primaries options\n"); + goto disconnect; + } + + if (mdev->agreed_pro_version >= 87) { + unsigned char *my_alg = mdev->net_conf->integrity_alg; + + if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) + return FALSE; + + p_integrity_alg[SHARED_SECRET_MAX-1] = 0; + if (strcmp(p_integrity_alg, my_alg)) { + dev_err(DEV, "incompatible setting of the data-integrity-alg\n"); + goto disconnect; + } + dev_info(DEV, "data-integrity-alg: %s\n", + my_alg[0] ? my_alg : (unsigned char *)""); + } + + return TRUE; + +disconnect: + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return FALSE; +} + +/* helper function + * input: alg name, feature name + * return: NULL (alg name was "") + * ERR_PTR(error) if something goes wrong + * or the crypto hash ptr, if it worked out ok. */ +struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, + const char *alg, const char *name) +{ + struct crypto_hash *tfm; + + if (!alg[0]) + return NULL; + + tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n", + alg, name, PTR_ERR(tfm)); + return tfm; + } + if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) { + crypto_free_hash(tfm); + dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name); + return ERR_PTR(-EINVAL); + } + return tfm; +} + +static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) +{ + int ok = TRUE; + struct p_rs_param_89 *p = (struct p_rs_param_89 *)h; + unsigned int header_size, data_size, exp_max_sz; + struct crypto_hash *verify_tfm = NULL; + struct crypto_hash *csums_tfm = NULL; + const int apv = mdev->agreed_pro_version; + + exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) + : apv == 88 ? sizeof(struct p_rs_param) + + SHARED_SECRET_MAX + : /* 89 */ sizeof(struct p_rs_param_89); + + if (h->length > exp_max_sz) { + dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", + h->length, exp_max_sz); + return FALSE; + } + + if (apv <= 88) { + header_size = sizeof(struct p_rs_param) - sizeof(*h); + data_size = h->length - header_size; + } else /* apv >= 89 */ { + header_size = sizeof(struct p_rs_param_89) - sizeof(*h); + data_size = h->length - header_size; + D_ASSERT(data_size == 0); + } + + /* initialize verify_alg and csums_alg */ + memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); + + if (drbd_recv(mdev, h->payload, header_size) != header_size) + return FALSE; + + mdev->sync_conf.rate = be32_to_cpu(p->rate); + + if (apv >= 88) { + if (apv == 88) { + if (data_size > SHARED_SECRET_MAX) { + dev_err(DEV, "verify-alg too long, " + "peer wants %u, accepting only %u byte\n", + data_size, SHARED_SECRET_MAX); + return FALSE; + } + + if (drbd_recv(mdev, p->verify_alg, data_size) != data_size) + return FALSE; + + /* we expect NUL terminated string */ + /* but just in case someone tries to be evil */ + D_ASSERT(p->verify_alg[data_size-1] == 0); + p->verify_alg[data_size-1] = 0; + + } else /* apv >= 89 */ { + /* we still expect NUL terminated strings */ + /* but just in case someone tries to be evil */ + D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0); + D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0); + p->verify_alg[SHARED_SECRET_MAX-1] = 0; + p->csums_alg[SHARED_SECRET_MAX-1] = 0; + } + + if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) { + if (mdev->state.conn == C_WF_REPORT_PARAMS) { + dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", + mdev->sync_conf.verify_alg, p->verify_alg); + goto disconnect; + } + verify_tfm = drbd_crypto_alloc_digest_safe(mdev, + p->verify_alg, "verify-alg"); + if (IS_ERR(verify_tfm)) { + verify_tfm = NULL; + goto disconnect; + } + } + + if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) { + if (mdev->state.conn == C_WF_REPORT_PARAMS) { + dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", + mdev->sync_conf.csums_alg, p->csums_alg); + goto disconnect; + } + csums_tfm = drbd_crypto_alloc_digest_safe(mdev, + p->csums_alg, "csums-alg"); + if (IS_ERR(csums_tfm)) { + csums_tfm = NULL; + goto disconnect; + } + } + + + spin_lock(&mdev->peer_seq_lock); + /* lock against drbd_nl_syncer_conf() */ + if (verify_tfm) { + strcpy(mdev->sync_conf.verify_alg, p->verify_alg); + mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1; + crypto_free_hash(mdev->verify_tfm); + mdev->verify_tfm = verify_tfm; + dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg); + } + if (csums_tfm) { + strcpy(mdev->sync_conf.csums_alg, p->csums_alg); + mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1; + crypto_free_hash(mdev->csums_tfm); + mdev->csums_tfm = csums_tfm; + dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); + } + spin_unlock(&mdev->peer_seq_lock); + } + + return ok; +disconnect: + /* just for completeness: actually not needed, + * as this is not reached if csums_tfm was ok. */ + crypto_free_hash(csums_tfm); + /* but free the verify_tfm again, if csums_tfm did not work out */ + crypto_free_hash(verify_tfm); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return FALSE; +} + +static void drbd_setup_order_type(struct drbd_conf *mdev, int peer) +{ + /* sorry, we currently have no working implementation + * of distributed TCQ */ +} + +/* warn if the arguments differ by more than 12.5% */ +static void warn_if_differ_considerably(struct drbd_conf *mdev, + const char *s, sector_t a, sector_t b) +{ + sector_t d; + if (a == 0 || b == 0) + return; + d = (a > b) ? (a - b) : (b - a); + if (d > (a>>3) || d > (b>>3)) + dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s, + (unsigned long long)a, (unsigned long long)b); +} + +static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_sizes *p = (struct p_sizes *)h; + enum determine_dev_size dd = unchanged; + unsigned int max_seg_s; + sector_t p_size, p_usize, my_usize; + int ldsc = 0; /* local disk size changed */ + enum drbd_conns nconn; + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; + if (drbd_recv(mdev, h->payload, h->length) != h->length) + return FALSE; + + p_size = be64_to_cpu(p->d_size); + p_usize = be64_to_cpu(p->u_size); + + if (p_size == 0 && mdev->state.disk == D_DISKLESS) { + dev_err(DEV, "some backing storage is needed\n"); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return FALSE; + } + + /* just store the peer's disk size for now. + * we still need to figure out whether we accept that. */ + mdev->p_size = p_size; + +#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) + if (get_ldev(mdev)) { + warn_if_differ_considerably(mdev, "lower level device sizes", + p_size, drbd_get_max_capacity(mdev->ldev)); + warn_if_differ_considerably(mdev, "user requested size", + p_usize, mdev->ldev->dc.disk_size); + + /* if this is the first connect, or an otherwise expected + * param exchange, choose the minimum */ + if (mdev->state.conn == C_WF_REPORT_PARAMS) + p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size, + p_usize); + + my_usize = mdev->ldev->dc.disk_size; + + if (mdev->ldev->dc.disk_size != p_usize) { + mdev->ldev->dc.disk_size = p_usize; + dev_info(DEV, "Peer sets u_size to %lu sectors\n", + (unsigned long)mdev->ldev->dc.disk_size); + } + + /* Never shrink a device with usable data during connect. + But allow online shrinking if we are connected. */ + if (drbd_new_dev_size(mdev, mdev->ldev) < + drbd_get_capacity(mdev->this_bdev) && + mdev->state.disk >= D_OUTDATED && + mdev->state.conn < C_CONNECTED) { + dev_err(DEV, "The peer's disk size is too small!\n"); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + mdev->ldev->dc.disk_size = my_usize; + put_ldev(mdev); + return FALSE; + } + put_ldev(mdev); + } +#undef min_not_zero + + if (get_ldev(mdev)) { + dd = drbd_determin_dev_size(mdev); + put_ldev(mdev); + if (dd == dev_size_error) + return FALSE; + drbd_md_sync(mdev); + } else { + /* I am diskless, need to accept the peer's size. */ + drbd_set_my_capacity(mdev, p_size); + } + + if (mdev->p_uuid && mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { + nconn = drbd_sync_handshake(mdev, + mdev->state.peer, mdev->state.pdsk); + put_ldev(mdev); + + if (nconn == C_MASK) { + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return FALSE; + } + + if (drbd_request_state(mdev, NS(conn, nconn)) < SS_SUCCESS) { + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return FALSE; + } + } + + if (get_ldev(mdev)) { + if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { + mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); + ldsc = 1; + } + + max_seg_s = be32_to_cpu(p->max_segment_size); + if (max_seg_s != queue_max_segment_size(mdev->rq_queue)) + drbd_setup_queue_param(mdev, max_seg_s); + + drbd_setup_order_type(mdev, be32_to_cpu(p->queue_order_type)); + put_ldev(mdev); + } + + if (mdev->state.conn > C_WF_REPORT_PARAMS) { + if (be64_to_cpu(p->c_size) != + drbd_get_capacity(mdev->this_bdev) || ldsc) { + /* we have different sizes, probably peer + * needs to know my new size... */ + drbd_send_sizes(mdev, 0); + } + if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) || + (dd == grew && mdev->state.conn == C_CONNECTED)) { + if (mdev->state.pdsk >= D_INCONSISTENT && + mdev->state.disk >= D_INCONSISTENT) + resync_after_online_grow(mdev); + else + set_bit(RESYNC_AFTER_NEG, &mdev->flags); + } + } + + return TRUE; +} + +static int receive_uuids(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_uuids *p = (struct p_uuids *)h; + u64 *p_uuid; + int i; + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; + if (drbd_recv(mdev, h->payload, h->length) != h->length) + return FALSE; + + p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); + + for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) + p_uuid[i] = be64_to_cpu(p->uuid[i]); + + kfree(mdev->p_uuid); + mdev->p_uuid = p_uuid; + + if (mdev->state.conn < C_CONNECTED && + mdev->state.disk < D_INCONSISTENT && + mdev->state.role == R_PRIMARY && + (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { + dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", + (unsigned long long)mdev->ed_uuid); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return FALSE; + } + + if (get_ldev(mdev)) { + int skip_initial_sync = + mdev->state.conn == C_CONNECTED && + mdev->agreed_pro_version >= 90 && + mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && + (p_uuid[UI_FLAGS] & 8); + if (skip_initial_sync) { + dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); + drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, + "clear_n_write from receive_uuids"); + _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]); + _drbd_uuid_set(mdev, UI_BITMAP, 0); + _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), + CS_VERBOSE, NULL); + drbd_md_sync(mdev); + } + put_ldev(mdev); + } + + /* Before we test for the disk state, we should wait until an eventually + ongoing cluster wide state change is finished. That is important if + we are primary and are detaching from our disk. We need to see the + new disk state... */ + wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); + if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) + drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); + + return TRUE; +} + +/** + * convert_state() - Converts the peer's view of the cluster state to our point of view + * @ps: The state as seen by the peer. + */ +static union drbd_state convert_state(union drbd_state ps) +{ + union drbd_state ms; + + static enum drbd_conns c_tab[] = { + [C_CONNECTED] = C_CONNECTED, + + [C_STARTING_SYNC_S] = C_STARTING_SYNC_T, + [C_STARTING_SYNC_T] = C_STARTING_SYNC_S, + [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */ + [C_VERIFY_S] = C_VERIFY_T, + [C_MASK] = C_MASK, + }; + + ms.i = ps.i; + + ms.conn = c_tab[ps.conn]; + ms.peer = ps.role; + ms.role = ps.peer; + ms.pdsk = ps.disk; + ms.disk = ps.pdsk; + ms.peer_isp = (ps.aftr_isp | ps.user_isp); + + return ms; +} + +static int receive_req_state(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_req_state *p = (struct p_req_state *)h; + union drbd_state mask, val; + int rv; + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; + if (drbd_recv(mdev, h->payload, h->length) != h->length) + return FALSE; + + mask.i = be32_to_cpu(p->mask); + val.i = be32_to_cpu(p->val); + + if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && + test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { + drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); + return TRUE; + } + + mask = convert_state(mask); + val = convert_state(val); + + rv = drbd_change_state(mdev, CS_VERBOSE, mask, val); + + drbd_send_sr_reply(mdev, rv); + drbd_md_sync(mdev); + + return TRUE; +} + +static int receive_state(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_state *p = (struct p_state *)h; + enum drbd_conns nconn, oconn; + union drbd_state ns, peer_state; + enum drbd_disk_state real_peer_disk; + int rv; + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) + return FALSE; + + if (drbd_recv(mdev, h->payload, h->length) != h->length) + return FALSE; + + peer_state.i = be32_to_cpu(p->state); + + real_peer_disk = peer_state.disk; + if (peer_state.disk == D_NEGOTIATING) { + real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT; + dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); + } + + spin_lock_irq(&mdev->req_lock); + retry: + oconn = nconn = mdev->state.conn; + spin_unlock_irq(&mdev->req_lock); + + if (nconn == C_WF_REPORT_PARAMS) + nconn = C_CONNECTED; + + if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && + get_ldev_if_state(mdev, D_NEGOTIATING)) { + int cr; /* consider resync */ + + /* if we established a new connection */ + cr = (oconn < C_CONNECTED); + /* if we had an established connection + * and one of the nodes newly attaches a disk */ + cr |= (oconn == C_CONNECTED && + (peer_state.disk == D_NEGOTIATING || + mdev->state.disk == D_NEGOTIATING)); + /* if we have both been inconsistent, and the peer has been + * forced to be UpToDate with --overwrite-data */ + cr |= test_bit(CONSIDER_RESYNC, &mdev->flags); + /* if we had been plain connected, and the admin requested to + * start a sync by "invalidate" or "invalidate-remote" */ + cr |= (oconn == C_CONNECTED && + (peer_state.conn >= C_STARTING_SYNC_S && + peer_state.conn <= C_WF_BITMAP_T)); + + if (cr) + nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk); + + put_ldev(mdev); + if (nconn == C_MASK) { + if (mdev->state.disk == D_NEGOTIATING) { + drbd_force_state(mdev, NS(disk, D_DISKLESS)); + nconn = C_CONNECTED; + } else if (peer_state.disk == D_NEGOTIATING) { + dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); + peer_state.disk = D_DISKLESS; + } else { + D_ASSERT(oconn == C_WF_REPORT_PARAMS); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return FALSE; + } + } + } + + spin_lock_irq(&mdev->req_lock); + if (mdev->state.conn != oconn) + goto retry; + clear_bit(CONSIDER_RESYNC, &mdev->flags); + ns.i = mdev->state.i; + ns.conn = nconn; + ns.peer = peer_state.role; + ns.pdsk = real_peer_disk; + ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); + if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) + ns.disk = mdev->new_state_tmp.disk; + + rv = _drbd_set_state(mdev, ns, CS_VERBOSE | CS_HARD, NULL); + ns = mdev->state; + spin_unlock_irq(&mdev->req_lock); + + if (rv < SS_SUCCESS) { + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return FALSE; + } + + if (oconn > C_WF_REPORT_PARAMS) { + if (nconn > C_CONNECTED && peer_state.conn <= C_CONNECTED && + peer_state.disk != D_NEGOTIATING ) { + /* we want resync, peer has not yet decided to sync... */ + /* Nowadays only used when forcing a node into primary role and + setting its disk to UpToDate with that */ + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + } + + mdev->net_conf->want_lose = 0; + + drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ + + return TRUE; +} + +static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_rs_uuid *p = (struct p_rs_uuid *)h; + + wait_event(mdev->misc_wait, + mdev->state.conn == C_WF_SYNC_UUID || + mdev->state.conn < C_CONNECTED || + mdev->state.disk < D_NEGOTIATING); + + /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */ + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; + if (drbd_recv(mdev, h->payload, h->length) != h->length) + return FALSE; + + /* Here the _drbd_uuid_ functions are right, current should + _not_ be rotated into the history */ + if (get_ldev_if_state(mdev, D_NEGOTIATING)) { + _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid)); + _drbd_uuid_set(mdev, UI_BITMAP, 0UL); + + drbd_start_resync(mdev, C_SYNC_TARGET); + + put_ldev(mdev); + } else + dev_err(DEV, "Ignoring SyncUUID packet!\n"); + + return TRUE; +} + +enum receive_bitmap_ret { OK, DONE, FAILED }; + +static enum receive_bitmap_ret +receive_bitmap_plain(struct drbd_conf *mdev, struct p_header *h, + unsigned long *buffer, struct bm_xfer_ctx *c) +{ + unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); + unsigned want = num_words * sizeof(long); + + if (want != h->length) { + dev_err(DEV, "%s:want (%u) != h->length (%u)\n", __func__, want, h->length); + return FAILED; + } + if (want == 0) + return DONE; + if (drbd_recv(mdev, buffer, want) != want) + return FAILED; + + drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer); + + c->word_offset += num_words; + c->bit_offset = c->word_offset * BITS_PER_LONG; + if (c->bit_offset > c->bm_bits) + c->bit_offset = c->bm_bits; + + return OK; +} + +static enum receive_bitmap_ret +recv_bm_rle_bits(struct drbd_conf *mdev, + struct p_compressed_bm *p, + struct bm_xfer_ctx *c) +{ + struct bitstream bs; + u64 look_ahead; + u64 rl; + u64 tmp; + unsigned long s = c->bit_offset; + unsigned long e; + int len = p->head.length - (sizeof(*p) - sizeof(p->head)); + int toggle = DCBP_get_start(p); + int have; + int bits; + + bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p)); + + bits = bitstream_get_bits(&bs, &look_ahead, 64); + if (bits < 0) + return FAILED; + + for (have = bits; have > 0; s += rl, toggle = !toggle) { + bits = vli_decode_bits(&rl, look_ahead); + if (bits <= 0) + return FAILED; + + if (toggle) { + e = s + rl -1; + if (e >= c->bm_bits) { + dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); + return FAILED; + } + _drbd_bm_set_bits(mdev, s, e); + } + + if (have < bits) { + dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n", + have, bits, look_ahead, + (unsigned int)(bs.cur.b - p->code), + (unsigned int)bs.buf_len); + return FAILED; + } + look_ahead >>= bits; + have -= bits; + + bits = bitstream_get_bits(&bs, &tmp, 64 - have); + if (bits < 0) + return FAILED; + look_ahead |= tmp << have; + have += bits; + } + + c->bit_offset = s; + bm_xfer_ctx_bit_to_word_offset(c); + + return (s == c->bm_bits) ? DONE : OK; +} + +static enum receive_bitmap_ret +decode_bitmap_c(struct drbd_conf *mdev, + struct p_compressed_bm *p, + struct bm_xfer_ctx *c) +{ + if (DCBP_get_code(p) == RLE_VLI_Bits) + return recv_bm_rle_bits(mdev, p, c); + + /* other variants had been implemented for evaluation, + * but have been dropped as this one turned out to be "best" + * during all our tests. */ + + dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); + drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); + return FAILED; +} + +void INFO_bm_xfer_stats(struct drbd_conf *mdev, + const char *direction, struct bm_xfer_ctx *c) +{ + /* what would it take to transfer it "plaintext" */ + unsigned plain = sizeof(struct p_header) * + ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) + + c->bm_words * sizeof(long); + unsigned total = c->bytes[0] + c->bytes[1]; + unsigned r; + + /* total can not be zero. but just in case: */ + if (total == 0) + return; + + /* don't report if not compressed */ + if (total >= plain) + return; + + /* total < plain. check for overflow, still */ + r = (total > UINT_MAX/1000) ? (total / (plain/1000)) + : (1000 * total / plain); + + if (r > 1000) + r = 1000; + + r = 1000 - r; + dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " + "total %u; compression: %u.%u%%\n", + direction, + c->bytes[1], c->packets[1], + c->bytes[0], c->packets[0], + total, r/10, r % 10); +} + +/* Since we are processing the bitfield from lower addresses to higher, + it does not matter if the process it in 32 bit chunks or 64 bit + chunks as long as it is little endian. (Understand it as byte stream, + beginning with the lowest byte...) If we would use big endian + we would need to process it from the highest address to the lowest, + in order to be agnostic to the 32 vs 64 bits issue. + + returns 0 on failure, 1 if we successfully received it. */ +static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) +{ + struct bm_xfer_ctx c; + void *buffer; + enum receive_bitmap_ret ret; + int ok = FALSE; + + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); + + drbd_bm_lock(mdev, "receive bitmap"); + + /* maybe we should use some per thread scratch page, + * and allocate that during initial device creation? */ + buffer = (unsigned long *) __get_free_page(GFP_NOIO); + if (!buffer) { + dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); + goto out; + } + + c = (struct bm_xfer_ctx) { + .bm_bits = drbd_bm_bits(mdev), + .bm_words = drbd_bm_words(mdev), + }; + + do { + if (h->command == P_BITMAP) { + ret = receive_bitmap_plain(mdev, h, buffer, &c); + } else if (h->command == P_COMPRESSED_BITMAP) { + /* MAYBE: sanity check that we speak proto >= 90, + * and the feature is enabled! */ + struct p_compressed_bm *p; + + if (h->length > BM_PACKET_PAYLOAD_BYTES) { + dev_err(DEV, "ReportCBitmap packet too large\n"); + goto out; + } + /* use the page buff */ + p = buffer; + memcpy(p, h, sizeof(*h)); + if (drbd_recv(mdev, p->head.payload, h->length) != h->length) + goto out; + if (p->head.length <= (sizeof(*p) - sizeof(p->head))) { + dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", p->head.length); + return FAILED; + } + ret = decode_bitmap_c(mdev, p, &c); + } else { + dev_warn(DEV, "receive_bitmap: h->command neither ReportBitMap nor ReportCBitMap (is 0x%x)", h->command); + goto out; + } + + c.packets[h->command == P_BITMAP]++; + c.bytes[h->command == P_BITMAP] += sizeof(struct p_header) + h->length; + + if (ret != OK) + break; + + if (!drbd_recv_header(mdev, h)) + goto out; + } while (ret == OK); + if (ret == FAILED) + goto out; + + INFO_bm_xfer_stats(mdev, "receive", &c); + + if (mdev->state.conn == C_WF_BITMAP_T) { + ok = !drbd_send_bitmap(mdev); + if (!ok) + goto out; + /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ + ok = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); + D_ASSERT(ok == SS_SUCCESS); + } else if (mdev->state.conn != C_WF_BITMAP_S) { + /* admin may have requested C_DISCONNECTING, + * other threads may have noticed network errors */ + dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n", + drbd_conn_str(mdev->state.conn)); + } + + ok = TRUE; + out: + drbd_bm_unlock(mdev); + if (ok && mdev->state.conn == C_WF_BITMAP_S) + drbd_start_resync(mdev, C_SYNC_SOURCE); + free_page((unsigned long) buffer); + return ok; +} + +static int receive_skip(struct drbd_conf *mdev, struct p_header *h) +{ + /* TODO zero copy sink :) */ + static char sink[128]; + int size, want, r; + + dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n", + h->command, h->length); + + size = h->length; + while (size > 0) { + want = min_t(int, size, sizeof(sink)); + r = drbd_recv(mdev, sink, want); + ERR_IF(r <= 0) break; + size -= r; + } + return size == 0; +} + +static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h) +{ + if (mdev->state.disk >= D_INCONSISTENT) + drbd_kick_lo(mdev); + + /* Make sure we've acked all the TCP data associated + * with the data requests being unplugged */ + drbd_tcp_quickack(mdev->data.socket); + + return TRUE; +} + +typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *); + +static drbd_cmd_handler_f drbd_default_handler[] = { + [P_DATA] = receive_Data, + [P_DATA_REPLY] = receive_DataReply, + [P_RS_DATA_REPLY] = receive_RSDataReply, + [P_BARRIER] = receive_Barrier, + [P_BITMAP] = receive_bitmap, + [P_COMPRESSED_BITMAP] = receive_bitmap, + [P_UNPLUG_REMOTE] = receive_UnplugRemote, + [P_DATA_REQUEST] = receive_DataRequest, + [P_RS_DATA_REQUEST] = receive_DataRequest, + [P_SYNC_PARAM] = receive_SyncParam, + [P_SYNC_PARAM89] = receive_SyncParam, + [P_PROTOCOL] = receive_protocol, + [P_UUIDS] = receive_uuids, + [P_SIZES] = receive_sizes, + [P_STATE] = receive_state, + [P_STATE_CHG_REQ] = receive_req_state, + [P_SYNC_UUID] = receive_sync_uuid, + [P_OV_REQUEST] = receive_DataRequest, + [P_OV_REPLY] = receive_DataRequest, + [P_CSUM_RS_REQUEST] = receive_DataRequest, + /* anything missing from this table is in + * the asender_tbl, see get_asender_cmd */ + [P_MAX_CMD] = NULL, +}; + +static drbd_cmd_handler_f *drbd_cmd_handler = drbd_default_handler; +static drbd_cmd_handler_f *drbd_opt_cmd_handler; + +static void drbdd(struct drbd_conf *mdev) +{ + drbd_cmd_handler_f handler; + struct p_header *header = &mdev->data.rbuf.header; + + while (get_t_state(&mdev->receiver) == Running) { + drbd_thread_current_set_cpu(mdev); + if (!drbd_recv_header(mdev, header)) + break; + + if (header->command < P_MAX_CMD) + handler = drbd_cmd_handler[header->command]; + else if (P_MAY_IGNORE < header->command + && header->command < P_MAX_OPT_CMD) + handler = drbd_opt_cmd_handler[header->command-P_MAY_IGNORE]; + else if (header->command > P_MAX_OPT_CMD) + handler = receive_skip; + else + handler = NULL; + + if (unlikely(!handler)) { + dev_err(DEV, "unknown packet type %d, l: %d!\n", + header->command, header->length); + drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); + break; + } + if (unlikely(!handler(mdev, header))) { + dev_err(DEV, "error receiving %s, l: %d!\n", + cmdname(header->command), header->length); + drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); + break; + } + + trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, + __FILE__, __LINE__); + } +} + +static void drbd_fail_pending_reads(struct drbd_conf *mdev) +{ + struct hlist_head *slot; + struct hlist_node *pos; + struct hlist_node *tmp; + struct drbd_request *req; + int i; + + /* + * Application READ requests + */ + spin_lock_irq(&mdev->req_lock); + for (i = 0; i < APP_R_HSIZE; i++) { + slot = mdev->app_reads_hash+i; + hlist_for_each_entry_safe(req, pos, tmp, slot, colision) { + /* it may (but should not any longer!) + * be on the work queue; if that assert triggers, + * we need to also grab the + * spin_lock_irq(&mdev->data.work.q_lock); + * and list_del_init here. */ + D_ASSERT(list_empty(&req->w.list)); + /* It would be nice to complete outside of spinlock. + * But this is easier for now. */ + _req_mod(req, connection_lost_while_pending); + } + } + for (i = 0; i < APP_R_HSIZE; i++) + if (!hlist_empty(mdev->app_reads_hash+i)) + dev_warn(DEV, "ASSERT FAILED: app_reads_hash[%d].first: " + "%p, should be NULL\n", i, mdev->app_reads_hash[i].first); + + memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); + spin_unlock_irq(&mdev->req_lock); +} + +void drbd_flush_workqueue(struct drbd_conf *mdev) +{ + struct drbd_wq_barrier barr; + + barr.w.cb = w_prev_work_done; + init_completion(&barr.done); + drbd_queue_work(&mdev->data.work, &barr.w); + wait_for_completion(&barr.done); +} + +static void drbd_disconnect(struct drbd_conf *mdev) +{ + enum drbd_fencing_p fp; + union drbd_state os, ns; + int rv = SS_UNKNOWN_ERROR; + unsigned int i; + + if (mdev->state.conn == C_STANDALONE) + return; + if (mdev->state.conn >= C_WF_CONNECTION) + dev_err(DEV, "ASSERT FAILED cstate = %s, expected < WFConnection\n", + drbd_conn_str(mdev->state.conn)); + + /* asender does not clean up anything. it must not interfere, either */ + drbd_thread_stop(&mdev->asender); + + mutex_lock(&mdev->data.mutex); + drbd_free_sock(mdev); + mutex_unlock(&mdev->data.mutex); + + spin_lock_irq(&mdev->req_lock); + _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); + _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee); + _drbd_wait_ee_list_empty(mdev, &mdev->read_ee); + spin_unlock_irq(&mdev->req_lock); + + /* We do not have data structures that would allow us to + * get the rs_pending_cnt down to 0 again. + * * On C_SYNC_TARGET we do not have any data structures describing + * the pending RSDataRequest's we have sent. + * * On C_SYNC_SOURCE there is no data structure that tracks + * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget. + * And no, it is not the sum of the reference counts in the + * resync_LRU. The resync_LRU tracks the whole operation including + * the disk-IO, while the rs_pending_cnt only tracks the blocks + * on the fly. */ + drbd_rs_cancel_all(mdev); + mdev->rs_total = 0; + mdev->rs_failed = 0; + atomic_set(&mdev->rs_pending_cnt, 0); + wake_up(&mdev->misc_wait); + + /* make sure syncer is stopped and w_resume_next_sg queued */ + del_timer_sync(&mdev->resync_timer); + set_bit(STOP_SYNC_TIMER, &mdev->flags); + resync_timer_fn((unsigned long)mdev); + + /* so we can be sure that all remote or resync reads + * made it at least to net_ee */ + wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + + /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, + * w_make_resync_request etc. which may still be on the worker queue + * to be "canceled" */ + drbd_flush_workqueue(mdev); + + /* This also does reclaim_net_ee(). If we do this too early, we might + * miss some resync ee and pages.*/ + drbd_process_done_ee(mdev); + + kfree(mdev->p_uuid); + mdev->p_uuid = NULL; + + if (!mdev->state.susp) + tl_clear(mdev); + + drbd_fail_pending_reads(mdev); + + dev_info(DEV, "Connection closed\n"); + + drbd_md_sync(mdev); + + fp = FP_DONT_CARE; + if (get_ldev(mdev)) { + fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + } + + if (mdev->state.role == R_PRIMARY) { + if (fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) { + enum drbd_disk_state nps = drbd_try_outdate_peer(mdev); + drbd_request_state(mdev, NS(pdsk, nps)); + } + } + + spin_lock_irq(&mdev->req_lock); + os = mdev->state; + if (os.conn >= C_UNCONNECTED) { + /* Do not restart in case we are C_DISCONNECTING */ + ns = os; + ns.conn = C_UNCONNECTED; + rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); + } + spin_unlock_irq(&mdev->req_lock); + + if (os.conn == C_DISCONNECTING) { + struct hlist_head *h; + wait_event(mdev->misc_wait, atomic_read(&mdev->net_cnt) == 0); + + /* we must not free the tl_hash + * while application io is still on the fly */ + wait_event(mdev->misc_wait, atomic_read(&mdev->ap_bio_cnt) == 0); + + spin_lock_irq(&mdev->req_lock); + /* paranoia code */ + for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++) + if (h->first) + dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", + (int)(h - mdev->ee_hash), h->first); + kfree(mdev->ee_hash); + mdev->ee_hash = NULL; + mdev->ee_hash_s = 0; + + /* paranoia code */ + for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) + if (h->first) + dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", + (int)(h - mdev->tl_hash), h->first); + kfree(mdev->tl_hash); + mdev->tl_hash = NULL; + mdev->tl_hash_s = 0; + spin_unlock_irq(&mdev->req_lock); + + crypto_free_hash(mdev->cram_hmac_tfm); + mdev->cram_hmac_tfm = NULL; + + kfree(mdev->net_conf); + mdev->net_conf = NULL; + drbd_request_state(mdev, NS(conn, C_STANDALONE)); + } + + /* tcp_close and release of sendpage pages can be deferred. I don't + * want to use SO_LINGER, because apparently it can be deferred for + * more than 20 seconds (longest time I checked). + * + * Actually we don't care for exactly when the network stack does its + * put_page(), but release our reference on these pages right here. + */ + i = drbd_release_ee(mdev, &mdev->net_ee); + if (i) + dev_info(DEV, "net_ee not empty, killed %u entries\n", i); + i = atomic_read(&mdev->pp_in_use); + if (i) + dev_info(DEV, "pp_in_use = %u, expected 0\n", i); + + D_ASSERT(list_empty(&mdev->read_ee)); + D_ASSERT(list_empty(&mdev->active_ee)); + D_ASSERT(list_empty(&mdev->sync_ee)); + D_ASSERT(list_empty(&mdev->done_ee)); + + /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ + atomic_set(&mdev->current_epoch->epoch_size, 0); + D_ASSERT(list_empty(&mdev->current_epoch->list)); +} + +/* + * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version + * we can agree on is stored in agreed_pro_version. + * + * feature flags and the reserved array should be enough room for future + * enhancements of the handshake protocol, and possible plugins... + * + * for now, they are expected to be zero, but ignored. + */ +static int drbd_send_handshake(struct drbd_conf *mdev) +{ + /* ASSERT current == mdev->receiver ... */ + struct p_handshake *p = &mdev->data.sbuf.handshake; + int ok; + + if (mutex_lock_interruptible(&mdev->data.mutex)) { + dev_err(DEV, "interrupted during initial handshake\n"); + return 0; /* interrupted. not ok. */ + } + + if (mdev->data.socket == NULL) { + mutex_unlock(&mdev->data.mutex); + return 0; + } + + memset(p, 0, sizeof(*p)); + p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); + p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); + ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE, + (struct p_header *)p, sizeof(*p), 0 ); + mutex_unlock(&mdev->data.mutex); + return ok; +} + +/* + * return values: + * 1 yes, we have a valid connection + * 0 oops, did not work out, please try again + * -1 peer talks different language, + * no point in trying again, please go standalone. + */ +static int drbd_do_handshake(struct drbd_conf *mdev) +{ + /* ASSERT current == mdev->receiver ... */ + struct p_handshake *p = &mdev->data.rbuf.handshake; + const int expect = sizeof(struct p_handshake) + -sizeof(struct p_header); + int rv; + + rv = drbd_send_handshake(mdev); + if (!rv) + return 0; + + rv = drbd_recv_header(mdev, &p->head); + if (!rv) + return 0; + + if (p->head.command != P_HAND_SHAKE) { + dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n", + cmdname(p->head.command), p->head.command); + return -1; + } + + if (p->head.length != expect) { + dev_err(DEV, "expected HandShake length: %u, received: %u\n", + expect, p->head.length); + return -1; + } + + rv = drbd_recv(mdev, &p->head.payload, expect); + + if (rv != expect) { + dev_err(DEV, "short read receiving handshake packet: l=%u\n", rv); + return 0; + } + + trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, + __FILE__, __LINE__); + + p->protocol_min = be32_to_cpu(p->protocol_min); + p->protocol_max = be32_to_cpu(p->protocol_max); + if (p->protocol_max == 0) + p->protocol_max = p->protocol_min; + + if (PRO_VERSION_MAX < p->protocol_min || + PRO_VERSION_MIN > p->protocol_max) + goto incompat; + + mdev->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); + + dev_info(DEV, "Handshake successful: " + "Agreed network protocol version %d\n", mdev->agreed_pro_version); + + return 1; + + incompat: + dev_err(DEV, "incompatible DRBD dialects: " + "I support %d-%d, peer supports %d-%d\n", + PRO_VERSION_MIN, PRO_VERSION_MAX, + p->protocol_min, p->protocol_max); + return -1; +} + +#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) +static int drbd_do_auth(struct drbd_conf *mdev) +{ + dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); + dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); + return 0; +} +#else +#define CHALLENGE_LEN 64 +static int drbd_do_auth(struct drbd_conf *mdev) +{ + char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ + struct scatterlist sg; + char *response = NULL; + char *right_response = NULL; + char *peers_ch = NULL; + struct p_header p; + unsigned int key_len = strlen(mdev->net_conf->shared_secret); + unsigned int resp_size; + struct hash_desc desc; + int rv; + + desc.tfm = mdev->cram_hmac_tfm; + desc.flags = 0; + + rv = crypto_hash_setkey(mdev->cram_hmac_tfm, + (u8 *)mdev->net_conf->shared_secret, key_len); + if (rv) { + dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv); + rv = 0; + goto fail; + } + + get_random_bytes(my_challenge, CHALLENGE_LEN); + + rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); + if (!rv) + goto fail; + + rv = drbd_recv_header(mdev, &p); + if (!rv) + goto fail; + + if (p.command != P_AUTH_CHALLENGE) { + dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n", + cmdname(p.command), p.command); + rv = 0; + goto fail; + } + + if (p.length > CHALLENGE_LEN*2) { + dev_err(DEV, "expected AuthChallenge payload too big.\n"); + rv = 0; + goto fail; + } + + peers_ch = kmalloc(p.length, GFP_NOIO); + if (peers_ch == NULL) { + dev_err(DEV, "kmalloc of peers_ch failed\n"); + rv = 0; + goto fail; + } + + rv = drbd_recv(mdev, peers_ch, p.length); + + if (rv != p.length) { + dev_err(DEV, "short read AuthChallenge: l=%u\n", rv); + rv = 0; + goto fail; + } + + resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm); + response = kmalloc(resp_size, GFP_NOIO); + if (response == NULL) { + dev_err(DEV, "kmalloc of response failed\n"); + rv = 0; + goto fail; + } + + sg_init_table(&sg, 1); + sg_set_buf(&sg, peers_ch, p.length); + + rv = crypto_hash_digest(&desc, &sg, sg.length, response); + if (rv) { + dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv); + rv = 0; + goto fail; + } + + rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size); + if (!rv) + goto fail; + + rv = drbd_recv_header(mdev, &p); + if (!rv) + goto fail; + + if (p.command != P_AUTH_RESPONSE) { + dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n", + cmdname(p.command), p.command); + rv = 0; + goto fail; + } + + if (p.length != resp_size) { + dev_err(DEV, "expected AuthResponse payload of wrong size\n"); + rv = 0; + goto fail; + } + + rv = drbd_recv(mdev, response , resp_size); + + if (rv != resp_size) { + dev_err(DEV, "short read receiving AuthResponse: l=%u\n", rv); + rv = 0; + goto fail; + } + + right_response = kmalloc(resp_size, GFP_NOIO); + if (response == NULL) { + dev_err(DEV, "kmalloc of right_response failed\n"); + rv = 0; + goto fail; + } + + sg_set_buf(&sg, my_challenge, CHALLENGE_LEN); + + rv = crypto_hash_digest(&desc, &sg, sg.length, right_response); + if (rv) { + dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv); + rv = 0; + goto fail; + } + + rv = !memcmp(response, right_response, resp_size); + + if (rv) + dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n", + resp_size, mdev->net_conf->cram_hmac_alg); + + fail: + kfree(peers_ch); + kfree(response); + kfree(right_response); + + return rv; +} +#endif + +int drbdd_init(struct drbd_thread *thi) +{ + struct drbd_conf *mdev = thi->mdev; + unsigned int minor = mdev_to_minor(mdev); + int h; + + sprintf(current->comm, "drbd%d_receiver", minor); + + dev_info(DEV, "receiver (re)started\n"); + + do { + h = drbd_connect(mdev); + if (h == 0) { + drbd_disconnect(mdev); + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + } + if (h == -1) { + dev_warn(DEV, "Discarding network configuration.\n"); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + } + } while (h == 0); + + if (h > 0) { + if (get_net_conf(mdev)) { + drbdd(mdev); + put_net_conf(mdev); + } + } + + drbd_disconnect(mdev); + + dev_info(DEV, "receiver terminated\n"); + return 0; +} + +/* ********* acknowledge sender ******** */ + +static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_req_state_reply *p = (struct p_req_state_reply *)h; + + int retcode = be32_to_cpu(p->retcode); + + if (retcode >= SS_SUCCESS) { + set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); + } else { + set_bit(CL_ST_CHG_FAIL, &mdev->flags); + dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", + drbd_set_st_err_str(retcode), retcode); + } + wake_up(&mdev->state_wait); + + return TRUE; +} + +static int got_Ping(struct drbd_conf *mdev, struct p_header *h) +{ + return drbd_send_ping_ack(mdev); + +} + +static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) +{ + /* restore idle timeout */ + mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; + + return TRUE; +} + +static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_block_ack *p = (struct p_block_ack *)h; + sector_t sector = be64_to_cpu(p->sector); + int blksize = be32_to_cpu(p->blksize); + + D_ASSERT(mdev->agreed_pro_version >= 89); + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + + drbd_rs_complete_io(mdev, sector); + drbd_set_in_sync(mdev, sector, blksize); + /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ + mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); + dec_rs_pending(mdev); + + return TRUE; +} + +/* when we receive the ACK for a write request, + * verify that we actually know about it */ +static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, + u64 id, sector_t sector) +{ + struct hlist_head *slot = tl_hash_slot(mdev, sector); + struct hlist_node *n; + struct drbd_request *req; + + hlist_for_each_entry(req, n, slot, colision) { + if ((unsigned long)req == (unsigned long)id) { + if (req->sector != sector) { + dev_err(DEV, "_ack_id_to_req: found req %p but it has " + "wrong sector (%llus versus %llus)\n", req, + (unsigned long long)req->sector, + (unsigned long long)sector); + break; + } + return req; + } + } + dev_err(DEV, "_ack_id_to_req: failed to find req %p, sector %llus in list\n", + (void *)(unsigned long)id, (unsigned long long)sector); + return NULL; +} + +typedef struct drbd_request *(req_validator_fn) + (struct drbd_conf *mdev, u64 id, sector_t sector); + +static int validate_req_change_req_state(struct drbd_conf *mdev, + u64 id, sector_t sector, req_validator_fn validator, + const char *func, enum drbd_req_event what) +{ + struct drbd_request *req; + struct bio_and_error m; + + spin_lock_irq(&mdev->req_lock); + req = validator(mdev, id, sector); + if (unlikely(!req)) { + spin_unlock_irq(&mdev->req_lock); + dev_err(DEV, "%s: got a corrupt block_id/sector pair\n", func); + return FALSE; + } + __req_mod(req, what, &m); + spin_unlock_irq(&mdev->req_lock); + + if (m.bio) + complete_master_bio(mdev, &m); + return TRUE; +} + +static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_block_ack *p = (struct p_block_ack *)h; + sector_t sector = be64_to_cpu(p->sector); + int blksize = be32_to_cpu(p->blksize); + enum drbd_req_event what; + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + + if (is_syncer_block_id(p->block_id)) { + drbd_set_in_sync(mdev, sector, blksize); + dec_rs_pending(mdev); + return TRUE; + } + switch (be16_to_cpu(h->command)) { + case P_RS_WRITE_ACK: + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + what = write_acked_by_peer_and_sis; + break; + case P_WRITE_ACK: + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + what = write_acked_by_peer; + break; + case P_RECV_ACK: + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B); + what = recv_acked_by_peer; + break; + case P_DISCARD_ACK: + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + what = conflict_discarded_by_peer; + break; + default: + D_ASSERT(0); + return FALSE; + } + + return validate_req_change_req_state(mdev, p->block_id, sector, + _ack_id_to_req, __func__ , what); +} + +static int got_NegAck(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_block_ack *p = (struct p_block_ack *)h; + sector_t sector = be64_to_cpu(p->sector); + + if (__ratelimit(&drbd_ratelimit_state)) + dev_warn(DEV, "Got NegAck packet. Peer is in troubles?\n"); + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + + if (is_syncer_block_id(p->block_id)) { + int size = be32_to_cpu(p->blksize); + dec_rs_pending(mdev); + drbd_rs_failed_io(mdev, sector, size); + return TRUE; + } + return validate_req_change_req_state(mdev, p->block_id, sector, + _ack_id_to_req, __func__ , neg_acked); +} + +static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_block_ack *p = (struct p_block_ack *)h; + sector_t sector = be64_to_cpu(p->sector); + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", + (unsigned long long)sector, be32_to_cpu(p->blksize)); + + return validate_req_change_req_state(mdev, p->block_id, sector, + _ar_id_to_req, __func__ , neg_acked); +} + +static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) +{ + sector_t sector; + int size; + struct p_block_ack *p = (struct p_block_ack *)h; + + sector = be64_to_cpu(p->sector); + size = be32_to_cpu(p->blksize); + D_ASSERT(p->block_id == ID_SYNCER); + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + + dec_rs_pending(mdev); + + if (get_ldev_if_state(mdev, D_FAILED)) { + drbd_rs_complete_io(mdev, sector); + drbd_rs_failed_io(mdev, sector, size); + put_ldev(mdev); + } + + return TRUE; +} + +static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_barrier_ack *p = (struct p_barrier_ack *)h; + + tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); + + return TRUE; +} + +static int got_OVResult(struct drbd_conf *mdev, struct p_header *h) +{ + struct p_block_ack *p = (struct p_block_ack *)h; + struct drbd_work *w; + sector_t sector; + int size; + + sector = be64_to_cpu(p->sector); + size = be32_to_cpu(p->blksize); + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + + if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) + drbd_ov_oos_found(mdev, sector, size); + else + ov_oos_print(mdev); + + drbd_rs_complete_io(mdev, sector); + dec_rs_pending(mdev); + + if (--mdev->ov_left == 0) { + w = kmalloc(sizeof(*w), GFP_NOIO); + if (w) { + w->cb = w_ov_finished; + drbd_queue_work_front(&mdev->data.work, w); + } else { + dev_err(DEV, "kmalloc(w) failed."); + ov_oos_print(mdev); + drbd_resync_finished(mdev); + } + } + return TRUE; +} + +struct asender_cmd { + size_t pkt_size; + int (*process)(struct drbd_conf *mdev, struct p_header *h); +}; + +static struct asender_cmd *get_asender_cmd(int cmd) +{ + static struct asender_cmd asender_tbl[] = { + /* anything missing from this table is in + * the drbd_cmd_handler (drbd_default_handler) table, + * see the beginning of drbdd() */ + [P_PING] = { sizeof(struct p_header), got_Ping }, + [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, + [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, + [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, + [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply}, + [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, + [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, + [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, + [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, + [P_MAX_CMD] = { 0, NULL }, + }; + if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) + return NULL; + return &asender_tbl[cmd]; +} + +int drbd_asender(struct drbd_thread *thi) +{ + struct drbd_conf *mdev = thi->mdev; + struct p_header *h = &mdev->meta.rbuf.header; + struct asender_cmd *cmd = NULL; + + int rv, len; + void *buf = h; + int received = 0; + int expect = sizeof(struct p_header); + int empty; + + sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); + + current->policy = SCHED_RR; /* Make this a realtime task! */ + current->rt_priority = 2; /* more important than all other tasks */ + + while (get_t_state(thi) == Running) { + drbd_thread_current_set_cpu(mdev); + if (test_and_clear_bit(SEND_PING, &mdev->flags)) { + ERR_IF(!drbd_send_ping(mdev)) goto reconnect; + mdev->meta.socket->sk->sk_rcvtimeo = + mdev->net_conf->ping_timeo*HZ/10; + } + + /* conditionally cork; + * it may hurt latency if we cork without much to send */ + if (!mdev->net_conf->no_cork && + 3 < atomic_read(&mdev->unacked_cnt)) + drbd_tcp_cork(mdev->meta.socket); + while (1) { + clear_bit(SIGNAL_ASENDER, &mdev->flags); + flush_signals(current); + if (!drbd_process_done_ee(mdev)) { + dev_err(DEV, "process_done_ee() = NOT_OK\n"); + goto reconnect; + } + /* to avoid race with newly queued ACKs */ + set_bit(SIGNAL_ASENDER, &mdev->flags); + spin_lock_irq(&mdev->req_lock); + empty = list_empty(&mdev->done_ee); + spin_unlock_irq(&mdev->req_lock); + /* new ack may have been queued right here, + * but then there is also a signal pending, + * and we start over... */ + if (empty) + break; + } + /* but unconditionally uncork unless disabled */ + if (!mdev->net_conf->no_cork) + drbd_tcp_uncork(mdev->meta.socket); + + /* short circuit, recv_msg would return EINTR anyways. */ + if (signal_pending(current)) + continue; + + rv = drbd_recv_short(mdev, mdev->meta.socket, + buf, expect-received, 0); + clear_bit(SIGNAL_ASENDER, &mdev->flags); + + flush_signals(current); + + /* Note: + * -EINTR (on meta) we got a signal + * -EAGAIN (on meta) rcvtimeo expired + * -ECONNRESET other side closed the connection + * -ERESTARTSYS (on data) we got a signal + * rv < 0 other than above: unexpected error! + * rv == expected: full header or command + * rv < expected: "woken" by signal during receive + * rv == 0 : "connection shut down by peer" + */ + if (likely(rv > 0)) { + received += rv; + buf += rv; + } else if (rv == 0) { + dev_err(DEV, "meta connection shut down by peer.\n"); + goto reconnect; + } else if (rv == -EAGAIN) { + if (mdev->meta.socket->sk->sk_rcvtimeo == + mdev->net_conf->ping_timeo*HZ/10) { + dev_err(DEV, "PingAck did not arrive in time.\n"); + goto reconnect; + } + set_bit(SEND_PING, &mdev->flags); + continue; + } else if (rv == -EINTR) { + continue; + } else { + dev_err(DEV, "sock_recvmsg returned %d\n", rv); + goto reconnect; + } + + if (received == expect && cmd == NULL) { + if (unlikely(h->magic != BE_DRBD_MAGIC)) { + dev_err(DEV, "magic?? on meta m: 0x%lx c: %d l: %d\n", + (long)be32_to_cpu(h->magic), + h->command, h->length); + goto reconnect; + } + cmd = get_asender_cmd(be16_to_cpu(h->command)); + len = be16_to_cpu(h->length); + if (unlikely(cmd == NULL)) { + dev_err(DEV, "unknown command?? on meta m: 0x%lx c: %d l: %d\n", + (long)be32_to_cpu(h->magic), + h->command, h->length); + goto disconnect; + } + expect = cmd->pkt_size; + ERR_IF(len != expect-sizeof(struct p_header)) { + trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); + goto reconnect; + } + } + if (received == expect) { + D_ASSERT(cmd != NULL); + trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); + if (!cmd->process(mdev, h)) + goto reconnect; + + buf = h; + received = 0; + expect = sizeof(struct p_header); + cmd = NULL; + } + } + + if (0) { +reconnect: + drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); + } + if (0) { +disconnect: + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + } + clear_bit(SIGNAL_ASENDER, &mdev->flags); + + D_ASSERT(mdev->state.conn < C_CONNECTED); + dev_info(DEV, "asender terminated\n"); + + return 0; +} diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c new file mode 100644 index 000000000000..0656cf1edd57 --- /dev/null +++ b/drivers/block/drbd/drbd_req.c @@ -0,0 +1,1132 @@ +/* + drbd_req.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include + +#include +#include +#include "drbd_int.h" +#include "drbd_tracing.h" +#include "drbd_req.h" + + +/* Update disk stats at start of I/O request */ +static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio) +{ + const int rw = bio_data_dir(bio); + int cpu; + cpu = part_stat_lock(); + part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); + part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); + mdev->vdisk->part0.in_flight[rw]++; +} + +/* Update disk stats when completing request upwards */ +static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) +{ + int rw = bio_data_dir(req->master_bio); + unsigned long duration = jiffies - req->start_time; + int cpu; + cpu = part_stat_lock(); + part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration); + part_round_stats(cpu, &mdev->vdisk->part0); + part_stat_unlock(); + mdev->vdisk->part0.in_flight[rw]--; +} + +static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) +{ + const unsigned long s = req->rq_state; + /* if it was a write, we may have to set the corresponding + * bit(s) out-of-sync first. If it had a local part, we need to + * release the reference to the activity log. */ + if (rw == WRITE) { + /* remove it from the transfer log. + * well, only if it had been there in the first + * place... if it had not (local only or conflicting + * and never sent), it should still be "empty" as + * initialized in drbd_req_new(), so we can list_del() it + * here unconditionally */ + list_del(&req->tl_requests); + /* Set out-of-sync unless both OK flags are set + * (local only or remote failed). + * Other places where we set out-of-sync: + * READ with local io-error */ + if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) + drbd_set_out_of_sync(mdev, req->sector, req->size); + + if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) + drbd_set_in_sync(mdev, req->sector, req->size); + + /* one might be tempted to move the drbd_al_complete_io + * to the local io completion callback drbd_endio_pri. + * but, if this was a mirror write, we may only + * drbd_al_complete_io after this is RQ_NET_DONE, + * otherwise the extent could be dropped from the al + * before it has actually been written on the peer. + * if we crash before our peer knows about the request, + * but after the extent has been dropped from the al, + * we would forget to resync the corresponding extent. + */ + if (s & RQ_LOCAL_MASK) { + if (get_ldev_if_state(mdev, D_FAILED)) { + drbd_al_complete_io(mdev, req->sector); + put_ldev(mdev); + } else if (__ratelimit(&drbd_ratelimit_state)) { + dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), " + "but my Disk seems to have failed :(\n", + (unsigned long long) req->sector); + } + } + } + + /* if it was a local io error, we want to notify our + * peer about that, and see if we need to + * detach the disk and stuff. + * to avoid allocating some special work + * struct, reuse the request. */ + + /* THINK + * why do we do this not when we detect the error, + * but delay it until it is "done", i.e. possibly + * until the next barrier ack? */ + + if (rw == WRITE && + ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) { + if (!(req->w.list.next == LIST_POISON1 || + list_empty(&req->w.list))) { + /* DEBUG ASSERT only; if this triggers, we + * probably corrupt the worker list here */ + dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next); + dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev); + } + req->w.cb = w_io_error; + drbd_queue_work(&mdev->data.work, &req->w); + /* drbd_req_free() is done in w_io_error */ + } else { + drbd_req_free(req); + } +} + +static void queue_barrier(struct drbd_conf *mdev) +{ + struct drbd_tl_epoch *b; + + /* We are within the req_lock. Once we queued the barrier for sending, + * we set the CREATE_BARRIER bit. It is cleared as soon as a new + * barrier/epoch object is added. This is the only place this bit is + * set. It indicates that the barrier for this epoch is already queued, + * and no new epoch has been created yet. */ + if (test_bit(CREATE_BARRIER, &mdev->flags)) + return; + + b = mdev->newest_tle; + b->w.cb = w_send_barrier; + /* inc_ap_pending done here, so we won't + * get imbalanced on connection loss. + * dec_ap_pending will be done in got_BarrierAck + * or (on connection loss) in tl_clear. */ + inc_ap_pending(mdev); + drbd_queue_work(&mdev->data.work, &b->w); + set_bit(CREATE_BARRIER, &mdev->flags); +} + +static void _about_to_complete_local_write(struct drbd_conf *mdev, + struct drbd_request *req) +{ + const unsigned long s = req->rq_state; + struct drbd_request *i; + struct drbd_epoch_entry *e; + struct hlist_node *n; + struct hlist_head *slot; + + /* before we can signal completion to the upper layers, + * we may need to close the current epoch */ + if (mdev->state.conn >= C_CONNECTED && + req->epoch == mdev->newest_tle->br_number) + queue_barrier(mdev); + + /* we need to do the conflict detection stuff, + * if we have the ee_hash (two_primaries) and + * this has been on the network */ + if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { + const sector_t sector = req->sector; + const int size = req->size; + + /* ASSERT: + * there must be no conflicting requests, since + * they must have been failed on the spot */ +#define OVERLAPS overlaps(sector, size, i->sector, i->size) + slot = tl_hash_slot(mdev, sector); + hlist_for_each_entry(i, n, slot, colision) { + if (OVERLAPS) { + dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " + "other: %p %llus +%u\n", + req, (unsigned long long)sector, size, + i, (unsigned long long)i->sector, i->size); + } + } + + /* maybe "wake" those conflicting epoch entries + * that wait for this request to finish. + * + * currently, there can be only _one_ such ee + * (well, or some more, which would be pending + * P_DISCARD_ACK not yet sent by the asender...), + * since we block the receiver thread upon the + * first conflict detection, which will wait on + * misc_wait. maybe we want to assert that? + * + * anyways, if we found one, + * we just have to do a wake_up. */ +#undef OVERLAPS +#define OVERLAPS overlaps(sector, size, e->sector, e->size) + slot = ee_hash_slot(mdev, req->sector); + hlist_for_each_entry(e, n, slot, colision) { + if (OVERLAPS) { + wake_up(&mdev->misc_wait); + break; + } + } + } +#undef OVERLAPS +} + +void complete_master_bio(struct drbd_conf *mdev, + struct bio_and_error *m) +{ + trace_drbd_bio(mdev, "Rq", m->bio, 1, NULL); + bio_endio(m->bio, m->error); + dec_ap_bio(mdev); +} + +/* Helper for __req_mod(). + * Set m->bio to the master bio, if it is fit to be completed, + * or leave it alone (it is initialized to NULL in __req_mod), + * if it has already been completed, or cannot be completed yet. + * If m->bio is set, the error status to be returned is placed in m->error. + */ +void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) +{ + const unsigned long s = req->rq_state; + struct drbd_conf *mdev = req->mdev; + /* only WRITES may end up here without a master bio (on barrier ack) */ + int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; + + trace_drbd_req(req, nothing, "_req_may_be_done"); + + /* we must not complete the master bio, while it is + * still being processed by _drbd_send_zc_bio (drbd_send_dblock) + * not yet acknowledged by the peer + * not yet completed by the local io subsystem + * these flags may get cleared in any order by + * the worker, + * the receiver, + * the bio_endio completion callbacks. + */ + if (s & RQ_NET_QUEUED) + return; + if (s & RQ_NET_PENDING) + return; + if (s & RQ_LOCAL_PENDING) + return; + + if (req->master_bio) { + /* this is data_received (remote read) + * or protocol C P_WRITE_ACK + * or protocol B P_RECV_ACK + * or protocol A "handed_over_to_network" (SendAck) + * or canceled or failed, + * or killed from the transfer log due to connection loss. + */ + + /* + * figure out whether to report success or failure. + * + * report success when at least one of the operations succeeded. + * or, to put the other way, + * only report failure, when both operations failed. + * + * what to do about the failures is handled elsewhere. + * what we need to do here is just: complete the master_bio. + * + * local completion error, if any, has been stored as ERR_PTR + * in private_bio within drbd_endio_pri. + */ + int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); + int error = PTR_ERR(req->private_bio); + + /* remove the request from the conflict detection + * respective block_id verification hash */ + if (!hlist_unhashed(&req->colision)) + hlist_del(&req->colision); + else + D_ASSERT((s & RQ_NET_MASK) == 0); + + /* for writes we need to do some extra housekeeping */ + if (rw == WRITE) + _about_to_complete_local_write(mdev, req); + + /* Update disk stats */ + _drbd_end_io_acct(mdev, req); + + m->error = ok ? 0 : (error ?: -EIO); + m->bio = req->master_bio; + req->master_bio = NULL; + } + + if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { + /* this is disconnected (local only) operation, + * or protocol C P_WRITE_ACK, + * or protocol A or B P_BARRIER_ACK, + * or killed from the transfer log due to connection loss. */ + _req_is_done(mdev, req, rw); + } + /* else: network part and not DONE yet. that is + * protocol A or B, barrier ack still pending... */ +} + +/* + * checks whether there was an overlapping request + * or ee already registered. + * + * if so, return 1, in which case this request is completed on the spot, + * without ever being submitted or send. + * + * return 0 if it is ok to submit this request. + * + * NOTE: + * paranoia: assume something above us is broken, and issues different write + * requests for the same block simultaneously... + * + * To ensure these won't be reordered differently on both nodes, resulting in + * diverging data sets, we discard the later one(s). Not that this is supposed + * to happen, but this is the rationale why we also have to check for + * conflicting requests with local origin, and why we have to do so regardless + * of whether we allowed multiple primaries. + * + * BTW, in case we only have one primary, the ee_hash is empty anyways, and the + * second hlist_for_each_entry becomes a noop. This is even simpler than to + * grab a reference on the net_conf, and check for the two_primaries flag... + */ +static int _req_conflicts(struct drbd_request *req) +{ + struct drbd_conf *mdev = req->mdev; + const sector_t sector = req->sector; + const int size = req->size; + struct drbd_request *i; + struct drbd_epoch_entry *e; + struct hlist_node *n; + struct hlist_head *slot; + + D_ASSERT(hlist_unhashed(&req->colision)); + + if (!get_net_conf(mdev)) + return 0; + + /* BUG_ON */ + ERR_IF (mdev->tl_hash_s == 0) + goto out_no_conflict; + BUG_ON(mdev->tl_hash == NULL); + +#define OVERLAPS overlaps(i->sector, i->size, sector, size) + slot = tl_hash_slot(mdev, sector); + hlist_for_each_entry(i, n, slot, colision) { + if (OVERLAPS) { + dev_alert(DEV, "%s[%u] Concurrent local write detected! " + "[DISCARD L] new: %llus +%u; " + "pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)i->sector, i->size); + goto out_conflict; + } + } + + if (mdev->ee_hash_s) { + /* now, check for overlapping requests with remote origin */ + BUG_ON(mdev->ee_hash == NULL); +#undef OVERLAPS +#define OVERLAPS overlaps(e->sector, e->size, sector, size) + slot = ee_hash_slot(mdev, sector); + hlist_for_each_entry(e, n, slot, colision) { + if (OVERLAPS) { + dev_alert(DEV, "%s[%u] Concurrent remote write detected!" + " [DISCARD L] new: %llus +%u; " + "pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)e->sector, e->size); + goto out_conflict; + } + } + } +#undef OVERLAPS + +out_no_conflict: + /* this is like it should be, and what we expected. + * our users do behave after all... */ + put_net_conf(mdev); + return 0; + +out_conflict: + put_net_conf(mdev); + return 1; +} + +/* obviously this could be coded as many single functions + * instead of one huge switch, + * or by putting the code directly in the respective locations + * (as it has been before). + * + * but having it this way + * enforces that it is all in this one place, where it is easier to audit, + * it makes it obvious that whatever "event" "happens" to a request should + * happen "atomically" within the req_lock, + * and it enforces that we have to think in a very structured manner + * about the "events" that may happen to a request during its life time ... + */ +void __req_mod(struct drbd_request *req, enum drbd_req_event what, + struct bio_and_error *m) +{ + struct drbd_conf *mdev = req->mdev; + m->bio = NULL; + + trace_drbd_req(req, what, NULL); + + switch (what) { + default: + dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__); + break; + + /* does not happen... + * initialization done in drbd_req_new + case created: + break; + */ + + case to_be_send: /* via network */ + /* reached via drbd_make_request_common + * and from w_read_retry_remote */ + D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + req->rq_state |= RQ_NET_PENDING; + inc_ap_pending(mdev); + break; + + case to_be_submitted: /* locally */ + /* reached via drbd_make_request_common */ + D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK)); + req->rq_state |= RQ_LOCAL_PENDING; + break; + + case completed_ok: + if (bio_data_dir(req->master_bio) == WRITE) + mdev->writ_cnt += req->size>>9; + else + mdev->read_cnt += req->size>>9; + + req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); + req->rq_state &= ~RQ_LOCAL_PENDING; + + _req_may_be_done(req, m); + put_ldev(mdev); + break; + + case write_completed_with_error: + req->rq_state |= RQ_LOCAL_COMPLETED; + req->rq_state &= ~RQ_LOCAL_PENDING; + + dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n", + (unsigned long long)req->sector, req->size); + /* and now: check how to handle local io error. */ + __drbd_chk_io_error(mdev, FALSE); + _req_may_be_done(req, m); + put_ldev(mdev); + break; + + case read_ahead_completed_with_error: + /* it is legal to fail READA */ + req->rq_state |= RQ_LOCAL_COMPLETED; + req->rq_state &= ~RQ_LOCAL_PENDING; + _req_may_be_done(req, m); + put_ldev(mdev); + break; + + case read_completed_with_error: + drbd_set_out_of_sync(mdev, req->sector, req->size); + + req->rq_state |= RQ_LOCAL_COMPLETED; + req->rq_state &= ~RQ_LOCAL_PENDING; + + dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", + (unsigned long long)req->sector, req->size); + /* _req_mod(req,to_be_send); oops, recursion... */ + D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + req->rq_state |= RQ_NET_PENDING; + inc_ap_pending(mdev); + + __drbd_chk_io_error(mdev, FALSE); + put_ldev(mdev); + /* NOTE: if we have no connection, + * or know the peer has no good data either, + * then we don't actually need to "queue_for_net_read", + * but we do so anyways, since the drbd_io_error() + * and the potential state change to "Diskless" + * needs to be done from process context */ + + /* fall through: _req_mod(req,queue_for_net_read); */ + + case queue_for_net_read: + /* READ or READA, and + * no local disk, + * or target area marked as invalid, + * or just got an io-error. */ + /* from drbd_make_request_common + * or from bio_endio during read io-error recovery */ + + /* so we can verify the handle in the answer packet + * corresponding hlist_del is in _req_may_be_done() */ + hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector)); + + set_bit(UNPLUG_REMOTE, &mdev->flags); /* why? */ + + D_ASSERT(req->rq_state & RQ_NET_PENDING); + req->rq_state |= RQ_NET_QUEUED; + req->w.cb = (req->rq_state & RQ_LOCAL_MASK) + ? w_read_retry_remote + : w_send_read_req; + drbd_queue_work(&mdev->data.work, &req->w); + break; + + case queue_for_net_write: + /* assert something? */ + /* from drbd_make_request_common only */ + + hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector)); + /* corresponding hlist_del is in _req_may_be_done() */ + + /* NOTE + * In case the req ended up on the transfer log before being + * queued on the worker, it could lead to this request being + * missed during cleanup after connection loss. + * So we have to do both operations here, + * within the same lock that protects the transfer log. + * + * _req_add_to_epoch(req); this has to be after the + * _maybe_start_new_epoch(req); which happened in + * drbd_make_request_common, because we now may set the bit + * again ourselves to close the current epoch. + * + * Add req to the (now) current epoch (barrier). */ + + /* see drbd_make_request_common, + * just after it grabs the req_lock */ + D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); + + req->epoch = mdev->newest_tle->br_number; + list_add_tail(&req->tl_requests, + &mdev->newest_tle->requests); + + /* increment size of current epoch */ + mdev->newest_tle->n_req++; + + /* queue work item to send data */ + D_ASSERT(req->rq_state & RQ_NET_PENDING); + req->rq_state |= RQ_NET_QUEUED; + req->w.cb = w_send_dblock; + drbd_queue_work(&mdev->data.work, &req->w); + + /* close the epoch, in case it outgrew the limit */ + if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size) + queue_barrier(mdev); + + break; + + case send_canceled: + /* treat it the same */ + case send_failed: + /* real cleanup will be done from tl_clear. just update flags + * so it is no longer marked as on the worker queue */ + req->rq_state &= ~RQ_NET_QUEUED; + /* if we did it right, tl_clear should be scheduled only after + * this, so this should not be necessary! */ + _req_may_be_done(req, m); + break; + + case handed_over_to_network: + /* assert something? */ + if (bio_data_dir(req->master_bio) == WRITE && + mdev->net_conf->wire_protocol == DRBD_PROT_A) { + /* this is what is dangerous about protocol A: + * pretend it was successfully written on the peer. */ + if (req->rq_state & RQ_NET_PENDING) { + dec_ap_pending(mdev); + req->rq_state &= ~RQ_NET_PENDING; + req->rq_state |= RQ_NET_OK; + } /* else: neg-ack was faster... */ + /* it is still not yet RQ_NET_DONE until the + * corresponding epoch barrier got acked as well, + * so we know what to dirty on connection loss */ + } + req->rq_state &= ~RQ_NET_QUEUED; + req->rq_state |= RQ_NET_SENT; + /* because _drbd_send_zc_bio could sleep, and may want to + * dereference the bio even after the "write_acked_by_peer" and + * "completed_ok" events came in, once we return from + * _drbd_send_zc_bio (drbd_send_dblock), we have to check + * whether it is done already, and end it. */ + _req_may_be_done(req, m); + break; + + case connection_lost_while_pending: + /* transfer log cleanup after connection loss */ + /* assert something? */ + if (req->rq_state & RQ_NET_PENDING) + dec_ap_pending(mdev); + req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); + req->rq_state |= RQ_NET_DONE; + /* if it is still queued, we may not complete it here. + * it will be canceled soon. */ + if (!(req->rq_state & RQ_NET_QUEUED)) + _req_may_be_done(req, m); + break; + + case write_acked_by_peer_and_sis: + req->rq_state |= RQ_NET_SIS; + case conflict_discarded_by_peer: + /* for discarded conflicting writes of multiple primaries, + * there is no need to keep anything in the tl, potential + * node crashes are covered by the activity log. */ + if (what == conflict_discarded_by_peer) + dev_alert(DEV, "Got DiscardAck packet %llus +%u!" + " DRBD is not a random data generator!\n", + (unsigned long long)req->sector, req->size); + req->rq_state |= RQ_NET_DONE; + /* fall through */ + case write_acked_by_peer: + /* protocol C; successfully written on peer. + * Nothing to do here. + * We want to keep the tl in place for all protocols, to cater + * for volatile write-back caches on lower level devices. + * + * A barrier request is expected to have forced all prior + * requests onto stable storage, so completion of a barrier + * request could set NET_DONE right here, and not wait for the + * P_BARRIER_ACK, but that is an unnecessary optimization. */ + + /* this makes it effectively the same as for: */ + case recv_acked_by_peer: + /* protocol B; pretends to be successfully written on peer. + * see also notes above in handed_over_to_network about + * protocol != C */ + req->rq_state |= RQ_NET_OK; + D_ASSERT(req->rq_state & RQ_NET_PENDING); + dec_ap_pending(mdev); + req->rq_state &= ~RQ_NET_PENDING; + _req_may_be_done(req, m); + break; + + case neg_acked: + /* assert something? */ + if (req->rq_state & RQ_NET_PENDING) + dec_ap_pending(mdev); + req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); + + req->rq_state |= RQ_NET_DONE; + _req_may_be_done(req, m); + /* else: done by handed_over_to_network */ + break; + + case barrier_acked: + if (req->rq_state & RQ_NET_PENDING) { + /* barrier came in before all requests have been acked. + * this is bad, because if the connection is lost now, + * we won't be able to clean them up... */ + dev_err(DEV, "FIXME (barrier_acked but pending)\n"); + trace_drbd_req(req, nothing, "FIXME (barrier_acked but pending)"); + list_move(&req->tl_requests, &mdev->out_of_sequence_requests); + } + D_ASSERT(req->rq_state & RQ_NET_SENT); + req->rq_state |= RQ_NET_DONE; + _req_may_be_done(req, m); + break; + + case data_received: + D_ASSERT(req->rq_state & RQ_NET_PENDING); + dec_ap_pending(mdev); + req->rq_state &= ~RQ_NET_PENDING; + req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); + _req_may_be_done(req, m); + break; + }; +} + +/* we may do a local read if: + * - we are consistent (of course), + * - or we are generally inconsistent, + * BUT we are still/already IN SYNC for this area. + * since size may be bigger than BM_BLOCK_SIZE, + * we may need to check several bits. + */ +static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size) +{ + unsigned long sbnr, ebnr; + sector_t esector, nr_sectors; + + if (mdev->state.disk == D_UP_TO_DATE) + return 1; + if (mdev->state.disk >= D_OUTDATED) + return 0; + if (mdev->state.disk < D_INCONSISTENT) + return 0; + /* state.disk == D_INCONSISTENT We will have a look at the BitMap */ + nr_sectors = drbd_get_capacity(mdev->this_bdev); + esector = sector + (size >> 9) - 1; + + D_ASSERT(sector < nr_sectors); + D_ASSERT(esector < nr_sectors); + + sbnr = BM_SECT_TO_BIT(sector); + ebnr = BM_SECT_TO_BIT(esector); + + return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); +} + +static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) +{ + const int rw = bio_rw(bio); + const int size = bio->bi_size; + const sector_t sector = bio->bi_sector; + struct drbd_tl_epoch *b = NULL; + struct drbd_request *req; + int local, remote; + int err = -EIO; + + /* allocate outside of all locks; */ + req = drbd_req_new(mdev, bio); + if (!req) { + dec_ap_bio(mdev); + /* only pass the error to the upper layers. + * if user cannot handle io errors, that's not our business. */ + dev_err(DEV, "could not kmalloc() req\n"); + bio_endio(bio, -ENOMEM); + return 0; + } + + trace_drbd_bio(mdev, "Rq", bio, 0, req); + + local = get_ldev(mdev); + if (!local) { + bio_put(req->private_bio); /* or we get a bio leak */ + req->private_bio = NULL; + } + if (rw == WRITE) { + remote = 1; + } else { + /* READ || READA */ + if (local) { + if (!drbd_may_do_local_read(mdev, sector, size)) { + /* we could kick the syncer to + * sync this extent asap, wait for + * it, then continue locally. + * Or just issue the request remotely. + */ + local = 0; + bio_put(req->private_bio); + req->private_bio = NULL; + put_ldev(mdev); + } + } + remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; + } + + /* If we have a disk, but a READA request is mapped to remote, + * we are R_PRIMARY, D_INCONSISTENT, SyncTarget. + * Just fail that READA request right here. + * + * THINK: maybe fail all READA when not local? + * or make this configurable... + * if network is slow, READA won't do any good. + */ + if (rw == READA && mdev->state.disk >= D_INCONSISTENT && !local) { + err = -EWOULDBLOCK; + goto fail_and_free_req; + } + + /* For WRITES going to the local disk, grab a reference on the target + * extent. This waits for any resync activity in the corresponding + * resync extent to finish, and, if necessary, pulls in the target + * extent into the activity log, which involves further disk io because + * of transactional on-disk meta data updates. */ + if (rw == WRITE && local) + drbd_al_begin_io(mdev, sector); + + remote = remote && (mdev->state.pdsk == D_UP_TO_DATE || + (mdev->state.pdsk == D_INCONSISTENT && + mdev->state.conn >= C_CONNECTED)); + + if (!(local || remote)) { + dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); + goto fail_free_complete; + } + + /* For WRITE request, we have to make sure that we have an + * unused_spare_tle, in case we need to start a new epoch. + * I try to be smart and avoid to pre-allocate always "just in case", + * but there is a race between testing the bit and pointer outside the + * spinlock, and grabbing the spinlock. + * if we lost that race, we retry. */ + if (rw == WRITE && remote && + mdev->unused_spare_tle == NULL && + test_bit(CREATE_BARRIER, &mdev->flags)) { +allocate_barrier: + b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO); + if (!b) { + dev_err(DEV, "Failed to alloc barrier.\n"); + err = -ENOMEM; + goto fail_free_complete; + } + } + + /* GOOD, everything prepared, grab the spin_lock */ + spin_lock_irq(&mdev->req_lock); + + if (remote) { + remote = (mdev->state.pdsk == D_UP_TO_DATE || + (mdev->state.pdsk == D_INCONSISTENT && + mdev->state.conn >= C_CONNECTED)); + if (!remote) + dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); + if (!(local || remote)) { + dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); + spin_unlock_irq(&mdev->req_lock); + goto fail_free_complete; + } + } + + if (b && mdev->unused_spare_tle == NULL) { + mdev->unused_spare_tle = b; + b = NULL; + } + if (rw == WRITE && remote && + mdev->unused_spare_tle == NULL && + test_bit(CREATE_BARRIER, &mdev->flags)) { + /* someone closed the current epoch + * while we were grabbing the spinlock */ + spin_unlock_irq(&mdev->req_lock); + goto allocate_barrier; + } + + + /* Update disk stats */ + _drbd_start_io_acct(mdev, req, bio); + + /* _maybe_start_new_epoch(mdev); + * If we need to generate a write barrier packet, we have to add the + * new epoch (barrier) object, and queue the barrier packet for sending, + * and queue the req's data after it _within the same lock_, otherwise + * we have race conditions were the reorder domains could be mixed up. + * + * Even read requests may start a new epoch and queue the corresponding + * barrier packet. To get the write ordering right, we only have to + * make sure that, if this is a write request and it triggered a + * barrier packet, this request is queued within the same spinlock. */ + if (remote && mdev->unused_spare_tle && + test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { + _tl_add_barrier(mdev, mdev->unused_spare_tle); + mdev->unused_spare_tle = NULL; + } else { + D_ASSERT(!(remote && rw == WRITE && + test_bit(CREATE_BARRIER, &mdev->flags))); + } + + /* NOTE + * Actually, 'local' may be wrong here already, since we may have failed + * to write to the meta data, and may become wrong anytime because of + * local io-error for some other request, which would lead to us + * "detaching" the local disk. + * + * 'remote' may become wrong any time because the network could fail. + * + * This is a harmless race condition, though, since it is handled + * correctly at the appropriate places; so it just defers the failure + * of the respective operation. + */ + + /* mark them early for readability. + * this just sets some state flags. */ + if (remote) + _req_mod(req, to_be_send); + if (local) + _req_mod(req, to_be_submitted); + + /* check this request on the collision detection hash tables. + * if we have a conflict, just complete it here. + * THINK do we want to check reads, too? (I don't think so...) */ + if (rw == WRITE && _req_conflicts(req)) { + /* this is a conflicting request. + * even though it may have been only _partially_ + * overlapping with one of the currently pending requests, + * without even submitting or sending it, we will + * pretend that it was successfully served right now. + */ + if (local) { + bio_put(req->private_bio); + req->private_bio = NULL; + drbd_al_complete_io(mdev, req->sector); + put_ldev(mdev); + local = 0; + } + if (remote) + dec_ap_pending(mdev); + _drbd_end_io_acct(mdev, req); + /* THINK: do we want to fail it (-EIO), or pretend success? */ + bio_endio(req->master_bio, 0); + req->master_bio = NULL; + dec_ap_bio(mdev); + drbd_req_free(req); + remote = 0; + } + + /* NOTE remote first: to get the concurrent write detection right, + * we must register the request before start of local IO. */ + if (remote) { + /* either WRITE and C_CONNECTED, + * or READ, and no local disk, + * or READ, but not in sync. + */ + _req_mod(req, (rw == WRITE) + ? queue_for_net_write + : queue_for_net_read); + } + spin_unlock_irq(&mdev->req_lock); + kfree(b); /* if someone else has beaten us to it... */ + + if (local) { + req->private_bio->bi_bdev = mdev->ldev->backing_bdev; + + trace_drbd_bio(mdev, "Pri", req->private_bio, 0, NULL); + + if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR + : rw == READ ? DRBD_FAULT_DT_RD + : DRBD_FAULT_DT_RA)) + bio_endio(req->private_bio, -EIO); + else + generic_make_request(req->private_bio); + } + + /* we need to plug ALWAYS since we possibly need to kick lo_dev. + * we plug after submit, so we won't miss an unplug event */ + drbd_plug_device(mdev); + + return 0; + +fail_free_complete: + if (rw == WRITE && local) + drbd_al_complete_io(mdev, sector); +fail_and_free_req: + if (local) { + bio_put(req->private_bio); + req->private_bio = NULL; + put_ldev(mdev); + } + bio_endio(bio, err); + drbd_req_free(req); + dec_ap_bio(mdev); + kfree(b); + + return 0; +} + +/* helper function for drbd_make_request + * if we can determine just by the mdev (state) that this request will fail, + * return 1 + * otherwise return 0 + */ +static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) +{ + /* Unconfigured */ + if (mdev->state.conn == C_DISCONNECTING && + mdev->state.disk == D_DISKLESS) + return 1; + + if (mdev->state.role != R_PRIMARY && + (!allow_oos || is_write)) { + if (__ratelimit(&drbd_ratelimit_state)) { + dev_err(DEV, "Process %s[%u] tried to %s; " + "since we are not in Primary state, " + "we cannot allow this\n", + current->comm, current->pid, + is_write ? "WRITE" : "READ"); + } + return 1; + } + + /* + * Paranoia: we might have been primary, but sync target, or + * even diskless, then lost the connection. + * This should have been handled (panic? suspend?) somewhere + * else. But maybe it was not, so check again here. + * Caution: as long as we do not have a read/write lock on mdev, + * to serialize state changes, this is racy, since we may lose + * the connection *after* we test for the cstate. + */ + if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) { + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "Sorry, I have no access to good data anymore.\n"); + return 1; + } + + return 0; +} + +int drbd_make_request_26(struct request_queue *q, struct bio *bio) +{ + unsigned int s_enr, e_enr; + struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; + + if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) { + bio_endio(bio, -EPERM); + return 0; + } + + /* Reject barrier requests if we know the underlying device does + * not support them. + * XXX: Need to get this info from peer as well some how so we + * XXX: reject if EITHER side/data/metadata area does not support them. + * + * because of those XXX, this is not yet enabled, + * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit. + */ + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) { + /* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */ + bio_endio(bio, -EOPNOTSUPP); + return 0; + } + + /* + * what we "blindly" assume: + */ + D_ASSERT(bio->bi_size > 0); + D_ASSERT((bio->bi_size & 0x1ff) == 0); + D_ASSERT(bio->bi_idx == 0); + + /* to make some things easier, force alignment of requests within the + * granularity of our hash tables */ + s_enr = bio->bi_sector >> HT_SHIFT; + e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; + + if (likely(s_enr == e_enr)) { + inc_ap_bio(mdev, 1); + return drbd_make_request_common(mdev, bio); + } + + /* can this bio be split generically? + * Maybe add our own split-arbitrary-bios function. */ + if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) { + /* rather error out here than BUG in bio_split */ + dev_err(DEV, "bio would need to, but cannot, be split: " + "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", + bio->bi_vcnt, bio->bi_idx, bio->bi_size, + (unsigned long long)bio->bi_sector); + bio_endio(bio, -EINVAL); + } else { + /* This bio crosses some boundary, so we have to split it. */ + struct bio_pair *bp; + /* works for the "do not cross hash slot boundaries" case + * e.g. sector 262269, size 4096 + * s_enr = 262269 >> 6 = 4097 + * e_enr = (262269+8-1) >> 6 = 4098 + * HT_SHIFT = 6 + * sps = 64, mask = 63 + * first_sectors = 64 - (262269 & 63) = 3 + */ + const sector_t sect = bio->bi_sector; + const int sps = 1 << HT_SHIFT; /* sectors per slot */ + const int mask = sps - 1; + const sector_t first_sectors = sps - (sect & mask); + bp = bio_split(bio, +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + bio_split_pool, +#endif + first_sectors); + + /* we need to get a "reference count" (ap_bio_cnt) + * to avoid races with the disconnect/reconnect/suspend code. + * In case we need to split the bio here, we need to get two references + * atomically, otherwise we might deadlock when trying to submit the + * second one! */ + inc_ap_bio(mdev, 2); + + D_ASSERT(e_enr == s_enr + 1); + + drbd_make_request_common(mdev, &bp->bio1); + drbd_make_request_common(mdev, &bp->bio2); + bio_pair_release(bp); + } + return 0; +} + +/* This is called by bio_add_page(). With this function we reduce + * the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs + * units (was AL_EXTENTs). + * + * we do the calculation within the lower 32bit of the byte offsets, + * since we don't care for actual offset, but only check whether it + * would cross "activity log extent" boundaries. + * + * As long as the BIO is empty we have to allow at least one bvec, + * regardless of size and offset. so the resulting bio may still + * cross extent boundaries. those are dealt with (bio_split) in + * drbd_make_request_26. + */ +int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) +{ + struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; + unsigned int bio_offset = + (unsigned int)bvm->bi_sector << 9; /* 32 bit */ + unsigned int bio_size = bvm->bi_size; + int limit, backing_limit; + + limit = DRBD_MAX_SEGMENT_SIZE + - ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size); + if (limit < 0) + limit = 0; + if (bio_size == 0) { + if (limit <= bvec->bv_len) + limit = bvec->bv_len; + } else if (limit && get_ldev(mdev)) { + struct request_queue * const b = + mdev->ldev->backing_bdev->bd_disk->queue; + if (b->merge_bvec_fn && mdev->ldev->dc.use_bmbv) { + backing_limit = b->merge_bvec_fn(b, bvm, bvec); + limit = min(limit, backing_limit); + } + put_ldev(mdev); + } + return limit; +} diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h new file mode 100644 index 000000000000..d37ab57f1209 --- /dev/null +++ b/drivers/block/drbd/drbd_req.h @@ -0,0 +1,327 @@ +/* + drbd_req.h + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2006-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2006-2008, Lars Ellenberg . + Copyright (C) 2006-2008, Philipp Reisner . + + DRBD is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + DRBD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _DRBD_REQ_H +#define _DRBD_REQ_H + +#include +#include + +#include +#include +#include "drbd_int.h" +#include "drbd_wrappers.h" + +/* The request callbacks will be called in irq context by the IDE drivers, + and in Softirqs/Tasklets/BH context by the SCSI drivers, + and by the receiver and worker in kernel-thread context. + Try to get the locking right :) */ + +/* + * Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are + * associated with IO requests originating from the block layer above us. + * + * There are quite a few things that may happen to a drbd request + * during its lifetime. + * + * It will be created. + * It will be marked with the intention to be + * submitted to local disk and/or + * send via the network. + * + * It has to be placed on the transfer log and other housekeeping lists, + * In case we have a network connection. + * + * It may be identified as a concurrent (write) request + * and be handled accordingly. + * + * It may me handed over to the local disk subsystem. + * It may be completed by the local disk subsystem, + * either sucessfully or with io-error. + * In case it is a READ request, and it failed locally, + * it may be retried remotely. + * + * It may be queued for sending. + * It may be handed over to the network stack, + * which may fail. + * It may be acknowledged by the "peer" according to the wire_protocol in use. + * this may be a negative ack. + * It may receive a faked ack when the network connection is lost and the + * transfer log is cleaned up. + * Sending may be canceled due to network connection loss. + * When it finally has outlived its time, + * corresponding dirty bits in the resync-bitmap may be cleared or set, + * it will be destroyed, + * and completion will be signalled to the originator, + * with or without "success". + */ + +enum drbd_req_event { + created, + to_be_send, + to_be_submitted, + + /* XXX yes, now I am inconsistent... + * these two are not "events" but "actions" + * oh, well... */ + queue_for_net_write, + queue_for_net_read, + + send_canceled, + send_failed, + handed_over_to_network, + connection_lost_while_pending, + recv_acked_by_peer, + write_acked_by_peer, + write_acked_by_peer_and_sis, /* and set_in_sync */ + conflict_discarded_by_peer, + neg_acked, + barrier_acked, /* in protocol A and B */ + data_received, /* (remote read) */ + + read_completed_with_error, + read_ahead_completed_with_error, + write_completed_with_error, + completed_ok, + nothing, /* for tracing only */ +}; + +/* encoding of request states for now. we don't actually need that many bits. + * we don't need to do atomic bit operations either, since most of the time we + * need to look at the connection state and/or manipulate some lists at the + * same time, so we should hold the request lock anyways. + */ +enum drbd_req_state_bits { + /* 210 + * 000: no local possible + * 001: to be submitted + * UNUSED, we could map: 011: submitted, completion still pending + * 110: completed ok + * 010: completed with error + */ + __RQ_LOCAL_PENDING, + __RQ_LOCAL_COMPLETED, + __RQ_LOCAL_OK, + + /* 76543 + * 00000: no network possible + * 00001: to be send + * 00011: to be send, on worker queue + * 00101: sent, expecting recv_ack (B) or write_ack (C) + * 11101: sent, + * recv_ack (B) or implicit "ack" (A), + * still waiting for the barrier ack. + * master_bio may already be completed and invalidated. + * 11100: write_acked (C), + * data_received (for remote read, any protocol) + * or finally the barrier ack has arrived (B,A)... + * request can be freed + * 01100: neg-acked (write, protocol C) + * or neg-d-acked (read, any protocol) + * or killed from the transfer log + * during cleanup after connection loss + * request can be freed + * 01000: canceled or send failed... + * request can be freed + */ + + /* if "SENT" is not set, yet, this can still fail or be canceled. + * if "SENT" is set already, we still wait for an Ack packet. + * when cleared, the master_bio may be completed. + * in (B,A) the request object may still linger on the transaction log + * until the corresponding barrier ack comes in */ + __RQ_NET_PENDING, + + /* If it is QUEUED, and it is a WRITE, it is also registered in the + * transfer log. Currently we need this flag to avoid conflicts between + * worker canceling the request and tl_clear_barrier killing it from + * transfer log. We should restructure the code so this conflict does + * no longer occur. */ + __RQ_NET_QUEUED, + + /* well, actually only "handed over to the network stack". + * + * TODO can potentially be dropped because of the similar meaning + * of RQ_NET_SENT and ~RQ_NET_QUEUED. + * however it is not exactly the same. before we drop it + * we must ensure that we can tell a request with network part + * from a request without, regardless of what happens to it. */ + __RQ_NET_SENT, + + /* when set, the request may be freed (if RQ_NET_QUEUED is clear). + * basically this means the corresponding P_BARRIER_ACK was received */ + __RQ_NET_DONE, + + /* whether or not we know (C) or pretend (B,A) that the write + * was successfully written on the peer. + */ + __RQ_NET_OK, + + /* peer called drbd_set_in_sync() for this write */ + __RQ_NET_SIS, + + /* keep this last, its for the RQ_NET_MASK */ + __RQ_NET_MAX, +}; + +#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) +#define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) +#define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) + +#define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ + +#define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) +#define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) +#define RQ_NET_SENT (1UL << __RQ_NET_SENT) +#define RQ_NET_DONE (1UL << __RQ_NET_DONE) +#define RQ_NET_OK (1UL << __RQ_NET_OK) +#define RQ_NET_SIS (1UL << __RQ_NET_SIS) + +/* 0x1f8 */ +#define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK) + +/* epoch entries */ +static inline +struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector) +{ + BUG_ON(mdev->ee_hash_s == 0); + return mdev->ee_hash + + ((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s); +} + +/* transfer log (drbd_request objects) */ +static inline +struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector) +{ + BUG_ON(mdev->tl_hash_s == 0); + return mdev->tl_hash + + ((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s); +} + +/* application reads (drbd_request objects) */ +static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) +{ + return mdev->app_reads_hash + + ((unsigned int)(sector) % APP_R_HSIZE); +} + +/* when we receive the answer for a read request, + * verify that we actually know about it */ +static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, + u64 id, sector_t sector) +{ + struct hlist_head *slot = ar_hash_slot(mdev, sector); + struct hlist_node *n; + struct drbd_request *req; + + hlist_for_each_entry(req, n, slot, colision) { + if ((unsigned long)req == (unsigned long)id) { + D_ASSERT(req->sector == sector); + return req; + } + } + return NULL; +} + +static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, + struct bio *bio_src) +{ + struct bio *bio; + struct drbd_request *req = + mempool_alloc(drbd_request_mempool, GFP_NOIO); + if (likely(req)) { + bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */ + + req->rq_state = 0; + req->mdev = mdev; + req->master_bio = bio_src; + req->private_bio = bio; + req->epoch = 0; + req->sector = bio->bi_sector; + req->size = bio->bi_size; + req->start_time = jiffies; + INIT_HLIST_NODE(&req->colision); + INIT_LIST_HEAD(&req->tl_requests); + INIT_LIST_HEAD(&req->w.list); + + bio->bi_private = req; + bio->bi_end_io = drbd_endio_pri; + bio->bi_next = NULL; + } + return req; +} + +static inline void drbd_req_free(struct drbd_request *req) +{ + mempool_free(req, drbd_request_mempool); +} + +static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) +{ + return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); +} + +/* Short lived temporary struct on the stack. + * We could squirrel the error to be returned into + * bio->bi_size, or similar. But that would be too ugly. */ +struct bio_and_error { + struct bio *bio; + int error; +}; + +extern void _req_may_be_done(struct drbd_request *req, + struct bio_and_error *m); +extern void __req_mod(struct drbd_request *req, enum drbd_req_event what, + struct bio_and_error *m); +extern void complete_master_bio(struct drbd_conf *mdev, + struct bio_and_error *m); + +/* use this if you don't want to deal with calling complete_master_bio() + * outside the spinlock, e.g. when walking some list on cleanup. */ +static inline void _req_mod(struct drbd_request *req, enum drbd_req_event what) +{ + struct drbd_conf *mdev = req->mdev; + struct bio_and_error m; + + /* __req_mod possibly frees req, do not touch req after that! */ + __req_mod(req, what, &m); + if (m.bio) + complete_master_bio(mdev, &m); +} + +/* completion of master bio is outside of spinlock. + * If you need it irqsave, do it your self! */ +static inline void req_mod(struct drbd_request *req, + enum drbd_req_event what) +{ + struct drbd_conf *mdev = req->mdev; + struct bio_and_error m; + spin_lock_irq(&mdev->req_lock); + __req_mod(req, what, &m); + spin_unlock_irq(&mdev->req_lock); + + if (m.bio) + complete_master_bio(mdev, &m); +} +#endif diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c new file mode 100644 index 000000000000..76863e3f05be --- /dev/null +++ b/drivers/block/drbd/drbd_strings.c @@ -0,0 +1,113 @@ +/* + drbd.h + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +#include + +static const char *drbd_conn_s_names[] = { + [C_STANDALONE] = "StandAlone", + [C_DISCONNECTING] = "Disconnecting", + [C_UNCONNECTED] = "Unconnected", + [C_TIMEOUT] = "Timeout", + [C_BROKEN_PIPE] = "BrokenPipe", + [C_NETWORK_FAILURE] = "NetworkFailure", + [C_PROTOCOL_ERROR] = "ProtocolError", + [C_WF_CONNECTION] = "WFConnection", + [C_WF_REPORT_PARAMS] = "WFReportParams", + [C_TEAR_DOWN] = "TearDown", + [C_CONNECTED] = "Connected", + [C_STARTING_SYNC_S] = "StartingSyncS", + [C_STARTING_SYNC_T] = "StartingSyncT", + [C_WF_BITMAP_S] = "WFBitMapS", + [C_WF_BITMAP_T] = "WFBitMapT", + [C_WF_SYNC_UUID] = "WFSyncUUID", + [C_SYNC_SOURCE] = "SyncSource", + [C_SYNC_TARGET] = "SyncTarget", + [C_PAUSED_SYNC_S] = "PausedSyncS", + [C_PAUSED_SYNC_T] = "PausedSyncT", + [C_VERIFY_S] = "VerifyS", + [C_VERIFY_T] = "VerifyT", +}; + +static const char *drbd_role_s_names[] = { + [R_PRIMARY] = "Primary", + [R_SECONDARY] = "Secondary", + [R_UNKNOWN] = "Unknown" +}; + +static const char *drbd_disk_s_names[] = { + [D_DISKLESS] = "Diskless", + [D_ATTACHING] = "Attaching", + [D_FAILED] = "Failed", + [D_NEGOTIATING] = "Negotiating", + [D_INCONSISTENT] = "Inconsistent", + [D_OUTDATED] = "Outdated", + [D_UNKNOWN] = "DUnknown", + [D_CONSISTENT] = "Consistent", + [D_UP_TO_DATE] = "UpToDate", +}; + +static const char *drbd_state_sw_errors[] = { + [-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config", + [-SS_NO_UP_TO_DATE_DISK] = "Refusing to be Primary without at least one UpToDate disk", + [-SS_NO_LOCAL_DISK] = "Can not resync without local disk", + [-SS_NO_REMOTE_DISK] = "Can not resync without remote disk", + [-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected", + [-SS_PRIMARY_NOP] = "Refusing to be Primary while peer is not outdated", + [-SS_RESYNC_RUNNING] = "Can not start OV/resync since it is already active", + [-SS_ALREADY_STANDALONE] = "Can not disconnect a StandAlone device", + [-SS_CW_FAILED_BY_PEER] = "State change was refused by peer node", + [-SS_IS_DISKLESS] = "Device is diskless, the requested operation requires a disk", + [-SS_DEVICE_IN_USE] = "Device is held open by someone", + [-SS_NO_NET_CONFIG] = "Have no net/connection configuration", + [-SS_NO_VERIFY_ALG] = "Need a verify algorithm to start online verify", + [-SS_NEED_CONNECTION] = "Need a connection to start verify or resync", + [-SS_NOT_SUPPORTED] = "Peer does not support protocol", + [-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated", + [-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change", + [-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted", +}; + +const char *drbd_conn_str(enum drbd_conns s) +{ + /* enums are unsigned... */ + return s > C_PAUSED_SYNC_T ? "TOO_LARGE" : drbd_conn_s_names[s]; +} + +const char *drbd_role_str(enum drbd_role s) +{ + return s > R_SECONDARY ? "TOO_LARGE" : drbd_role_s_names[s]; +} + +const char *drbd_disk_str(enum drbd_disk_state s) +{ + return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s]; +} + +const char *drbd_set_st_err_str(enum drbd_state_ret_codes err) +{ + return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" : + err > SS_TWO_PRIMARIES ? "TOO_LARGE" + : drbd_state_sw_errors[-err]; +} diff --git a/drivers/block/drbd/drbd_tracing.c b/drivers/block/drbd/drbd_tracing.c new file mode 100644 index 000000000000..d18d4f7b4bef --- /dev/null +++ b/drivers/block/drbd/drbd_tracing.c @@ -0,0 +1,752 @@ +/* + drbd_tracing.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include +#include "drbd_int.h" +#include "drbd_tracing.h" +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Philipp Reisner, Lars Ellenberg"); +MODULE_DESCRIPTION("DRBD tracepoint probes"); +MODULE_PARM_DESC(trace_mask, "Bitmap of events to trace see drbd_tracing.c"); +MODULE_PARM_DESC(trace_level, "Current tracing level (changeable in /sys)"); +MODULE_PARM_DESC(trace_devs, "Bitmap of devices to trace (changeable in /sys)"); + +unsigned int trace_mask = 0; /* Bitmap of events to trace */ +int trace_level; /* Current trace level */ +int trace_devs; /* Bitmap of devices to trace */ + +module_param(trace_mask, uint, 0444); +module_param(trace_level, int, 0644); +module_param(trace_devs, int, 0644); + +enum { + TRACE_PACKET = 0x0001, + TRACE_RQ = 0x0002, + TRACE_UUID = 0x0004, + TRACE_RESYNC = 0x0008, + TRACE_EE = 0x0010, + TRACE_UNPLUG = 0x0020, + TRACE_NL = 0x0040, + TRACE_AL_EXT = 0x0080, + TRACE_INT_RQ = 0x0100, + TRACE_MD_IO = 0x0200, + TRACE_EPOCH = 0x0400, +}; + +/* Buffer printing support + * dbg_print_flags: used for Flags arg to drbd_print_buffer + * - DBGPRINT_BUFFADDR; if set, each line starts with the + * virtual address of the line being output. If clear, + * each line starts with the offset from the beginning + * of the buffer. */ +enum dbg_print_flags { + DBGPRINT_BUFFADDR = 0x0001, +}; + +/* Macro stuff */ +static char *nl_packet_name(int packet_type) +{ +/* Generate packet type strings */ +#define NL_PACKET(name, number, fields) \ + [P_ ## name] = # name, +#define NL_INTEGER Argh! +#define NL_BIT Argh! +#define NL_INT64 Argh! +#define NL_STRING Argh! + + static char *nl_tag_name[P_nl_after_last_packet] = { +#include "linux/drbd_nl.h" + }; + + return (packet_type < sizeof(nl_tag_name)/sizeof(nl_tag_name[0])) ? + nl_tag_name[packet_type] : "*Unknown*"; +} +/* /Macro stuff */ + +static inline int is_mdev_trace(struct drbd_conf *mdev, unsigned int level) +{ + return trace_level >= level && ((1 << mdev_to_minor(mdev)) & trace_devs); +} + +static void probe_drbd_unplug(struct drbd_conf *mdev, char *msg) +{ + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + dev_info(DEV, "%s, ap_bio_count=%d\n", msg, atomic_read(&mdev->ap_bio_cnt)); +} + +static void probe_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index) +{ + static char *uuid_str[UI_EXTENDED_SIZE] = { + [UI_CURRENT] = "CURRENT", + [UI_BITMAP] = "BITMAP", + [UI_HISTORY_START] = "HISTORY_START", + [UI_HISTORY_END] = "HISTORY_END", + [UI_SIZE] = "SIZE", + [UI_FLAGS] = "FLAGS", + }; + + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + if (index >= UI_EXTENDED_SIZE) { + dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n"); + return; + } + + dev_info(DEV, " uuid[%s] now %016llX\n", + uuid_str[index], + (unsigned long long)mdev->ldev->md.uuid[index]); +} + +static void probe_drbd_md_io(struct drbd_conf *mdev, int rw, + struct drbd_backing_dev *bdev) +{ + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + dev_info(DEV, " %s metadata superblock now\n", + rw == READ ? "Reading" : "Writing"); +} + +static void probe_drbd_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg) +{ + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + dev_info(DEV, "EE %s sec=%llus size=%u e=%p\n", + msg, (unsigned long long)e->sector, e->size, e); +} + +static void probe_drbd_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch, + enum epoch_event ev) +{ + static char *epoch_event_str[] = { + [EV_PUT] = "put", + [EV_GOT_BARRIER_NR] = "got_barrier_nr", + [EV_BARRIER_DONE] = "barrier_done", + [EV_BECAME_LAST] = "became_last", + [EV_TRACE_FLUSH] = "issuing_flush", + [EV_TRACE_ADD_BARRIER] = "added_barrier", + [EV_TRACE_SETTING_BI] = "just set barrier_in_next_epoch", + }; + + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + ev &= ~EV_CLEANUP; + + switch (ev) { + case EV_TRACE_ALLOC: + dev_info(DEV, "Allocate epoch %p/xxxx { } nr_epochs=%d\n", epoch, mdev->epochs); + break; + case EV_TRACE_FREE: + dev_info(DEV, "Freeing epoch %p/%d { size=%d } nr_epochs=%d\n", + epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size), + mdev->epochs); + break; + default: + dev_info(DEV, "Update epoch %p/%d { size=%d active=%d %c%c n%c%c } ev=%s\n", + epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size), + atomic_read(&epoch->active), + test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) ? 'n' : '-', + test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) ? 'b' : '-', + test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) ? 'i' : '-', + test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ? 'd' : '-', + epoch_event_str[ev]); + } +} + +static void probe_drbd_netlink(void *data, int is_req) +{ + struct cn_msg *msg = data; + + if (is_req) { + struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)msg->data; + + printk(KERN_INFO "drbd%d: " + "Netlink: << %s (%d) - seq: %x, ack: %x, len: %x\n", + nlp->drbd_minor, + nl_packet_name(nlp->packet_type), + nlp->packet_type, + msg->seq, msg->ack, msg->len); + } else { + struct drbd_nl_cfg_reply *nlp = (struct drbd_nl_cfg_reply *)msg->data; + + printk(KERN_INFO "drbd%d: " + "Netlink: >> %s (%d) - seq: %x, ack: %x, len: %x\n", + nlp->minor, + nlp->packet_type == P_nl_after_last_packet ? + "Empty-Reply" : nl_packet_name(nlp->packet_type), + nlp->packet_type, + msg->seq, msg->ack, msg->len); + } +} + +static void probe_drbd_actlog(struct drbd_conf *mdev, sector_t sector, char* msg) +{ + unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); + + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + dev_info(DEV, "%s (sec=%llus, al_enr=%u, rs_enr=%d)\n", + msg, (unsigned long long) sector, enr, + (int)BM_SECT_TO_EXT(sector)); +} + +/** + * drbd_print_buffer() - Hexdump arbitrary binary data into a buffer + * @prefix: String is output at the beginning of each line output. + * @flags: Currently only defined flag: DBGPRINT_BUFFADDR; if set, each + * line starts with the virtual address of the line being + * output. If clear, each line starts with the offset from the + * beginning of the buffer. + * @size: Indicates the size of each entry in the buffer. Supported + * values are sizeof(char), sizeof(short) and sizeof(int) + * @buffer: Start address of buffer + * @buffer_va: Virtual address of start of buffer (normally the same + * as Buffer, but having it separate allows it to hold + * file address for example) + * @length: length of buffer + */ +static void drbd_print_buffer(const char *prefix, unsigned int flags, int size, + const void *buffer, const void *buffer_va, + unsigned int length) + +#define LINE_SIZE 16 +#define LINE_ENTRIES (int)(LINE_SIZE/size) +{ + const unsigned char *pstart; + const unsigned char *pstart_va; + const unsigned char *pend; + char bytes_str[LINE_SIZE*3+8], ascii_str[LINE_SIZE+8]; + char *pbytes = bytes_str, *pascii = ascii_str; + int offset = 0; + long sizemask; + int field_width; + int index; + const unsigned char *pend_str; + const unsigned char *p; + int count; + + /* verify size parameter */ + if (size != sizeof(char) && + size != sizeof(short) && + size != sizeof(int)) { + printk(KERN_DEBUG "drbd_print_buffer: " + "ERROR invalid size %d\n", size); + return; + } + + sizemask = size-1; + field_width = size*2; + + /* Adjust start/end to be on appropriate boundary for size */ + buffer = (const char *)((long)buffer & ~sizemask); + pend = (const unsigned char *) + (((long)buffer + length + sizemask) & ~sizemask); + + if (flags & DBGPRINT_BUFFADDR) { + /* Move start back to nearest multiple of line size, + * if printing address. This results in nicely formatted output + * with addresses being on line size (16) byte boundaries */ + pstart = (const unsigned char *)((long)buffer & ~(LINE_SIZE-1)); + } else { + pstart = (const unsigned char *)buffer; + } + + /* Set value of start VA to print if addresses asked for */ + pstart_va = (const unsigned char *)buffer_va + - ((const unsigned char *)buffer-pstart); + + /* Calculate end position to nicely align right hand side */ + pend_str = pstart + (((pend-pstart) + LINE_SIZE-1) & ~(LINE_SIZE-1)); + + /* Init strings */ + *pbytes = *pascii = '\0'; + + /* Start at beginning of first line */ + p = pstart; + count = 0; + + while (p < pend_str) { + if (p < (const unsigned char *)buffer || p >= pend) { + /* Before start of buffer or after end- print spaces */ + pbytes += sprintf(pbytes, "%*c ", field_width, ' '); + pascii += sprintf(pascii, "%*c", size, ' '); + p += size; + } else { + /* Add hex and ascii to strings */ + int val; + switch (size) { + default: + case 1: + val = *(unsigned char *)p; + break; + case 2: + val = *(unsigned short *)p; + break; + case 4: + val = *(unsigned int *)p; + break; + } + + pbytes += sprintf(pbytes, "%0*x ", field_width, val); + + for (index = size; index; index--) { + *pascii++ = isprint(*p) ? *p : '.'; + p++; + } + } + + count++; + + if (count == LINE_ENTRIES || p >= pend_str) { + /* Null terminate and print record */ + *pascii = '\0'; + printk(KERN_DEBUG "%s%8.8lx: %*s|%*s|\n", + prefix, + (flags & DBGPRINT_BUFFADDR) + ? (long)pstart_va:(long)offset, + LINE_ENTRIES*(field_width+1), bytes_str, + LINE_SIZE, ascii_str); + + /* Move onto next line */ + pstart_va += (p-pstart); + pstart = p; + count = 0; + offset += LINE_SIZE; + + /* Re-init strings */ + pbytes = bytes_str; + pascii = ascii_str; + *pbytes = *pascii = '\0'; + } + } +} + +static void probe_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, va_list args) +{ + char str[256]; + + if (!is_mdev_trace(mdev, level)) + return; + + if (vsnprintf(str, 256, fmt, args) >= 256) + str[255] = 0; + + printk(KERN_INFO "%s %s: %s", dev_driver_string(disk_to_dev(mdev->vdisk)), + dev_name(disk_to_dev(mdev->vdisk)), str); +} + +static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete, + struct drbd_request *r) +{ +#if defined(CONFIG_LBDAF) || defined(CONFIG_LBD) +#define SECTOR_FORMAT "%Lx" +#else +#define SECTOR_FORMAT "%lx" +#endif +#define SECTOR_SHIFT 9 + + unsigned long lowaddr = (unsigned long)(bio->bi_sector << SECTOR_SHIFT); + char *faddr = (char *)(lowaddr); + char rb[sizeof(void *)*2+6] = { 0, }; + struct bio_vec *bvec; + int segno; + + const int rw = bio->bi_rw; + const int biorw = (rw & (RW_MASK|RWA_MASK)); + const int biobarrier = (rw & (1<>>", + pfx, + biorw == WRITE ? "Write" : "Read", + biobarrier ? " : B" : "", + biosync ? " : S" : "", + bio, + rb, + complete ? (bio_flagged(bio, BIO_UPTODATE) ? "Success, " : "Failed, ") : "", + bio->bi_sector << SECTOR_SHIFT, + bio->bi_size); + + if (trace_level >= TRACE_LVL_METRICS && + ((biorw == WRITE) ^ complete)) { + printk(KERN_DEBUG " ind page offset length\n"); + __bio_for_each_segment(bvec, bio, segno, 0) { + printk(KERN_DEBUG " [%d] %p %8.8x %8.8x\n", segno, + bvec->bv_page, bvec->bv_offset, bvec->bv_len); + + if (trace_level >= TRACE_LVL_ALL) { + char *bvec_buf; + unsigned long flags; + + bvec_buf = bvec_kmap_irq(bvec, &flags); + + drbd_print_buffer(" ", DBGPRINT_BUFFADDR, 1, + bvec_buf, + faddr, + (bvec->bv_len <= 0x80) + ? bvec->bv_len : 0x80); + + bvec_kunmap_irq(bvec_buf, &flags); + + if (bvec->bv_len > 0x40) + printk(KERN_DEBUG " ....\n"); + + faddr += bvec->bv_len; + } + } + } +} + +static void probe_drbd_req(struct drbd_request *req, enum drbd_req_event what, char *msg) +{ + static const char *rq_event_names[] = { + [created] = "created", + [to_be_send] = "to_be_send", + [to_be_submitted] = "to_be_submitted", + [queue_for_net_write] = "queue_for_net_write", + [queue_for_net_read] = "queue_for_net_read", + [send_canceled] = "send_canceled", + [send_failed] = "send_failed", + [handed_over_to_network] = "handed_over_to_network", + [connection_lost_while_pending] = + "connection_lost_while_pending", + [recv_acked_by_peer] = "recv_acked_by_peer", + [write_acked_by_peer] = "write_acked_by_peer", + [neg_acked] = "neg_acked", + [conflict_discarded_by_peer] = "conflict_discarded_by_peer", + [barrier_acked] = "barrier_acked", + [data_received] = "data_received", + [read_completed_with_error] = "read_completed_with_error", + [read_ahead_completed_with_error] = "reada_completed_with_error", + [write_completed_with_error] = "write_completed_with_error", + [completed_ok] = "completed_ok", + }; + + struct drbd_conf *mdev = req->mdev; + + const int rw = (req->master_bio == NULL || + bio_data_dir(req->master_bio) == WRITE) ? + 'W' : 'R'; + const unsigned long s = req->rq_state; + + if (what != nothing) { + dev_info(DEV, "__req_mod(%p %c ,%s)\n", req, rw, rq_event_names[what]); + } else { + dev_info(DEV, "%s %p %c L%c%c%cN%c%c%c%c%c %u (%llus +%u) %s\n", + msg, req, rw, + s & RQ_LOCAL_PENDING ? 'p' : '-', + s & RQ_LOCAL_COMPLETED ? 'c' : '-', + s & RQ_LOCAL_OK ? 'o' : '-', + s & RQ_NET_PENDING ? 'p' : '-', + s & RQ_NET_QUEUED ? 'q' : '-', + s & RQ_NET_SENT ? 's' : '-', + s & RQ_NET_DONE ? 'd' : '-', + s & RQ_NET_OK ? 'o' : '-', + req->epoch, + (unsigned long long)req->sector, + req->size, + drbd_conn_str(mdev->state.conn)); + } +} + + +#define drbd_peer_str drbd_role_str +#define drbd_pdsk_str drbd_disk_str + +#define PSM(A) \ +do { \ + if (mask.A) { \ + int i = snprintf(p, len, " " #A "( %s )", \ + drbd_##A##_str(val.A)); \ + if (i >= len) \ + return op; \ + p += i; \ + len -= i; \ + } \ +} while (0) + +static char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val) +{ + char *op = p; + *p = '\0'; + PSM(role); + PSM(peer); + PSM(conn); + PSM(disk); + PSM(pdsk); + + return op; +} + +#define INFOP(fmt, args...) \ +do { \ + if (trace_level >= TRACE_LVL_ALL) { \ + dev_info(DEV, "%s:%d: %s [%d] %s %s " fmt , \ + file, line, current->comm, current->pid, \ + sockname, recv ? "<<<" : ">>>" , \ + ## args); \ + } else { \ + dev_info(DEV, "%s %s " fmt, sockname, \ + recv ? "<<<" : ">>>" , \ + ## args); \ + } \ +} while (0) + +static char *_dump_block_id(u64 block_id, char *buff) +{ + if (is_syncer_block_id(block_id)) + strcpy(buff, "SyncerId"); + else + sprintf(buff, "%llx", (unsigned long long)block_id); + + return buff; +} + +static void probe_drbd_packet(struct drbd_conf *mdev, struct socket *sock, + int recv, union p_polymorph *p, char *file, int line) +{ + char *sockname = sock == mdev->meta.socket ? "meta" : "data"; + int cmd = (recv == 2) ? p->header.command : be16_to_cpu(p->header.command); + char tmp[300]; + union drbd_state m, v; + + switch (cmd) { + case P_HAND_SHAKE: + INFOP("%s (protocol %u-%u)\n", cmdname(cmd), + be32_to_cpu(p->handshake.protocol_min), + be32_to_cpu(p->handshake.protocol_max)); + break; + + case P_BITMAP: /* don't report this */ + case P_COMPRESSED_BITMAP: /* don't report this */ + break; + + case P_DATA: + INFOP("%s (sector %llus, id %s, seq %u, f %x)\n", cmdname(cmd), + (unsigned long long)be64_to_cpu(p->data.sector), + _dump_block_id(p->data.block_id, tmp), + be32_to_cpu(p->data.seq_num), + be32_to_cpu(p->data.dp_flags) + ); + break; + + case P_DATA_REPLY: + case P_RS_DATA_REPLY: + INFOP("%s (sector %llus, id %s)\n", cmdname(cmd), + (unsigned long long)be64_to_cpu(p->data.sector), + _dump_block_id(p->data.block_id, tmp) + ); + break; + + case P_RECV_ACK: + case P_WRITE_ACK: + case P_RS_WRITE_ACK: + case P_DISCARD_ACK: + case P_NEG_ACK: + case P_NEG_RS_DREPLY: + INFOP("%s (sector %llus, size %u, id %s, seq %u)\n", + cmdname(cmd), + (long long)be64_to_cpu(p->block_ack.sector), + be32_to_cpu(p->block_ack.blksize), + _dump_block_id(p->block_ack.block_id, tmp), + be32_to_cpu(p->block_ack.seq_num) + ); + break; + + case P_DATA_REQUEST: + case P_RS_DATA_REQUEST: + INFOP("%s (sector %llus, size %u, id %s)\n", cmdname(cmd), + (long long)be64_to_cpu(p->block_req.sector), + be32_to_cpu(p->block_req.blksize), + _dump_block_id(p->block_req.block_id, tmp) + ); + break; + + case P_BARRIER: + case P_BARRIER_ACK: + INFOP("%s (barrier %u)\n", cmdname(cmd), p->barrier.barrier); + break; + + case P_SYNC_PARAM: + case P_SYNC_PARAM89: + INFOP("%s (rate %u, verify-alg \"%.64s\", csums-alg \"%.64s\")\n", + cmdname(cmd), be32_to_cpu(p->rs_param_89.rate), + p->rs_param_89.verify_alg, p->rs_param_89.csums_alg); + break; + + case P_UUIDS: + INFOP("%s Curr:%016llX, Bitmap:%016llX, " + "HisSt:%016llX, HisEnd:%016llX\n", + cmdname(cmd), + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_CURRENT]), + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_BITMAP]), + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_START]), + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_END])); + break; + + case P_SIZES: + INFOP("%s (d %lluMiB, u %lluMiB, c %lldMiB, " + "max bio %x, q order %x)\n", + cmdname(cmd), + (long long)(be64_to_cpu(p->sizes.d_size)>>(20-9)), + (long long)(be64_to_cpu(p->sizes.u_size)>>(20-9)), + (long long)(be64_to_cpu(p->sizes.c_size)>>(20-9)), + be32_to_cpu(p->sizes.max_segment_size), + be32_to_cpu(p->sizes.queue_order_type)); + break; + + case P_STATE: + v.i = be32_to_cpu(p->state.state); + m.i = 0xffffffff; + dump_st(tmp, sizeof(tmp), m, v); + INFOP("%s (s %x {%s})\n", cmdname(cmd), v.i, tmp); + break; + + case P_STATE_CHG_REQ: + m.i = be32_to_cpu(p->req_state.mask); + v.i = be32_to_cpu(p->req_state.val); + dump_st(tmp, sizeof(tmp), m, v); + INFOP("%s (m %x v %x {%s})\n", cmdname(cmd), m.i, v.i, tmp); + break; + + case P_STATE_CHG_REPLY: + INFOP("%s (ret %x)\n", cmdname(cmd), + be32_to_cpu(p->req_state_reply.retcode)); + break; + + case P_PING: + case P_PING_ACK: + /* + * Dont trace pings at summary level + */ + if (trace_level < TRACE_LVL_ALL) + break; + /* fall through... */ + default: + INFOP("%s (%u)\n", cmdname(cmd), cmd); + break; + } +} + + +static int __init drbd_trace_init(void) +{ + int ret; + + if (trace_mask & TRACE_UNPLUG) { + ret = register_trace_drbd_unplug(probe_drbd_unplug); + WARN_ON(ret); + } + if (trace_mask & TRACE_UUID) { + ret = register_trace_drbd_uuid(probe_drbd_uuid); + WARN_ON(ret); + } + if (trace_mask & TRACE_EE) { + ret = register_trace_drbd_ee(probe_drbd_ee); + WARN_ON(ret); + } + if (trace_mask & TRACE_PACKET) { + ret = register_trace_drbd_packet(probe_drbd_packet); + WARN_ON(ret); + } + if (trace_mask & TRACE_MD_IO) { + ret = register_trace_drbd_md_io(probe_drbd_md_io); + WARN_ON(ret); + } + if (trace_mask & TRACE_EPOCH) { + ret = register_trace_drbd_epoch(probe_drbd_epoch); + WARN_ON(ret); + } + if (trace_mask & TRACE_NL) { + ret = register_trace_drbd_netlink(probe_drbd_netlink); + WARN_ON(ret); + } + if (trace_mask & TRACE_AL_EXT) { + ret = register_trace_drbd_actlog(probe_drbd_actlog); + WARN_ON(ret); + } + if (trace_mask & TRACE_RQ) { + ret = register_trace_drbd_bio(probe_drbd_bio); + WARN_ON(ret); + } + if (trace_mask & TRACE_INT_RQ) { + ret = register_trace_drbd_req(probe_drbd_req); + WARN_ON(ret); + } + if (trace_mask & TRACE_RESYNC) { + ret = register_trace__drbd_resync(probe_drbd_resync); + WARN_ON(ret); + } + return 0; +} + +module_init(drbd_trace_init); + +static void __exit drbd_trace_exit(void) +{ + if (trace_mask & TRACE_UNPLUG) + unregister_trace_drbd_unplug(probe_drbd_unplug); + if (trace_mask & TRACE_UUID) + unregister_trace_drbd_uuid(probe_drbd_uuid); + if (trace_mask & TRACE_EE) + unregister_trace_drbd_ee(probe_drbd_ee); + if (trace_mask & TRACE_PACKET) + unregister_trace_drbd_packet(probe_drbd_packet); + if (trace_mask & TRACE_MD_IO) + unregister_trace_drbd_md_io(probe_drbd_md_io); + if (trace_mask & TRACE_EPOCH) + unregister_trace_drbd_epoch(probe_drbd_epoch); + if (trace_mask & TRACE_NL) + unregister_trace_drbd_netlink(probe_drbd_netlink); + if (trace_mask & TRACE_AL_EXT) + unregister_trace_drbd_actlog(probe_drbd_actlog); + if (trace_mask & TRACE_RQ) + unregister_trace_drbd_bio(probe_drbd_bio); + if (trace_mask & TRACE_INT_RQ) + unregister_trace_drbd_req(probe_drbd_req); + if (trace_mask & TRACE_RESYNC) + unregister_trace__drbd_resync(probe_drbd_resync); + + tracepoint_synchronize_unregister(); +} + +module_exit(drbd_trace_exit); diff --git a/drivers/block/drbd/drbd_tracing.h b/drivers/block/drbd/drbd_tracing.h new file mode 100644 index 000000000000..c4531a137f65 --- /dev/null +++ b/drivers/block/drbd/drbd_tracing.h @@ -0,0 +1,87 @@ +/* + drbd_tracing.h + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#ifndef DRBD_TRACING_H +#define DRBD_TRACING_H + +#include +#include "drbd_int.h" +#include "drbd_req.h" + +enum { + TRACE_LVL_ALWAYS = 0, + TRACE_LVL_SUMMARY, + TRACE_LVL_METRICS, + TRACE_LVL_ALL, + TRACE_LVL_MAX +}; + +DECLARE_TRACE(drbd_unplug, + TP_PROTO(struct drbd_conf *mdev, char* msg), + TP_ARGS(mdev, msg)); + +DECLARE_TRACE(drbd_uuid, + TP_PROTO(struct drbd_conf *mdev, enum drbd_uuid_index index), + TP_ARGS(mdev, index)); + +DECLARE_TRACE(drbd_ee, + TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg), + TP_ARGS(mdev, e, msg)); + +DECLARE_TRACE(drbd_md_io, + TP_PROTO(struct drbd_conf *mdev, int rw, struct drbd_backing_dev *bdev), + TP_ARGS(mdev, rw, bdev)); + +DECLARE_TRACE(drbd_epoch, + TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch *epoch, enum epoch_event ev), + TP_ARGS(mdev, epoch, ev)); + +DECLARE_TRACE(drbd_netlink, + TP_PROTO(void *data, int is_req), + TP_ARGS(data, is_req)); + +DECLARE_TRACE(drbd_actlog, + TP_PROTO(struct drbd_conf *mdev, sector_t sector, char* msg), + TP_ARGS(mdev, sector, msg)); + +DECLARE_TRACE(drbd_bio, + TP_PROTO(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete, + struct drbd_request *r), + TP_ARGS(mdev, pfx, bio, complete, r)); + +DECLARE_TRACE(drbd_req, + TP_PROTO(struct drbd_request *req, enum drbd_req_event what, char *msg), + TP_ARGS(req, what, msg)); + +DECLARE_TRACE(drbd_packet, + TP_PROTO(struct drbd_conf *mdev, struct socket *sock, + int recv, union p_polymorph *p, char *file, int line), + TP_ARGS(mdev, sock, recv, p, file, line)); + +DECLARE_TRACE(_drbd_resync, + TP_PROTO(struct drbd_conf *mdev, int level, const char *fmt, va_list args), + TP_ARGS(mdev, level, fmt, args)); + +#endif diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h new file mode 100644 index 000000000000..fc824006e721 --- /dev/null +++ b/drivers/block/drbd/drbd_vli.h @@ -0,0 +1,351 @@ +/* +-*- linux-c -*- + drbd_receiver.c + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _DRBD_VLI_H +#define _DRBD_VLI_H + +/* + * At a granularity of 4KiB storage represented per bit, + * and stroage sizes of several TiB, + * and possibly small-bandwidth replication, + * the bitmap transfer time can take much too long, + * if transmitted in plain text. + * + * We try to reduce the transfered bitmap information + * by encoding runlengths of bit polarity. + * + * We never actually need to encode a "zero" (runlengths are positive). + * But then we have to store the value of the first bit. + * The first bit of information thus shall encode if the first runlength + * gives the number of set or unset bits. + * + * We assume that large areas are either completely set or unset, + * which gives good compression with any runlength method, + * even when encoding the runlength as fixed size 32bit/64bit integers. + * + * Still, there may be areas where the polarity flips every few bits, + * and encoding the runlength sequence of those areas with fix size + * integers would be much worse than plaintext. + * + * We want to encode small runlength values with minimum code length, + * while still being able to encode a Huge run of all zeros. + * + * Thus we need a Variable Length Integer encoding, VLI. + * + * For some cases, we produce more code bits than plaintext input. + * We need to send incompressible chunks as plaintext, skip over them + * and then see if the next chunk compresses better. + * + * We don't care too much about "excellent" compression ratio for large + * runlengths (all set/all clear): whether we achieve a factor of 100 + * or 1000 is not that much of an issue. + * We do not want to waste too much on short runlengths in the "noisy" + * parts of the bitmap, though. + * + * There are endless variants of VLI, we experimented with: + * * simple byte-based + * * various bit based with different code word length. + * + * To avoid yet an other configuration parameter (choice of bitmap compression + * algorithm) which was difficult to explain and tune, we just chose the one + * variant that turned out best in all test cases. + * Based on real world usage patterns, with device sizes ranging from a few GiB + * to several TiB, file server/mailserver/webserver/mysql/postgress, + * mostly idle to really busy, the all time winner (though sometimes only + * marginally better) is: + */ + +/* + * encoding is "visualised" as + * __little endian__ bitstream, least significant bit first (left most) + * + * this particular encoding is chosen so that the prefix code + * starts as unary encoding the level, then modified so that + * 10 levels can be described in 8bit, with minimal overhead + * for the smaller levels. + * + * Number of data bits follow fibonacci sequence, with the exception of the + * last level (+1 data bit, so it makes 64bit total). The only worse code when + * encoding bit polarity runlength is 1 plain bits => 2 code bits. +prefix data bits max val Nº data bits +0 x 0x2 1 +10 x 0x4 1 +110 xx 0x8 2 +1110 xxx 0x10 3 +11110 xxx xx 0x30 5 +111110 xx xxxxxx 0x130 8 +11111100 xxxxxxxx xxxxx 0x2130 13 +11111110 xxxxxxxx xxxxxxxx xxxxx 0x202130 21 +11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202130 34 +11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56 + * maximum encodable value: 0x100000400202130 == 2**56 + some */ + +/* compression "table": + transmitted x 0.29 + as plaintext x ........................ + x ........................ + x ........................ + x 0.59 0.21........................ + x ........................................................ + x .. c ................................................... + x 0.44.. o ................................................... + x .......... d ................................................... + x .......... e ................................................... + X............. ................................................... + x.............. b ................................................... +2.0x............... i ................................................... + #X................ t ................................................... + #................. s ........................... plain bits .......... +-+----------------------------------------------------------------------- + 1 16 32 64 +*/ + +/* LEVEL: (total bits, prefix bits, prefix value), + * sorted ascending by number of total bits. + * The rest of the code table is calculated at compiletime from this. */ + +/* fibonacci data 1, 1, ... */ +#define VLI_L_1_1() do { \ + LEVEL( 2, 1, 0x00); \ + LEVEL( 3, 2, 0x01); \ + LEVEL( 5, 3, 0x03); \ + LEVEL( 7, 4, 0x07); \ + LEVEL(10, 5, 0x0f); \ + LEVEL(14, 6, 0x1f); \ + LEVEL(21, 8, 0x3f); \ + LEVEL(29, 8, 0x7f); \ + LEVEL(42, 8, 0xbf); \ + LEVEL(64, 8, 0xff); \ + } while (0) + +/* finds a suitable level to decode the least significant part of in. + * returns number of bits consumed. + * + * BUG() for bad input, as that would mean a buggy code table. */ +static inline int vli_decode_bits(u64 *out, const u64 in) +{ + u64 adj = 1; + +#define LEVEL(t,b,v) \ + do { \ + if ((in & ((1 << b) -1)) == v) { \ + *out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \ + return t; \ + } \ + adj += 1ULL << (t - b); \ + } while (0) + + VLI_L_1_1(); + + /* NOT REACHED, if VLI_LEVELS code table is defined properly */ + BUG(); +#undef LEVEL +} + +/* return number of code bits needed, + * or negative error number */ +static inline int __vli_encode_bits(u64 *out, const u64 in) +{ + u64 max = 0; + u64 adj = 1; + + if (in == 0) + return -EINVAL; + +#define LEVEL(t,b,v) do { \ + max += 1ULL << (t - b); \ + if (in <= max) { \ + if (out) \ + *out = ((in - adj) << b) | v; \ + return t; \ + } \ + adj = max + 1; \ + } while (0) + + VLI_L_1_1(); + + return -EOVERFLOW; +#undef LEVEL +} + +#undef VLI_L_1_1 + +/* code from here down is independend of actually used bit code */ + +/* + * Code length is determined by some unique (e.g. unary) prefix. + * This encodes arbitrary bit length, not whole bytes: we have a bit-stream, + * not a byte stream. + */ + +/* for the bitstream, we need a cursor */ +struct bitstream_cursor { + /* the current byte */ + u8 *b; + /* the current bit within *b, nomalized: 0..7 */ + unsigned int bit; +}; + +/* initialize cursor to point to first bit of stream */ +static inline void bitstream_cursor_reset(struct bitstream_cursor *cur, void *s) +{ + cur->b = s; + cur->bit = 0; +} + +/* advance cursor by that many bits; maximum expected input value: 64, + * but depending on VLI implementation, it may be more. */ +static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits) +{ + bits += cur->bit; + cur->b = cur->b + (bits >> 3); + cur->bit = bits & 7; +} + +/* the bitstream itself knows its length */ +struct bitstream { + struct bitstream_cursor cur; + unsigned char *buf; + size_t buf_len; /* in bytes */ + + /* for input stream: + * number of trailing 0 bits for padding + * total number of valid bits in stream: buf_len * 8 - pad_bits */ + unsigned int pad_bits; +}; + +static inline void bitstream_init(struct bitstream *bs, void *s, size_t len, unsigned int pad_bits) +{ + bs->buf = s; + bs->buf_len = len; + bs->pad_bits = pad_bits; + bitstream_cursor_reset(&bs->cur, bs->buf); +} + +static inline void bitstream_rewind(struct bitstream *bs) +{ + bitstream_cursor_reset(&bs->cur, bs->buf); + memset(bs->buf, 0, bs->buf_len); +} + +/* Put (at most 64) least significant bits of val into bitstream, and advance cursor. + * Ignores "pad_bits". + * Returns zero if bits == 0 (nothing to do). + * Returns number of bits used if successful. + * + * If there is not enough room left in bitstream, + * leaves bitstream unchanged and returns -ENOBUFS. + */ +static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits) +{ + unsigned char *b = bs->cur.b; + unsigned int tmp; + + if (bits == 0) + return 0; + + if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len) + return -ENOBUFS; + + /* paranoia: strip off hi bits; they should not be set anyways. */ + if (bits < 64) + val &= ~0ULL >> (64 - bits); + + *b++ |= (val & 0xff) << bs->cur.bit; + + for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8) + *b++ |= (val >> tmp) & 0xff; + + bitstream_cursor_advance(&bs->cur, bits); + return bits; +} + +/* Fetch (at most 64) bits from bitstream into *out, and advance cursor. + * + * If more than 64 bits are requested, returns -EINVAL and leave *out unchanged. + * + * If there are less than the requested number of valid bits left in the + * bitstream, still fetches all available bits. + * + * Returns number of actually fetched bits. + */ +static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits) +{ + u64 val; + unsigned int n; + + if (bits > 64) + return -EINVAL; + + if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len) + bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3) + - bs->cur.bit - bs->pad_bits; + + if (bits == 0) { + *out = 0; + return 0; + } + + /* get the high bits */ + val = 0; + n = (bs->cur.bit + bits + 7) >> 3; + /* n may be at most 9, if cur.bit + bits > 64 */ + /* which means this copies at most 8 byte */ + if (n) { + memcpy(&val, bs->cur.b+1, n - 1); + val = le64_to_cpu(val) << (8 - bs->cur.bit); + } + + /* we still need the low bits */ + val |= bs->cur.b[0] >> bs->cur.bit; + + /* and mask out bits we don't want */ + val &= ~0ULL >> (64 - bits); + + bitstream_cursor_advance(&bs->cur, bits); + *out = val; + + return bits; +} + +/* encodes @in as vli into @bs; + + * return values + * > 0: number of bits successfully stored in bitstream + * -ENOBUFS @bs is full + * -EINVAL input zero (invalid) + * -EOVERFLOW input too large for this vli code (invalid) + */ +static inline int vli_encode_bits(struct bitstream *bs, u64 in) +{ + u64 code = code; + int bits = __vli_encode_bits(&code, in); + + if (bits <= 0) + return bits; + + return bitstream_put_bits(bs, code, bits); +} + +#endif diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c new file mode 100644 index 000000000000..212e9545e634 --- /dev/null +++ b/drivers/block/drbd/drbd_worker.c @@ -0,0 +1,1529 @@ +/* + drbd_worker.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "drbd_int.h" +#include "drbd_req.h" +#include "drbd_tracing.h" + +#define SLEEP_TIME (HZ/10) + +static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); + + + +/* defined here: + drbd_md_io_complete + drbd_endio_write_sec + drbd_endio_read_sec + drbd_endio_pri + + * more endio handlers: + atodb_endio in drbd_actlog.c + drbd_bm_async_io_complete in drbd_bitmap.c + + * For all these callbacks, note the following: + * The callbacks will be called in irq context by the IDE drivers, + * and in Softirqs/Tasklets/BH context by the SCSI drivers. + * Try to get the locking right :) + * + */ + + +/* About the global_state_lock + Each state transition on an device holds a read lock. In case we have + to evaluate the sync after dependencies, we grab a write lock, because + we need stable states on all devices for that. */ +rwlock_t global_state_lock; + +/* used for synchronous meta data and bitmap IO + * submitted by drbd_md_sync_page_io() + */ +void drbd_md_io_complete(struct bio *bio, int error) +{ + struct drbd_md_io *md_io; + + md_io = (struct drbd_md_io *)bio->bi_private; + md_io->error = error; + + trace_drbd_bio(md_io->mdev, "Md", bio, 1, NULL); + + complete(&md_io->event); +} + +/* reads on behalf of the partner, + * "submitted" by the receiver + */ +void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) +{ + unsigned long flags = 0; + struct drbd_epoch_entry *e = NULL; + struct drbd_conf *mdev; + int uptodate = bio_flagged(bio, BIO_UPTODATE); + + e = bio->bi_private; + mdev = e->mdev; + + if (error) + dev_warn(DEV, "read: error=%d s=%llus\n", error, + (unsigned long long)e->sector); + if (!error && !uptodate) { + dev_warn(DEV, "read: setting error to -EIO s=%llus\n", + (unsigned long long)e->sector); + /* strange behavior of some lower level drivers... + * fail the request by clearing the uptodate flag, + * but do not return any error?! */ + error = -EIO; + } + + D_ASSERT(e->block_id != ID_VACANT); + + trace_drbd_bio(mdev, "Sec", bio, 1, NULL); + + spin_lock_irqsave(&mdev->req_lock, flags); + mdev->read_cnt += e->size >> 9; + list_del(&e->w.list); + if (list_empty(&mdev->read_ee)) + wake_up(&mdev->ee_wait); + spin_unlock_irqrestore(&mdev->req_lock, flags); + + drbd_chk_io_error(mdev, error, FALSE); + drbd_queue_work(&mdev->data.work, &e->w); + put_ldev(mdev); + + trace_drbd_ee(mdev, e, "read completed"); +} + +/* writes on behalf of the partner, or resync writes, + * "submitted" by the receiver. + */ +void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) +{ + unsigned long flags = 0; + struct drbd_epoch_entry *e = NULL; + struct drbd_conf *mdev; + sector_t e_sector; + int do_wake; + int is_syncer_req; + int do_al_complete_io; + int uptodate = bio_flagged(bio, BIO_UPTODATE); + int is_barrier = bio_rw_flagged(bio, BIO_RW_BARRIER); + + e = bio->bi_private; + mdev = e->mdev; + + if (error) + dev_warn(DEV, "write: error=%d s=%llus\n", error, + (unsigned long long)e->sector); + if (!error && !uptodate) { + dev_warn(DEV, "write: setting error to -EIO s=%llus\n", + (unsigned long long)e->sector); + /* strange behavior of some lower level drivers... + * fail the request by clearing the uptodate flag, + * but do not return any error?! */ + error = -EIO; + } + + /* error == -ENOTSUPP would be a better test, + * alas it is not reliable */ + if (error && is_barrier && e->flags & EE_IS_BARRIER) { + drbd_bump_write_ordering(mdev, WO_bdev_flush); + spin_lock_irqsave(&mdev->req_lock, flags); + list_del(&e->w.list); + e->w.cb = w_e_reissue; + /* put_ldev actually happens below, once we come here again. */ + __release(local); + spin_unlock_irqrestore(&mdev->req_lock, flags); + drbd_queue_work(&mdev->data.work, &e->w); + return; + } + + D_ASSERT(e->block_id != ID_VACANT); + + trace_drbd_bio(mdev, "Sec", bio, 1, NULL); + + spin_lock_irqsave(&mdev->req_lock, flags); + mdev->writ_cnt += e->size >> 9; + is_syncer_req = is_syncer_block_id(e->block_id); + + /* after we moved e to done_ee, + * we may no longer access it, + * it may be freed/reused already! + * (as soon as we release the req_lock) */ + e_sector = e->sector; + do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; + + list_del(&e->w.list); /* has been on active_ee or sync_ee */ + list_add_tail(&e->w.list, &mdev->done_ee); + + trace_drbd_ee(mdev, e, "write completed"); + + /* No hlist_del_init(&e->colision) here, we did not send the Ack yet, + * neither did we wake possibly waiting conflicting requests. + * done from "drbd_process_done_ee" within the appropriate w.cb + * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ + + do_wake = is_syncer_req + ? list_empty(&mdev->sync_ee) + : list_empty(&mdev->active_ee); + + if (error) + __drbd_chk_io_error(mdev, FALSE); + spin_unlock_irqrestore(&mdev->req_lock, flags); + + if (is_syncer_req) + drbd_rs_complete_io(mdev, e_sector); + + if (do_wake) + wake_up(&mdev->ee_wait); + + if (do_al_complete_io) + drbd_al_complete_io(mdev, e_sector); + + wake_asender(mdev); + put_ldev(mdev); + +} + +/* read, readA or write requests on R_PRIMARY coming from drbd_make_request + */ +void drbd_endio_pri(struct bio *bio, int error) +{ + unsigned long flags; + struct drbd_request *req = bio->bi_private; + struct drbd_conf *mdev = req->mdev; + struct bio_and_error m; + enum drbd_req_event what; + int uptodate = bio_flagged(bio, BIO_UPTODATE); + + if (error) + dev_warn(DEV, "p %s: error=%d\n", + bio_data_dir(bio) == WRITE ? "write" : "read", error); + if (!error && !uptodate) { + dev_warn(DEV, "p %s: setting error to -EIO\n", + bio_data_dir(bio) == WRITE ? "write" : "read"); + /* strange behavior of some lower level drivers... + * fail the request by clearing the uptodate flag, + * but do not return any error?! */ + error = -EIO; + } + + trace_drbd_bio(mdev, "Pri", bio, 1, NULL); + + /* to avoid recursion in __req_mod */ + if (unlikely(error)) { + what = (bio_data_dir(bio) == WRITE) + ? write_completed_with_error + : (bio_rw(bio) == READA) + ? read_completed_with_error + : read_ahead_completed_with_error; + } else + what = completed_ok; + + bio_put(req->private_bio); + req->private_bio = ERR_PTR(error); + + spin_lock_irqsave(&mdev->req_lock, flags); + __req_mod(req, what, &m); + spin_unlock_irqrestore(&mdev->req_lock, flags); + + if (m.bio) + complete_master_bio(mdev, &m); +} + +int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_request *req = container_of(w, struct drbd_request, w); + + /* NOTE: mdev->ldev can be NULL by the time we get here! */ + /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */ + + /* the only way this callback is scheduled is from _req_may_be_done, + * when it is done and had a local write error, see comments there */ + drbd_req_free(req); + + return TRUE; +} + +int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_request *req = container_of(w, struct drbd_request, w); + + /* We should not detach for read io-error, + * but try to WRITE the P_DATA_REPLY to the failed location, + * to give the disk the chance to relocate that block */ + + spin_lock_irq(&mdev->req_lock); + if (cancel || + mdev->state.conn < C_CONNECTED || + mdev->state.pdsk <= D_INCONSISTENT) { + _req_mod(req, send_canceled); + spin_unlock_irq(&mdev->req_lock); + dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); + return 1; + } + spin_unlock_irq(&mdev->req_lock); + + return w_send_read_req(mdev, w, 0); +} + +int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + ERR_IF(cancel) return 1; + dev_err(DEV, "resync inactive, but callback triggered??\n"); + return 1; /* Simply ignore this! */ +} + +void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest) +{ + struct hash_desc desc; + struct scatterlist sg; + struct bio_vec *bvec; + int i; + + desc.tfm = tfm; + desc.flags = 0; + + sg_init_table(&sg, 1); + crypto_hash_init(&desc); + + __bio_for_each_segment(bvec, bio, i, 0) { + sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); + crypto_hash_update(&desc, &sg, sg.length); + } + crypto_hash_final(&desc, digest); +} + +static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + int digest_size; + void *digest; + int ok; + + D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef); + + if (unlikely(cancel)) { + drbd_free_ee(mdev, e); + return 1; + } + + if (likely(drbd_bio_uptodate(e->private_bio))) { + digest_size = crypto_hash_digestsize(mdev->csums_tfm); + digest = kmalloc(digest_size, GFP_NOIO); + if (digest) { + drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest); + + inc_rs_pending(mdev); + ok = drbd_send_drequest_csum(mdev, + e->sector, + e->size, + digest, + digest_size, + P_CSUM_RS_REQUEST); + kfree(digest); + } else { + dev_err(DEV, "kmalloc() of digest failed.\n"); + ok = 0; + } + } else + ok = 1; + + drbd_free_ee(mdev, e); + + if (unlikely(!ok)) + dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); + return ok; +} + +#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) + +static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) +{ + struct drbd_epoch_entry *e; + + if (!get_ldev(mdev)) + return 0; + + /* GFP_TRY, because if there is no memory available right now, this may + * be rescheduled for later. It is "only" background resync, after all. */ + e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY); + if (!e) { + put_ldev(mdev); + return 2; + } + + spin_lock_irq(&mdev->req_lock); + list_add(&e->w.list, &mdev->read_ee); + spin_unlock_irq(&mdev->req_lock); + + e->private_bio->bi_end_io = drbd_endio_read_sec; + e->private_bio->bi_rw = READ; + e->w.cb = w_e_send_csum; + + mdev->read_cnt += size >> 9; + drbd_generic_make_request(mdev, DRBD_FAULT_RS_RD, e->private_bio); + + return 1; +} + +void resync_timer_fn(unsigned long data) +{ + unsigned long flags; + struct drbd_conf *mdev = (struct drbd_conf *) data; + int queue; + + spin_lock_irqsave(&mdev->req_lock, flags); + + if (likely(!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))) { + queue = 1; + if (mdev->state.conn == C_VERIFY_S) + mdev->resync_work.cb = w_make_ov_request; + else + mdev->resync_work.cb = w_make_resync_request; + } else { + queue = 0; + mdev->resync_work.cb = w_resync_inactive; + } + + spin_unlock_irqrestore(&mdev->req_lock, flags); + + /* harmless race: list_empty outside data.work.q_lock */ + if (list_empty(&mdev->resync_work.list) && queue) + drbd_queue_work(&mdev->data.work, &mdev->resync_work); +} + +int w_make_resync_request(struct drbd_conf *mdev, + struct drbd_work *w, int cancel) +{ + unsigned long bit; + sector_t sector; + const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + int max_segment_size = queue_max_segment_size(mdev->rq_queue); + int number, i, size, pe, mx; + int align, queued, sndbuf; + + if (unlikely(cancel)) + return 1; + + if (unlikely(mdev->state.conn < C_CONNECTED)) { + dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected"); + return 0; + } + + if (mdev->state.conn != C_SYNC_TARGET) + dev_err(DEV, "%s in w_make_resync_request\n", + drbd_conn_str(mdev->state.conn)); + + if (!get_ldev(mdev)) { + /* Since we only need to access mdev->rsync a + get_ldev_if_state(mdev,D_FAILED) would be sufficient, but + to continue resync with a broken disk makes no sense at + all */ + dev_err(DEV, "Disk broke down during resync!\n"); + mdev->resync_work.cb = w_resync_inactive; + return 1; + } + + number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ); + pe = atomic_read(&mdev->rs_pending_cnt); + + mutex_lock(&mdev->data.mutex); + if (mdev->data.socket) + mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req); + else + mx = 1; + mutex_unlock(&mdev->data.mutex); + + /* For resync rates >160MB/sec, allow more pending RS requests */ + if (number > mx) + mx = number; + + /* Limit the number of pending RS requests to no more than the peer's receive buffer */ + if ((pe + number) > mx) { + number = mx - pe; + } + + for (i = 0; i < number; i++) { + /* Stop generating RS requests, when half of the send buffer is filled */ + mutex_lock(&mdev->data.mutex); + if (mdev->data.socket) { + queued = mdev->data.socket->sk->sk_wmem_queued; + sndbuf = mdev->data.socket->sk->sk_sndbuf; + } else { + queued = 1; + sndbuf = 0; + } + mutex_unlock(&mdev->data.mutex); + if (queued > sndbuf / 2) + goto requeue; + +next_sector: + size = BM_BLOCK_SIZE; + bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); + + if (bit == -1UL) { + mdev->bm_resync_fo = drbd_bm_bits(mdev); + mdev->resync_work.cb = w_resync_inactive; + put_ldev(mdev); + return 1; + } + + sector = BM_BIT_TO_SECT(bit); + + if (drbd_try_rs_begin_io(mdev, sector)) { + mdev->bm_resync_fo = bit; + goto requeue; + } + mdev->bm_resync_fo = bit + 1; + + if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) { + drbd_rs_complete_io(mdev, sector); + goto next_sector; + } + +#if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE + /* try to find some adjacent bits. + * we stop if we have already the maximum req size. + * + * Additionally always align bigger requests, in order to + * be prepared for all stripe sizes of software RAIDs. + * + * we _do_ care about the agreed-upon q->max_segment_size + * here, as splitting up the requests on the other side is more + * difficult. the consequence is, that on lvm and md and other + * "indirect" devices, this is dead code, since + * q->max_segment_size will be PAGE_SIZE. + */ + align = 1; + for (;;) { + if (size + BM_BLOCK_SIZE > max_segment_size) + break; + + /* Be always aligned */ + if (sector & ((1<<(align+3))-1)) + break; + + /* do not cross extent boundaries */ + if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) + break; + /* now, is it actually dirty, after all? + * caution, drbd_bm_test_bit is tri-state for some + * obscure reason; ( b == 0 ) would get the out-of-band + * only accidentally right because of the "oddly sized" + * adjustment below */ + if (drbd_bm_test_bit(mdev, bit+1) != 1) + break; + bit++; + size += BM_BLOCK_SIZE; + if ((BM_BLOCK_SIZE << align) <= size) + align++; + i++; + } + /* if we merged some, + * reset the offset to start the next drbd_bm_find_next from */ + if (size > BM_BLOCK_SIZE) + mdev->bm_resync_fo = bit + 1; +#endif + + /* adjust very last sectors, in case we are oddly sized */ + if (sector + (size>>9) > capacity) + size = (capacity-sector)<<9; + if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) { + switch (read_for_csum(mdev, sector, size)) { + case 0: /* Disk failure*/ + put_ldev(mdev); + return 0; + case 2: /* Allocation failed */ + drbd_rs_complete_io(mdev, sector); + mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); + goto requeue; + /* case 1: everything ok */ + } + } else { + inc_rs_pending(mdev); + if (!drbd_send_drequest(mdev, P_RS_DATA_REQUEST, + sector, size, ID_SYNCER)) { + dev_err(DEV, "drbd_send_drequest() failed, aborting...\n"); + dec_rs_pending(mdev); + put_ldev(mdev); + return 0; + } + } + } + + if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) { + /* last syncer _request_ was sent, + * but the P_RS_DATA_REPLY not yet received. sync will end (and + * next sync group will resume), as soon as we receive the last + * resync data block, and the last bit is cleared. + * until then resync "work" is "inactive" ... + */ + mdev->resync_work.cb = w_resync_inactive; + put_ldev(mdev); + return 1; + } + + requeue: + mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); + put_ldev(mdev); + return 1; +} + +static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + int number, i, size; + sector_t sector; + const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + + if (unlikely(cancel)) + return 1; + + if (unlikely(mdev->state.conn < C_CONNECTED)) { + dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected"); + return 0; + } + + number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ); + if (atomic_read(&mdev->rs_pending_cnt) > number) + goto requeue; + + number -= atomic_read(&mdev->rs_pending_cnt); + + sector = mdev->ov_position; + for (i = 0; i < number; i++) { + if (sector >= capacity) { + mdev->resync_work.cb = w_resync_inactive; + return 1; + } + + size = BM_BLOCK_SIZE; + + if (drbd_try_rs_begin_io(mdev, sector)) { + mdev->ov_position = sector; + goto requeue; + } + + if (sector + (size>>9) > capacity) + size = (capacity-sector)<<9; + + inc_rs_pending(mdev); + if (!drbd_send_ov_request(mdev, sector, size)) { + dec_rs_pending(mdev); + return 0; + } + sector += BM_SECT_PER_BIT; + } + mdev->ov_position = sector; + + requeue: + mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); + return 1; +} + + +int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + kfree(w); + ov_oos_print(mdev); + drbd_resync_finished(mdev); + + return 1; +} + +static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + kfree(w); + + drbd_resync_finished(mdev); + + return 1; +} + +int drbd_resync_finished(struct drbd_conf *mdev) +{ + unsigned long db, dt, dbdt; + unsigned long n_oos; + union drbd_state os, ns; + struct drbd_work *w; + char *khelper_cmd = NULL; + + /* Remove all elements from the resync LRU. Since future actions + * might set bits in the (main) bitmap, then the entries in the + * resync LRU would be wrong. */ + if (drbd_rs_del_all(mdev)) { + /* In case this is not possible now, most probably because + * there are P_RS_DATA_REPLY Packets lingering on the worker's + * queue (or even the read operations for those packets + * is not finished by now). Retry in 100ms. */ + + drbd_kick_lo(mdev); + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 10); + w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); + if (w) { + w->cb = w_resync_finished; + drbd_queue_work(&mdev->data.work, w); + return 1; + } + dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n"); + } + + dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; + if (dt <= 0) + dt = 1; + db = mdev->rs_total; + dbdt = Bit2KB(db/dt); + mdev->rs_paused /= HZ; + + if (!get_ldev(mdev)) + goto out; + + spin_lock_irq(&mdev->req_lock); + os = mdev->state; + + /* This protects us against multiple calls (that can happen in the presence + of application IO), and against connectivity loss just before we arrive here. */ + if (os.conn <= C_CONNECTED) + goto out_unlock; + + ns = os; + ns.conn = C_CONNECTED; + + dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", + (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ? + "Online verify " : "Resync", + dt + mdev->rs_paused, mdev->rs_paused, dbdt); + + n_oos = drbd_bm_total_weight(mdev); + + if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) { + if (n_oos) { + dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n", + n_oos, Bit2KB(1)); + khelper_cmd = "out-of-sync"; + } + } else { + D_ASSERT((n_oos - mdev->rs_failed) == 0); + + if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) + khelper_cmd = "after-resync-target"; + + if (mdev->csums_tfm && mdev->rs_total) { + const unsigned long s = mdev->rs_same_csum; + const unsigned long t = mdev->rs_total; + const int ratio = + (t == 0) ? 0 : + (t < 100000) ? ((s*100)/t) : (s/(t/100)); + dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; " + "transferred %luK total %luK\n", + ratio, + Bit2KB(mdev->rs_same_csum), + Bit2KB(mdev->rs_total - mdev->rs_same_csum), + Bit2KB(mdev->rs_total)); + } + } + + if (mdev->rs_failed) { + dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed); + + if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { + ns.disk = D_INCONSISTENT; + ns.pdsk = D_UP_TO_DATE; + } else { + ns.disk = D_UP_TO_DATE; + ns.pdsk = D_INCONSISTENT; + } + } else { + ns.disk = D_UP_TO_DATE; + ns.pdsk = D_UP_TO_DATE; + + if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { + if (mdev->p_uuid) { + int i; + for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) + _drbd_uuid_set(mdev, i, mdev->p_uuid[i]); + drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]); + _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]); + } else { + dev_err(DEV, "mdev->p_uuid is NULL! BUG\n"); + } + } + + drbd_uuid_set_bm(mdev, 0UL); + + if (mdev->p_uuid) { + /* Now the two UUID sets are equal, update what we + * know of the peer. */ + int i; + for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) + mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; + } + } + + _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); +out_unlock: + spin_unlock_irq(&mdev->req_lock); + put_ldev(mdev); +out: + mdev->rs_total = 0; + mdev->rs_failed = 0; + mdev->rs_paused = 0; + mdev->ov_start_sector = 0; + + if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) { + dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n"); + drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); + } + + if (khelper_cmd) + drbd_khelper(mdev, khelper_cmd); + + return 1; +} + +/* helper */ +static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e) +{ + if (drbd_bio_has_active_page(e->private_bio)) { + /* This might happen if sendpage() has not finished */ + spin_lock_irq(&mdev->req_lock); + list_add_tail(&e->w.list, &mdev->net_ee); + spin_unlock_irq(&mdev->req_lock); + } else + drbd_free_ee(mdev, e); +} + +/** + * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST + * @mdev: DRBD device. + * @w: work object. + * @cancel: The connection will be closed anyways + */ +int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + int ok; + + if (unlikely(cancel)) { + drbd_free_ee(mdev, e); + dec_unacked(mdev); + return 1; + } + + if (likely(drbd_bio_uptodate(e->private_bio))) { + ok = drbd_send_block(mdev, P_DATA_REPLY, e); + } else { + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "Sending NegDReply. sector=%llus.\n", + (unsigned long long)e->sector); + + ok = drbd_send_ack(mdev, P_NEG_DREPLY, e); + } + + dec_unacked(mdev); + + move_to_net_ee_or_free(mdev, e); + + if (unlikely(!ok)) + dev_err(DEV, "drbd_send_block() failed\n"); + return ok; +} + +/** + * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS + * @mdev: DRBD device. + * @w: work object. + * @cancel: The connection will be closed anyways + */ +int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + int ok; + + if (unlikely(cancel)) { + drbd_free_ee(mdev, e); + dec_unacked(mdev); + return 1; + } + + if (get_ldev_if_state(mdev, D_FAILED)) { + drbd_rs_complete_io(mdev, e->sector); + put_ldev(mdev); + } + + if (likely(drbd_bio_uptodate(e->private_bio))) { + if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { + inc_rs_pending(mdev); + ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); + } else { + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "Not sending RSDataReply, " + "partner DISKLESS!\n"); + ok = 1; + } + } else { + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", + (unsigned long long)e->sector); + + ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); + + /* update resync data with failure */ + drbd_rs_failed_io(mdev, e->sector, e->size); + } + + dec_unacked(mdev); + + move_to_net_ee_or_free(mdev, e); + + if (unlikely(!ok)) + dev_err(DEV, "drbd_send_block() failed\n"); + return ok; +} + +int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct digest_info *di; + int digest_size; + void *digest = NULL; + int ok, eq = 0; + + if (unlikely(cancel)) { + drbd_free_ee(mdev, e); + dec_unacked(mdev); + return 1; + } + + drbd_rs_complete_io(mdev, e->sector); + + di = (struct digest_info *)(unsigned long)e->block_id; + + if (likely(drbd_bio_uptodate(e->private_bio))) { + /* quick hack to try to avoid a race against reconfiguration. + * a real fix would be much more involved, + * introducing more locking mechanisms */ + if (mdev->csums_tfm) { + digest_size = crypto_hash_digestsize(mdev->csums_tfm); + D_ASSERT(digest_size == di->digest_size); + digest = kmalloc(digest_size, GFP_NOIO); + } + if (digest) { + drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest); + eq = !memcmp(digest, di->digest, digest_size); + kfree(digest); + } + + if (eq) { + drbd_set_in_sync(mdev, e->sector, e->size); + mdev->rs_same_csum++; + ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); + } else { + inc_rs_pending(mdev); + e->block_id = ID_SYNCER; + ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); + } + } else { + ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); + } + + dec_unacked(mdev); + + kfree(di); + + move_to_net_ee_or_free(mdev, e); + + if (unlikely(!ok)) + dev_err(DEV, "drbd_send_block/ack() failed\n"); + return ok; +} + +int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + int digest_size; + void *digest; + int ok = 1; + + if (unlikely(cancel)) + goto out; + + if (unlikely(!drbd_bio_uptodate(e->private_bio))) + goto out; + + digest_size = crypto_hash_digestsize(mdev->verify_tfm); + /* FIXME if this allocation fails, online verify will not terminate! */ + digest = kmalloc(digest_size, GFP_NOIO); + if (digest) { + drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest); + inc_rs_pending(mdev); + ok = drbd_send_drequest_csum(mdev, e->sector, e->size, + digest, digest_size, P_OV_REPLY); + if (!ok) + dec_rs_pending(mdev); + kfree(digest); + } + +out: + drbd_free_ee(mdev, e); + + dec_unacked(mdev); + + return ok; +} + +void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) +{ + if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) { + mdev->ov_last_oos_size += size>>9; + } else { + mdev->ov_last_oos_start = sector; + mdev->ov_last_oos_size = size>>9; + } + drbd_set_out_of_sync(mdev, sector, size); + set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); +} + +int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct digest_info *di; + int digest_size; + void *digest; + int ok, eq = 0; + + if (unlikely(cancel)) { + drbd_free_ee(mdev, e); + dec_unacked(mdev); + return 1; + } + + /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all + * the resync lru has been cleaned up already */ + drbd_rs_complete_io(mdev, e->sector); + + di = (struct digest_info *)(unsigned long)e->block_id; + + if (likely(drbd_bio_uptodate(e->private_bio))) { + digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest = kmalloc(digest_size, GFP_NOIO); + if (digest) { + drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest); + + D_ASSERT(digest_size == di->digest_size); + eq = !memcmp(digest, di->digest, digest_size); + kfree(digest); + } + } else { + ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); + } + + dec_unacked(mdev); + + kfree(di); + + if (!eq) + drbd_ov_oos_found(mdev, e->sector, e->size); + else + ov_oos_print(mdev); + + ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size, + eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); + + drbd_free_ee(mdev, e); + + if (--mdev->ov_left == 0) { + ov_oos_print(mdev); + drbd_resync_finished(mdev); + } + + return ok; +} + +int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w); + complete(&b->done); + return 1; +} + +int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); + struct p_barrier *p = &mdev->data.sbuf.barrier; + int ok = 1; + + /* really avoid racing with tl_clear. w.cb may have been referenced + * just before it was reassigned and re-queued, so double check that. + * actually, this race was harmless, since we only try to send the + * barrier packet here, and otherwise do nothing with the object. + * but compare with the head of w_clear_epoch */ + spin_lock_irq(&mdev->req_lock); + if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED) + cancel = 1; + spin_unlock_irq(&mdev->req_lock); + if (cancel) + return 1; + + if (!drbd_get_data_sock(mdev)) + return 0; + p->barrier = b->br_number; + /* inc_ap_pending was done where this was queued. + * dec_ap_pending will be done in got_BarrierAck + * or (on connection loss) in w_clear_epoch. */ + ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER, + (struct p_header *)p, sizeof(*p), 0); + drbd_put_data_sock(mdev); + + return ok; +} + +int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + if (cancel) + return 1; + return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); +} + +/** + * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request + * @mdev: DRBD device. + * @w: work object. + * @cancel: The connection will be closed anyways + */ +int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_request *req = container_of(w, struct drbd_request, w); + int ok; + + if (unlikely(cancel)) { + req_mod(req, send_canceled); + return 1; + } + + ok = drbd_send_dblock(mdev, req); + req_mod(req, ok ? handed_over_to_network : send_failed); + + return ok; +} + +/** + * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet + * @mdev: DRBD device. + * @w: work object. + * @cancel: The connection will be closed anyways + */ +int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_request *req = container_of(w, struct drbd_request, w); + int ok; + + if (unlikely(cancel)) { + req_mod(req, send_canceled); + return 1; + } + + ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->sector, req->size, + (unsigned long)req); + + if (!ok) { + /* ?? we set C_TIMEOUT or C_BROKEN_PIPE in drbd_send(); + * so this is probably redundant */ + if (mdev->state.conn >= C_CONNECTED) + drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); + } + req_mod(req, ok ? handed_over_to_network : send_failed); + + return ok; +} + +static int _drbd_may_sync_now(struct drbd_conf *mdev) +{ + struct drbd_conf *odev = mdev; + + while (1) { + if (odev->sync_conf.after == -1) + return 1; + odev = minor_to_mdev(odev->sync_conf.after); + ERR_IF(!odev) return 1; + if ((odev->state.conn >= C_SYNC_SOURCE && + odev->state.conn <= C_PAUSED_SYNC_T) || + odev->state.aftr_isp || odev->state.peer_isp || + odev->state.user_isp) + return 0; + } +} + +/** + * _drbd_pause_after() - Pause resync on all devices that may not resync now + * @mdev: DRBD device. + * + * Called from process context only (admin command and after_state_ch). + */ +static int _drbd_pause_after(struct drbd_conf *mdev) +{ + struct drbd_conf *odev; + int i, rv = 0; + + for (i = 0; i < minor_count; i++) { + odev = minor_to_mdev(i); + if (!odev) + continue; + if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) + continue; + if (!_drbd_may_sync_now(odev)) + rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL) + != SS_NOTHING_TO_DO); + } + + return rv; +} + +/** + * _drbd_resume_next() - Resume resync on all devices that may resync now + * @mdev: DRBD device. + * + * Called from process context only (admin command and worker). + */ +static int _drbd_resume_next(struct drbd_conf *mdev) +{ + struct drbd_conf *odev; + int i, rv = 0; + + for (i = 0; i < minor_count; i++) { + odev = minor_to_mdev(i); + if (!odev) + continue; + if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) + continue; + if (odev->state.aftr_isp) { + if (_drbd_may_sync_now(odev)) + rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0), + CS_HARD, NULL) + != SS_NOTHING_TO_DO) ; + } + } + return rv; +} + +void resume_next_sg(struct drbd_conf *mdev) +{ + write_lock_irq(&global_state_lock); + _drbd_resume_next(mdev); + write_unlock_irq(&global_state_lock); +} + +void suspend_other_sg(struct drbd_conf *mdev) +{ + write_lock_irq(&global_state_lock); + _drbd_pause_after(mdev); + write_unlock_irq(&global_state_lock); +} + +static int sync_after_error(struct drbd_conf *mdev, int o_minor) +{ + struct drbd_conf *odev; + + if (o_minor == -1) + return NO_ERROR; + if (o_minor < -1 || minor_to_mdev(o_minor) == NULL) + return ERR_SYNC_AFTER; + + /* check for loops */ + odev = minor_to_mdev(o_minor); + while (1) { + if (odev == mdev) + return ERR_SYNC_AFTER_CYCLE; + + /* dependency chain ends here, no cycles. */ + if (odev->sync_conf.after == -1) + return NO_ERROR; + + /* follow the dependency chain */ + odev = minor_to_mdev(odev->sync_conf.after); + } +} + +int drbd_alter_sa(struct drbd_conf *mdev, int na) +{ + int changes; + int retcode; + + write_lock_irq(&global_state_lock); + retcode = sync_after_error(mdev, na); + if (retcode == NO_ERROR) { + mdev->sync_conf.after = na; + do { + changes = _drbd_pause_after(mdev); + changes |= _drbd_resume_next(mdev); + } while (changes); + } + write_unlock_irq(&global_state_lock); + return retcode; +} + +/** + * drbd_start_resync() - Start the resync process + * @mdev: DRBD device. + * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET + * + * This function might bring you directly into one of the + * C_PAUSED_SYNC_* states. + */ +void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) +{ + union drbd_state ns; + int r; + + if (mdev->state.conn >= C_SYNC_SOURCE) { + dev_err(DEV, "Resync already running!\n"); + return; + } + + trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, "Resync starting: side=%s\n", + side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource"); + + /* In case a previous resync run was aborted by an IO error/detach on the peer. */ + drbd_rs_cancel_all(mdev); + + if (side == C_SYNC_TARGET) { + /* Since application IO was locked out during C_WF_BITMAP_T and + C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET + we check that we might make the data inconsistent. */ + r = drbd_khelper(mdev, "before-resync-target"); + r = (r >> 8) & 0xff; + if (r > 0) { + dev_info(DEV, "before-resync-target handler returned %d, " + "dropping connection.\n", r); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return; + } + } + + drbd_state_lock(mdev); + + if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { + drbd_state_unlock(mdev); + return; + } + + if (side == C_SYNC_TARGET) { + mdev->bm_resync_fo = 0; + } else /* side == C_SYNC_SOURCE */ { + u64 uuid; + + get_random_bytes(&uuid, sizeof(u64)); + drbd_uuid_set(mdev, UI_BITMAP, uuid); + drbd_send_sync_uuid(mdev, uuid); + + D_ASSERT(mdev->state.disk == D_UP_TO_DATE); + } + + write_lock_irq(&global_state_lock); + ns = mdev->state; + + ns.aftr_isp = !_drbd_may_sync_now(mdev); + + ns.conn = side; + + if (side == C_SYNC_TARGET) + ns.disk = D_INCONSISTENT; + else /* side == C_SYNC_SOURCE */ + ns.pdsk = D_INCONSISTENT; + + r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL); + ns = mdev->state; + + if (ns.conn < C_CONNECTED) + r = SS_UNKNOWN_ERROR; + + if (r == SS_SUCCESS) { + mdev->rs_total = + mdev->rs_mark_left = drbd_bm_total_weight(mdev); + mdev->rs_failed = 0; + mdev->rs_paused = 0; + mdev->rs_start = + mdev->rs_mark_time = jiffies; + mdev->rs_same_csum = 0; + _drbd_pause_after(mdev); + } + write_unlock_irq(&global_state_lock); + drbd_state_unlock(mdev); + put_ldev(mdev); + + if (r == SS_SUCCESS) { + dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", + drbd_conn_str(ns.conn), + (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), + (unsigned long) mdev->rs_total); + + if (mdev->rs_total == 0) { + /* Peer still reachable? Beware of failing before-resync-target handlers! */ + request_ping(mdev); + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(mdev->net_conf->ping_timeo*HZ/9); /* 9 instead 10 */ + drbd_resync_finished(mdev); + return; + } + + /* ns.conn may already be != mdev->state.conn, + * we may have been paused in between, or become paused until + * the timer triggers. + * No matter, that is handled in resync_timer_fn() */ + if (ns.conn == C_SYNC_TARGET) + mod_timer(&mdev->resync_timer, jiffies); + + drbd_md_sync(mdev); + } +} + +int drbd_worker(struct drbd_thread *thi) +{ + struct drbd_conf *mdev = thi->mdev; + struct drbd_work *w = NULL; + LIST_HEAD(work_list); + int intr = 0, i; + + sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); + + while (get_t_state(thi) == Running) { + drbd_thread_current_set_cpu(mdev); + + if (down_trylock(&mdev->data.work.s)) { + mutex_lock(&mdev->data.mutex); + if (mdev->data.socket && !mdev->net_conf->no_cork) + drbd_tcp_uncork(mdev->data.socket); + mutex_unlock(&mdev->data.mutex); + + intr = down_interruptible(&mdev->data.work.s); + + mutex_lock(&mdev->data.mutex); + if (mdev->data.socket && !mdev->net_conf->no_cork) + drbd_tcp_cork(mdev->data.socket); + mutex_unlock(&mdev->data.mutex); + } + + if (intr) { + D_ASSERT(intr == -EINTR); + flush_signals(current); + ERR_IF (get_t_state(thi) == Running) + continue; + break; + } + + if (get_t_state(thi) != Running) + break; + /* With this break, we have done a down() but not consumed + the entry from the list. The cleanup code takes care of + this... */ + + w = NULL; + spin_lock_irq(&mdev->data.work.q_lock); + ERR_IF(list_empty(&mdev->data.work.q)) { + /* something terribly wrong in our logic. + * we were able to down() the semaphore, + * but the list is empty... doh. + * + * what is the best thing to do now? + * try again from scratch, restarting the receiver, + * asender, whatnot? could break even more ugly, + * e.g. when we are primary, but no good local data. + * + * I'll try to get away just starting over this loop. + */ + spin_unlock_irq(&mdev->data.work.q_lock); + continue; + } + w = list_entry(mdev->data.work.q.next, struct drbd_work, list); + list_del_init(&w->list); + spin_unlock_irq(&mdev->data.work.q_lock); + + if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) { + /* dev_warn(DEV, "worker: a callback failed! \n"); */ + if (mdev->state.conn >= C_CONNECTED) + drbd_force_state(mdev, + NS(conn, C_NETWORK_FAILURE)); + } + } + D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags)); + D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags)); + + spin_lock_irq(&mdev->data.work.q_lock); + i = 0; + while (!list_empty(&mdev->data.work.q)) { + list_splice_init(&mdev->data.work.q, &work_list); + spin_unlock_irq(&mdev->data.work.q_lock); + + while (!list_empty(&work_list)) { + w = list_entry(work_list.next, struct drbd_work, list); + list_del_init(&w->list); + w->cb(mdev, w, 1); + i++; /* dead debugging code */ + } + + spin_lock_irq(&mdev->data.work.q_lock); + } + sema_init(&mdev->data.work.s, 0); + /* DANGEROUS race: if someone did queue his work within the spinlock, + * but up() ed outside the spinlock, we could get an up() on the + * semaphore without corresponding list entry. + * So don't do that. + */ + spin_unlock_irq(&mdev->data.work.q_lock); + + D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); + /* _drbd_set_state only uses stop_nowait. + * wait here for the Exiting receiver. */ + drbd_thread_stop(&mdev->receiver); + drbd_mdev_cleanup(mdev); + + dev_info(DEV, "worker terminated\n"); + + clear_bit(DEVICE_DYING, &mdev->flags); + clear_bit(CONFIG_PENDING, &mdev->flags); + wake_up(&mdev->state_wait); + + return 0; +} diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h new file mode 100644 index 000000000000..f93fa111ce50 --- /dev/null +++ b/drivers/block/drbd/drbd_wrappers.h @@ -0,0 +1,91 @@ +#ifndef _DRBD_WRAPPERS_H +#define _DRBD_WRAPPERS_H + +#include +#include + +/* see get_sb_bdev and bd_claim */ +extern char *drbd_sec_holder; + +/* sets the number of 512 byte sectors of our virtual device */ +static inline void drbd_set_my_capacity(struct drbd_conf *mdev, + sector_t size) +{ + /* set_capacity(mdev->this_bdev->bd_disk, size); */ + set_capacity(mdev->vdisk, size); + mdev->this_bdev->bd_inode->i_size = (loff_t)size << 9; +} + +#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE) + +static inline int drbd_bio_has_active_page(struct bio *bio) +{ + struct bio_vec *bvec; + int i; + + __bio_for_each_segment(bvec, bio, i, 0) { + if (page_count(bvec->bv_page) > 1) + return 1; + } + + return 0; +} + +/* bi_end_io handlers */ +extern void drbd_md_io_complete(struct bio *bio, int error); +extern void drbd_endio_read_sec(struct bio *bio, int error); +extern void drbd_endio_write_sec(struct bio *bio, int error); +extern void drbd_endio_pri(struct bio *bio, int error); + +/* + * used to submit our private bio + */ +static inline void drbd_generic_make_request(struct drbd_conf *mdev, + int fault_type, struct bio *bio) +{ + __release(local); + if (!bio->bi_bdev) { + printk(KERN_ERR "drbd%d: drbd_generic_make_request: " + "bio->bi_bdev == NULL\n", + mdev_to_minor(mdev)); + dump_stack(); + bio_endio(bio, -ENODEV); + return; + } + + if (FAULT_ACTIVE(mdev, fault_type)) + bio_endio(bio, -EIO); + else + generic_make_request(bio); +} + +static inline void drbd_plug_device(struct drbd_conf *mdev) +{ + struct request_queue *q; + q = bdev_get_queue(mdev->this_bdev); + + spin_lock_irq(q->queue_lock); + +/* XXX the check on !blk_queue_plugged is redundant, + * implicitly checked in blk_plug_device */ + + if (!blk_queue_plugged(q)) { + blk_plug_device(q); + del_timer(&q->unplug_timer); + /* unplugging should not happen automatically... */ + } + spin_unlock_irq(q->queue_lock); +} + +static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm) +{ + return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK) + == CRYPTO_ALG_TYPE_HASH; +} + +#ifndef __CHECKER__ +# undef __cond_lock +# define __cond_lock(x,c) (c) +#endif + +#endif diff --git a/include/linux/drbd.h b/include/linux/drbd.h new file mode 100644 index 000000000000..69dc711f37b3 --- /dev/null +++ b/include/linux/drbd.h @@ -0,0 +1,349 @@ +/* + drbd.h + Kernel module for 2.6.x Kernels + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2001-2008, Philipp Reisner . + Copyright (C) 2001-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ +#ifndef DRBD_H +#define DRBD_H +#include +#include + +#ifdef __KERNEL__ +#include +#include +#else +#include +#include +#include + +/* Altough the Linux source code makes a difference between + generic endianness and the bitfields' endianness, there is no + architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness + does not match the generic endianness. */ + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define __LITTLE_ENDIAN_BITFIELD +#elif __BYTE_ORDER == __BIG_ENDIAN +#define __BIG_ENDIAN_BITFIELD +#else +# error "sorry, weird endianness on this box" +#endif + +#endif + + +extern const char *drbd_buildtag(void); +#define REL_VERSION "8.3.3rc2" +#define API_VERSION 88 +#define PRO_VERSION_MIN 86 +#define PRO_VERSION_MAX 91 + + +enum drbd_io_error_p { + EP_PASS_ON, /* FIXME should the better be named "Ignore"? */ + EP_CALL_HELPER, + EP_DETACH +}; + +enum drbd_fencing_p { + FP_DONT_CARE, + FP_RESOURCE, + FP_STONITH +}; + +enum drbd_disconnect_p { + DP_RECONNECT, + DP_DROP_NET_CONF, + DP_FREEZE_IO +}; + +enum drbd_after_sb_p { + ASB_DISCONNECT, + ASB_DISCARD_YOUNGER_PRI, + ASB_DISCARD_OLDER_PRI, + ASB_DISCARD_ZERO_CHG, + ASB_DISCARD_LEAST_CHG, + ASB_DISCARD_LOCAL, + ASB_DISCARD_REMOTE, + ASB_CONSENSUS, + ASB_DISCARD_SECONDARY, + ASB_CALL_HELPER, + ASB_VIOLENTLY +}; + +/* KEEP the order, do not delete or insert. Only append. */ +enum drbd_ret_codes { + ERR_CODE_BASE = 100, + NO_ERROR = 101, + ERR_LOCAL_ADDR = 102, + ERR_PEER_ADDR = 103, + ERR_OPEN_DISK = 104, + ERR_OPEN_MD_DISK = 105, + ERR_DISK_NOT_BDEV = 107, + ERR_MD_NOT_BDEV = 108, + ERR_DISK_TO_SMALL = 111, + ERR_MD_DISK_TO_SMALL = 112, + ERR_BDCLAIM_DISK = 114, + ERR_BDCLAIM_MD_DISK = 115, + ERR_MD_IDX_INVALID = 116, + ERR_IO_MD_DISK = 118, + ERR_MD_INVALID = 119, + ERR_AUTH_ALG = 120, + ERR_AUTH_ALG_ND = 121, + ERR_NOMEM = 122, + ERR_DISCARD = 123, + ERR_DISK_CONFIGURED = 124, + ERR_NET_CONFIGURED = 125, + ERR_MANDATORY_TAG = 126, + ERR_MINOR_INVALID = 127, + ERR_INTR = 129, /* EINTR */ + ERR_RESIZE_RESYNC = 130, + ERR_NO_PRIMARY = 131, + ERR_SYNC_AFTER = 132, + ERR_SYNC_AFTER_CYCLE = 133, + ERR_PAUSE_IS_SET = 134, + ERR_PAUSE_IS_CLEAR = 135, + ERR_PACKET_NR = 137, + ERR_NO_DISK = 138, + ERR_NOT_PROTO_C = 139, + ERR_NOMEM_BITMAP = 140, + ERR_INTEGRITY_ALG = 141, /* DRBD 8.2 only */ + ERR_INTEGRITY_ALG_ND = 142, /* DRBD 8.2 only */ + ERR_CPU_MASK_PARSE = 143, /* DRBD 8.2 only */ + ERR_CSUMS_ALG = 144, /* DRBD 8.2 only */ + ERR_CSUMS_ALG_ND = 145, /* DRBD 8.2 only */ + ERR_VERIFY_ALG = 146, /* DRBD 8.2 only */ + ERR_VERIFY_ALG_ND = 147, /* DRBD 8.2 only */ + ERR_CSUMS_RESYNC_RUNNING= 148, /* DRBD 8.2 only */ + ERR_VERIFY_RUNNING = 149, /* DRBD 8.2 only */ + ERR_DATA_NOT_CURRENT = 150, + ERR_CONNECTED = 151, /* DRBD 8.3 only */ + + /* insert new ones above this line */ + AFTER_LAST_ERR_CODE +}; + +#define DRBD_PROT_A 1 +#define DRBD_PROT_B 2 +#define DRBD_PROT_C 3 + +enum drbd_role { + R_UNKNOWN = 0, + R_PRIMARY = 1, /* role */ + R_SECONDARY = 2, /* role */ + R_MASK = 3, +}; + +/* The order of these constants is important. + * The lower ones (=C_WF_REPORT_PARAMS ==> There is a socket + */ +enum drbd_conns { + C_STANDALONE, + C_DISCONNECTING, /* Temporal state on the way to StandAlone. */ + C_UNCONNECTED, /* >= C_UNCONNECTED -> inc_net() succeeds */ + + /* These temporal states are all used on the way + * from >= C_CONNECTED to Unconnected. + * The 'disconnect reason' states + * I do not allow to change beween them. */ + C_TIMEOUT, + C_BROKEN_PIPE, + C_NETWORK_FAILURE, + C_PROTOCOL_ERROR, + C_TEAR_DOWN, + + C_WF_CONNECTION, + C_WF_REPORT_PARAMS, /* we have a socket */ + C_CONNECTED, /* we have introduced each other */ + C_STARTING_SYNC_S, /* starting full sync by admin request. */ + C_STARTING_SYNC_T, /* stariing full sync by admin request. */ + C_WF_BITMAP_S, + C_WF_BITMAP_T, + C_WF_SYNC_UUID, + + /* All SyncStates are tested with this comparison + * xx >= C_SYNC_SOURCE && xx <= C_PAUSED_SYNC_T */ + C_SYNC_SOURCE, + C_SYNC_TARGET, + C_VERIFY_S, + C_VERIFY_T, + C_PAUSED_SYNC_S, + C_PAUSED_SYNC_T, + C_MASK = 31 +}; + +enum drbd_disk_state { + D_DISKLESS, + D_ATTACHING, /* In the process of reading the meta-data */ + D_FAILED, /* Becomes D_DISKLESS as soon as we told it the peer */ + /* when >= D_FAILED it is legal to access mdev->bc */ + D_NEGOTIATING, /* Late attaching state, we need to talk to the peer */ + D_INCONSISTENT, + D_OUTDATED, + D_UNKNOWN, /* Only used for the peer, never for myself */ + D_CONSISTENT, /* Might be D_OUTDATED, might be D_UP_TO_DATE ... */ + D_UP_TO_DATE, /* Only this disk state allows applications' IO ! */ + D_MASK = 15 +}; + +union drbd_state { +/* According to gcc's docs is the ... + * The order of allocation of bit-fields within a unit (C90 6.5.2.1, C99 6.7.2.1). + * Determined by ABI. + * pointed out by Maxim Uvarov q + * even though we transmit as "cpu_to_be32(state)", + * the offsets of the bitfields still need to be swapped + * on different endianess. + */ + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + unsigned role:2 ; /* 3/4 primary/secondary/unknown */ + unsigned peer:2 ; /* 3/4 primary/secondary/unknown */ + unsigned conn:5 ; /* 17/32 cstates */ + unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ + unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ + unsigned susp:1 ; /* 2/2 IO suspended no/yes */ + unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ + unsigned peer_isp:1 ; + unsigned user_isp:1 ; + unsigned _pad:11; /* 0 unused */ +#elif defined(__BIG_ENDIAN_BITFIELD) + unsigned _pad:11; /* 0 unused */ + unsigned user_isp:1 ; + unsigned peer_isp:1 ; + unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ + unsigned susp:1 ; /* 2/2 IO suspended no/yes */ + unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ + unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ + unsigned conn:5 ; /* 17/32 cstates */ + unsigned peer:2 ; /* 3/4 primary/secondary/unknown */ + unsigned role:2 ; /* 3/4 primary/secondary/unknown */ +#else +# error "this endianess is not supported" +#endif + }; + unsigned int i; +}; + +enum drbd_state_ret_codes { + SS_CW_NO_NEED = 4, + SS_CW_SUCCESS = 3, + SS_NOTHING_TO_DO = 2, + SS_SUCCESS = 1, + SS_UNKNOWN_ERROR = 0, /* Used to sleep longer in _drbd_request_state */ + SS_TWO_PRIMARIES = -1, + SS_NO_UP_TO_DATE_DISK = -2, + SS_NO_LOCAL_DISK = -4, + SS_NO_REMOTE_DISK = -5, + SS_CONNECTED_OUTDATES = -6, + SS_PRIMARY_NOP = -7, + SS_RESYNC_RUNNING = -8, + SS_ALREADY_STANDALONE = -9, + SS_CW_FAILED_BY_PEER = -10, + SS_IS_DISKLESS = -11, + SS_DEVICE_IN_USE = -12, + SS_NO_NET_CONFIG = -13, + SS_NO_VERIFY_ALG = -14, /* drbd-8.2 only */ + SS_NEED_CONNECTION = -15, /* drbd-8.2 only */ + SS_LOWER_THAN_OUTDATED = -16, + SS_NOT_SUPPORTED = -17, /* drbd-8.2 only */ + SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */ + SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */ + SS_AFTER_LAST_ERROR = -20, /* Keep this at bottom */ +}; + +/* from drbd_strings.c */ +extern const char *drbd_conn_str(enum drbd_conns); +extern const char *drbd_role_str(enum drbd_role); +extern const char *drbd_disk_str(enum drbd_disk_state); +extern const char *drbd_set_st_err_str(enum drbd_state_ret_codes); + +#define SHARED_SECRET_MAX 64 + +#define MDF_CONSISTENT (1 << 0) +#define MDF_PRIMARY_IND (1 << 1) +#define MDF_CONNECTED_IND (1 << 2) +#define MDF_FULL_SYNC (1 << 3) +#define MDF_WAS_UP_TO_DATE (1 << 4) +#define MDF_PEER_OUT_DATED (1 << 5) +#define MDF_CRASHED_PRIMARY (1 << 6) + +enum drbd_uuid_index { + UI_CURRENT, + UI_BITMAP, + UI_HISTORY_START, + UI_HISTORY_END, + UI_SIZE, /* nl-packet: number of dirty bits */ + UI_FLAGS, /* nl-packet: flags */ + UI_EXTENDED_SIZE /* Everything. */ +}; + +enum drbd_timeout_flag { + UT_DEFAULT = 0, + UT_DEGRADED = 1, + UT_PEER_OUTDATED = 2, +}; + +#define UUID_JUST_CREATED ((__u64)4) + +#define DRBD_MAGIC 0x83740267 +#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) + +/* these are of type "int" */ +#define DRBD_MD_INDEX_INTERNAL -1 +#define DRBD_MD_INDEX_FLEX_EXT -2 +#define DRBD_MD_INDEX_FLEX_INT -3 + +/* Start of the new netlink/connector stuff */ + +#define DRBD_NL_CREATE_DEVICE 0x01 +#define DRBD_NL_SET_DEFAULTS 0x02 + +/* The following line should be moved over to linux/connector.h + * when the time comes */ +#ifndef CN_IDX_DRBD +# define CN_IDX_DRBD 0x4 +/* Ubuntu "intrepid ibex" release defined CN_IDX_DRBD as 0x6 */ +#endif +#define CN_VAL_DRBD 0x1 + +/* For searching a vacant cn_idx value */ +#define CN_IDX_STEP 6977 + +struct drbd_nl_cfg_req { + int packet_type; + unsigned int drbd_minor; + int flags; + unsigned short tag_list[]; +}; + +struct drbd_nl_cfg_reply { + int packet_type; + unsigned int minor; + int ret_code; /* enum ret_code or set_st_err_t */ + unsigned short tag_list[]; /* only used with get_* calls */ +}; + +#endif diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h new file mode 100644 index 000000000000..9d067ce46960 --- /dev/null +++ b/include/linux/drbd_limits.h @@ -0,0 +1,137 @@ +/* + drbd_limits.h + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. +*/ + +/* + * Our current limitations. + * Some of them are hard limits, + * some of them are arbitrary range limits, that make it easier to provide + * feedback about nonsense settings for certain configurable values. + */ + +#ifndef DRBD_LIMITS_H +#define DRBD_LIMITS_H 1 + +#define DEBUG_RANGE_CHECK 0 + +#define DRBD_MINOR_COUNT_MIN 1 +#define DRBD_MINOR_COUNT_MAX 255 + +#define DRBD_DIALOG_REFRESH_MIN 0 +#define DRBD_DIALOG_REFRESH_MAX 600 + +/* valid port number */ +#define DRBD_PORT_MIN 1 +#define DRBD_PORT_MAX 0xffff + +/* startup { */ + /* if you want more than 3.4 days, disable */ +#define DRBD_WFC_TIMEOUT_MIN 0 +#define DRBD_WFC_TIMEOUT_MAX 300000 +#define DRBD_WFC_TIMEOUT_DEF 0 + +#define DRBD_DEGR_WFC_TIMEOUT_MIN 0 +#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000 +#define DRBD_DEGR_WFC_TIMEOUT_DEF 0 + +#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0 +#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000 +#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0 +/* }*/ + +/* net { */ + /* timeout, unit centi seconds + * more than one minute timeout is not usefull */ +#define DRBD_TIMEOUT_MIN 1 +#define DRBD_TIMEOUT_MAX 600 +#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ + + /* active connection retries when C_WF_CONNECTION */ +#define DRBD_CONNECT_INT_MIN 1 +#define DRBD_CONNECT_INT_MAX 120 +#define DRBD_CONNECT_INT_DEF 10 /* seconds */ + + /* keep-alive probes when idle */ +#define DRBD_PING_INT_MIN 1 +#define DRBD_PING_INT_MAX 120 +#define DRBD_PING_INT_DEF 10 + + /* timeout for the ping packets.*/ +#define DRBD_PING_TIMEO_MIN 1 +#define DRBD_PING_TIMEO_MAX 100 +#define DRBD_PING_TIMEO_DEF 5 + + /* max number of write requests between write barriers */ +#define DRBD_MAX_EPOCH_SIZE_MIN 1 +#define DRBD_MAX_EPOCH_SIZE_MAX 20000 +#define DRBD_MAX_EPOCH_SIZE_DEF 2048 + + /* I don't think that a tcp send buffer of more than 10M is usefull */ +#define DRBD_SNDBUF_SIZE_MIN 0 +#define DRBD_SNDBUF_SIZE_MAX (10<<20) +#define DRBD_SNDBUF_SIZE_DEF (2*65535) + +#define DRBD_RCVBUF_SIZE_MIN 0 +#define DRBD_RCVBUF_SIZE_MAX (10<<20) +#define DRBD_RCVBUF_SIZE_DEF (2*65535) + + /* @4k PageSize -> 128kB - 512MB */ +#define DRBD_MAX_BUFFERS_MIN 32 +#define DRBD_MAX_BUFFERS_MAX 131072 +#define DRBD_MAX_BUFFERS_DEF 2048 + + /* @4k PageSize -> 4kB - 512MB */ +#define DRBD_UNPLUG_WATERMARK_MIN 1 +#define DRBD_UNPLUG_WATERMARK_MAX 131072 +#define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16) + + /* 0 is disabled. + * 200 should be more than enough even for very short timeouts */ +#define DRBD_KO_COUNT_MIN 0 +#define DRBD_KO_COUNT_MAX 200 +#define DRBD_KO_COUNT_DEF 0 +/* } */ + +/* syncer { */ + /* FIXME allow rate to be zero? */ +#define DRBD_RATE_MIN 1 +/* channel bonding 10 GbE, or other hardware */ +#define DRBD_RATE_MAX (4 << 20) +#define DRBD_RATE_DEF 250 /* kb/second */ + + /* less than 7 would hit performance unneccessarily. + * 3833 is the largest prime that still does fit + * into 64 sectors of activity log */ +#define DRBD_AL_EXTENTS_MIN 7 +#define DRBD_AL_EXTENTS_MAX 3833 +#define DRBD_AL_EXTENTS_DEF 127 + +#define DRBD_AFTER_MIN -1 +#define DRBD_AFTER_MAX 255 +#define DRBD_AFTER_DEF -1 + +/* } */ + +/* drbdsetup XY resize -d Z + * you are free to reduce the device size to nothing, if you want to. + * the upper limit with 64bit kernel, enough ram and flexible meta data + * is 16 TB, currently. */ +/* DRBD_MAX_SECTORS */ +#define DRBD_DISK_SIZE_SECT_MIN 0 +#define DRBD_DISK_SIZE_SECT_MAX (16 * (2LLU << 30)) +#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ + +#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON +#define DRBD_FENCING_DEF FP_DONT_CARE +#define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT +#define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT +#define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT +#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT + +#define DRBD_MAX_BIO_BVECS_MIN 0 +#define DRBD_MAX_BIO_BVECS_MAX 128 +#define DRBD_MAX_BIO_BVECS_DEF 0 + +#undef RANGE +#endif diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h new file mode 100644 index 000000000000..db5721ad50d1 --- /dev/null +++ b/include/linux/drbd_nl.h @@ -0,0 +1,137 @@ +/* + PAKET( name, + TYPE ( pn, pr, member ) + ... + ) + + You may never reissue one of the pn arguments +*/ + +#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64) +#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined" +#endif + +NL_PACKET(primary, 1, + NL_BIT( 1, T_MAY_IGNORE, overwrite_peer) +) + +NL_PACKET(secondary, 2, ) + +NL_PACKET(disk_conf, 3, + NL_INT64( 2, T_MAY_IGNORE, disk_size) + NL_STRING( 3, T_MANDATORY, backing_dev, 128) + NL_STRING( 4, T_MANDATORY, meta_dev, 128) + NL_INTEGER( 5, T_MANDATORY, meta_dev_idx) + NL_INTEGER( 6, T_MAY_IGNORE, on_io_error) + NL_INTEGER( 7, T_MAY_IGNORE, fencing) + NL_BIT( 37, T_MAY_IGNORE, use_bmbv) + NL_BIT( 53, T_MAY_IGNORE, no_disk_flush) + NL_BIT( 54, T_MAY_IGNORE, no_md_flush) + /* 55 max_bio_size was available in 8.2.6rc2 */ + NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs) + NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier) + NL_BIT( 58, T_MAY_IGNORE, no_disk_drain) +) + +NL_PACKET(detach, 4, ) + +NL_PACKET(net_conf, 5, + NL_STRING( 8, T_MANDATORY, my_addr, 128) + NL_STRING( 9, T_MANDATORY, peer_addr, 128) + NL_STRING( 10, T_MAY_IGNORE, shared_secret, SHARED_SECRET_MAX) + NL_STRING( 11, T_MAY_IGNORE, cram_hmac_alg, SHARED_SECRET_MAX) + NL_STRING( 44, T_MAY_IGNORE, integrity_alg, SHARED_SECRET_MAX) + NL_INTEGER( 14, T_MAY_IGNORE, timeout) + NL_INTEGER( 15, T_MANDATORY, wire_protocol) + NL_INTEGER( 16, T_MAY_IGNORE, try_connect_int) + NL_INTEGER( 17, T_MAY_IGNORE, ping_int) + NL_INTEGER( 18, T_MAY_IGNORE, max_epoch_size) + NL_INTEGER( 19, T_MAY_IGNORE, max_buffers) + NL_INTEGER( 20, T_MAY_IGNORE, unplug_watermark) + NL_INTEGER( 21, T_MAY_IGNORE, sndbuf_size) + NL_INTEGER( 22, T_MAY_IGNORE, ko_count) + NL_INTEGER( 24, T_MAY_IGNORE, after_sb_0p) + NL_INTEGER( 25, T_MAY_IGNORE, after_sb_1p) + NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p) + NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict) + NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo) + NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size) + /* 59 addr_family was available in GIT, never released */ + NL_BIT( 60, T_MANDATORY, mind_af) + NL_BIT( 27, T_MAY_IGNORE, want_lose) + NL_BIT( 28, T_MAY_IGNORE, two_primaries) + NL_BIT( 41, T_MAY_IGNORE, always_asbp) + NL_BIT( 61, T_MAY_IGNORE, no_cork) + NL_BIT( 62, T_MANDATORY, auto_sndbuf_size) +) + +NL_PACKET(disconnect, 6, ) + +NL_PACKET(resize, 7, + NL_INT64( 29, T_MAY_IGNORE, resize_size) +) + +NL_PACKET(syncer_conf, 8, + NL_INTEGER( 30, T_MAY_IGNORE, rate) + NL_INTEGER( 31, T_MAY_IGNORE, after) + NL_INTEGER( 32, T_MAY_IGNORE, al_extents) + NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX) + NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32) + NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) + NL_BIT( 65, T_MAY_IGNORE, use_rle) +) + +NL_PACKET(invalidate, 9, ) +NL_PACKET(invalidate_peer, 10, ) +NL_PACKET(pause_sync, 11, ) +NL_PACKET(resume_sync, 12, ) +NL_PACKET(suspend_io, 13, ) +NL_PACKET(resume_io, 14, ) +NL_PACKET(outdate, 15, ) +NL_PACKET(get_config, 16, ) +NL_PACKET(get_state, 17, + NL_INTEGER( 33, T_MAY_IGNORE, state_i) +) + +NL_PACKET(get_uuids, 18, + NL_STRING( 34, T_MAY_IGNORE, uuids, (UI_SIZE*sizeof(__u64))) + NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags) +) + +NL_PACKET(get_timeout_flag, 19, + NL_BIT( 36, T_MAY_IGNORE, use_degraded) +) + +NL_PACKET(call_helper, 20, + NL_STRING( 38, T_MAY_IGNORE, helper, 32) +) + +/* Tag nr 42 already allocated in drbd-8.1 development. */ + +NL_PACKET(sync_progress, 23, + NL_INTEGER( 43, T_MAY_IGNORE, sync_progress) +) + +NL_PACKET(dump_ee, 24, + NL_STRING( 45, T_MAY_IGNORE, dump_ee_reason, 32) + NL_STRING( 46, T_MAY_IGNORE, seen_digest, SHARED_SECRET_MAX) + NL_STRING( 47, T_MAY_IGNORE, calc_digest, SHARED_SECRET_MAX) + NL_INT64( 48, T_MAY_IGNORE, ee_sector) + NL_INT64( 49, T_MAY_IGNORE, ee_block_id) + NL_STRING( 50, T_MAY_IGNORE, ee_data, 32 << 10) +) + +NL_PACKET(start_ov, 25, + NL_INT64( 66, T_MAY_IGNORE, start_sector) +) + +NL_PACKET(new_c_uuid, 26, + NL_BIT( 63, T_MANDATORY, clear_bm) +) + +#undef NL_PACKET +#undef NL_INTEGER +#undef NL_INT64 +#undef NL_BIT +#undef NL_STRING + diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h new file mode 100644 index 000000000000..fcdff8410e99 --- /dev/null +++ b/include/linux/drbd_tag_magic.h @@ -0,0 +1,83 @@ +#ifndef DRBD_TAG_MAGIC_H +#define DRBD_TAG_MAGIC_H + +#define TT_END 0 +#define TT_REMOVED 0xE000 + +/* declare packet_type enums */ +enum packet_types { +#define NL_PACKET(name, number, fields) P_ ## name = number, +#define NL_INTEGER(pn, pr, member) +#define NL_INT64(pn, pr, member) +#define NL_BIT(pn, pr, member) +#define NL_STRING(pn, pr, member, len) +#include "drbd_nl.h" + P_nl_after_last_packet, +}; + +/* These struct are used to deduce the size of the tag lists: */ +#define NL_PACKET(name, number, fields) \ + struct name ## _tag_len_struct { fields }; +#define NL_INTEGER(pn, pr, member) \ + int member; int tag_and_len ## member; +#define NL_INT64(pn, pr, member) \ + __u64 member; int tag_and_len ## member; +#define NL_BIT(pn, pr, member) \ + unsigned char member:1; int tag_and_len ## member; +#define NL_STRING(pn, pr, member, len) \ + unsigned char member[len]; int member ## _len; \ + int tag_and_len ## member; +#include "linux/drbd_nl.h" + +/* declate tag-list-sizes */ +static const int tag_list_sizes[] = { +#define NL_PACKET(name, number, fields) 2 fields , +#define NL_INTEGER(pn, pr, member) + 4 + 4 +#define NL_INT64(pn, pr, member) + 4 + 8 +#define NL_BIT(pn, pr, member) + 4 + 1 +#define NL_STRING(pn, pr, member, len) + 4 + (len) +#include "drbd_nl.h" +}; + +/* The two highest bits are used for the tag type */ +#define TT_MASK 0xC000 +#define TT_INTEGER 0x0000 +#define TT_INT64 0x4000 +#define TT_BIT 0x8000 +#define TT_STRING 0xC000 +/* The next bit indicates if processing of the tag is mandatory */ +#define T_MANDATORY 0x2000 +#define T_MAY_IGNORE 0x0000 +#define TN_MASK 0x1fff +/* The remaining 13 bits are used to enumerate the tags */ + +#define tag_type(T) ((T) & TT_MASK) +#define tag_number(T) ((T) & TN_MASK) + +/* declare tag enums */ +#define NL_PACKET(name, number, fields) fields +enum drbd_tags { +#define NL_INTEGER(pn, pr, member) T_ ## member = pn | TT_INTEGER | pr , +#define NL_INT64(pn, pr, member) T_ ## member = pn | TT_INT64 | pr , +#define NL_BIT(pn, pr, member) T_ ## member = pn | TT_BIT | pr , +#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING | pr , +#include "drbd_nl.h" +}; + +struct tag { + const char *name; + int type_n_flags; + int max_len; +}; + +/* declare tag names */ +#define NL_PACKET(name, number, fields) fields +static const struct tag tag_descriptions[] = { +#define NL_INTEGER(pn, pr, member) [ pn ] = { #member, TT_INTEGER | pr, sizeof(int) }, +#define NL_INT64(pn, pr, member) [ pn ] = { #member, TT_INT64 | pr, sizeof(__u64) }, +#define NL_BIT(pn, pr, member) [ pn ] = { #member, TT_BIT | pr, sizeof(int) }, +#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING | pr, (len) }, +#include "drbd_nl.h" +}; + +#endif diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h new file mode 100644 index 000000000000..3a2b2d9b0472 --- /dev/null +++ b/include/linux/lru_cache.h @@ -0,0 +1,294 @@ +/* + lru_cache.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#ifndef LRU_CACHE_H +#define LRU_CACHE_H + +#include +#include +#include +#include /* for memset */ +#include + +/* +This header file (and its .c file; kernel-doc of functions see there) + define a helper framework to easily keep track of index:label associations, + and changes to an "active set" of objects, as well as pending transactions, + to persistently record those changes. + + We use an LRU policy if it is necessary to "cool down" a region currently in + the active set before we can "heat" a previously unused region. + + Because of this later property, it is called "lru_cache". + As it actually Tracks Objects in an Active SeT, we could also call it + toast (incidentally that is what may happen to the data on the + backend storage uppon next resync, if we don't get it right). + +What for? + +We replicate IO (more or less synchronously) to local and remote disk. + +For crash recovery after replication node failure, + we need to resync all regions that have been target of in-flight WRITE IO + (in use, or "hot", regions), as we don't know wether or not those WRITEs have + made it to stable storage. + + To avoid a "full resync", we need to persistently track these regions. + + This is known as "write intent log", and can be implemented as on-disk + (coarse or fine grained) bitmap, or other meta data. + + To avoid the overhead of frequent extra writes to this meta data area, + usually the condition is softened to regions that _may_ have been target of + in-flight WRITE IO, e.g. by only lazily clearing the on-disk write-intent + bitmap, trading frequency of meta data transactions against amount of + (possibly unneccessary) resync traffic. + + If we set a hard limit on the area that may be "hot" at any given time, we + limit the amount of resync traffic needed for crash recovery. + +For recovery after replication link failure, + we need to resync all blocks that have been changed on the other replica + in the mean time, or, if both replica have been changed independently [*], + all blocks that have been changed on either replica in the mean time. + [*] usually as a result of a cluster split-brain and insufficient protection. + but there are valid use cases to do this on purpose. + + Tracking those blocks can be implemented as "dirty bitmap". + Having it fine-grained reduces the amount of resync traffic. + It should also be persistent, to allow for reboots (or crashes) + while the replication link is down. + +There are various possible implementations for persistently storing +write intent log information, three of which are mentioned here. + +"Chunk dirtying" + The on-disk "dirty bitmap" may be re-used as "write-intent" bitmap as well. + To reduce the frequency of bitmap updates for write-intent log purposes, + one could dirty "chunks" (of some size) at a time of the (fine grained) + on-disk bitmap, while keeping the in-memory "dirty" bitmap as clean as + possible, flushing it to disk again when a previously "hot" (and on-disk + dirtied as full chunk) area "cools down" again (no IO in flight anymore, + and none expected in the near future either). + +"Explicit (coarse) write intent bitmap" + An other implementation could chose a (probably coarse) explicit bitmap, + for write-intent log purposes, additionally to the fine grained dirty bitmap. + +"Activity log" + Yet an other implementation may keep track of the hot regions, by starting + with an empty set, and writing down a journal of region numbers that have + become "hot", or have "cooled down" again. + + To be able to use a ring buffer for this journal of changes to the active + set, we not only record the actual changes to that set, but also record the + not changing members of the set in a round robin fashion. To do so, we use a + fixed (but configurable) number of slots which we can identify by index, and + associate region numbers (labels) with these indices. + For each transaction recording a change to the active set, we record the + change itself (index: -old_label, +new_label), and which index is associated + with which label (index: current_label) within a certain sliding window that + is moved further over the available indices with each such transaction. + + Thus, for crash recovery, if the ringbuffer is sufficiently large, we can + accurately reconstruct the active set. + + Sufficiently large depends only on maximum number of active objects, and the + size of the sliding window recording "index: current_label" associations within + each transaction. + + This is what we call the "activity log". + + Currently we need one activity log transaction per single label change, which + does not give much benefit over the "dirty chunks of bitmap" approach, other + than potentially less seeks. + + We plan to change the transaction format to support multiple changes per + transaction, which then would reduce several (disjoint, "random") updates to + the bitmap into one transaction to the activity log ring buffer. +*/ + +/* this defines an element in a tracked set + * .colision is for hash table lookup. + * When we process a new IO request, we know its sector, thus can deduce the + * region number (label) easily. To do the label -> object lookup without a + * full list walk, we use a simple hash table. + * + * .list is on one of three lists: + * in_use: currently in use (refcnt > 0, lc_number != LC_FREE) + * lru: unused but ready to be reused or recycled + * (ts_refcnt == 0, lc_number != LC_FREE), + * free: unused but ready to be recycled + * (ts_refcnt == 0, lc_number == LC_FREE), + * + * an element is said to be "in the active set", + * if either on "in_use" or "lru", i.e. lc_number != LC_FREE. + * + * DRBD currently (May 2009) only uses 61 elements on the resync lru_cache + * (total memory usage 2 pages), and up to 3833 elements on the act_log + * lru_cache, totalling ~215 kB for 64bit architechture, ~53 pages. + * + * We usually do not actually free these objects again, but only "recycle" + * them, as the change "index: -old_label, +LC_FREE" would need a transaction + * as well. Which also means that using a kmem_cache to allocate the objects + * from wastes some resources. + * But it avoids high order page allocations in kmalloc. + */ +struct lc_element { + struct hlist_node colision; + struct list_head list; /* LRU list or free list */ + unsigned refcnt; + /* back "pointer" into ts_cache->element[index], + * for paranoia, and for "ts_element_to_index" */ + unsigned lc_index; + /* if we want to track a larger set of objects, + * it needs to become arch independend u64 */ + unsigned lc_number; + + /* special label when on free list */ +#define LC_FREE (~0U) +}; + +struct lru_cache { + /* the least recently used item is kept at lru->prev */ + struct list_head lru; + struct list_head free; + struct list_head in_use; + + /* the pre-created kmem cache to allocate the objects from */ + struct kmem_cache *lc_cache; + + /* size of tracked objects, used to memset(,0,) them in lc_reset */ + size_t element_size; + /* offset of struct lc_element member in the tracked object */ + size_t element_off; + + /* number of elements (indices) */ + unsigned int nr_elements; + /* Arbitrary limit on maximum tracked objects. Practical limit is much + * lower due to allocation failures, probably. For typical use cases, + * nr_elements should be a few thousand at most. + * This also limits the maximum value of ts_element.ts_index, allowing the + * 8 high bits of .ts_index to be overloaded with flags in the future. */ +#define LC_MAX_ACTIVE (1<<24) + + /* statistics */ + unsigned used; /* number of lelements currently on in_use list */ + unsigned long hits, misses, starving, dirty, changed; + + /* see below: flag-bits for lru_cache */ + unsigned long flags; + + /* when changing the label of an index element */ + unsigned int new_number; + + /* for paranoia when changing the label of an index element */ + struct lc_element *changing_element; + + void *lc_private; + const char *name; + + /* nr_elements there */ + struct hlist_head *lc_slot; + struct lc_element **lc_element; +}; + + +/* flag-bits for lru_cache */ +enum { + /* debugging aid, to catch concurrent access early. + * user needs to guarantee exclusive access by proper locking! */ + __LC_PARANOIA, + /* if we need to change the set, but currently there is a changing + * transaction pending, we are "dirty", and must deferr further + * changing requests */ + __LC_DIRTY, + /* if we need to change the set, but currently there is no free nor + * unused element available, we are "starving", and must not give out + * further references, to guarantee that eventually some refcnt will + * drop to zero and we will be able to make progress again, changing + * the set, writing the transaction. + * if the statistics say we are frequently starving, + * nr_elements is too small. */ + __LC_STARVING, +}; +#define LC_PARANOIA (1<<__LC_PARANOIA) +#define LC_DIRTY (1<<__LC_DIRTY) +#define LC_STARVING (1<<__LC_STARVING) + +extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned e_count, size_t e_size, size_t e_off); +extern void lc_reset(struct lru_cache *lc); +extern void lc_destroy(struct lru_cache *lc); +extern void lc_set(struct lru_cache *lc, unsigned int enr, int index); +extern void lc_del(struct lru_cache *lc, struct lc_element *element); + +extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); +extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); +extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); +extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); +extern void lc_changed(struct lru_cache *lc, struct lc_element *e); + +struct seq_file; +extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); + +extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, + void (*detail) (struct seq_file *, struct lc_element *)); + +/** + * lc_try_lock - can be used to stop lc_get() from changing the tracked set + * @lc: the lru cache to operate on + * + * Note that the reference counts and order on the active and lru lists may + * still change. Returns true if we aquired the lock. + */ +static inline int lc_try_lock(struct lru_cache *lc) +{ + return !test_and_set_bit(__LC_DIRTY, &lc->flags); +} + +/** + * lc_unlock - unlock @lc, allow lc_get() to change the set again + * @lc: the lru cache to operate on + */ +static inline void lc_unlock(struct lru_cache *lc) +{ + clear_bit(__LC_DIRTY, &lc->flags); + smp_mb__after_clear_bit(); +} + +static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e = lc_find(lc, enr); + return e && e->refcnt; +} + +#define lc_entry(ptr, type, member) \ + container_of(ptr, type, member) + +extern struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i); +extern unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e); + +#endif diff --git a/lib/Kconfig b/lib/Kconfig index bb1326d3839c..1cfe51628e1b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -200,4 +200,7 @@ config NLATTR config GENERIC_ATOMIC64 bool +config LRU_CACHE + tristate + endmenu diff --git a/lib/Makefile b/lib/Makefile index 2e78277eff9d..347ad8db29d3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -91,6 +91,8 @@ obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o obj-$(CONFIG_NLATTR) += nlattr.o +obj-$(CONFIG_LRU_CACHE) += lru_cache.o + obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o obj-$(CONFIG_GENERIC_CSUM) += checksum.o diff --git a/lib/lru_cache.c b/lib/lru_cache.c new file mode 100644 index 000000000000..270de9d31b8c --- /dev/null +++ b/lib/lru_cache.c @@ -0,0 +1,560 @@ +/* + lru_cache.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include +#include /* for memset */ +#include /* for seq_printf */ +#include + +MODULE_AUTHOR("Philipp Reisner , " + "Lars Ellenberg "); +MODULE_DESCRIPTION("lru_cache - Track sets of hot objects"); +MODULE_LICENSE("GPL"); + +/* this is developers aid only. + * it catches concurrent access (lack of locking on the users part) */ +#define PARANOIA_ENTRY() do { \ + BUG_ON(!lc); \ + BUG_ON(!lc->nr_elements); \ + BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \ +} while (0) + +#define RETURN(x...) do { \ + clear_bit(__LC_PARANOIA, &lc->flags); \ + smp_mb__after_clear_bit(); return x ; } while (0) + +/* BUG() if e is not one of the elements tracked by lc */ +#define PARANOIA_LC_ELEMENT(lc, e) do { \ + struct lru_cache *lc_ = (lc); \ + struct lc_element *e_ = (e); \ + unsigned i = e_->lc_index; \ + BUG_ON(i >= lc_->nr_elements); \ + BUG_ON(lc_->lc_element[i] != e_); } while (0) + +/** + * lc_create - prepares to track objects in an active set + * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @e_count: number of elements allowed to be active simultaneously + * @e_size: size of the tracked objects + * @e_off: offset to the &struct lc_element member in a tracked object + * + * Returns a pointer to a newly initialized struct lru_cache on success, + * or NULL on (allocation) failure. + */ +struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned e_count, size_t e_size, size_t e_off) +{ + struct hlist_head *slot = NULL; + struct lc_element **element = NULL; + struct lru_cache *lc; + struct lc_element *e; + unsigned cache_obj_size = kmem_cache_size(cache); + unsigned i; + + WARN_ON(cache_obj_size < e_size); + if (cache_obj_size < e_size) + return NULL; + + /* e_count too big; would probably fail the allocation below anyways. + * for typical use cases, e_count should be few thousand at most. */ + if (e_count > LC_MAX_ACTIVE) + return NULL; + + slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL); + if (!slot) + goto out_fail; + element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL); + if (!element) + goto out_fail; + + lc = kzalloc(sizeof(*lc), GFP_KERNEL); + if (!lc) + goto out_fail; + + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + + lc->name = name; + lc->element_size = e_size; + lc->element_off = e_off; + lc->nr_elements = e_count; + lc->new_number = LC_FREE; + lc->lc_cache = cache; + lc->lc_element = element; + lc->lc_slot = slot; + + /* preallocate all objects */ + for (i = 0; i < e_count; i++) { + void *p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) + break; + memset(p, 0, lc->element_size); + e = p + e_off; + e->lc_index = i; + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + element[i] = e; + } + if (i == e_count) + return lc; + + /* else: could not allocate all elements, give up */ + for (i--; i; i--) { + void *p = element[i]; + kmem_cache_free(cache, p - e_off); + } + kfree(lc); +out_fail: + kfree(element); + kfree(slot); + return NULL; +} + +void lc_free_by_index(struct lru_cache *lc, unsigned i) +{ + void *p = lc->lc_element[i]; + WARN_ON(!p); + if (p) { + p -= lc->element_off; + kmem_cache_free(lc->lc_cache, p); + } +} + +/** + * lc_destroy - frees memory allocated by lc_create() + * @lc: the lru cache to destroy + */ +void lc_destroy(struct lru_cache *lc) +{ + unsigned i; + if (!lc) + return; + for (i = 0; i < lc->nr_elements; i++) + lc_free_by_index(lc, i); + kfree(lc->lc_element); + kfree(lc->lc_slot); + kfree(lc); +} + +/** + * lc_reset - does a full reset for @lc and the hash table slots. + * @lc: the lru cache to operate on + * + * It is roughly the equivalent of re-allocating a fresh lru_cache object, + * basically a short cut to lc_destroy(lc); lc = lc_create(...); + */ +void lc_reset(struct lru_cache *lc) +{ + unsigned i; + + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + lc->used = 0; + lc->hits = 0; + lc->misses = 0; + lc->starving = 0; + lc->dirty = 0; + lc->changed = 0; + lc->flags = 0; + lc->changing_element = NULL; + lc->new_number = LC_FREE; + memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); + + for (i = 0; i < lc->nr_elements; i++) { + struct lc_element *e = lc->lc_element[i]; + void *p = e; + p -= lc->element_off; + memset(p, 0, lc->element_size); + /* re-init it */ + e->lc_index = i; + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + } +} + +/** + * lc_seq_printf_stats - print stats about @lc into @seq + * @seq: the seq_file to print into + * @lc: the lru cache to print statistics of + */ +size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) +{ + /* NOTE: + * total calls to lc_get are + * (starving + hits + misses) + * misses include "dirty" count (update from an other thread in + * progress) and "changed", when this in fact lead to an successful + * update of the cache. + */ + return seq_printf(seq, "\t%s: used:%u/%u " + "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + lc->name, lc->used, lc->nr_elements, + lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); +} + +static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) +{ + return lc->lc_slot + (enr % lc->nr_elements); +} + + +/** + * lc_find - find element by label, if present in the hash table + * @lc: The lru_cache object + * @enr: element number + * + * Returns the pointer to an element, if the element with the requested + * "label" or element number is present in the hash table, + * or NULL if not found. Does not change the refcnt. + */ +struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) +{ + struct hlist_node *n; + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { + if (e->lc_number == enr) + return e; + } + return NULL; +} + +/* returned element will be "recycled" immediately */ +static struct lc_element *lc_evict(struct lru_cache *lc) +{ + struct list_head *n; + struct lc_element *e; + + if (list_empty(&lc->lru)) + return NULL; + + n = lc->lru.prev; + e = list_entry(n, struct lc_element, list); + + PARANOIA_LC_ELEMENT(lc, e); + + list_del(&e->list); + hlist_del(&e->colision); + return e; +} + +/** + * lc_del - removes an element from the cache + * @lc: The lru_cache object + * @e: The element to remove + * + * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list, + * sets @e->enr to %LC_FREE. + */ +void lc_del(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + PARANOIA_LC_ELEMENT(lc, e); + BUG_ON(e->refcnt); + + e->lc_number = LC_FREE; + hlist_del_init(&e->colision); + list_move(&e->list, &lc->free); + RETURN(); +} + +static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +{ + struct list_head *n; + + if (list_empty(&lc->free)) + return lc_evict(lc); + + n = lc->free.next; + list_del(n); + return list_entry(n, struct lc_element, list); +} + +static int lc_unused_element_available(struct lru_cache *lc) +{ + if (!list_empty(&lc->free)) + return 1; /* something on the free list */ + if (!list_empty(&lc->lru)) + return 1; /* something to evict */ + + return 0; +} + + +/** + * lc_get - get element by label, maybe change the active set + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Finds an element in the cache, increases its usage count, + * "touches" and returns it. + * + * In case the requested number is not present, it needs to be added to the + * cache. Therefore it is possible that an other element becomes evicted from + * the cache. In either case, the user is notified so he is able to e.g. keep + * a persistent log of the cache changes, and therefore the objects in use. + * + * Return values: + * NULL + * The cache was marked %LC_STARVING, + * or the requested label was not in the active set + * and a changing transaction is still pending (@lc was marked %LC_DIRTY). + * Or no unused or free element could be recycled (@lc will be marked as + * %LC_STARVING, blocking further lc_get() operations). + * + * pointer to the element with the REQUESTED element number. + * In this case, it can be used right away + * + * pointer to an UNUSED element with some different element number, + * where that different number may also be %LC_FREE. + * + * In this case, the cache is marked %LC_DIRTY (blocking further changes), + * and the returned element pointer is removed from the lru list and + * hash collision chains. The user now should do whatever housekeeping + * is necessary. + * Then he must call lc_changed(lc,element_pointer), to finish + * the change. + * + * NOTE: The user needs to check the lc_number on EACH use, so he recognizes + * any cache set change. + */ +struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + RETURN(e); + } + + ++lc->misses; + + /* In case there is nothing available and we can not kick out + * the LRU element, we have to wait ... + */ + if (!lc_unused_element_available(lc)) { + __set_bit(__LC_STARVING, &lc->flags); + RETURN(NULL); + } + + /* it was not present in the active set. + * we are going to recycle an unused (or even "free") element. + * user may need to commit a transaction to record that change. + * we serialize on flags & TF_DIRTY */ + if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { + ++lc->dirty; + RETURN(NULL); + } + + e = lc_get_unused_element(lc); + BUG_ON(!e); + + clear_bit(__LC_STARVING, &lc->flags); + BUG_ON(++e->refcnt != 1); + lc->used++; + + lc->changing_element = e; + lc->new_number = enr; + + RETURN(e); +} + +/* similar to lc_get, + * but only gets a new reference on an existing element. + * you either get the requested element, or NULL. + * will be consolidated into one function. + */ +struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + } + RETURN(e); +} + +/** + * lc_changed - tell @lc that the change has been recorded + * @lc: the lru cache to operate on + * @e: the element pending label change + */ +void lc_changed(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + BUG_ON(e != lc->changing_element); + PARANOIA_LC_ELEMENT(lc, e); + ++lc->changed; + e->lc_number = lc->new_number; + list_add(&e->list, &lc->in_use); + hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); + lc->changing_element = NULL; + lc->new_number = LC_FREE; + clear_bit(__LC_DIRTY, &lc->flags); + smp_mb__after_clear_bit(); + RETURN(); +} + + +/** + * lc_put - give up refcnt of @e + * @lc: the lru cache to operate on + * @e: the element to put + * + * If refcnt reaches zero, the element is moved to the lru list, + * and a %LC_STARVING (if set) is cleared. + * Returns the new (post-decrement) refcnt. + */ +unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + PARANOIA_LC_ELEMENT(lc, e); + BUG_ON(e->refcnt == 0); + BUG_ON(e == lc->changing_element); + if (--e->refcnt == 0) { + /* move it to the front of LRU. */ + list_move(&e->list, &lc->lru); + lc->used--; + clear_bit(__LC_STARVING, &lc->flags); + smp_mb__after_clear_bit(); + } + RETURN(e->refcnt); +} + +/** + * lc_element_by_index + * @lc: the lru cache to operate on + * @i: the index of the element to return + */ +struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) +{ + BUG_ON(i >= lc->nr_elements); + BUG_ON(lc->lc_element[i] == NULL); + BUG_ON(lc->lc_element[i]->lc_index != i); + return lc->lc_element[i]; +} + +/** + * lc_index_of + * @lc: the lru cache to operate on + * @e: the element to query for its index position in lc->element + */ +unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_LC_ELEMENT(lc, e); + return e->lc_index; +} + +/** + * lc_set - associate index with label + * @lc: the lru cache to operate on + * @enr: the label to set + * @index: the element index to associate label with. + * + * Used to initialize the active set to some previously recorded state. + */ +void lc_set(struct lru_cache *lc, unsigned int enr, int index) +{ + struct lc_element *e; + + if (index < 0 || index >= lc->nr_elements) + return; + + e = lc_element_by_index(lc, index); + e->lc_number = enr; + + hlist_del_init(&e->colision); + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); + list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); +} + +/** + * lc_dump - Dump a complete LRU cache to seq in textual form. + * @lc: the lru cache to operate on + * @seq: the &struct seq_file pointer to seq_printf into + * @utext: user supplied "heading" or other info + * @detail: function pointer the user may provide to dump further details + * of the object the lc_element is embedded in. + */ +void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, + void (*detail) (struct seq_file *, struct lc_element *)) +{ + unsigned int nr_elements = lc->nr_elements; + struct lc_element *e; + int i; + + seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); + for (i = 0; i < nr_elements; i++) { + e = lc_element_by_index(lc, i); + if (e->lc_number == LC_FREE) { + seq_printf(seq, "\t%2d: FREE\n", i); + } else { + seq_printf(seq, "\t%2d: %4u %4u ", i, + e->lc_number, e->refcnt); + detail(seq, e); + } + } +} + +EXPORT_SYMBOL(lc_create); +EXPORT_SYMBOL(lc_reset); +EXPORT_SYMBOL(lc_destroy); +EXPORT_SYMBOL(lc_set); +EXPORT_SYMBOL(lc_del); +EXPORT_SYMBOL(lc_try_get); +EXPORT_SYMBOL(lc_find); +EXPORT_SYMBOL(lc_get); +EXPORT_SYMBOL(lc_put); +EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_element_by_index); +EXPORT_SYMBOL(lc_index_of); +EXPORT_SYMBOL(lc_seq_printf_stats); +EXPORT_SYMBOL(lc_seq_dump_details); -- cgit v1.3-8-gc7d7 From 7c824f4b69316df55fe243c5a6c7dba2b62285c1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 2 Oct 2009 07:22:58 +0200 Subject: ALSA: sscape - Remove sscap_ioctl.h from include/sound/Kbuild Signed-off-by: Takashi Iwai --- include/sound/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/sound/Kbuild b/include/sound/Kbuild index fd054a344324..e9dd9369ecb9 100644 --- a/include/sound/Kbuild +++ b/include/sound/Kbuild @@ -2,7 +2,6 @@ header-y += asound_fm.h header-y += hdsp.h header-y += hdspm.h header-y += sfnt_info.h -header-y += sscape_ioctl.h unifdef-y += asequencer.h unifdef-y += asound.h -- cgit v1.3-8-gc7d7 From 63312b6a6faae3f2e5577f2b001e3b504f10a2aa Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 2 Oct 2009 07:50:50 -0700 Subject: x86: Add a Kconfig option to turn the copy_from_user warnings into errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For automated testing it is useful to have the option to turn the warnings on copy_from_user() etc checks into errors: In function ‘copy_from_user’, inlined from ‘fd_copyin’ at drivers/block/floppy.c:3080, inlined from ‘fd_ioctl’ at drivers/block/floppy.c:3503: linux/arch/x86/include/asm/uaccess_32.h:213: error: call to ‘copy_from_user_overflow’ declared with attribute error: copy_from_user buffer size is not provably correct Signed-off-by: Arjan van de Ven Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <20091002075050.4e9f7641@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 14 ++++++++++++++ arch/x86/include/asm/uaccess_32.h | 4 +++- include/linux/compiler-gcc4.h | 1 + include/linux/compiler.h | 3 +++ 4 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29bb6bb..1bd2e36f1538 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -287,4 +287,18 @@ config OPTIMIZE_INLINING If unsure, say N. +config DEBUG_STRICT_USER_COPY_CHECKS + bool "Strict copy size checks" + depends on DEBUG_KERNEL + ---help--- + Enabling this option turns a certain set of sanity checks for user + copy operations into compile time failures. + + The copy_from_user() etc checks are there to help test if there + are sufficient security checks on the length argument of + the copy operation, by having gcc prove that the argument is + within bounds. + + If unsure, or if you run an older (pre 4.4) gcc, say N. + endmenu diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 952f9e793c3e..0c9825e97f36 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -193,7 +193,9 @@ unsigned long __must_check _copy_from_user(void *to, extern void copy_from_user_overflow(void) -#ifdef CONFIG_DEBUG_STACKOVERFLOW +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS + __compiletime_error("copy_from_user() buffer size is not provably correct") +#else __compiletime_warning("copy_from_user() buffer size is not provably correct") #endif ; diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index f1709c1f9eae..77542c57e20a 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -41,4 +41,5 @@ #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) #if __GNUC_MINOR__ >= 4 #define __compiletime_warning(message) __attribute__((warning(message))) +#define __compiletime_error(message) __attribute__((error(message))) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 950356311f12..88fd4b673cb4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -273,6 +273,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #ifndef __compiletime_warning # define __compiletime_warning(message) #endif +#ifndef __compiletime_error +# define __compiletime_error(message) +#endif /* * Prevent the compiler from merging or refetching accesses. The compiler -- cgit v1.3-8-gc7d7 From a9828ec6bc0b7e19a65f7e13daa8bd35a926a753 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 1 Oct 2009 11:33:03 +0000 Subject: ethtool: Remove support for obsolete string query operations The in-tree implementations have all been converted to get_sset_count(). Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 4 ---- net/core/ethtool.c | 58 +++++++++---------------------------------------- 2 files changed, 10 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 15e4eb713694..aa0dcb3833d1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -495,10 +495,6 @@ struct ethtool_ops { u32 (*get_priv_flags)(struct net_device *); int (*set_priv_flags)(struct net_device *, u32); int (*get_sset_count)(struct net_device *, int); - - /* the following hooks are obsolete */ - int (*self_test_count)(struct net_device *);/* use get_sset_count */ - int (*get_stats_count)(struct net_device *);/* use get_sset_count */ int (*get_rxnfc)(struct net_device *, struct ethtool_rxnfc *, void *); int (*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *); int (*flash_device)(struct net_device *, struct ethtool_flash *); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4c12ddb5f5ee..e1951084b973 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -198,13 +198,6 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); if (rc >= 0) info.n_priv_flags = rc; - } else { - /* code path for obsolete hooks */ - - if (ops->self_test_count) - info.testinfo_len = ops->self_test_count(dev); - if (ops->get_stats_count) - info.n_stats = ops->get_stats_count(dev); } if (ops->get_regs_len) info.regdump_len = ops->get_regs_len(dev); @@ -684,16 +677,10 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) u64 *data; int ret, test_len; - if (!ops->self_test) - return -EOPNOTSUPP; - if (!ops->get_sset_count && !ops->self_test_count) + if (!ops->self_test || !ops->get_sset_count) return -EOPNOTSUPP; - if (ops->get_sset_count) - test_len = ops->get_sset_count(dev, ETH_SS_TEST); - else - /* code path for obsolete hook */ - test_len = ops->self_test_count(dev); + test_len = ops->get_sset_count(dev, ETH_SS_TEST); if (test_len < 0) return test_len; WARN_ON(test_len == 0); @@ -728,36 +715,17 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) u8 *data; int ret; - if (!ops->get_strings) + if (!ops->get_strings || !ops->get_sset_count) return -EOPNOTSUPP; if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - if (ops->get_sset_count) { - ret = ops->get_sset_count(dev, gstrings.string_set); - if (ret < 0) - return ret; - - gstrings.len = ret; - } else { - /* code path for obsolete hooks */ - - switch (gstrings.string_set) { - case ETH_SS_TEST: - if (!ops->self_test_count) - return -EOPNOTSUPP; - gstrings.len = ops->self_test_count(dev); - break; - case ETH_SS_STATS: - if (!ops->get_stats_count) - return -EOPNOTSUPP; - gstrings.len = ops->get_stats_count(dev); - break; - default: - return -EINVAL; - } - } + ret = ops->get_sset_count(dev, gstrings.string_set); + if (ret < 0) + return ret; + + gstrings.len = ret; data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); if (!data) @@ -798,16 +766,10 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) u64 *data; int ret, n_stats; - if (!ops->get_ethtool_stats) - return -EOPNOTSUPP; - if (!ops->get_sset_count && !ops->get_stats_count) + if (!ops->get_ethtool_stats || !ops->get_sset_count) return -EOPNOTSUPP; - if (ops->get_sset_count) - n_stats = ops->get_sset_count(dev, ETH_SS_STATS); - else - /* code path for obsolete hook */ - n_stats = ops->get_stats_count(dev); + n_stats = ops->get_sset_count(dev, ETH_SS_STATS); if (n_stats < 0) return n_stats; WARN_ON(n_stats == 0); -- cgit v1.3-8-gc7d7 From 977750076d98c7ff6cbda51858bb5a5894a9d9ab Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 2 Oct 2009 06:56:41 +0000 Subject: af_packet: add interframe drop cmsg (v6) Add Ancilliary data to better represent loss information I've had a few requests recently to provide more detail regarding frame loss during an AF_PACKET packet capture session. Specifically the requestors want to see where in a packet sequence frames were lost, i.e. they want to see that 40 frames were lost between frames 302 and 303 in a packet capture file. In order to do this we need: 1) The kernel to export this data to user space 2) The applications to make use of it This patch addresses item (1). It does this by doing the following: A) Anytime we drop a frame for which we would increment po->stats.tp_drops, we also no increment a stats called po->stats.tp_gap. B) Every time we successfully enqueue a frame to sk_receive_queue, we record the value of po->stats.tp_gap in skb->mark. skb->cb would nominally be the place to record this, but since all the space there is used up, we're overloading skb->mark. Its safe to do since any enqueued packet is guaranteed to be unshared at this point, and skb->mark isn't used for anything else in the rx path to the application. After we record tp_gap in the skb, we zero po->stats.tp_gap. This allows us to keep a counter of the number of frames lost between any two enqueued packets C) When the application goes to dequeue a frame from the packet socket, we look at skb->mark for that frame. If it is non-zero, we add a cmsg chunk to the msghdr of level SOL_PACKET and type PACKET_GAPDATA. Its a 32 bit integer that represents the number of frames lost between this packet and the last previous frame received. Note there is a chance that if there is frame loss after a receive, and then the socket is closed, some gap data might be lost. This is covered by the use of the PACKET_AUXDATA socket option, which gives total loss data. With a bit of math, the final gap can be determined that way. I've tested this patch myself, and it works well. Signed-off-by: Neil Horman Signed-off-by: Eric Dumazet include/linux/if_packet.h | 2 ++ net/packet/af_packet.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) Signed-off-by: David S. Miller --- include/linux/if_packet.h | 2 ++ net/packet/af_packet.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index dea7d6b7cf98..e5d200f53fc3 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -48,11 +48,13 @@ struct sockaddr_ll #define PACKET_RESERVE 12 #define PACKET_TX_RING 13 #define PACKET_LOSS 14 +#define PACKET_GAPDATA 15 struct tpacket_stats { unsigned int tp_packets; unsigned int tp_drops; + unsigned int tp_gap; }; struct tpacket_auxdata diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d7ecca0a0c07..d398a9bf6903 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -523,6 +523,31 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, return res; } +/* + * If we've lost frames since the last time we queued one to the + * sk_receive_queue, we need to record it here. + * This must be called under the protection of the socket lock + * to prevent racing with other softirqs and user space + */ +static inline void record_packet_gap(struct sk_buff *skb, + struct packet_sock *po) +{ + /* + * We overload the mark field here, since we're about + * to enqueue to a receive queue and no body else will + * use this field at this point + */ + skb->mark = po->stats.tp_gap; + po->stats.tp_gap = 0; + return; + +} + +static inline __u32 check_packet_gap(struct sk_buff *skb) +{ + return skb->mark; +} + /* This function makes lazy skb cloning in hope that most of packets are discarded by BPF. @@ -626,6 +651,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, spin_lock(&sk->sk_receive_queue.lock); po->stats.tp_packets++; + record_packet_gap(skb, po); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk, skb->len); @@ -634,6 +660,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, drop_n_acct: spin_lock(&sk->sk_receive_queue.lock); po->stats.tp_drops++; + po->stats.tp_gap++; spin_unlock(&sk->sk_receive_queue.lock); drop_n_restore: @@ -811,6 +838,7 @@ drop: ring_is_full: po->stats.tp_drops++; + po->stats.tp_gap++; spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk, 0); @@ -1418,6 +1446,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb; int copied, err; struct sockaddr_ll *sll; + __u32 gap; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) @@ -1496,6 +1525,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } + gap = check_packet_gap(skb); + if (gap) + put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u32), &gap); + /* * Free or return the buffer as appropriate. Again this * hides all the races and re-entrancy issues from us. -- cgit v1.3-8-gc7d7 From 16c6cf8bb471392fd09b48b7c27e7d83a446b4bc Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 20 Sep 2009 10:35:36 +0000 Subject: ipv4: fib table algorithm performance improvement The FIB algorithim for IPV4 is set at compile time, but kernel goes through the overhead of function call indirection at runtime. Save some cycles by turning the indirect calls to direct calls to either hash or trie code. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/ip_fib.h | 25 ++++++++++++++----------- net/ipv4/fib_frontend.c | 26 +++++++++++++------------- net/ipv4/fib_hash.c | 25 ++++++++++--------------- net/ipv4/fib_rules.c | 2 +- net/ipv4/fib_trie.c | 26 ++++++++++---------------- 5 files changed, 48 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ef91fe924ba4..68fd5ebd0949 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -144,18 +144,21 @@ struct fib_table { struct hlist_node tb_hlist; u32 tb_id; int tb_default; - int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res); - int (*tb_insert)(struct fib_table *, struct fib_config *); - int (*tb_delete)(struct fib_table *, struct fib_config *); - int (*tb_dump)(struct fib_table *table, struct sk_buff *skb, - struct netlink_callback *cb); - int (*tb_flush)(struct fib_table *table); - void (*tb_select_default)(struct fib_table *table, - const struct flowi *flp, struct fib_result *res); - unsigned char tb_data[0]; }; +extern int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, + struct fib_result *res); +extern int fib_table_insert(struct fib_table *, struct fib_config *); +extern int fib_table_delete(struct fib_table *, struct fib_config *); +extern int fib_table_dump(struct fib_table *table, struct sk_buff *skb, + struct netlink_callback *cb); +extern int fib_table_flush(struct fib_table *table); +extern void fib_table_select_default(struct fib_table *table, + const struct flowi *flp, + struct fib_result *res); + + #ifndef CONFIG_IP_MULTIPLE_TABLES #define TABLE_LOCAL_INDEX 0 @@ -182,11 +185,11 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp, struct fib_table *table; table = fib_get_table(net, RT_TABLE_LOCAL); - if (!table->tb_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res)) return 0; table = fib_get_table(net, RT_TABLE_MAIN); - if (!table->tb_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res)) return 0; return -ENETUNREACH; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e2f950592566..f73dbed0f0d7 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -125,7 +125,7 @@ void fib_select_default(struct net *net, #endif tb = fib_get_table(net, table); if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - tb->tb_select_default(tb, flp, res); + fib_table_select_default(tb, flp, res); } static void fib_flush(struct net *net) @@ -139,7 +139,7 @@ static void fib_flush(struct net *net) for (h = 0; h < FIB_TABLE_HASHSZ; h++) { head = &net->ipv4.fib_table_hash[h]; hlist_for_each_entry(tb, node, head, tb_hlist) - flushed += tb->tb_flush(tb); + flushed += fib_table_flush(tb); } if (flushed) @@ -162,7 +162,7 @@ struct net_device * ip_dev_find(struct net *net, __be32 addr) #endif local_table = fib_get_table(net, RT_TABLE_LOCAL); - if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) + if (!local_table || fib_table_lookup(local_table, &fl, &res)) return NULL; if (res.type != RTN_LOCAL) goto out; @@ -200,7 +200,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, local_table = fib_get_table(net, RT_TABLE_LOCAL); if (local_table) { ret = RTN_UNICAST; - if (!local_table->tb_lookup(local_table, &fl, &res)) { + if (!fib_table_lookup(local_table, &fl, &res)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; fib_res_put(&res); @@ -473,13 +473,13 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (cmd == SIOCDELRT) { tb = fib_get_table(net, cfg.fc_table); if (tb) - err = tb->tb_delete(tb, &cfg); + err = fib_table_delete(tb, &cfg); else err = -ESRCH; } else { tb = fib_new_table(net, cfg.fc_table); if (tb) - err = tb->tb_insert(tb, &cfg); + err = fib_table_insert(tb, &cfg); else err = -ENOBUFS; } @@ -594,7 +594,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *ar goto errout; } - err = tb->tb_delete(tb, &cfg); + err = fib_table_delete(tb, &cfg); errout: return err; } @@ -616,7 +616,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *ar goto errout; } - err = tb->tb_insert(tb, &cfg); + err = fib_table_insert(tb, &cfg); errout: return err; } @@ -647,7 +647,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (dumped) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); - if (tb->tb_dump(tb, skb, cb) < 0) + if (fib_table_dump(tb, skb, cb) < 0) goto out; dumped = 1; next: @@ -701,9 +701,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - tb->tb_insert(tb, &cfg); + fib_table_insert(tb, &cfg); else - tb->tb_delete(tb, &cfg); + fib_table_delete(tb, &cfg); } void fib_add_ifaddr(struct in_ifaddr *ifa) @@ -832,7 +832,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) local_bh_disable(); frn->tb_id = tb->tb_id; - frn->err = tb->tb_lookup(tb, &fl, &res); + frn->err = fib_table_lookup(tb, &fl, &res); if (!frn->err) { frn->prefixlen = res.prefixlen; @@ -1009,7 +1009,7 @@ static void __net_exit ip_fib_net_exit(struct net *net) head = &net->ipv4.fib_table_hash[i]; hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { hlist_del(node); - tb->tb_flush(tb); + fib_table_flush(tb); kfree(tb); } } diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index ecd39454235c..14972017b9c2 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -242,8 +242,8 @@ fn_new_zone(struct fn_hash *table, int z) return fz; } -static int -fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) +int fib_table_lookup(struct fib_table *tb, + const struct flowi *flp, struct fib_result *res) { int err; struct fn_zone *fz; @@ -274,8 +274,8 @@ out: return err; } -static void -fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) +void fib_table_select_default(struct fib_table *tb, + const struct flowi *flp, struct fib_result *res) { int order, last_idx; struct hlist_node *node; @@ -366,7 +366,7 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) return NULL; } -static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) +int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash *) tb->tb_data; struct fib_node *new_f = NULL; @@ -544,8 +544,7 @@ out: return err; } - -static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) +int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash *)tb->tb_data; struct fib_node *f; @@ -662,7 +661,7 @@ static int fn_flush_list(struct fn_zone *fz, int idx) return found; } -static int fn_hash_flush(struct fib_table *tb) +int fib_table_flush(struct fib_table *tb) { struct fn_hash *table = (struct fn_hash *) tb->tb_data; struct fn_zone *fz; @@ -743,7 +742,8 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, return skb->len; } -static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) +int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, + struct netlink_callback *cb) { int m, s_m; struct fn_zone *fz; @@ -787,12 +787,7 @@ struct fib_table *fib_hash_table(u32 id) tb->tb_id = id; tb->tb_default = -1; - tb->tb_lookup = fn_hash_lookup; - tb->tb_insert = fn_hash_insert; - tb->tb_delete = fn_hash_delete; - tb->tb_flush = fn_hash_flush; - tb->tb_select_default = fn_hash_select_default; - tb->tb_dump = fn_hash_dump; + memset(tb->tb_data, 0, sizeof(struct fn_hash)); return tb; } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 92d9d97ec5e3..835262c2b867 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -94,7 +94,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL) goto errout; - err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); + err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result); if (err > 0) err = -EAGAIN; errout: diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 291bdf50a21f..af5d89792860 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1174,7 +1174,7 @@ done: /* * Caller must hold RTNL. */ -static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) +int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *new_fa; @@ -1373,8 +1373,8 @@ static int check_leaf(struct trie *t, struct leaf *l, return 1; } -static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, - struct fib_result *res) +int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, + struct fib_result *res) { struct trie *t = (struct trie *) tb->tb_data; int ret; @@ -1595,7 +1595,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) /* * Caller must hold RTNL. */ -static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) +int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; u32 key, mask; @@ -1786,7 +1786,7 @@ static struct leaf *trie_leafindex(struct trie *t, int index) /* * Caller must hold RTNL. */ -static int fn_trie_flush(struct fib_table *tb) +int fib_table_flush(struct fib_table *tb) { struct trie *t = (struct trie *) tb->tb_data; struct leaf *l, *ll = NULL; @@ -1807,9 +1807,9 @@ static int fn_trie_flush(struct fib_table *tb) return found; } -static void fn_trie_select_default(struct fib_table *tb, - const struct flowi *flp, - struct fib_result *res) +void fib_table_select_default(struct fib_table *tb, + const struct flowi *flp, + struct fib_result *res) { struct trie *t = (struct trie *) tb->tb_data; int order, last_idx; @@ -1952,8 +1952,8 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, return skb->len; } -static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, - struct netlink_callback *cb) +int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, + struct netlink_callback *cb) { struct leaf *l; struct trie *t = (struct trie *) tb->tb_data; @@ -2020,12 +2020,6 @@ struct fib_table *fib_hash_table(u32 id) tb->tb_id = id; tb->tb_default = -1; - tb->tb_lookup = fn_trie_lookup; - tb->tb_insert = fn_trie_insert; - tb->tb_delete = fn_trie_delete; - tb->tb_flush = fn_trie_flush; - tb->tb_select_default = fn_trie_select_default; - tb->tb_dump = fn_trie_dump; t = (struct trie *) tb->tb_data; memset(t, 0, sizeof(*t)); -- cgit v1.3-8-gc7d7 From 0bfbedb14a8a96c529341bec88991a92b41fac72 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Oct 2009 00:11:22 -0700 Subject: tunnels: Optimize tx path We currently dirty a cache line to update tunnel device stats (tx_packets/tx_bytes). We better use the txq->tx_bytes/tx_packets counters that already are present in cpu cache, in the cache line shared with txq->_xmit_lock This patch extends IPTUNNEL_XMIT() macro to use txq pointer provided by the caller. Also &tunnel->dev->stats can be replaced by &dev->stats Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ipip.h | 6 +++--- net/ipv4/ip_gre.c | 5 +++-- net/ipv4/ipip.c | 5 +++-- net/ipv6/sit.c | 5 +++-- 4 files changed, 12 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/ipip.h b/include/net/ipip.h index 87acf8f3a155..0159221a8509 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -42,9 +42,9 @@ struct ip_tunnel_prl_entry ip_select_ident(iph, &rt->u.dst, NULL); \ \ err = ip_local_out(skb); \ - if (net_xmit_eval(err) == 0) { \ - stats->tx_bytes += pkt_len; \ - stats->tx_packets++; \ + if (likely(net_xmit_eval(err) == 0)) { \ + txq->tx_bytes += pkt_len; \ + txq->tx_packets++; \ } else { \ stats->tx_errors++; \ stats->tx_aborted_errors++; \ diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 41ada9904d31..89ff9d5b1500 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -662,7 +662,8 @@ drop_nolock: static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->dev->stats; + struct net_device_stats *stats = &dev->stats; + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); struct iphdr *old_iph = ip_hdr(skb); struct iphdr *tiph; u8 tos; @@ -810,7 +811,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); - stats->tx_dropped++; + txq->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 08ccd344de7a..6a5539236ab3 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -390,7 +390,8 @@ static int ipip_rcv(struct sk_buff *skb) static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->dev->stats; + struct net_device_stats *stats = &dev->stats; + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; @@ -478,7 +479,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); - stats->tx_dropped++; + txq->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index dbd19a78ca73..99da272951dc 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -555,7 +555,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->dev->stats; + struct net_device_stats *stats = &dev->stats; + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; @@ -688,7 +689,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); - stats->tx_dropped++; + txq->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } -- cgit v1.3-8-gc7d7 From e1e499eef2200c2a7120c9ebf297d48b195cf887 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 2 Oct 2009 05:15:25 +0000 Subject: usbnet: Use wwan%d interface name for mobile broadband devices Add support for usbnet based devices like CDC-Ether to indicate that they are actually mobile broadband devices. In that case use wwan%d as default interface name. Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 20 ++++++++++++++------ drivers/net/usb/usbnet.c | 3 +++ include/linux/usb/usbnet.h | 1 + 3 files changed, 18 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 4a6aff579403..71e65fc10e6f 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -420,6 +420,14 @@ static const struct driver_info cdc_info = { .status = cdc_status, }; +static const struct driver_info mbm_info = { + .description = "Mobile Broadband Network Device", + .flags = FLAG_WWAN, + .bind = cdc_bind, + .unbind = usbnet_cdc_unbind, + .status = cdc_status, +}; + /*-------------------------------------------------------------------------*/ @@ -532,32 +540,32 @@ static const struct usb_device_id products [] = { /* Ericsson F3507g */ USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1900, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &cdc_info, + .driver_info = (unsigned long) &mbm_info, }, { /* Ericsson F3507g ver. 2 */ USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1902, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &cdc_info, + .driver_info = (unsigned long) &mbm_info, }, { /* Ericsson F3607gw */ USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1904, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &cdc_info, + .driver_info = (unsigned long) &mbm_info, }, { /* Ericsson F3307 */ USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1906, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &cdc_info, + .driver_info = (unsigned long) &mbm_info, }, { /* Toshiba F3507g */ USB_DEVICE_AND_INTERFACE_INFO(0x0930, 0x130b, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &cdc_info, + .driver_info = (unsigned long) &mbm_info, }, { /* Dell F3507g */ USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x8147, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &cdc_info, + .driver_info = (unsigned long) &mbm_info, }, { }, // END }; diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index ca5ca5ae061d..8124cf16259f 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1295,6 +1295,9 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) /* WLAN devices should always be named "wlan%d" */ if ((dev->driver_info->flags & FLAG_WLAN) != 0) strcpy(net->name, "wlan%d"); + /* WWAN devices should always be named "wwan%d" */ + if ((dev->driver_info->flags & FLAG_WWAN) != 0) + strcpy(net->name, "wwan%d"); /* maybe the remote can't receive an Ethernet MTU */ if (net->mtu > (dev->hard_mtu - net->hard_header_len)) diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index f81473052059..86c31b753266 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -90,6 +90,7 @@ struct driver_info { #define FLAG_WLAN 0x0080 /* use "wlan%d" names */ #define FLAG_AVOID_UNLINK_URBS 0x0100 /* don't unlink urbs at usbnet_stop() */ #define FLAG_SEND_ZLP 0x0200 /* hw requires ZLPs are sent */ +#define FLAG_WWAN 0x0400 /* use "wwan%d" names */ /* init device ... can sleep, or cause probe() failure */ -- cgit v1.3-8-gc7d7 From 7ffbe3fdace0bdfcdab8dc6c77506feda0871f79 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 2 Oct 2009 05:15:27 +0000 Subject: net: introduce NETDEV_POST_INIT notifier For various purposes including a wireless extensions bugfix, we need to hook into the netdev creation before before netdev_register_kobject(). This will also ease doing the dev type assignment that Marcel was working on for cfg80211 drivers w/o touching them all. Signed-off-by: Johannes Berg Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- include/linux/notifier.h | 1 + net/core/dev.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 44428d247dbe..29714b8441b1 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -201,6 +201,7 @@ static inline int notifier_to_errno(int ret) #define NETDEV_PRE_UP 0x000D #define NETDEV_BONDING_OLDTYPE 0x000E #define NETDEV_BONDING_NEWTYPE 0x000F +#define NETDEV_POST_INIT 0x0010 #define SYS_DOWN 0x0001 /* Notify of system down */ #define SYS_RESTART SYS_DOWN diff --git a/net/core/dev.c b/net/core/dev.c index b8f74cfb1bfd..a74c8fd69556 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4836,6 +4836,12 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_GSO; netdev_initialize_kobject(dev); + + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); + ret = notifier_to_errno(ret); + if (ret) + goto err_uninit; + ret = netdev_register_kobject(dev); if (ret) goto err_uninit; -- cgit v1.3-8-gc7d7 From a092ff0f90cae22b2ac8028ecd2c6f6c1a9e4601 Mon Sep 17 00:00:00 2001 From: john stultz Date: Fri, 2 Oct 2009 16:17:53 -0700 Subject: time: Implement logarithmic time accumulation Accumulating one tick at a time works well unless we're using NOHZ. Then it can be an issue, since we may have to run through the loop a few thousand times, which can increase timer interrupt caused latency. The current solution was to accumulate in half-second intervals with NOHZ. This kept the number of loops down, however it did slightly change how we make NTP adjustments. While not an issue with NTPd users, as NTPd makes adjustments over a longer period of time, other adjtimex() users have noticed the half-second granularity with which we can apply frequency changes to the clock. For instance, if a application tries to apply a 100ppm frequency correction for 20ms to correct a 2us offset, with NOHZ they either get no correction, or a 50us correction. Now, there will always be some granularity error for applying frequency corrections. However with users sensitive to this error have seen a 50-500x increase with NOHZ compared to running without NOHZ. So I figured I'd try another approach then just simply increasing the interval. My approach is to consume the time interval logarithmically. This reduces the number of times through the loop needed keeping latency down, while still preserving the original granularity error for adjtimex() changes. Further, this change allows us to remove the xtime_cache code (patch to follow), as xtime is always within one tick of the current time, instead of the half-second updates it saw before. An earlier version of this patch has been shipping to x86 users in the RedHat MRG releases for awhile without issue, but I've reworked this version to be even more careful about avoiding possible overflows if the shift value gets too large. Signed-off-by: John Stultz Acked-by: Thomas Gleixner Reviewed-by: John Kacur Cc: Clark Williams Cc: Martin Schwidefsky Cc: Andrew Morton LKML-Reference: <1254525473.7741.88.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- include/linux/timex.h | 4 --- kernel/time/timekeeping.c | 85 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 60 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index e6967d10d9e5..0c0ef7d4db7c 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -261,11 +261,7 @@ static inline int ntp_synced(void) #define NTP_SCALE_SHIFT 32 -#ifdef CONFIG_NO_HZ -#define NTP_INTERVAL_FREQ (2) -#else #define NTP_INTERVAL_FREQ (HZ) -#endif #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index fb0f46fa1ecd..5fdd78e0858a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -721,6 +721,51 @@ static void timekeeping_adjust(s64 offset) timekeeper.ntp_error_shift; } + +/** + * logarithmic_accumulation - shifted accumulation of cycles + * + * This functions accumulates a shifted interval of cycles into + * into a shifted interval nanoseconds. Allows for O(log) accumulation + * loop. + * + * Returns the unconsumed cycles. + */ +static cycle_t logarithmic_accumulation(cycle_t offset, int shift) +{ + u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; + + /* If the offset is smaller then a shifted interval, do nothing */ + if (offset < timekeeper.cycle_interval<cycle_last += timekeeper.cycle_interval << shift; + + timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; + while (timekeeper.xtime_nsec >= nsecps) { + timekeeper.xtime_nsec -= nsecps; + xtime.tv_sec++; + second_overflow(); + } + + /* Accumulate into raw time */ + raw_time.tv_nsec += timekeeper.raw_interval << shift;; + while (raw_time.tv_nsec >= NSEC_PER_SEC) { + raw_time.tv_nsec -= NSEC_PER_SEC; + raw_time.tv_sec++; + } + + /* Accumulate error between NTP and clock interval */ + timekeeper.ntp_error += tick_length << shift; + timekeeper.ntp_error -= timekeeper.xtime_interval << + (timekeeper.ntp_error_shift + shift); + + return offset; +} + + /** * update_wall_time - Uses the current clocksource to increment the wall time * @@ -731,6 +776,7 @@ void update_wall_time(void) struct clocksource *clock; cycle_t offset; u64 nsecs; + int shift = 0, maxshift; /* Make sure we're fully resumed: */ if (unlikely(timekeeping_suspended)) @@ -744,33 +790,22 @@ void update_wall_time(void) #endif timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift; - /* normally this loop will run just once, however in the - * case of lost or late ticks, it will accumulate correctly. + /* + * With NO_HZ we may have to accumulate many cycle_intervals + * (think "ticks") worth of time at once. To do this efficiently, + * we calculate the largest doubling multiple of cycle_intervals + * that is smaller then the offset. We then accumulate that + * chunk in one go, and then try to consume the next smaller + * doubled multiple. */ + shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); + shift = max(0, shift); + /* Bound shift to one less then what overflows tick_length */ + maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1; + shift = min(shift, maxshift); while (offset >= timekeeper.cycle_interval) { - u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; - - /* accumulate one interval */ - offset -= timekeeper.cycle_interval; - clock->cycle_last += timekeeper.cycle_interval; - - timekeeper.xtime_nsec += timekeeper.xtime_interval; - if (timekeeper.xtime_nsec >= nsecps) { - timekeeper.xtime_nsec -= nsecps; - xtime.tv_sec++; - second_overflow(); - } - - raw_time.tv_nsec += timekeeper.raw_interval; - if (raw_time.tv_nsec >= NSEC_PER_SEC) { - raw_time.tv_nsec -= NSEC_PER_SEC; - raw_time.tv_sec++; - } - - /* accumulate error between NTP and clock interval */ - timekeeper.ntp_error += tick_length; - timekeeper.ntp_error -= timekeeper.xtime_interval << - timekeeper.ntp_error_shift; + offset = logarithmic_accumulation(offset, shift); + shift--; } /* correct the clock when NTP error is too big */ -- cgit v1.3-8-gc7d7 From 9f5180e5c331d7b3ccc35e1a78072235d38f9f34 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 6 Oct 2009 09:30:14 +0200 Subject: drbd: Work on permission enforcement Now we have the capabilities of the sending process available, use them to enforce CAP_SYS_ADMIN. Signed-off-by: Philipp Reisner Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 7 ++++++- include/linux/drbd.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 73c55ccb629a..22538d9628f1 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2000,7 +2000,7 @@ static struct cn_handler_struct cnd_table[] = { [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 }, }; -static void drbd_connector_callback(struct cn_msg *req) +static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) { struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; struct cn_handler_struct *cm; @@ -2017,6 +2017,11 @@ static void drbd_connector_callback(struct cn_msg *req) return; } + if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) { + retcode = ERR_PERM; + goto fail; + } + mdev = ensure_mdev(nlp); if (!mdev) { retcode = ERR_MINOR_INVALID; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 69dc711f37b3..233db5c18b86 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -138,6 +138,7 @@ enum drbd_ret_codes { ERR_VERIFY_RUNNING = 149, /* DRBD 8.2 only */ ERR_DATA_NOT_CURRENT = 150, ERR_CONNECTED = 151, /* DRBD 8.3 only */ + ERR_PERM = 152, /* insert new ones above this line */ AFTER_LAST_ERR_CODE -- cgit v1.3-8-gc7d7 From 26a50744b21fff65bd754874072857bee8967f4d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 6 Oct 2009 01:09:50 -0500 Subject: tracing/events: Add 'signed' field to format files The sign info used for filters in the kernel is also useful to applications that process the trace stream. Add it to the format files and make it available to userspace. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: rostedt@goodmis.org Cc: lizf@cn.fujitsu.com Cc: hch@infradead.org Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1254809398-8078-2-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 15 +++++++++------ kernel/trace/ring_buffer.c | 15 +++++++++------ kernel/trace/trace_events.c | 24 ++++++++++++------------ kernel/trace/trace_export.c | 25 ++++++++++++++----------- kernel/trace/trace_syscalls.c | 20 +++++++++++++------- tools/perf/util/trace-event-parse.c | 24 ++++++++++++++++++++++++ tools/perf/util/trace-event.h | 1 + 7 files changed, 82 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index cc0d9667e182..c9bbcab95fbe 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -120,9 +120,10 @@ #undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; @@ -132,19 +133,21 @@ #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; #undef __dynamic_array #define __dynamic_array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), \ __data_loc_##item), \ - (unsigned int)sizeof(field.__data_loc_##item)); \ + (unsigned int)sizeof(field.__data_loc_##item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d4ff01970547..e43c928356ee 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s) int ret; ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" - "offset:0;\tsize:%u;\n", - (unsigned int)sizeof(field.time_stamp)); + "offset:0;\tsize:%u;\tsigned:%u;\n", + (unsigned int)sizeof(field.time_stamp), + (unsigned int)is_signed_type(u64)); ret = trace_seq_printf(s, "\tfield: local_t commit;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), commit), - (unsigned int)sizeof(field.commit)); + (unsigned int)sizeof(field.commit), + (unsigned int)is_signed_type(long)); ret = trace_seq_printf(s, "\tfield: char data;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), data), - (unsigned int)BUF_PAGE_SIZE); + (unsigned int)BUF_PAGE_SIZE, + (unsigned int)is_signed_type(char)); return ret; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d128f65778e6..cf3cabf6ce14 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -507,7 +507,7 @@ extern char *__bad_type_size(void); #define FIELD(type, name) \ sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ #type, "common_" #name, offsetof(typeof(field), name), \ - sizeof(field.name) + sizeof(field.name), is_signed_type(type) static int trace_write_header(struct trace_seq *s) { @@ -515,17 +515,17 @@ static int trace_write_header(struct trace_seq *s) /* struct trace_entry */ return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\n", - FIELD(unsigned short, type), - FIELD(unsigned char, flags), - FIELD(unsigned char, preempt_count), - FIELD(int, pid), - FIELD(int, lock_depth)); + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\n", + FIELD(unsigned short, type), + FIELD(unsigned char, flags), + FIELD(unsigned char, preempt_count), + FIELD(int, pid), + FIELD(int, lock_depth)); } static ssize_t diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 9753fcc61bc5..31da218ee10f 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -66,44 +66,47 @@ static void __used ____ftrace_check_##name(void) \ #undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; #undef __field_desc #define __field_desc(type, container, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), container.item), \ - sizeof(field.container.item)); \ + sizeof(field.container.item), \ + is_signed_type(type)); \ if (!ret) \ return 0; #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; #undef __array_desc #define __array_desc(type, container, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), container.item), \ - sizeof(field.container.item)); \ + sizeof(field.container.item), \ + is_signed_type(type)); \ if (!ret) \ return 0; #undef __dynamic_array #define __dynamic_array(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:0;\n", \ - offsetof(typeof(field), item)); \ + "offset:%zu;\tsize:0;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + is_signed_type(type)); \ if (!ret) \ return 0; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 527e17eae575..d99abc427c39 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -103,7 +103,8 @@ extern char *__bad_type_size(void); #define SYSCALL_FIELD(type, name) \ sizeof(type) != sizeof(trace.name) ? \ __bad_type_size() : \ - #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) + #type, #name, offsetof(typeof(trace), name), \ + sizeof(trace.name), is_signed_type(type) int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) { @@ -120,7 +121,8 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) if (!entry) return 0; - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr)); if (!ret) return 0; @@ -130,8 +132,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) entry->args[i]); if (!ret) return 0; - ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, - sizeof(unsigned long)); + ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" + "\tsigned:%u;\n", offset, + sizeof(unsigned long), + is_signed_type(unsigned long)); if (!ret) return 0; offset += sizeof(unsigned long); @@ -163,8 +167,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) struct syscall_trace_exit trace; ret = trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr), SYSCALL_FIELD(long, ret)); if (!ret) @@ -212,7 +218,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) if (ret) return ret; - ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0, + ret = trace_define_field(call, SYSCALL_FIELD(long, ret), FILTER_OTHER); return ret; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 55b41b9e3834..be8412d699a1 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -894,6 +894,21 @@ static int event_read_fields(struct event *event, struct format_field **fields) field->size = strtoul(token, NULL, 0); free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) + goto fail_expect; + + if (read_expected(EVENT_ITEM, (char *)"signed") < 0) + goto fail_expect; + + if (read_expected(EVENT_OP, (char *)":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + if (strtoul(token, NULL, 0)) + field->flags |= FIELD_IS_SIGNED; + free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) goto fail_expect; @@ -2843,6 +2858,15 @@ static void parse_header_field(char *type, return; *size = atoi(token); free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) + return; + if (read_expected(EVENT_ITEM, (char *)"signed") < 0) + return; + if (read_expected(EVENT_OP, (char *)":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + return; + free_token(token); if (read_expected(EVENT_OP, (char *)";") < 0) return; if (read_expect_type(EVENT_NEWLINE, &token) < 0) diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 162c3e6deb93..00b440df66d8 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -26,6 +26,7 @@ enum { enum format_flags { FIELD_IS_ARRAY = 1, FIELD_IS_POINTER = 2, + FIELD_IS_SIGNED = 4, }; struct format_field { -- cgit v1.3-8-gc7d7 From d2b247a8be57647d1745535acd58169fbcbe431a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 6 Oct 2009 15:21:04 +0100 Subject: ASoC: Add virtual enumeration support for DAPM muxes Sometimes it is desirable to have a mux which does not reflect any direct register configuration but which will instead only have an effect implicitly (for example, as a result of changing which parts of the device are powered up). Provide a virtual mux for this purpose. Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 10 ++++++++++ sound/soc/soc-dapm.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 67224db60348..c5c95e1da65b 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -206,6 +206,12 @@ .get = snd_soc_dapm_get_enum_double, \ .put = snd_soc_dapm_put_enum_double, \ .private_value = (unsigned long)&xenum } +#define SOC_DAPM_ENUM_VIRT(xname, xenum) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ + .info = snd_soc_info_enum_double, \ + .get = snd_soc_dapm_get_enum_virt, \ + .put = snd_soc_dapm_put_enum_virt, \ + .private_value = (unsigned long)&xenum } #define SOC_DAPM_VALUE_ENUM(xname, xenum) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ .info = snd_soc_info_enum_double, \ @@ -260,6 +266,10 @@ int snd_soc_dapm_get_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); +int snd_soc_dapm_get_enum_virt(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); +int snd_soc_dapm_put_enum_virt(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); int snd_soc_dapm_get_value_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_soc_dapm_put_value_enum_double(struct snd_kcontrol *kcontrol, diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 311467b95afb..d2af872e4771 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1807,6 +1807,54 @@ out: } EXPORT_SYMBOL_GPL(snd_soc_dapm_put_enum_double); +/** + * snd_soc_dapm_get_enum_virt - Get virtual DAPM mux + * @kcontrol: mixer control + * @ucontrol: control element information + * + * Returns 0 for success. + */ +int snd_soc_dapm_get_enum_virt(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_dapm_widget *widget = snd_kcontrol_chip(kcontrol); + + ucontrol->value.enumerated.item[0] = widget->value; + + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_get_enum_virt); + +/** + * snd_soc_dapm_put_enum_virt - Set virtual DAPM mux + * @kcontrol: mixer control + * @ucontrol: control element information + * + * Returns 0 for success. + */ +int snd_soc_dapm_put_enum_virt(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_dapm_widget *widget = snd_kcontrol_chip(kcontrol); + struct soc_enum *e = + (struct soc_enum *)kcontrol->private_value; + int change; + int ret = 0; + + if (ucontrol->value.enumerated.item[0] >= e->max) + return -EINVAL; + + mutex_lock(&widget->codec->mutex); + + change = widget->value != ucontrol->value.enumerated.item[0]; + widget->value = ucontrol->value.enumerated.item[0]; + dapm_mux_update_power(widget, kcontrol, change, widget->value, e); + + mutex_unlock(&widget->codec->mutex); + return ret; +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_put_enum_virt); + /** * snd_soc_dapm_get_value_enum_double - dapm semi enumerated double mixer get * callback -- cgit v1.3-8-gc7d7 From bcdce7195e0eab55b37dbd53be53057f38006380 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Oct 2009 17:28:29 -0700 Subject: net: speedup sk_wake_async() An incoming datagram must bring into cpu cache *lot* of cache lines, in particular : (other parts omitted (hash chains, ip route cache...)) On 32bit arches : offsetof(struct sock, sk_rcvbuf) =0x30 (read) offsetof(struct sock, sk_lock) =0x34 (rw) offsetof(struct sock, sk_sleep) =0x50 (read) offsetof(struct sock, sk_rmem_alloc) =0x64 (rw) offsetof(struct sock, sk_receive_queue)=0x74 (rw) offsetof(struct sock, sk_forward_alloc)=0x98 (rw) offsetof(struct sock, sk_callback_lock)=0xcc (rw) offsetof(struct sock, sk_drops) =0xd8 (read if we add dropcount support, rw if frame dropped) offsetof(struct sock, sk_filter) =0xf8 (read) offsetof(struct sock, sk_socket) =0x138 (read) offsetof(struct sock, sk_data_ready) =0x15c (read) We can avoid sk->sk_socket and socket->fasync_list referencing on sockets with no fasync() structures. (socket->fasync_list ptr is probably already in cache because it shares a cache line with socket->wait, ie location pointed by sk->sk_sleep) This avoids one cache line load per incoming packet for common cases (no fasync()) We can leave (or even move in a future patch) sk->sk_socket in a cold location Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 3 ++- net/socket.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 1621935aad5b..98398bdec57d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -504,6 +504,7 @@ enum sock_flags { SOCK_TIMESTAMPING_SOFTWARE, /* %SOF_TIMESTAMPING_SOFTWARE */ SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */ SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */ + SOCK_FASYNC, /* fasync() active */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) @@ -1396,7 +1397,7 @@ static inline unsigned long sock_wspace(struct sock *sk) static inline void sk_wake_async(struct sock *sk, int how, int band) { - if (sk->sk_socket && sk->sk_socket->fasync_list) + if (sock_flag(sk, SOCK_FASYNC)) sock_wake_async(sk->sk_socket, how, band); } diff --git a/net/socket.c b/net/socket.c index 75655365b5fd..d53ad11558c3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1100,11 +1100,14 @@ static int sock_fasync(int fd, struct file *filp, int on) fna->fa_next = sock->fasync_list; write_lock_bh(&sk->sk_callback_lock); sock->fasync_list = fna; + sock_set_flag(sk, SOCK_FASYNC); write_unlock_bh(&sk->sk_callback_lock); } else { if (fa != NULL) { write_lock_bh(&sk->sk_callback_lock); *prev = fa->fa_next; + if (!sock->fasync_list) + sock_reset_flag(sk, SOCK_FASYNC); write_unlock_bh(&sk->sk_callback_lock); kfree(fa); } -- cgit v1.3-8-gc7d7 From ee5e81f00051b5c373c8de16e3604fd6d3be699e Mon Sep 17 00:00:00 2001 From: Ilia K Date: Wed, 16 Sep 2009 05:53:07 +0000 Subject: add vif using local interface index instead of IP When routing daemon wants to enable forwarding of multicast traffic it performs something like: struct vifctl vc = { .vifc_vifi = 1, .vifc_flags = 0, .vifc_threshold = 1, .vifc_rate_limit = 0, .vifc_lcl_addr = ip, /* <--- ip address of physical interface, e.g. eth0 */ .vifc_rmt_addr.s_addr = htonl(INADDR_ANY), }; setsockopt(fd, IPPROTO_IP, MRT_ADD_VIF, &vc, sizeof(vc)); This leads (in the kernel) to calling vif_add() function call which search the (physical) device using assigned IP address: dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); The current API (struct vifctl) does not allow to specify an interface other way than using it's IP, and if there are more than a single interface with specified IP only the first one will be found. The attached patch (against 2.6.30.4) allows to specify an interface by its index, instead of IP address: struct vifctl vc = { .vifc_vifi = 1, .vifc_flags = VIFF_USE_IFINDEX, /* NEW */ .vifc_threshold = 1, .vifc_rate_limit = 0, .vifc_lcl_ifindex = if_nametoindex("eth0"), /* NEW */ .vifc_rmt_addr.s_addr = htonl(INADDR_ANY), }; setsockopt(fd, IPPROTO_IP, MRT_ADD_VIF, &vc, sizeof(vc)); Signed-off-by: Ilia K. === modified file 'include/linux/mroute.h' Signed-off-by: David S. Miller --- include/linux/mroute.h | 13 +++++++++---- net/ipv4/ipmr.c | 12 +++++++++++- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 08bc776d05e2..d5f69151f692 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -59,13 +59,18 @@ struct vifctl { unsigned char vifc_flags; /* VIFF_ flags */ unsigned char vifc_threshold; /* ttl limit */ unsigned int vifc_rate_limit; /* Rate limiter values (NI) */ - struct in_addr vifc_lcl_addr; /* Our address */ + union { + struct in_addr vifc_lcl_addr; /* Local interface address */ + int vifc_lcl_ifindex; /* Local interface index */ + }; struct in_addr vifc_rmt_addr; /* IPIP tunnel addr */ }; -#define VIFF_TUNNEL 0x1 /* IPIP tunnel */ -#define VIFF_SRCRT 0x2 /* NI */ -#define VIFF_REGISTER 0x4 /* register vif */ +#define VIFF_TUNNEL 0x1 /* IPIP tunnel */ +#define VIFF_SRCRT 0x2 /* NI */ +#define VIFF_REGISTER 0x4 /* register vif */ +#define VIFF_USE_IFINDEX 0x8 /* use vifc_lcl_ifindex instead of + vifc_lcl_addr to find an interface */ /* * Cache manipulation structures for mrouted and PIMd diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 630a56df7b47..c757f0b4b74c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -469,8 +469,18 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) return err; } break; + + case VIFF_USE_IFINDEX: case 0: - dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); + if (vifc->vifc_flags == VIFF_USE_IFINDEX) { + dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); + if (dev && dev->ip_ptr == NULL) { + dev_put(dev); + return -EADDRNOTAVAIL; + } + } else + dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); + if (!dev) return -EADDRNOTAVAIL; err = dev_set_allmulti(dev, 1); -- cgit v1.3-8-gc7d7 From fa857afcf77da669eb6b7031ec07ad14b912c307 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Tue, 22 Sep 2009 23:43:14 +0000 Subject: ipv6 sit: 6rd (IPv6 Rapid Deployment) Support. IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly deploy IPv6 unicast service to IPv4 sites to which it provides customer premise equipment. Like 6to4, it utilizes stateless IPv6 in IPv4 encapsulation in order to transit IPv4-only network infrastructure. Unlike 6to4, a 6rd service provider uses an IPv6 prefix of its own in place of the fixed 6to4 prefix. With this option enabled, the SIT driver offers 6rd functionality by providing additional ioctl API to configure the IPv6 Prefix for in stead of static 2002::/16 for 6to4. Original patch was done by Alexandre Cassen based on old Internet-Draft. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/if_tunnel.h | 11 ++++ include/net/ipip.h | 13 +++++ net/ipv6/Kconfig | 19 +++++++ net/ipv6/sit.c | 124 +++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 159 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index 5a9aae4adb44..c53c8e016940 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -15,6 +15,10 @@ #define SIOCADDPRL (SIOCDEVPRIVATE + 5) #define SIOCDELPRL (SIOCDEVPRIVATE + 6) #define SIOCCHGPRL (SIOCDEVPRIVATE + 7) +#define SIOCGET6RD (SIOCDEVPRIVATE + 8) +#define SIOCADD6RD (SIOCDEVPRIVATE + 9) +#define SIOCDEL6RD (SIOCDEVPRIVATE + 10) +#define SIOCCHG6RD (SIOCDEVPRIVATE + 11) #define GRE_CSUM __cpu_to_be16(0x8000) #define GRE_ROUTING __cpu_to_be16(0x4000) @@ -51,6 +55,13 @@ struct ip_tunnel_prl { /* PRL flags */ #define PRL_DEFAULT 0x0001 +struct ip_tunnel_6rd { + struct in6_addr prefix; + __be32 relay_prefix; + __u16 prefixlen; + __u16 relay_prefixlen; +}; + enum { IFLA_GRE_UNSPEC, diff --git a/include/net/ipip.h b/include/net/ipip.h index 0159221a8509..86f1c8bd040c 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -7,6 +7,15 @@ /* Keep error state on tunnel for 30 sec */ #define IPTUNNEL_ERR_TIMEO (30*HZ) +/* 6rd prefix/relay information */ +struct ip_tunnel_6rd_parm +{ + struct in6_addr prefix; + __be32 relay_prefix; + u16 prefixlen; + u16 relay_prefixlen; +}; + struct ip_tunnel { struct ip_tunnel *next; @@ -23,6 +32,10 @@ struct ip_tunnel struct ip_tunnel_parm parms; + /* for SIT */ +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd_parm ip6rd; +#endif struct ip_tunnel_prl_entry *prl; /* potential router list */ unsigned int prl_count; /* # of entries in PRL */ }; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ead6c7a42f44..f56199827452 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -170,6 +170,25 @@ config IPV6_SIT Saying M here will produce a module called sit. If unsure, say Y. +config IPV6_SIT_6RD + bool "IPv6: IPv6 Rapid Development (6RD) (EXPERIMENTAL)" + depends on IPV6_SIT && EXPERIMENTAL + default n + ---help--- + IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon + mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly + deploy IPv6 unicast service to IPv4 sites to which it provides + customer premise equipment. Like 6to4, it utilizes stateless IPv6 in + IPv4 encapsulation in order to transit IPv4-only network + infrastructure. Unlike 6to4, a 6rd service provider uses an IPv6 + prefix of its own in place of the fixed 6to4 prefix. + + With this option enabled, the SIT driver offers 6rd functionality by + providing additional ioctl API to configure the IPv6 Prefix for in + stead of static 2002::/16 for 6to4. + + If unsure, say N. + config IPV6_NDISC_NODETYPE bool diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 99da272951dc..6955654262a5 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -161,6 +161,21 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) write_unlock_bh(&ipip6_lock); } +static void ipip6_tunnel_clone_6rd(struct ip_tunnel *t, struct sit_net *sitn) +{ +#ifdef CONFIG_IPV6_SIT_6RD + if (t->dev == sitn->fb_tunnel_dev) { + ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); + t->ip6rd.relay_prefix = 0; + t->ip6rd.prefixlen = 16; + t->ip6rd.relay_prefixlen = 0; + } else { + struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev); + memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd)); + } +#endif +} + static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, struct ip_tunnel_parm *parms, int create) { @@ -213,6 +228,8 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, dev_hold(dev); + ipip6_tunnel_clone_6rd(t, sitn); + ipip6_tunnel_link(sitn, nt); return nt; @@ -532,17 +549,41 @@ out: return 0; } -/* Returns the embedded IPv4 address if the IPv6 address - comes from 6to4 (RFC 3056) addr space */ - -static inline __be32 try_6to4(struct in6_addr *v6dst) +/* + * Returns the embedded IPv4 address if the IPv6 address + * comes from 6rd / 6to4 (RFC 3056) addr space. + */ +static inline +__be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel) { __be32 dst = 0; +#ifdef CONFIG_IPV6_SIT_6RD + if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, + tunnel->ip6rd.prefixlen)) { + unsigned pbw0, pbi0; + int pbi1; + u32 d; + + pbw0 = tunnel->ip6rd.prefixlen >> 5; + pbi0 = tunnel->ip6rd.prefixlen & 0x1f; + + d = (ntohl(tunnel->ip6rd.prefix.s6_addr32[pbw0]) << pbi0) >> + tunnel->ip6rd.relay_prefixlen; + + pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; + if (pbi1 > 0) + d |= ntohl(tunnel->ip6rd.prefix.s6_addr32[pbw0 + 1]) >> + (32 - pbi1); + + dst = tunnel->ip6rd.relay_prefix | htonl(d); + } +#else if (v6dst->s6_addr16[0] == htons(0x2002)) { /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ memcpy(&dst, &v6dst->s6_addr16[1], 4); } +#endif return dst; } @@ -596,7 +637,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (!dst) - dst = try_6to4(&iph6->daddr); + dst = try_6rd(&iph6->daddr, tunnel); if (!dst) { struct neighbour *neigh = NULL; @@ -786,9 +827,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) struct ip_tunnel *t; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd ip6rd; +#endif switch (cmd) { case SIOCGETTUNNEL: +#ifdef CONFIG_IPV6_SIT_6RD + case SIOCGET6RD: +#endif t = NULL; if (dev == sitn->fb_tunnel_dev) { if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { @@ -799,9 +846,25 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) } if (t == NULL) t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - err = -EFAULT; + + err = -EFAULT; + if (cmd == SIOCGETTUNNEL) { + memcpy(&p, &t->parms, sizeof(p)); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, + sizeof(p))) + goto done; +#ifdef CONFIG_IPV6_SIT_6RD + } else { + ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix); + ip6rd.relay_prefix = t->ip6rd.relay_prefix; + ip6rd.prefixlen = t->ip6rd.prefixlen; + ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; + if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, + sizeof(ip6rd))) + goto done; +#endif + } + err = 0; break; case SIOCADDTUNNEL: @@ -922,6 +985,51 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) netdev_state_change(dev); break; +#ifdef CONFIG_IPV6_SIT_6RD + case SIOCADD6RD: + case SIOCCHG6RD: + case SIOCDEL6RD: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + goto done; + + err = -EFAULT; + if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, + sizeof(ip6rd))) + goto done; + + t = netdev_priv(dev); + + if (cmd != SIOCDEL6RD) { + struct in6_addr prefix; + __be32 relay_prefix; + + err = -EINVAL; + if (ip6rd.relay_prefixlen > 32 || + ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64) + goto done; + + ipv6_addr_prefix(&prefix, &ip6rd.prefix, + ip6rd.prefixlen); + if (!ipv6_addr_equal(&prefix, &ip6rd.prefix)) + goto done; + relay_prefix = ip6rd.relay_prefix & + htonl(0xffffffffUL << + (32 - ip6rd.relay_prefixlen)); + if (relay_prefix != ip6rd.relay_prefix) + goto done; + + ipv6_addr_copy(&t->ip6rd.prefix, &prefix); + t->ip6rd.relay_prefix = relay_prefix; + t->ip6rd.prefixlen = ip6rd.prefixlen; + t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen; + } else + ipip6_tunnel_clone_6rd(t, sitn); + + err = 0; + break; +#endif + default: err = -EINVAL; } -- cgit v1.3-8-gc7d7 From d250a5f90e53f5e150618186230795352d154c88 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Oct 2009 10:32:18 +0000 Subject: pkt_sched: gen_estimator: Dont report fake rate estimators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jarek Poplawski a écrit : > > > Hmm... So you made me to do some "real" work here, and guess what?: > there is one serious checkpatch warning! ;-) Plus, this new parameter > should be added to the function description. Otherwise: > Signed-off-by: Jarek Poplawski > > Thanks, > Jarek P. > > PS: I guess full "Don't" would show we really mean it... Okay :) Here is the last round, before the night ! Thanks again [RFC] pkt_sched: gen_estimator: Don't report fake rate estimators We currently send TCA_STATS_RATE_EST elements to netlink users, even if no estimator is running. # tc -s -d qdisc qdisc pfifo_fast 0: dev eth0 root bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 Sent 112833764978 bytes 1495081739 pkt (dropped 0, overlimits 0 requeues 0) rate 0bit 0pps backlog 0b 0p requeues 0 User has no way to tell if the "rate 0bit 0pps" is a real estimation, or a fake one (because no estimator is active) After this patch, tc command output is : $ tc -s -d qdisc qdisc pfifo_fast 0: dev eth0 root bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 Sent 561075 bytes 1196 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 We add a parameter to gnet_stats_copy_rate_est() function so that it can use gen_estimator_active(bstats, r), as suggested by Jarek. This parameter can be NULL if check is not necessary, (htb for example has a mandatory rate estimator) Signed-off-by: Eric Dumazet Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/gen_stats.h | 1 + net/core/gen_stats.c | 8 +++++++- net/sched/act_api.c | 3 ++- net/sched/sch_api.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 2 +- 8 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index c1488553e349..eb87a1447ae1 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -30,6 +30,7 @@ extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, extern int gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b); extern int gnet_stats_copy_rate_est(struct gnet_dump *d, + const struct gnet_stats_basic_packed *b, struct gnet_stats_rate_est *r); extern int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 8569310268ab..393b1d8618e2 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -127,6 +127,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b) /** * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV * @d: dumping handle + * @b: basic statistics * @r: rate estimator statistics * * Appends the rate estimator statistics to the top level TLV created by @@ -136,8 +137,13 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b) * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r) +gnet_stats_copy_rate_est(struct gnet_dump *d, + const struct gnet_stats_basic_packed *b, + struct gnet_stats_rate_est *r) { + if (b && !gen_estimator_active(b, r)) + return 0; + if (d->compat_tc_stats) { d->tc_stats.bps = r->bps; d->tc_stats.pps = r->pps; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2dfb3e7a040d..ca2e1fd2bf69 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -618,7 +618,8 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, goto errout; if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || - gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &h->tcf_bstats, + &h->tcf_rate_est) < 0 || gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) goto errout; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 903e4188b6ca..1acfd29cc826 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1179,7 +1179,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; if (gnet_stats_copy_basic(&d, &q->bstats) < 0 || - gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 || gnet_stats_copy_queue(&d, &q->qstats) < 0) goto nla_put_failure; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 5b132c473264..3846d65bc03e 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1609,7 +1609,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, cl->xstats.undertime = cl->undertime - q->now; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 5a888af7e5da..a65604f8f2b8 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -280,7 +280,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, } if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) return -1; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 2c5c76be18f8..b38b39c60752 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1375,7 +1375,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.rtwork = cl->cl_cumul; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 85acab9dc6fd..2e38d1abd830 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1105,7 +1105,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) cl->xstats.ctokens = cl->ctokens; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; -- cgit v1.3-8-gc7d7 From d73d3a8cb4723e161589864741d8528d70b350eb Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 5 Oct 2009 10:59:58 +0000 Subject: ethtool: Add reset operation After updating firmware stored in flash, users may wish to reset the relevant hardware and start the new firmware immediately. This should not be completely automatic as it may be disruptive. A selective reset may also be useful for debugging or diagnostics. This adds a separate reset operation which takes flags indicating the components to be reset. Drivers are allowed to reset only a subset of those requested, and must indicate the actual subset. This allows the use of generic component masks and some future expansion. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 32 ++++++++++++++++++++++++++++++++ net/core/ethtool.c | 23 +++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index aa0dcb3833d1..eb1a48da2d43 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -498,6 +498,7 @@ struct ethtool_ops { int (*get_rxnfc)(struct net_device *, struct ethtool_rxnfc *, void *); int (*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *); int (*flash_device)(struct net_device *, struct ethtool_flash *); + int (*reset)(struct net_device *, u32 *); }; #endif /* __KERNEL__ */ @@ -555,6 +556,7 @@ struct ethtool_ops { #define ETHTOOL_SRXCLSRLDEL 0x00000031 /* Delete RX classification rule */ #define ETHTOOL_SRXCLSRLINS 0x00000032 /* Insert RX classification rule */ #define ETHTOOL_FLASHDEV 0x00000033 /* Flash firmware to device */ +#define ETHTOOL_RESET 0x00000034 /* Reset hardware */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET @@ -685,4 +687,34 @@ struct ethtool_ops { #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL +/* Reset flags */ +/* The reset() operation must clear the flags for the components which + * were actually reset. On successful return, the flags indicate the + * components which were not reset, either because they do not exist + * in the hardware or because they cannot be reset independently. The + * driver must never reset any components that were not requested. + */ +enum ethtool_reset_flags { + /* These flags represent components dedicated to the interface + * the command is addressed to. Shift any flag left by + * ETH_RESET_SHARED_SHIFT to reset a shared component of the + * same type. + */ + ETH_RESET_MGMT = 1 << 0, /* Management processor */ + ETH_RESET_IRQ = 1 << 1, /* Interrupt requester */ + ETH_RESET_DMA = 1 << 2, /* DMA engine */ + ETH_RESET_FILTER = 1 << 3, /* Filtering/flow direction */ + ETH_RESET_OFFLOAD = 1 << 4, /* Protocol offload */ + ETH_RESET_MAC = 1 << 5, /* Media access controller */ + ETH_RESET_PHY = 1 << 6, /* Transceiver/PHY */ + ETH_RESET_RAM = 1 << 7, /* RAM shared between + * multiple components */ + + ETH_RESET_DEDICATED = 0x0000ffff, /* All components dedicated to + * this interface */ + ETH_RESET_ALL = 0xffffffff, /* All components used by this + * interface, even if shared */ +}; +#define ETH_RESET_SHARED_SHIFT 16 + #endif /* _LINUX_ETHTOOL_H */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index e1951084b973..d8aee584e8d1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -302,6 +302,26 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) return ret; } +static int ethtool_reset(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value reset; + int ret; + + if (!dev->ethtool_ops->reset) + return -EOPNOTSUPP; + + if (copy_from_user(&reset, useraddr, sizeof(reset))) + return -EFAULT; + + ret = dev->ethtool_ops->reset(dev, &reset.data); + if (ret) + return ret; + + if (copy_to_user(useraddr, &reset, sizeof(reset))) + return -EFAULT; + return 0; +} + static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) { struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; @@ -1089,6 +1109,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_FLASHDEV: rc = ethtool_flash_device(dev, useraddr); break; + case ETHTOOL_RESET: + rc = ethtool_reset(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.3-8-gc7d7 From f7734fdf61ec6bb848e0bafc1fb8bad2c124bb50 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Fri, 2 Oct 2009 11:39:15 +0000 Subject: make TLLAO option for NA packets configurable On Friday 02 October 2009 20:53:51 you wrote: > This is good although I would have shortened the name. Ah, I knew I forgot something :) Here is v4. tavi >From 24d96d825b9fa832b22878cc6c990d5711968734 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Fri, 2 Oct 2009 00:51:15 +0300 Subject: [PATCH] ipv6: new sysctl for sending TLLAO with unicast NAs Neighbor advertisements responding to unicast neighbor solicitations did not include the target link-layer address option. This patch adds a new sysctl option (disabled by default) which controls whether this option should be sent even with unicast NAs. The need for this arose because certain routers expect the TLLAO in some situations even as a response to unicast NS packets. Moreover, RFC 2461 recommends sending this to avoid a race condition (section 4.4, Target link-layer address) Signed-off-by: Cosmin Ratiu Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 18 ++++++++++++++++++ include/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 8 ++++++++ net/ipv6/ndisc.c | 1 + 4 files changed, 28 insertions(+) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index fbe427a6580c..a0e134dd2523 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1086,6 +1086,24 @@ accept_dad - INTEGER 2: Enable DAD, and disable IPv6 operation if MAC-based duplicate link-local address has been found. +force_tllao - BOOLEAN + Enable sending the target link-layer address option even when + responding to a unicast neighbor solicitation. + Default: FALSE + + Quoting from RFC 2461, section 4.4, Target link-layer address: + + "The option MUST be included for multicast solicitations in order to + avoid infinite Neighbor Solicitation "recursion" when the peer node + does not have a cache entry to return a Neighbor Advertisements + message. When responding to unicast solicitations, the option can be + omitted since the sender of the solicitation has the correct link- + layer address; otherwise it would not have be able to send the unicast + solicitation in the first place. However, including the link-layer + address in this case adds little overhead and eliminates a potential + race condition where the sender deletes the cached link-layer address + prior to receiving a response to a previous solicitation." + icmp/*: ratelimit - INTEGER Limit the maximal rates for sending ICMPv6 packets. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c662efa68289..ae74ede1abe7 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -167,6 +167,7 @@ struct ipv6_devconf { #endif __s32 disable_ipv6; __s32 accept_dad; + __s32 force_tllao; void *sysctl; }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1fd0a3d775d2..bdcee6981c60 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4352,6 +4352,14 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "force_tllao", + .data = &ipv6_devconf.force_tllao, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .ctl_name = 0, /* sentinel */ } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f74e4e2cdd06..3507cfe1e7a2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -598,6 +598,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, icmp6h.icmp6_solicited = solicited; icmp6h.icmp6_override = override; + inc_opt |= ifp->idev->cnf.force_tllao; __ndisc_send(dev, neigh, daddr, src_addr, &icmp6h, solicited_addr, inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); -- cgit v1.3-8-gc7d7 From ec1b4cf74c81bfd0fbe5bf62bafc86c45917e72f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 5 Oct 2009 05:58:39 +0000 Subject: net: mark net_proto_ops as const All usages of structure net_proto_ops should be declared const. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/isdn/mISDN/socket.c | 3 +-- drivers/net/pppox.c | 2 +- include/net/bluetooth/bluetooth.h | 2 +- net/appletalk/ddp.c | 2 +- net/atm/pvc.c | 2 +- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 2 +- net/bluetooth/af_bluetooth.c | 4 ++-- net/bluetooth/bnep/sock.c | 2 +- net/bluetooth/cmtp/sock.c | 2 +- net/bluetooth/hci_sock.c | 2 +- net/bluetooth/hidp/sock.c | 2 +- net/bluetooth/l2cap.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/sco.c | 2 +- net/can/af_can.c | 2 +- net/decnet/af_decnet.c | 2 +- net/econet/af_econet.c | 2 +- net/ieee802154/af_ieee802154.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipx/af_ipx.c | 2 +- net/irda/af_irda.c | 2 +- net/iucv/af_iucv.c | 2 +- net/key/af_key.c | 2 +- net/llc/af_llc.c | 2 +- net/netlink/af_netlink.c | 2 +- net/netrom/af_netrom.c | 2 +- net/packet/af_packet.c | 2 +- net/phonet/af_phonet.c | 2 +- net/rds/af_rds.c | 2 +- net/rose/af_rose.c | 2 +- net/rxrpc/af_rxrpc.c | 2 +- net/unix/af_unix.c | 2 +- net/x25/af_x25.c | 2 +- 35 files changed, 36 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index feb0fa45b664..28182ed8dea1 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -808,8 +808,7 @@ mISDN_sock_create(struct net *net, struct socket *sock, int proto) return err; } -static struct -net_proto_family mISDN_sock_family_ops = { +static const struct net_proto_family mISDN_sock_family_ops = { .owner = THIS_MODULE, .family = PF_ISDN, .create = mISDN_sock_create, diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c index 4f6d33fbc673..c14ee24c05a8 100644 --- a/drivers/net/pppox.c +++ b/drivers/net/pppox.c @@ -125,7 +125,7 @@ out: return rc; } -static struct net_proto_family pppox_proto_family = { +static const struct net_proto_family pppox_proto_family = { .family = PF_PPPOX, .create = pppox_create, .owner = THIS_MODULE, diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 718394e2c01e..04a6908e38d2 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -121,7 +121,7 @@ struct bt_sock_list { rwlock_t lock; }; -int bt_sock_register(int proto, struct net_proto_family *ops); +int bt_sock_register(int proto, const struct net_proto_family *ops); int bt_sock_unregister(int proto); void bt_sock_link(struct bt_sock_list *l, struct sock *s); void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b1a4290996b5..abe38014b7fd 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1821,7 +1821,7 @@ static int atalk_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lo #endif -static struct net_proto_family atalk_family_ops = { +static const struct net_proto_family atalk_family_ops = { .family = PF_APPLETALK, .create = atalk_create, .owner = THIS_MODULE, diff --git a/net/atm/pvc.c b/net/atm/pvc.c index d4c024504f99..a6e1fdbae87f 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -137,7 +137,7 @@ static int pvc_create(struct net *net, struct socket *sock,int protocol) } -static struct net_proto_family pvc_family_ops = { +static const struct net_proto_family pvc_family_ops = { .family = PF_ATMPVC, .create = pvc_create, .owner = THIS_MODULE, diff --git a/net/atm/svc.c b/net/atm/svc.c index f90d143c4b25..819354233318 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -666,7 +666,7 @@ static int svc_create(struct net *net, struct socket *sock,int protocol) } -static struct net_proto_family svc_family_ops = { +static const struct net_proto_family svc_family_ops = { .family = PF_ATMSVC, .create = svc_create, .owner = THIS_MODULE, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index f45460730371..f05306f168fa 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1961,7 +1961,7 @@ static const struct file_operations ax25_info_fops = { #endif -static struct net_proto_family ax25_family_ops = { +static const struct net_proto_family ax25_family_ops = { .family = PF_AX25, .create = ax25_create, .owner = THIS_MODULE, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 8cfb5a849841..1f6e49c1cde8 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -45,7 +45,7 @@ /* Bluetooth sockets */ #define BT_MAX_PROTO 8 -static struct net_proto_family *bt_proto[BT_MAX_PROTO]; +static const struct net_proto_family *bt_proto[BT_MAX_PROTO]; static DEFINE_RWLOCK(bt_proto_lock); static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; @@ -86,7 +86,7 @@ static inline void bt_sock_reclassify_lock(struct socket *sock, int proto) bt_key_strings[proto], &bt_lock_key[proto]); } -int bt_sock_register(int proto, struct net_proto_family *ops) +int bt_sock_register(int proto, const struct net_proto_family *ops) { int err = 0; diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index e857628b0b27..0a2c5460bb48 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -222,7 +222,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol) return 0; } -static struct net_proto_family bnep_sock_family_ops = { +static const struct net_proto_family bnep_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = bnep_sock_create diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 16b0fad74f6e..de7c8040bc56 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -217,7 +217,7 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol) return 0; } -static struct net_proto_family cmtp_sock_family_ops = { +static const struct net_proto_family cmtp_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = cmtp_sock_create diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 75302a986067..e7395f231989 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -687,7 +687,7 @@ static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; } -static struct net_proto_family hci_sock_family_ops = { +static const struct net_proto_family hci_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = hci_sock_create, diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 37c9d7d2e688..4beb6a7a2953 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -268,7 +268,7 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol) return 0; } -static struct net_proto_family hidp_sock_family_ops = { +static const struct net_proto_family hidp_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = hidp_sock_create diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 555d9da1869b..4b66bd579f4a 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -3916,7 +3916,7 @@ static const struct proto_ops l2cap_sock_ops = { .getsockopt = l2cap_sock_getsockopt }; -static struct net_proto_family l2cap_sock_family_ops = { +static const struct net_proto_family l2cap_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = l2cap_sock_create, diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8a20aaf1f231..c70786503850 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1101,7 +1101,7 @@ static const struct proto_ops rfcomm_sock_ops = { .mmap = sock_no_mmap }; -static struct net_proto_family rfcomm_sock_family_ops = { +static const struct net_proto_family rfcomm_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = rfcomm_sock_create diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 77f4153bdb5e..694a65541b73 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -993,7 +993,7 @@ static const struct proto_ops sco_sock_ops = { .getsockopt = sco_sock_getsockopt }; -static struct net_proto_family sco_sock_family_ops = { +static const struct net_proto_family sco_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = sco_sock_create, diff --git a/net/can/af_can.c b/net/can/af_can.c index 606832115674..3f2eb27e1ffb 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -842,7 +842,7 @@ static struct packet_type can_packet __read_mostly = { .func = can_rcv, }; -static struct net_proto_family can_family_ops __read_mostly = { +static const struct net_proto_family can_family_ops = { .family = PF_CAN, .create = can_create, .owner = THIS_MODULE, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 7a58c87baf17..4d3060660a14 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2325,7 +2325,7 @@ static const struct file_operations dn_socket_seq_fops = { }; #endif -static struct net_proto_family dn_family_ops = { +static const struct net_proto_family dn_family_ops = { .family = AF_DECnet, .create = dn_create, .owner = THIS_MODULE, diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 0e0254fd767d..6529be3a18b7 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -742,7 +742,7 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg return 0; } -static struct net_proto_family econet_family_ops = { +static const struct net_proto_family econet_family_ops = { .family = PF_ECONET, .create = econet_create, .owner = THIS_MODULE, diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index cd949d5e451b..309348fba72b 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -285,7 +285,7 @@ out: return rc; } -static struct net_proto_family ieee802154_family_ops = { +static const struct net_proto_family ieee802154_family_ops = { .family = PF_IEEE802154, .create = ieee802154_create, .owner = THIS_MODULE, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 57737b8d1711..1deff48b122e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -931,7 +931,7 @@ static const struct proto_ops inet_sockraw_ops = { #endif }; -static struct net_proto_family inet_family_ops = { +static const struct net_proto_family inet_family_ops = { .family = PF_INET, .create = inet_create, .owner = THIS_MODULE, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index da36497ae647..94216519873c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -552,7 +552,7 @@ const struct proto_ops inet6_dgram_ops = { #endif }; -static struct net_proto_family inet6_family_ops = { +static const struct net_proto_family inet6_family_ops = { .family = PF_INET6, .create = inet6_create, .owner = THIS_MODULE, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 66c7a20011f3..6481ee4bdf72 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1927,7 +1927,7 @@ static int ipx_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long * Socket family declarations */ -static struct net_proto_family ipx_family_ops = { +static const struct net_proto_family ipx_family_ops = { .family = PF_IPX, .create = ipx_create, .owner = THIS_MODULE, diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index dd35641835f4..9429e4002bca 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2463,7 +2463,7 @@ bed: return 0; } -static struct net_proto_family irda_family_ops = { +static const struct net_proto_family irda_family_ops = { .family = PF_IRDA, .create = irda_create, .owner = THIS_MODULE, diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index bada1b9c670b..004134b60d86 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1715,7 +1715,7 @@ static const struct proto_ops iucv_sock_ops = { .getsockopt = iucv_sock_getsockopt, }; -static struct net_proto_family iucv_sock_family_ops = { +static const struct net_proto_family iucv_sock_family_ops = { .family = AF_IUCV, .owner = THIS_MODULE, .create = iucv_sock_create, diff --git a/net/key/af_key.c b/net/key/af_key.c index 4e98193dfa0f..c078ae6e975b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3644,7 +3644,7 @@ static const struct proto_ops pfkey_ops = { .recvmsg = pfkey_recvmsg, }; -static struct net_proto_family pfkey_family_ops = { +static const struct net_proto_family pfkey_family_ops = { .family = PF_KEY, .create = pfkey_create, .owner = THIS_MODULE, diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7aa4fd170104..4866b4fb0c27 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1092,7 +1092,7 @@ out: return rc; } -static struct net_proto_family llc_ui_family_ops = { +static const struct net_proto_family llc_ui_family_ops = { .family = PF_LLC, .create = llc_ui_create, .owner = THIS_MODULE, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 19e98007691c..0cd2d8829313 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2050,7 +2050,7 @@ static const struct proto_ops netlink_ops = { .sendpage = sock_no_sendpage, }; -static struct net_proto_family netlink_family_ops = { +static const struct net_proto_family netlink_family_ops = { .family = PF_NETLINK, .create = netlink_create, .owner = THIS_MODULE, /* for consistency 8) */ diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7a834952f67f..281fa597cae5 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1372,7 +1372,7 @@ static const struct file_operations nr_info_fops = { }; #endif /* CONFIG_PROC_FS */ -static struct net_proto_family nr_family_ops = { +static const struct net_proto_family nr_family_ops = { .family = PF_NETROM, .create = nr_create, .owner = THIS_MODULE, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index efc1174af716..70073a0dea5d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2399,7 +2399,7 @@ static const struct proto_ops packet_ops = { .sendpage = sock_no_sendpage, }; -static struct net_proto_family packet_family_ops = { +static const struct net_proto_family packet_family_ops = { .family = PF_PACKET, .create = packet_create, .owner = THIS_MODULE, diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index f60c0c2aacba..c711d58b4bb5 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -118,7 +118,7 @@ out: return err; } -static struct net_proto_family phonet_proto_family = { +static const struct net_proto_family phonet_proto_family = { .family = PF_PHONET, .create = pn_socket_create, .owner = THIS_MODULE, diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 98e05382fd3c..a202e5b36079 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -431,7 +431,7 @@ void rds_sock_put(struct rds_sock *rs) sock_put(rds_rs_to_sk(rs)); } -static struct net_proto_family rds_family_ops = { +static const struct net_proto_family rds_family_ops = { .family = AF_RDS, .create = rds_create, .owner = THIS_MODULE, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 502cce76621d..c17734c2ce89 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1509,7 +1509,7 @@ static const struct file_operations rose_info_fops = { }; #endif /* CONFIG_PROC_FS */ -static struct net_proto_family rose_family_ops = { +static const struct net_proto_family rose_family_ops = { .family = PF_ROSE, .create = rose_create, .owner = THIS_MODULE, diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index a86afceaa94f..6817c9781ef3 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -777,7 +777,7 @@ static struct proto rxrpc_proto = { .max_header = sizeof(struct rxrpc_header), }; -static struct net_proto_family rxrpc_family_ops = { +static const struct net_proto_family rxrpc_family_ops = { .family = PF_RXRPC, .create = rxrpc_create, .owner = THIS_MODULE, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 51ab497115eb..0f133c5a8d3c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2214,7 +2214,7 @@ static const struct file_operations unix_seq_fops = { #endif -static struct net_proto_family unix_family_ops = { +static const struct net_proto_family unix_family_ops = { .family = PF_UNIX, .create = unix_create, .owner = THIS_MODULE, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 7fa9c7ad3d3b..ebbfe6bbbff9 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1476,7 +1476,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return rc; } -static struct net_proto_family x25_family_ops = { +static const struct net_proto_family x25_family_ops = { .family = AF_X25, .create = x25_create, .owner = THIS_MODULE, -- cgit v1.3-8-gc7d7 From 32953543221cfe2bf0a24205fab225e5b8ed81a0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 5 Oct 2009 06:01:03 +0000 Subject: dcb: data center bridging ops should be r/o The data center bridging ops structure can be const Signed-off-by: Stephen Hemminger Acked-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe.h | 2 +- drivers/net/ixgbe/ixgbe_dcb_nl.c | 2 +- include/linux/netdevice.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index 385be6016667..28f32da794dd 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h @@ -397,7 +397,7 @@ enum ixgbe_boards { extern struct ixgbe_info ixgbe_82598_info; extern struct ixgbe_info ixgbe_82599_info; #ifdef CONFIG_IXGBE_DCB -extern struct dcbnl_rtnl_ops dcbnl_ops; +extern const struct dcbnl_rtnl_ops dcbnl_ops; extern int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg, struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max); diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c index a6bc1ef28f92..3c7a79a7d7c6 100644 --- a/drivers/net/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c @@ -563,7 +563,7 @@ static u8 ixgbe_dcbnl_setapp(struct net_device *netdev, return rval; } -struct dcbnl_rtnl_ops dcbnl_ops = { +const struct dcbnl_rtnl_ops dcbnl_ops = { .getstate = ixgbe_dcbnl_get_state, .setstate = ixgbe_dcbnl_set_state, .getpermhwaddr = ixgbe_dcbnl_get_perm_hw_addr, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94958c109761..b332eefebb1b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -909,7 +909,7 @@ struct net_device #ifdef CONFIG_DCB /* Data Center Bridging netlink ops */ - struct dcbnl_rtnl_ops *dcbnl_ops; + const struct dcbnl_rtnl_ops *dcbnl_ops; #endif #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -- cgit v1.3-8-gc7d7 From 7c89606e24cdabaceb8ca9b3c7ab866c6bcc9e38 Mon Sep 17 00:00:00 2001 From: Holger Schurig Date: Thu, 24 Sep 2009 12:21:01 +0200 Subject: nl80211: report age of scan results Linux keeps scan results up to 15 seconds. This can be a problem for fast moving clients: they get back stale data. But if the kernel reports the age of the BSS items, then user-space can simply weed out old entries by itself. Signed-off-by: Holger Schurig Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 ++ net/wireless/nl80211.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index a8d71ed43a0e..50afca3dcff1 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1277,6 +1277,7 @@ enum nl80211_channel_type { * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon * in unspecified units, scaled to 0..100 (u8) * @NL80211_BSS_STATUS: status, if this BSS is "used" + * @NL80211_BSS_SEEN_MS_AGO: age of this BSS entry in ms * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -1291,6 +1292,7 @@ enum nl80211_bss { NL80211_BSS_SIGNAL_MBM, NL80211_BSS_SIGNAL_UNSPEC, NL80211_BSS_STATUS, + NL80211_BSS_SEEN_MS_AGO, /* keep last */ __NL80211_BSS_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index eddab097435c..e0ecc9f153d4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3105,6 +3105,8 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval); NLA_PUT_U16(msg, NL80211_BSS_CAPABILITY, res->capability); NLA_PUT_U32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq); + NLA_PUT_U32(msg, NL80211_BSS_SEEN_MS_AGO, + jiffies_to_msecs(jiffies - intbss->ts)); switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: -- cgit v1.3-8-gc7d7 From 3d23e349d807177eaf519d444677cee86b1a04cf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 29 Sep 2009 23:27:28 +0200 Subject: wext: refactor Refactor wext to * split out iwpriv handling * split out iwspy handling * split out procfs support * allow cfg80211 to have wireless extensions compat code w/o CONFIG_WIRELESS_EXT After this, drivers need to - select WIRELESS_EXT - for wext support - select WEXT_PRIV - for iwpriv support - select WEXT_SPY - for iwspy support except cfg80211 -- which gets new hooks in wext-core.c and can then get wext handlers without CONFIG_WIRELESS_EXT. Wireless extensions procfs support is auto-selected based on PROC_FS and anything that requires the wext core (i.e. WIRELESS_EXT or CFG80211_WEXT). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/Kconfig | 29 +- drivers/net/wireless/hostap/Kconfig | 2 + drivers/net/wireless/ipw2x00/Kconfig | 5 + drivers/net/wireless/orinoco/Kconfig | 2 + include/net/cfg80211.h | 6 +- include/net/iw_handler.h | 14 +- include/net/net_namespace.h | 2 +- include/net/wext.h | 49 +- net/core/net-sysfs.c | 6 +- net/socket.c | 4 +- net/wireless/Kconfig | 50 +- net/wireless/Makefile | 8 +- net/wireless/core.c | 14 +- net/wireless/ibss.c | 10 +- net/wireless/mlme.c | 2 +- net/wireless/nl80211.c | 4 +- net/wireless/scan.c | 6 +- net/wireless/sme.c | 12 +- net/wireless/wext-core.c | 1063 ++++++++++++++++++++ net/wireless/wext-priv.c | 248 +++++ net/wireless/wext-proc.c | 155 +++ net/wireless/wext-spy.c | 231 +++++ net/wireless/wext.c | 1775 ---------------------------------- 23 files changed, 1850 insertions(+), 1847 deletions(-) create mode 100644 net/wireless/wext-core.c create mode 100644 net/wireless/wext-priv.c create mode 100644 net/wireless/wext-proc.c create mode 100644 net/wireless/wext-spy.c delete mode 100644 net/wireless/wext.c (limited to 'include') diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index d7a764a2fc1a..c9829c59fd98 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -67,6 +67,8 @@ config WAVELAN tristate "AT&T/Lucent old WaveLAN & DEC RoamAbout DS ISA support" depends on ISA && WLAN_PRE80211 select WIRELESS_EXT + select WEXT_SPY + select WEXT_PRIV ---help--- The Lucent WaveLAN (formerly NCR and AT&T; or DEC RoamAbout DS) is a Radio LAN (wireless Ethernet-like Local Area Network) using the @@ -90,6 +92,8 @@ config PCMCIA_WAVELAN tristate "AT&T/Lucent old WaveLAN Pcmcia wireless support" depends on PCMCIA && WLAN_PRE80211 select WIRELESS_EXT + select WEXT_SPY + select WEXT_PRIV help Say Y here if you intend to attach an AT&T/Lucent Wavelan PCMCIA (PC-card) wireless Ethernet networking card to your computer. This @@ -102,6 +106,7 @@ config PCMCIA_NETWAVE tristate "Xircom Netwave AirSurfer Pcmcia wireless support" depends on PCMCIA && WLAN_PRE80211 select WIRELESS_EXT + select WEXT_PRIV help Say Y here if you intend to attach this type of PCMCIA (PC-card) wireless Ethernet networking card to your computer. @@ -123,6 +128,8 @@ config PCMCIA_RAYCS tristate "Aviator/Raytheon 2.4GHz wireless support" depends on PCMCIA && WLAN_80211 select WIRELESS_EXT + select WEXT_SPY + select WEXT_PRIV ---help--- Say Y here if you intend to attach an Aviator/Raytheon PCMCIA (PC-card) wireless Ethernet networking card to your computer. @@ -136,6 +143,7 @@ config LIBERTAS tristate "Marvell 8xxx Libertas WLAN driver support" depends on WLAN_80211 select WIRELESS_EXT + select WEXT_SPY select LIB80211 select FW_LOADER ---help--- @@ -190,6 +198,8 @@ config AIRO depends on ISA_DMA_API && WLAN_80211 && (PCI || BROKEN) select WIRELESS_EXT select CRYPTO + select WEXT_SPY + select WEXT_PRIV ---help--- This is the standard Linux driver to support Cisco/Aironet ISA and PCI 802.11 wireless cards. @@ -207,6 +217,7 @@ config ATMEL tristate "Atmel at76c50x chipset 802.11b support" depends on (PCI || PCMCIA) && WLAN_80211 select WIRELESS_EXT + select WEXT_PRIV select FW_LOADER select CRC32 ---help--- @@ -266,18 +277,21 @@ config AIRO_CS Cisco Linux utilities can be used to configure the card. config PCMCIA_WL3501 - tristate "Planet WL3501 PCMCIA cards" - depends on EXPERIMENTAL && PCMCIA && WLAN_80211 - select WIRELESS_EXT - ---help--- - A driver for WL3501 PCMCIA 802.11 wireless cards made by Planet. - It has basic support for Linux wireless extensions and initial - micro support for ethtool. + tristate "Planet WL3501 PCMCIA cards" + depends on EXPERIMENTAL && PCMCIA && WLAN_80211 + select WIRELESS_EXT + select WEXT_SPY + help + A driver for WL3501 PCMCIA 802.11 wireless cards made by Planet. + It has basic support for Linux wireless extensions and initial + micro support for ethtool. config PRISM54 tristate 'Intersil Prism GT/Duette/Indigo PCI/Cardbus (DEPRECATED)' depends on PCI && EXPERIMENTAL && WLAN_80211 select WIRELESS_EXT + select WEXT_SPY + select WEXT_PRIV select FW_LOADER ---help--- This enables support for FullMAC PCI/Cardbus prism54 devices. This @@ -300,6 +314,7 @@ config USB_ZD1201 tristate "USB ZD1201 based Wireless device support" depends on USB && WLAN_80211 select WIRELESS_EXT + select WEXT_PRIV select FW_LOADER ---help--- Say Y if you want to use wireless LAN adapters based on the ZyDAS diff --git a/drivers/net/wireless/hostap/Kconfig b/drivers/net/wireless/hostap/Kconfig index c15db2293515..08f1e989653d 100644 --- a/drivers/net/wireless/hostap/Kconfig +++ b/drivers/net/wireless/hostap/Kconfig @@ -2,6 +2,8 @@ config HOSTAP tristate "IEEE 802.11 for Host AP (Prism2/2.5/3 and WEP/TKIP/CCMP)" depends on WLAN_80211 select WIRELESS_EXT + select WEXT_SPY + select WEXT_PRIV select CRYPTO select CRYPTO_ARC4 select CRYPTO_ECB diff --git a/drivers/net/wireless/ipw2x00/Kconfig b/drivers/net/wireless/ipw2x00/Kconfig index a8131384c6b9..56fab79dc365 100644 --- a/drivers/net/wireless/ipw2x00/Kconfig +++ b/drivers/net/wireless/ipw2x00/Kconfig @@ -6,6 +6,8 @@ config IPW2100 tristate "Intel PRO/Wireless 2100 Network Connection" depends on PCI && WLAN_80211 && CFG80211 select WIRELESS_EXT + select WEXT_SPY + select WEXT_PRIV select FW_LOADER select LIB80211 select LIBIPW @@ -65,6 +67,8 @@ config IPW2200 tristate "Intel PRO/Wireless 2200BG and 2915ABG Network Connection" depends on PCI && WLAN_80211 && CFG80211 select WIRELESS_EXT + select WEXT_SPY + select WEXT_PRIV select FW_LOADER select LIB80211 select LIBIPW @@ -152,6 +156,7 @@ config LIBIPW tristate depends on PCI && WLAN_80211 && CFG80211 select WIRELESS_EXT + select WEXT_SPY select CRYPTO select CRYPTO_ARC4 select CRYPTO_ECB diff --git a/drivers/net/wireless/orinoco/Kconfig b/drivers/net/wireless/orinoco/Kconfig index 83b635fd7784..13b03b3e8fce 100644 --- a/drivers/net/wireless/orinoco/Kconfig +++ b/drivers/net/wireless/orinoco/Kconfig @@ -3,6 +3,8 @@ config HERMES depends on (PPC_PMAC || PCI || PCMCIA) && WLAN_80211 depends on CFG80211 select WIRELESS_EXT + select WEXT_SPY + select WEXT_PRIV select FW_LOADER select CRYPTO select CRYPTO_MICHAEL_MIC diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3d874c620219..241ea14d6df8 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1171,6 +1171,10 @@ struct wiphy { struct net *_net; #endif +#ifdef CONFIG_CFG80211_WEXT + const struct iw_handler_def *wext; +#endif + char priv[0] __attribute__((__aligned__(NETDEV_ALIGN))); }; @@ -1345,7 +1349,7 @@ struct wireless_dev { struct cfg80211_internal_bss *auth_bsses[MAX_AUTH_BSSES]; struct cfg80211_internal_bss *current_bss; /* associated / joined */ -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT /* wext data */ struct { struct cfg80211_ibss_params ibss; diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index e9054a283fde..d5d337170a56 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -323,18 +323,19 @@ typedef int (*iw_handler)(struct net_device *dev, struct iw_request_info *info, */ struct iw_handler_def { - /* Number of handlers defined (more precisely, index of the - * last defined handler + 1) */ - __u16 num_standard; - __u16 num_private; - /* Number of private arg description */ - __u16 num_private_args; /* Array of handlers for standard ioctls * We will call dev->wireless_handlers->standard[ioctl - SIOCSIWCOMMIT] */ const iw_handler * standard; + /* Number of handlers defined (more precisely, index of the + * last defined handler + 1) */ + __u16 num_standard; +#ifdef CONFIG_WEXT_PRIV + __u16 num_private; + /* Number of private arg description */ + __u16 num_private_args; /* Array of handlers for private ioctls * Will call dev->wireless_handlers->private[ioctl - SIOCIWFIRSTPRIV] */ @@ -344,6 +345,7 @@ struct iw_handler_def * can put it in any order you want and should not leave holes... * We will automatically export that to user space... */ const struct iw_priv_args * private_args; +#endif /* New location of get_wireless_stats, to de-bloat struct net_device. * The old pointer in struct net_device will be gradually phased diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index a1202841aadd..699410142bfa 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -80,7 +80,7 @@ struct net { #ifdef CONFIG_XFRM struct netns_xfrm xfrm; #endif -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; #endif struct net_generic *gen; diff --git a/include/net/wext.h b/include/net/wext.h index 3f2b94de2cfa..4f6e7423174c 100644 --- a/include/net/wext.h +++ b/include/net/wext.h @@ -1,29 +1,19 @@ #ifndef __NET_WEXT_H #define __NET_WEXT_H -/* - * wireless extensions interface to the core code - */ +#include struct net; -#ifdef CONFIG_WIRELESS_EXT -extern int wext_proc_init(struct net *net); -extern void wext_proc_exit(struct net *net); +#ifdef CONFIG_WEXT_CORE extern int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, void __user *arg); extern int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, unsigned long arg); + extern struct iw_statistics *get_wireless_stats(struct net_device *dev); +extern int call_commit_handler(struct net_device *dev); #else -static inline int wext_proc_init(struct net *net) -{ - return 0; -} -static inline void wext_proc_exit(struct net *net) -{ - return; -} static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, void __user *arg) { @@ -36,4 +26,35 @@ static inline int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, } #endif +#ifdef CONFIG_WEXT_PROC +extern int wext_proc_init(struct net *net); +extern void wext_proc_exit(struct net *net); +#else +static inline int wext_proc_init(struct net *net) +{ + return 0; +} +static inline void wext_proc_exit(struct net *net) +{ + return; +} +#endif + +#ifdef CONFIG_WEXT_PRIV +int ioctl_private_call(struct net_device *dev, struct iwreq *iwr, + unsigned int cmd, struct iw_request_info *info, + iw_handler handler); +int compat_private_call(struct net_device *dev, struct iwreq *iwr, + unsigned int cmd, struct iw_request_info *info, + iw_handler handler); +int iw_handler_get_private(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra); +#else +#define ioctl_private_call NULL +#define compat_private_call NULL +#endif + + #endif /* __NET_WEXT_H */ diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index effb78410eb2..9b07535c2889 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -543,8 +543,12 @@ int netdev_register_kobject(struct net_device *net) *groups++ = &netstat_group; #ifdef CONFIG_WIRELESS_EXT_SYSFS - if (net->wireless_handlers || net->ieee80211_ptr) + if (net->ieee80211_ptr) *groups++ = &wireless_group; +#ifdef CONFIG_WIRELESS_EXT + else if (net->wireless_handlers) + *groups++ = &wireless_group; +#endif #endif #endif /* CONFIG_SYSFS */ diff --git a/net/socket.c b/net/socket.c index 75655365b5fd..92a56709fd7d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -905,11 +905,11 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { err = dev_ioctl(net, cmd, argp); } else -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_WEXT_CORE if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { err = dev_ioctl(net, cmd, argp); } else -#endif /* CONFIG_WIRELESS_EXT */ +#endif switch (cmd) { case FIOSETOWN: case SIOCSPGRP: diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index abf7ca3f9ff9..614bdcec1c80 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -1,3 +1,21 @@ +config WIRELESS_EXT + bool + +config WEXT_CORE + def_bool y + depends on CFG80211_WEXT || WIRELESS_EXT + +config WEXT_PROC + def_bool y + depends on PROC_FS + depends on WEXT_CORE + +config WEXT_SPY + bool + +config WEXT_PRIV + bool + config CFG80211 tristate "cfg80211 - wireless configuration API" depends on RFKILL || !RFKILL @@ -56,6 +74,12 @@ config CFG80211_REG_DEBUG If unsure, say N. +config CFG80211_DEFAULT_PS_VALUE + int + default 1 if CFG80211_DEFAULT_PS + default 0 + depends on CFG80211 + config CFG80211_DEFAULT_PS bool "enable powersave by default" depends on CFG80211 @@ -67,14 +91,10 @@ config CFG80211_DEFAULT_PS applications instead -- they need to register their network latency requirement, see Documentation/power/pm_qos_interface.txt. -config CFG80211_DEFAULT_PS_VALUE - int - default 1 if CFG80211_DEFAULT_PS - default 0 - config CFG80211_DEBUGFS bool "cfg80211 DebugFS entries" - depends on CFG80211 && DEBUG_FS + depends on CFG80211 + depends on DEBUG_FS ---help--- You can enable this if you want to debugfs entries for cfg80211. @@ -83,6 +103,7 @@ config CFG80211_DEBUGFS config WIRELESS_OLD_REGULATORY bool "Old wireless static regulatory definitions" default n + depends on CFG80211 ---help--- This option enables the old static regulatory information and uses it within the new framework. This option is available @@ -94,20 +115,19 @@ config WIRELESS_OLD_REGULATORY Say N and if you say Y, please tell us why. The default is N. -config WIRELESS_EXT - bool "Wireless extensions" +config CFG80211_WEXT + bool "cfg80211 wireless extensions compatibility" + depends on CFG80211 + select WEXT_CORE default y - ---help--- - This option enables the legacy wireless extensions - (wireless network interface configuration via ioctls.) - - Say Y unless you've upgraded all your userspace to use - nl80211 instead of wireless extensions. + help + Enable this option if you need old userspace for wireless + extensions with cfg80211-based drivers. config WIRELESS_EXT_SYSFS bool "Wireless extensions sysfs files" default y - depends on WIRELESS_EXT && SYSFS + depends on WEXT_CORE && SYSFS help This option enables the deprecated wireless statistics files in /sys/class/net/*/wireless/. The same information diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 3ecaa9179977..c8141505a83a 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,13 +1,17 @@ -obj-$(CONFIG_WIRELESS_EXT) += wext.o obj-$(CONFIG_CFG80211) += cfg80211.o obj-$(CONFIG_LIB80211) += lib80211.o obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o +obj-$(CONFIG_WEXT_CORE) += wext-core.o +obj-$(CONFIG_WEXT_PROC) += wext-proc.o +obj-$(CONFIG_WEXT_SPY) += wext-spy.o +obj-$(CONFIG_WEXT_PRIV) += wext-priv.o + cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o cfg80211-y += mlme.o ibss.o sme.o chan.o cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o -cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o wext-sme.o +cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/wireless/core.c b/net/wireless/core.c index e6f02e98e5fd..eb0bb24b99c3 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -358,6 +358,10 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) INIT_LIST_HEAD(&rdev->bss_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); +#ifdef CONFIG_CFG80211_WEXT + rdev->wiphy.wext = &cfg80211_wext_handler; +#endif + device_initialize(&rdev->wiphy.dev); rdev->wiphy.dev.class = &ieee80211_class; rdev->wiphy.dev.platform_data = rdev; @@ -672,9 +676,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, wdev->netdev = dev; wdev->sme_state = CFG80211_SME_IDLE; mutex_unlock(&rdev->devlist_mtx); -#ifdef CONFIG_WIRELESS_EXT - if (!dev->wireless_handlers) - dev->wireless_handlers = &cfg80211_wext_handler; +#ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = -1; wdev->wext.default_mgmt_key = -1; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; @@ -696,7 +698,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, break; case NL80211_IFTYPE_STATION: wdev_lock(wdev); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.ie); wdev->wext.ie = NULL; wdev->wext.ie_len = 0; @@ -728,7 +730,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, mutex_unlock(&rdev->devlist_mtx); dev_put(dev); } -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); @@ -766,7 +768,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, sysfs_remove_link(&dev->dev.kobj, "phy80211"); list_del_init(&wdev->list); rdev->devlist_generation++; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.keys); #endif } diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index c88338911979..39b6d92e2828 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -15,7 +15,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_bss *bss; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -44,7 +44,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid) nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, GFP_KERNEL); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); @@ -96,7 +96,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, kfree(wdev->connect_keys); wdev->connect_keys = connkeys; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.channel = params->channel; #endif err = rdev->ops->join_ibss(&rdev->wiphy, dev, params); @@ -154,7 +154,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) wdev->current_bss = NULL; wdev->ssid_len = 0; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (!nowext) wdev->wext.ibss.ssid_len = 0; #endif @@ -203,7 +203,7 @@ int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, return err; } -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 79d2eec54cec..ceb2c14c8f47 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -331,7 +331,7 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; char *buf = kmalloc(128, gfp); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e0ecc9f153d4..14004e2ebd62 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1264,7 +1264,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (!err) err = func(&rdev->wiphy, dev, key.idx); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (!err) { if (func == rdev->ops->set_default_key) dev->ieee80211_ptr->wext.default_key = key.idx; @@ -1365,7 +1365,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (!err) err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (!err) { if (key.idx == dev->ieee80211_ptr->wext.default_key) dev->ieee80211_ptr->wext.default_key = -1; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index e5f92ee758f4..2e8c515f3c5c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -22,7 +22,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) { struct cfg80211_scan_request *request; struct net_device *dev; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -47,7 +47,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) else nl80211_send_scan_done(rdev, dev); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (!request->aborted) { memset(&wrqu, 0, sizeof(wrqu)); @@ -592,7 +592,7 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) } EXPORT_SYMBOL(cfg80211_unlink_bss); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT int cfg80211_wext_siwscan(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 93c3ed329204..d3624152f7f7 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -345,7 +345,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, { struct wireless_dev *wdev = dev->ieee80211_ptr; u8 *country_ie; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -362,7 +362,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, resp_ie, resp_ie_len, status, GFP_KERNEL); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (wextev) { if (req_ie && status == WLAN_STATUS_SUCCESS) { memset(&wrqu, 0, sizeof(wrqu)); @@ -477,7 +477,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid, const u8 *resp_ie, size_t resp_ie_len) { struct cfg80211_bss *bss; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -512,7 +512,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, GFP_KERNEL); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (req_ie) { memset(&wrqu, 0, sizeof(wrqu)); wrqu.data.length = req_ie_len; @@ -573,7 +573,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); int i; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -631,7 +631,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, for (i = 0; i < 6; i++) rdev->ops->del_key(wdev->wiphy, dev, i, NULL); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c new file mode 100644 index 000000000000..a4e5ddc8d4f5 --- /dev/null +++ b/net/wireless/wext-core.c @@ -0,0 +1,1063 @@ +/* + * This file implement the Wireless Extensions core API. + * + * Authors : Jean Tourrilhes - HPL - + * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. + * Copyright 2009 Johannes Berg + * + * (As all part of the Linux kernel, this file is GPL) + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *, + unsigned int, struct iw_request_info *, + iw_handler); + + +/* + * Meta-data about all the standard Wireless Extension request we + * know about. + */ +static const struct iw_ioctl_description standard_ioctl[] = { + [SIOCSIWCOMMIT - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_NULL, + }, + [SIOCGIWNAME - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_CHAR, + .flags = IW_DESCR_FLAG_DUMP, + }, + [SIOCSIWNWID - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + .flags = IW_DESCR_FLAG_EVENT, + }, + [SIOCGIWNWID - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + .flags = IW_DESCR_FLAG_DUMP, + }, + [SIOCSIWFREQ - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_FREQ, + .flags = IW_DESCR_FLAG_EVENT, + }, + [SIOCGIWFREQ - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_FREQ, + .flags = IW_DESCR_FLAG_DUMP, + }, + [SIOCSIWMODE - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_UINT, + .flags = IW_DESCR_FLAG_EVENT, + }, + [SIOCGIWMODE - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_UINT, + .flags = IW_DESCR_FLAG_DUMP, + }, + [SIOCSIWSENS - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCGIWSENS - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCSIWRANGE - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_NULL, + }, + [SIOCGIWRANGE - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = sizeof(struct iw_range), + .flags = IW_DESCR_FLAG_DUMP, + }, + [SIOCSIWPRIV - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_NULL, + }, + [SIOCGIWPRIV - SIOCIWFIRST] = { /* (handled directly by us) */ + .header_type = IW_HEADER_TYPE_POINT, + .token_size = sizeof(struct iw_priv_args), + .max_tokens = 16, + .flags = IW_DESCR_FLAG_NOMAX, + }, + [SIOCSIWSTATS - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_NULL, + }, + [SIOCGIWSTATS - SIOCIWFIRST] = { /* (handled directly by us) */ + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = sizeof(struct iw_statistics), + .flags = IW_DESCR_FLAG_DUMP, + }, + [SIOCSIWSPY - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = sizeof(struct sockaddr), + .max_tokens = IW_MAX_SPY, + }, + [SIOCGIWSPY - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = sizeof(struct sockaddr) + + sizeof(struct iw_quality), + .max_tokens = IW_MAX_SPY, + }, + [SIOCSIWTHRSPY - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = sizeof(struct iw_thrspy), + .min_tokens = 1, + .max_tokens = 1, + }, + [SIOCGIWTHRSPY - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = sizeof(struct iw_thrspy), + .min_tokens = 1, + .max_tokens = 1, + }, + [SIOCSIWAP - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_ADDR, + }, + [SIOCGIWAP - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_ADDR, + .flags = IW_DESCR_FLAG_DUMP, + }, + [SIOCSIWMLME - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .min_tokens = sizeof(struct iw_mlme), + .max_tokens = sizeof(struct iw_mlme), + }, + [SIOCGIWAPLIST - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = sizeof(struct sockaddr) + + sizeof(struct iw_quality), + .max_tokens = IW_MAX_AP, + .flags = IW_DESCR_FLAG_NOMAX, + }, + [SIOCSIWSCAN - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .min_tokens = 0, + .max_tokens = sizeof(struct iw_scan_req), + }, + [SIOCGIWSCAN - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_SCAN_MAX_DATA, + .flags = IW_DESCR_FLAG_NOMAX, + }, + [SIOCSIWESSID - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_ESSID_MAX_SIZE, + .flags = IW_DESCR_FLAG_EVENT, + }, + [SIOCGIWESSID - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_ESSID_MAX_SIZE, + .flags = IW_DESCR_FLAG_DUMP, + }, + [SIOCSIWNICKN - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_ESSID_MAX_SIZE, + }, + [SIOCGIWNICKN - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_ESSID_MAX_SIZE, + }, + [SIOCSIWRATE - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCGIWRATE - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCSIWRTS - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCGIWRTS - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCSIWFRAG - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCGIWFRAG - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCSIWTXPOW - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCGIWTXPOW - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCSIWRETRY - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCGIWRETRY - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCSIWENCODE - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_ENCODING_TOKEN_MAX, + .flags = IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT, + }, + [SIOCGIWENCODE - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_ENCODING_TOKEN_MAX, + .flags = IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT, + }, + [SIOCSIWPOWER - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCGIWPOWER - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCSIWGENIE - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_GENERIC_IE_MAX, + }, + [SIOCGIWGENIE - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_GENERIC_IE_MAX, + }, + [SIOCSIWAUTH - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCGIWAUTH - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_PARAM, + }, + [SIOCSIWENCODEEXT - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .min_tokens = sizeof(struct iw_encode_ext), + .max_tokens = sizeof(struct iw_encode_ext) + + IW_ENCODING_TOKEN_MAX, + }, + [SIOCGIWENCODEEXT - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .min_tokens = sizeof(struct iw_encode_ext), + .max_tokens = sizeof(struct iw_encode_ext) + + IW_ENCODING_TOKEN_MAX, + }, + [SIOCSIWPMKSA - SIOCIWFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .min_tokens = sizeof(struct iw_pmksa), + .max_tokens = sizeof(struct iw_pmksa), + }, +}; +static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl); + +/* + * Meta-data about all the additional standard Wireless Extension events + * we know about. + */ +static const struct iw_ioctl_description standard_event[] = { + [IWEVTXDROP - IWEVFIRST] = { + .header_type = IW_HEADER_TYPE_ADDR, + }, + [IWEVQUAL - IWEVFIRST] = { + .header_type = IW_HEADER_TYPE_QUAL, + }, + [IWEVCUSTOM - IWEVFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_CUSTOM_MAX, + }, + [IWEVREGISTERED - IWEVFIRST] = { + .header_type = IW_HEADER_TYPE_ADDR, + }, + [IWEVEXPIRED - IWEVFIRST] = { + .header_type = IW_HEADER_TYPE_ADDR, + }, + [IWEVGENIE - IWEVFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_GENERIC_IE_MAX, + }, + [IWEVMICHAELMICFAILURE - IWEVFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = sizeof(struct iw_michaelmicfailure), + }, + [IWEVASSOCREQIE - IWEVFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_GENERIC_IE_MAX, + }, + [IWEVASSOCRESPIE - IWEVFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = IW_GENERIC_IE_MAX, + }, + [IWEVPMKIDCAND - IWEVFIRST] = { + .header_type = IW_HEADER_TYPE_POINT, + .token_size = 1, + .max_tokens = sizeof(struct iw_pmkid_cand), + }, +}; +static const unsigned standard_event_num = ARRAY_SIZE(standard_event); + +/* Size (in bytes) of various events */ +static const int event_type_size[] = { + IW_EV_LCP_LEN, /* IW_HEADER_TYPE_NULL */ + 0, + IW_EV_CHAR_LEN, /* IW_HEADER_TYPE_CHAR */ + 0, + IW_EV_UINT_LEN, /* IW_HEADER_TYPE_UINT */ + IW_EV_FREQ_LEN, /* IW_HEADER_TYPE_FREQ */ + IW_EV_ADDR_LEN, /* IW_HEADER_TYPE_ADDR */ + 0, + IW_EV_POINT_LEN, /* Without variable payload */ + IW_EV_PARAM_LEN, /* IW_HEADER_TYPE_PARAM */ + IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */ +}; + +#ifdef CONFIG_COMPAT +static const int compat_event_type_size[] = { + IW_EV_COMPAT_LCP_LEN, /* IW_HEADER_TYPE_NULL */ + 0, + IW_EV_COMPAT_CHAR_LEN, /* IW_HEADER_TYPE_CHAR */ + 0, + IW_EV_COMPAT_UINT_LEN, /* IW_HEADER_TYPE_UINT */ + IW_EV_COMPAT_FREQ_LEN, /* IW_HEADER_TYPE_FREQ */ + IW_EV_COMPAT_ADDR_LEN, /* IW_HEADER_TYPE_ADDR */ + 0, + IW_EV_COMPAT_POINT_LEN, /* Without variable payload */ + IW_EV_COMPAT_PARAM_LEN, /* IW_HEADER_TYPE_PARAM */ + IW_EV_COMPAT_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */ +}; +#endif + + +/* IW event code */ + +static int __net_init wext_pernet_init(struct net *net) +{ + skb_queue_head_init(&net->wext_nlevents); + return 0; +} + +static void __net_exit wext_pernet_exit(struct net *net) +{ + skb_queue_purge(&net->wext_nlevents); +} + +static struct pernet_operations wext_pernet_ops = { + .init = wext_pernet_init, + .exit = wext_pernet_exit, +}; + +static int __init wireless_nlevent_init(void) +{ + return register_pernet_subsys(&wext_pernet_ops); +} + +subsys_initcall(wireless_nlevent_init); + +/* Process events generated by the wireless layer or the driver. */ +static void wireless_nlevent_process(struct work_struct *work) +{ + struct sk_buff *skb; + struct net *net; + + rtnl_lock(); + + for_each_net(net) { + while ((skb = skb_dequeue(&net->wext_nlevents))) + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, + GFP_KERNEL); + } + + rtnl_unlock(); +} + +static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process); + +static struct nlmsghdr *rtnetlink_ifinfo_prep(struct net_device *dev, + struct sk_buff *skb) +{ + struct ifinfomsg *r; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, RTM_NEWLINK, sizeof(*r), 0); + if (!nlh) + return NULL; + + r = nlmsg_data(nlh); + r->ifi_family = AF_UNSPEC; + r->__ifi_pad = 0; + r->ifi_type = dev->type; + r->ifi_index = dev->ifindex; + r->ifi_flags = dev_get_flags(dev); + r->ifi_change = 0; /* Wireless changes don't affect those flags */ + + NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); + + return nlh; + nla_put_failure: + nlmsg_cancel(skb, nlh); + return NULL; +} + + +/* + * Main event dispatcher. Called from other parts and drivers. + * Send the event on the appropriate channels. + * May be called from interrupt context. + */ +void wireless_send_event(struct net_device * dev, + unsigned int cmd, + union iwreq_data * wrqu, + const char * extra) +{ + const struct iw_ioctl_description * descr = NULL; + int extra_len = 0; + struct iw_event *event; /* Mallocated whole event */ + int event_len; /* Its size */ + int hdr_len; /* Size of the event header */ + int wrqu_off = 0; /* Offset in wrqu */ + /* Don't "optimise" the following variable, it will crash */ + unsigned cmd_index; /* *MUST* be unsigned */ + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct nlattr *nla; +#ifdef CONFIG_COMPAT + struct __compat_iw_event *compat_event; + struct compat_iw_point compat_wrqu; + struct sk_buff *compskb; +#endif + + /* + * Nothing in the kernel sends scan events with data, be safe. + * This is necessary because we cannot fix up scan event data + * for compat, due to being contained in 'extra', but normally + * applications are required to retrieve the scan data anyway + * and no data is included in the event, this codifies that + * practice. + */ + if (WARN_ON(cmd == SIOCGIWSCAN && extra)) + extra = NULL; + + /* Get the description of the Event */ + if (cmd <= SIOCIWLAST) { + cmd_index = cmd - SIOCIWFIRST; + if (cmd_index < standard_ioctl_num) + descr = &(standard_ioctl[cmd_index]); + } else { + cmd_index = cmd - IWEVFIRST; + if (cmd_index < standard_event_num) + descr = &(standard_event[cmd_index]); + } + /* Don't accept unknown events */ + if (descr == NULL) { + /* Note : we don't return an error to the driver, because + * the driver would not know what to do about it. It can't + * return an error to the user, because the event is not + * initiated by a user request. + * The best the driver could do is to log an error message. + * We will do it ourselves instead... + */ + printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n", + dev->name, cmd); + return; + } + + /* Check extra parameters and set extra_len */ + if (descr->header_type == IW_HEADER_TYPE_POINT) { + /* Check if number of token fits within bounds */ + if (wrqu->data.length > descr->max_tokens) { + printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length); + return; + } + if (wrqu->data.length < descr->min_tokens) { + printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length); + return; + } + /* Calculate extra_len - extra is NULL for restricted events */ + if (extra != NULL) + extra_len = wrqu->data.length * descr->token_size; + /* Always at an offset in wrqu */ + wrqu_off = IW_EV_POINT_OFF; + } + + /* Total length of the event */ + hdr_len = event_type_size[descr->header_type]; + event_len = hdr_len + extra_len; + + /* + * The problem for 64/32 bit. + * + * On 64-bit, a regular event is laid out as follows: + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | event.len | event.cmd | p a d d i n g | + * | wrqu data ... (with the correct size) | + * + * This padding exists because we manipulate event->u, + * and 'event' is not packed. + * + * An iw_point event is laid out like this instead: + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | event.len | event.cmd | p a d d i n g | + * | iwpnt.len | iwpnt.flg | p a d d i n g | + * | extra data ... + * + * The second padding exists because struct iw_point is extended, + * but this depends on the platform... + * + * On 32-bit, all the padding shouldn't be there. + */ + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!skb) + return; + + /* Send via the RtNetlink event channel */ + nlh = rtnetlink_ifinfo_prep(dev, skb); + if (WARN_ON(!nlh)) { + kfree_skb(skb); + return; + } + + /* Add the wireless events in the netlink packet */ + nla = nla_reserve(skb, IFLA_WIRELESS, event_len); + if (!nla) { + kfree_skb(skb); + return; + } + event = nla_data(nla); + + /* Fill event - first clear to avoid data leaking */ + memset(event, 0, hdr_len); + event->len = event_len; + event->cmd = cmd; + memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN); + if (extra_len) + memcpy(((char *) event) + hdr_len, extra, extra_len); + + nlmsg_end(skb, nlh); +#ifdef CONFIG_COMPAT + hdr_len = compat_event_type_size[descr->header_type]; + event_len = hdr_len + extra_len; + + compskb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!compskb) { + kfree_skb(skb); + return; + } + + /* Send via the RtNetlink event channel */ + nlh = rtnetlink_ifinfo_prep(dev, compskb); + if (WARN_ON(!nlh)) { + kfree_skb(skb); + kfree_skb(compskb); + return; + } + + /* Add the wireless events in the netlink packet */ + nla = nla_reserve(compskb, IFLA_WIRELESS, event_len); + if (!nla) { + kfree_skb(skb); + kfree_skb(compskb); + return; + } + compat_event = nla_data(nla); + + compat_event->len = event_len; + compat_event->cmd = cmd; + if (descr->header_type == IW_HEADER_TYPE_POINT) { + compat_wrqu.length = wrqu->data.length; + compat_wrqu.flags = wrqu->data.flags; + memcpy(&compat_event->pointer, + ((char *) &compat_wrqu) + IW_EV_COMPAT_POINT_OFF, + hdr_len - IW_EV_COMPAT_LCP_LEN); + if (extra_len) + memcpy(((char *) compat_event) + hdr_len, + extra, extra_len); + } else { + /* extra_len must be zero, so no if (extra) needed */ + memcpy(&compat_event->pointer, wrqu, + hdr_len - IW_EV_COMPAT_LCP_LEN); + } + + nlmsg_end(compskb, nlh); + + skb_shinfo(skb)->frag_list = compskb; +#endif + skb_queue_tail(&dev_net(dev)->wext_nlevents, skb); + schedule_work(&wireless_nlevent_work); +} +EXPORT_SYMBOL(wireless_send_event); + + + +/* IW handlers */ + +struct iw_statistics *get_wireless_stats(struct net_device *dev) +{ +#ifdef CONFIG_WIRELESS_EXT + if ((dev->wireless_handlers != NULL) && + (dev->wireless_handlers->get_wireless_stats != NULL)) + return dev->wireless_handlers->get_wireless_stats(dev); +#endif + +#ifdef CONFIG_CFG80211_WEXT + if (dev->ieee80211_ptr && dev->ieee80211_ptr && + dev->ieee80211_ptr->wiphy && + dev->ieee80211_ptr->wiphy->wext && + dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) + return dev->ieee80211_ptr->wiphy->wext->get_wireless_stats(dev); +#endif + + /* not found */ + return NULL; +} + +static int iw_handler_get_iwstats(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) +{ + /* Get stats from the driver */ + struct iw_statistics *stats; + + stats = get_wireless_stats(dev); + if (stats) { + /* Copy statistics to extra */ + memcpy(extra, stats, sizeof(struct iw_statistics)); + wrqu->data.length = sizeof(struct iw_statistics); + + /* Check if we need to clear the updated flag */ + if (wrqu->data.flags != 0) + stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; + return 0; + } else + return -EOPNOTSUPP; +} + +static iw_handler get_handler(struct net_device *dev, unsigned int cmd) +{ + /* Don't "optimise" the following variable, it will crash */ + unsigned int index; /* *MUST* be unsigned */ + const struct iw_handler_def *handlers = NULL; + +#ifdef CONFIG_CFG80211_WEXT + if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy) + handlers = dev->ieee80211_ptr->wiphy->wext; +#endif +#ifdef CONFIG_WIRELESS_EXT + if (dev->wireless_handlers) + handlers = dev->wireless_handlers; +#endif + + if (!handlers) + return NULL; + + /* Try as a standard command */ + index = cmd - SIOCIWFIRST; + if (index < handlers->num_standard) + return handlers->standard[index]; + +#ifdef CONFIG_WEXT_PRIV + /* Try as a private command */ + index = cmd - SIOCIWFIRSTPRIV; + if (index < handlers->num_private) + return handlers->private[index]; +#endif + + /* Not found */ + return NULL; +} + +static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, + const struct iw_ioctl_description *descr, + iw_handler handler, struct net_device *dev, + struct iw_request_info *info) +{ + int err, extra_size, user_length = 0, essid_compat = 0; + char *extra; + + /* Calculate space needed by arguments. Always allocate + * for max space. + */ + extra_size = descr->max_tokens * descr->token_size; + + /* Check need for ESSID compatibility for WE < 21 */ + switch (cmd) { + case SIOCSIWESSID: + case SIOCGIWESSID: + case SIOCSIWNICKN: + case SIOCGIWNICKN: + if (iwp->length == descr->max_tokens + 1) + essid_compat = 1; + else if (IW_IS_SET(cmd) && (iwp->length != 0)) { + char essid[IW_ESSID_MAX_SIZE + 1]; + unsigned int len; + len = iwp->length * descr->token_size; + + if (len > IW_ESSID_MAX_SIZE) + return -EFAULT; + + err = copy_from_user(essid, iwp->pointer, len); + if (err) + return -EFAULT; + + if (essid[iwp->length - 1] == '\0') + essid_compat = 1; + } + break; + default: + break; + } + + iwp->length -= essid_compat; + + /* Check what user space is giving us */ + if (IW_IS_SET(cmd)) { + /* Check NULL pointer */ + if (!iwp->pointer && iwp->length != 0) + return -EFAULT; + /* Check if number of token fits within bounds */ + if (iwp->length > descr->max_tokens) + return -E2BIG; + if (iwp->length < descr->min_tokens) + return -EINVAL; + } else { + /* Check NULL pointer */ + if (!iwp->pointer) + return -EFAULT; + /* Save user space buffer size for checking */ + user_length = iwp->length; + + /* Don't check if user_length > max to allow forward + * compatibility. The test user_length < min is + * implied by the test at the end. + */ + + /* Support for very large requests */ + if ((descr->flags & IW_DESCR_FLAG_NOMAX) && + (user_length > descr->max_tokens)) { + /* Allow userspace to GET more than max so + * we can support any size GET requests. + * There is still a limit : -ENOMEM. + */ + extra_size = user_length * descr->token_size; + + /* Note : user_length is originally a __u16, + * and token_size is controlled by us, + * so extra_size won't get negative and + * won't overflow... + */ + } + } + + /* kzalloc() ensures NULL-termination for essid_compat. */ + extra = kzalloc(extra_size, GFP_KERNEL); + if (!extra) + return -ENOMEM; + + /* If it is a SET, get all the extra data in here */ + if (IW_IS_SET(cmd) && (iwp->length != 0)) { + if (copy_from_user(extra, iwp->pointer, + iwp->length * + descr->token_size)) { + err = -EFAULT; + goto out; + } + + if (cmd == SIOCSIWENCODEEXT) { + struct iw_encode_ext *ee = (void *) extra; + + if (iwp->length < sizeof(*ee) + ee->key_len) + return -EFAULT; + } + } + + err = handler(dev, info, (union iwreq_data *) iwp, extra); + + iwp->length += essid_compat; + + /* If we have something to return to the user */ + if (!err && IW_IS_GET(cmd)) { + /* Check if there is enough buffer up there */ + if (user_length < iwp->length) { + err = -E2BIG; + goto out; + } + + if (copy_to_user(iwp->pointer, extra, + iwp->length * + descr->token_size)) { + err = -EFAULT; + goto out; + } + } + + /* Generate an event to notify listeners of the change */ + if ((descr->flags & IW_DESCR_FLAG_EVENT) && err == -EIWCOMMIT) { + union iwreq_data *data = (union iwreq_data *) iwp; + + if (descr->flags & IW_DESCR_FLAG_RESTRICT) + /* If the event is restricted, don't + * export the payload. + */ + wireless_send_event(dev, cmd, data, NULL); + else + wireless_send_event(dev, cmd, data, extra); + } + +out: + kfree(extra); + return err; +} + +/* + * Call the commit handler in the driver + * (if exist and if conditions are right) + * + * Note : our current commit strategy is currently pretty dumb, + * but we will be able to improve on that... + * The goal is to try to agreagate as many changes as possible + * before doing the commit. Drivers that will define a commit handler + * are usually those that need a reset after changing parameters, so + * we want to minimise the number of reset. + * A cool idea is to use a timer : at each "set" command, we re-set the + * timer, when the timer eventually fires, we call the driver. + * Hopefully, more on that later. + * + * Also, I'm waiting to see how many people will complain about the + * netif_running(dev) test. I'm open on that one... + * Hopefully, the driver will remember to do a commit in "open()" ;-) + */ +int call_commit_handler(struct net_device *dev) +{ +#ifdef CONFIG_WIRELESS_EXT + if ((netif_running(dev)) && + (dev->wireless_handlers->standard[0] != NULL)) + /* Call the commit handler on the driver */ + return dev->wireless_handlers->standard[0](dev, NULL, + NULL, NULL); + else + return 0; /* Command completed successfully */ +#else + /* cfg80211 has no commit */ + return 0; +#endif +} + +/* + * Main IOCTl dispatcher. + * Check the type of IOCTL and call the appropriate wrapper... + */ +static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, + unsigned int cmd, + struct iw_request_info *info, + wext_ioctl_func standard, + wext_ioctl_func private) +{ + struct iwreq *iwr = (struct iwreq *) ifr; + struct net_device *dev; + iw_handler handler; + + /* Permissions are already checked in dev_ioctl() before calling us. + * The copy_to/from_user() of ifr is also dealt with in there */ + + /* Make sure the device exist */ + if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL) + return -ENODEV; + + /* A bunch of special cases, then the generic case... + * Note that 'cmd' is already filtered in dev_ioctl() with + * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */ + if (cmd == SIOCGIWSTATS) + return standard(dev, iwr, cmd, info, + &iw_handler_get_iwstats); + +#ifdef CONFIG_WEXT_PRIV + if (cmd == SIOCGIWPRIV && dev->wireless_handlers) + return standard(dev, iwr, cmd, info, + iw_handler_get_private); +#endif + + /* Basic check */ + if (!netif_device_present(dev)) + return -ENODEV; + + /* New driver API : try to find the handler */ + handler = get_handler(dev, cmd); + if (handler) { + /* Standard and private are not the same */ + if (cmd < SIOCIWFIRSTPRIV) + return standard(dev, iwr, cmd, info, handler); + else if (private) + return private(dev, iwr, cmd, info, handler); + } + /* Old driver API : call driver ioctl handler */ + if (dev->netdev_ops->ndo_do_ioctl) + return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + return -EOPNOTSUPP; +} + +/* If command is `set a parameter', or `get the encoding parameters', + * check if the user has the right to do it. + */ +static int wext_permission_check(unsigned int cmd) +{ + if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT) + && !capable(CAP_NET_ADMIN)) + return -EPERM; + + return 0; +} + +/* entry point from dev ioctl */ +static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr, + unsigned int cmd, struct iw_request_info *info, + wext_ioctl_func standard, + wext_ioctl_func private) +{ + int ret = wext_permission_check(cmd); + + if (ret) + return ret; + + dev_load(net, ifr->ifr_name); + rtnl_lock(); + ret = wireless_process_ioctl(net, ifr, cmd, info, standard, private); + rtnl_unlock(); + + return ret; +} + +/* + * Wrapper to call a standard Wireless Extension handler. + * We do various checks and also take care of moving data between + * user space and kernel space. + */ +static int ioctl_standard_call(struct net_device * dev, + struct iwreq *iwr, + unsigned int cmd, + struct iw_request_info *info, + iw_handler handler) +{ + const struct iw_ioctl_description * descr; + int ret = -EINVAL; + + /* Get the description of the IOCTL */ + if ((cmd - SIOCIWFIRST) >= standard_ioctl_num) + return -EOPNOTSUPP; + descr = &(standard_ioctl[cmd - SIOCIWFIRST]); + + /* Check if we have a pointer to user space data or not */ + if (descr->header_type != IW_HEADER_TYPE_POINT) { + + /* No extra arguments. Trivial to handle */ + ret = handler(dev, info, &(iwr->u), NULL); + + /* Generate an event to notify listeners of the change */ + if ((descr->flags & IW_DESCR_FLAG_EVENT) && + ((ret == 0) || (ret == -EIWCOMMIT))) + wireless_send_event(dev, cmd, &(iwr->u), NULL); + } else { + ret = ioctl_standard_iw_point(&iwr->u.data, cmd, descr, + handler, dev, info); + } + + /* Call commit handler if needed and defined */ + if (ret == -EIWCOMMIT) + ret = call_commit_handler(dev); + + /* Here, we will generate the appropriate event if needed */ + + return ret; +} + + +int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, + void __user *arg) +{ + struct iw_request_info info = { .cmd = cmd, .flags = 0 }; + int ret; + + ret = wext_ioctl_dispatch(net, ifr, cmd, &info, + ioctl_standard_call, + ioctl_private_call); + if (ret >= 0 && + IW_IS_GET(cmd) && + copy_to_user(arg, ifr, sizeof(struct iwreq))) + return -EFAULT; + + return ret; +} + +#ifdef CONFIG_COMPAT +static int compat_standard_call(struct net_device *dev, + struct iwreq *iwr, + unsigned int cmd, + struct iw_request_info *info, + iw_handler handler) +{ + const struct iw_ioctl_description *descr; + struct compat_iw_point *iwp_compat; + struct iw_point iwp; + int err; + + descr = standard_ioctl + (cmd - SIOCIWFIRST); + + if (descr->header_type != IW_HEADER_TYPE_POINT) + return ioctl_standard_call(dev, iwr, cmd, info, handler); + + iwp_compat = (struct compat_iw_point *) &iwr->u.data; + iwp.pointer = compat_ptr(iwp_compat->pointer); + iwp.length = iwp_compat->length; + iwp.flags = iwp_compat->flags; + + err = ioctl_standard_iw_point(&iwp, cmd, descr, handler, dev, info); + + iwp_compat->pointer = ptr_to_compat(iwp.pointer); + iwp_compat->length = iwp.length; + iwp_compat->flags = iwp.flags; + + return err; +} + +int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct iw_request_info info; + struct iwreq iwr; + char *colon; + int ret; + + if (copy_from_user(&iwr, argp, sizeof(struct iwreq))) + return -EFAULT; + + iwr.ifr_name[IFNAMSIZ-1] = 0; + colon = strchr(iwr.ifr_name, ':'); + if (colon) + *colon = 0; + + info.cmd = cmd; + info.flags = IW_REQUEST_FLAG_COMPAT; + + ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd, &info, + compat_standard_call, + compat_private_call); + + if (ret >= 0 && + IW_IS_GET(cmd) && + copy_to_user(argp, &iwr, sizeof(struct iwreq))) + return -EFAULT; + + return ret; +} +#endif diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c new file mode 100644 index 000000000000..a3c2277de9e5 --- /dev/null +++ b/net/wireless/wext-priv.c @@ -0,0 +1,248 @@ +/* + * This file implement the Wireless Extensions priv API. + * + * Authors : Jean Tourrilhes - HPL - + * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. + * Copyright 2009 Johannes Berg + * + * (As all part of the Linux kernel, this file is GPL) + */ +#include +#include +#include +#include + +int iw_handler_get_private(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) +{ + /* Check if the driver has something to export */ + if ((dev->wireless_handlers->num_private_args == 0) || + (dev->wireless_handlers->private_args == NULL)) + return -EOPNOTSUPP; + + /* Check if there is enough buffer up there */ + if (wrqu->data.length < dev->wireless_handlers->num_private_args) { + /* User space can't know in advance how large the buffer + * needs to be. Give it a hint, so that we can support + * any size buffer we want somewhat efficiently... */ + wrqu->data.length = dev->wireless_handlers->num_private_args; + return -E2BIG; + } + + /* Set the number of available ioctls. */ + wrqu->data.length = dev->wireless_handlers->num_private_args; + + /* Copy structure to the user buffer. */ + memcpy(extra, dev->wireless_handlers->private_args, + sizeof(struct iw_priv_args) * wrqu->data.length); + + return 0; +} + +/* Size (in bytes) of the various private data types */ +static const char iw_priv_type_size[] = { + 0, /* IW_PRIV_TYPE_NONE */ + 1, /* IW_PRIV_TYPE_BYTE */ + 1, /* IW_PRIV_TYPE_CHAR */ + 0, /* Not defined */ + sizeof(__u32), /* IW_PRIV_TYPE_INT */ + sizeof(struct iw_freq), /* IW_PRIV_TYPE_FLOAT */ + sizeof(struct sockaddr), /* IW_PRIV_TYPE_ADDR */ + 0, /* Not defined */ +}; + +static int get_priv_size(__u16 args) +{ + int num = args & IW_PRIV_SIZE_MASK; + int type = (args & IW_PRIV_TYPE_MASK) >> 12; + + return num * iw_priv_type_size[type]; +} + +static int adjust_priv_size(__u16 args, struct iw_point *iwp) +{ + int num = iwp->length; + int max = args & IW_PRIV_SIZE_MASK; + int type = (args & IW_PRIV_TYPE_MASK) >> 12; + + /* Make sure the driver doesn't goof up */ + if (max < num) + num = max; + + return num * iw_priv_type_size[type]; +} + +/* + * Wrapper to call a private Wireless Extension handler. + * We do various checks and also take care of moving data between + * user space and kernel space. + * It's not as nice and slimline as the standard wrapper. The cause + * is struct iw_priv_args, which was not really designed for the + * job we are going here. + * + * IMPORTANT : This function prevent to set and get data on the same + * IOCTL and enforce the SET/GET convention. Not doing it would be + * far too hairy... + * If you need to set and get data at the same time, please don't use + * a iw_handler but process it in your ioctl handler (i.e. use the + * old driver API). + */ +static int get_priv_descr_and_size(struct net_device *dev, unsigned int cmd, + const struct iw_priv_args **descrp) +{ + const struct iw_priv_args *descr; + int i, extra_size; + + descr = NULL; + for (i = 0; i < dev->wireless_handlers->num_private_args; i++) { + if (cmd == dev->wireless_handlers->private_args[i].cmd) { + descr = &dev->wireless_handlers->private_args[i]; + break; + } + } + + extra_size = 0; + if (descr) { + if (IW_IS_SET(cmd)) { + int offset = 0; /* For sub-ioctls */ + /* Check for sub-ioctl handler */ + if (descr->name[0] == '\0') + /* Reserve one int for sub-ioctl index */ + offset = sizeof(__u32); + + /* Size of set arguments */ + extra_size = get_priv_size(descr->set_args); + + /* Does it fits in iwr ? */ + if ((descr->set_args & IW_PRIV_SIZE_FIXED) && + ((extra_size + offset) <= IFNAMSIZ)) + extra_size = 0; + } else { + /* Size of get arguments */ + extra_size = get_priv_size(descr->get_args); + + /* Does it fits in iwr ? */ + if ((descr->get_args & IW_PRIV_SIZE_FIXED) && + (extra_size <= IFNAMSIZ)) + extra_size = 0; + } + } + *descrp = descr; + return extra_size; +} + +static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd, + const struct iw_priv_args *descr, + iw_handler handler, struct net_device *dev, + struct iw_request_info *info, int extra_size) +{ + char *extra; + int err; + + /* Check what user space is giving us */ + if (IW_IS_SET(cmd)) { + if (!iwp->pointer && iwp->length != 0) + return -EFAULT; + + if (iwp->length > (descr->set_args & IW_PRIV_SIZE_MASK)) + return -E2BIG; + } else if (!iwp->pointer) + return -EFAULT; + + extra = kmalloc(extra_size, GFP_KERNEL); + if (!extra) + return -ENOMEM; + + /* If it is a SET, get all the extra data in here */ + if (IW_IS_SET(cmd) && (iwp->length != 0)) { + if (copy_from_user(extra, iwp->pointer, extra_size)) { + err = -EFAULT; + goto out; + } + } + + /* Call the handler */ + err = handler(dev, info, (union iwreq_data *) iwp, extra); + + /* If we have something to return to the user */ + if (!err && IW_IS_GET(cmd)) { + /* Adjust for the actual length if it's variable, + * avoid leaking kernel bits outside. + */ + if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) + extra_size = adjust_priv_size(descr->get_args, iwp); + + if (copy_to_user(iwp->pointer, extra, extra_size)) + err = -EFAULT; + } + +out: + kfree(extra); + return err; +} + +int ioctl_private_call(struct net_device *dev, struct iwreq *iwr, + unsigned int cmd, struct iw_request_info *info, + iw_handler handler) +{ + int extra_size = 0, ret = -EINVAL; + const struct iw_priv_args *descr; + + extra_size = get_priv_descr_and_size(dev, cmd, &descr); + + /* Check if we have a pointer to user space data or not. */ + if (extra_size == 0) { + /* No extra arguments. Trivial to handle */ + ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); + } else { + ret = ioctl_private_iw_point(&iwr->u.data, cmd, descr, + handler, dev, info, extra_size); + } + + /* Call commit handler if needed and defined */ + if (ret == -EIWCOMMIT) + ret = call_commit_handler(dev); + + return ret; +} + +#ifdef CONFIG_COMPAT +int compat_private_call(struct net_device *dev, struct iwreq *iwr, + unsigned int cmd, struct iw_request_info *info, + iw_handler handler) +{ + const struct iw_priv_args *descr; + int ret, extra_size; + + extra_size = get_priv_descr_and_size(dev, cmd, &descr); + + /* Check if we have a pointer to user space data or not. */ + if (extra_size == 0) { + /* No extra arguments. Trivial to handle */ + ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); + } else { + struct compat_iw_point *iwp_compat; + struct iw_point iwp; + + iwp_compat = (struct compat_iw_point *) &iwr->u.data; + iwp.pointer = compat_ptr(iwp_compat->pointer); + iwp.length = iwp_compat->length; + iwp.flags = iwp_compat->flags; + + ret = ioctl_private_iw_point(&iwp, cmd, descr, + handler, dev, info, extra_size); + + iwp_compat->pointer = ptr_to_compat(iwp.pointer); + iwp_compat->length = iwp.length; + iwp_compat->flags = iwp.flags; + } + + /* Call commit handler if needed and defined */ + if (ret == -EIWCOMMIT) + ret = call_commit_handler(dev); + + return ret; +} +#endif diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c new file mode 100644 index 000000000000..273a7f77c834 --- /dev/null +++ b/net/wireless/wext-proc.c @@ -0,0 +1,155 @@ +/* + * This file implement the Wireless Extensions proc API. + * + * Authors : Jean Tourrilhes - HPL - + * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. + * + * (As all part of the Linux kernel, this file is GPL) + */ + +/* + * The /proc/net/wireless file is a human readable user-space interface + * exporting various wireless specific statistics from the wireless devices. + * This is the most popular part of the Wireless Extensions ;-) + * + * This interface is a pure clone of /proc/net/dev (in net/core/dev.c). + * The content of the file is basically the content of "struct iw_statistics". + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +static void wireless_seq_printf_stats(struct seq_file *seq, + struct net_device *dev) +{ + /* Get stats from the driver */ + struct iw_statistics *stats = get_wireless_stats(dev); + static struct iw_statistics nullstats = {}; + + /* show device if it's wireless regardless of current stats */ + if (!stats) { +#ifdef CONFIG_WIRELESS_EXT + if (dev->wireless_handlers) + stats = &nullstats; +#endif +#ifdef CONFIG_CFG80211 + if (dev->ieee80211_ptr) + stats = &nullstats; +#endif + } + + if (stats) { + seq_printf(seq, "%6s: %04x %3d%c %3d%c %3d%c %6d %6d %6d " + "%6d %6d %6d\n", + dev->name, stats->status, stats->qual.qual, + stats->qual.updated & IW_QUAL_QUAL_UPDATED + ? '.' : ' ', + ((__s32) stats->qual.level) - + ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), + stats->qual.updated & IW_QUAL_LEVEL_UPDATED + ? '.' : ' ', + ((__s32) stats->qual.noise) - + ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), + stats->qual.updated & IW_QUAL_NOISE_UPDATED + ? '.' : ' ', + stats->discard.nwid, stats->discard.code, + stats->discard.fragment, stats->discard.retries, + stats->discard.misc, stats->miss.beacon); + + if (stats != &nullstats) + stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; + } +} + +/* ---------------------------------------------------------------- */ +/* + * Print info for /proc/net/wireless (print all entries) + */ +static int wireless_dev_seq_show(struct seq_file *seq, void *v) +{ + might_sleep(); + + if (v == SEQ_START_TOKEN) + seq_printf(seq, "Inter-| sta-| Quality | Discarded " + "packets | Missed | WE\n" + " face | tus | link level noise | nwid " + "crypt frag retry misc | beacon | %d\n", + WIRELESS_EXT); + else + wireless_seq_printf_stats(seq, v); + return 0; +} + +static void *wireless_dev_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + loff_t off; + struct net_device *dev; + + rtnl_lock(); + if (!*pos) + return SEQ_START_TOKEN; + + off = 1; + for_each_netdev(net, dev) + if (off++ == *pos) + return dev; + return NULL; +} + +static void *wireless_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + + ++*pos; + + return v == SEQ_START_TOKEN ? + first_net_device(net) : next_net_device(v); +} + +static void wireless_dev_seq_stop(struct seq_file *seq, void *v) +{ + rtnl_unlock(); +} + +static const struct seq_operations wireless_seq_ops = { + .start = wireless_dev_seq_start, + .next = wireless_dev_seq_next, + .stop = wireless_dev_seq_stop, + .show = wireless_dev_seq_show, +}; + +static int seq_open_wireless(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &wireless_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations wireless_seq_fops = { + .owner = THIS_MODULE, + .open = seq_open_wireless, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +int wext_proc_init(struct net *net) +{ + /* Create /proc/net/wireless entry */ + if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) + return -ENOMEM; + + return 0; +} + +void wext_proc_exit(struct net *net) +{ + proc_net_remove(net, "wireless"); +} diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c new file mode 100644 index 000000000000..6dcfe65a2d1a --- /dev/null +++ b/net/wireless/wext-spy.c @@ -0,0 +1,231 @@ +/* + * This file implement the Wireless Extensions spy API. + * + * Authors : Jean Tourrilhes - HPL - + * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. + * + * (As all part of the Linux kernel, this file is GPL) + */ + +#include +#include +#include +#include +#include +#include + +static inline struct iw_spy_data *get_spydata(struct net_device *dev) +{ + /* This is the new way */ + if (dev->wireless_data) + return dev->wireless_data->spy_data; + return NULL; +} + +int iw_handler_set_spy(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct sockaddr * address = (struct sockaddr *) extra; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + /* Disable spy collection while we copy the addresses. + * While we copy addresses, any call to wireless_spy_update() + * will NOP. This is OK, as anyway the addresses are changing. */ + spydata->spy_number = 0; + + /* We want to operate without locking, because wireless_spy_update() + * most likely will happen in the interrupt handler, and therefore + * have its own locking constraints and needs performance. + * The rtnl_lock() make sure we don't race with the other iw_handlers. + * This make sure wireless_spy_update() "see" that the spy list + * is temporarily disabled. */ + smp_wmb(); + + /* Are there are addresses to copy? */ + if (wrqu->data.length > 0) { + int i; + + /* Copy addresses */ + for (i = 0; i < wrqu->data.length; i++) + memcpy(spydata->spy_address[i], address[i].sa_data, + ETH_ALEN); + /* Reset stats */ + memset(spydata->spy_stat, 0, + sizeof(struct iw_quality) * IW_MAX_SPY); + } + + /* Make sure above is updated before re-enabling */ + smp_wmb(); + + /* Enable addresses */ + spydata->spy_number = wrqu->data.length; + + return 0; +} +EXPORT_SYMBOL(iw_handler_set_spy); + +int iw_handler_get_spy(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct sockaddr * address = (struct sockaddr *) extra; + int i; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + wrqu->data.length = spydata->spy_number; + + /* Copy addresses. */ + for (i = 0; i < spydata->spy_number; i++) { + memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN); + address[i].sa_family = AF_UNIX; + } + /* Copy stats to the user buffer (just after). */ + if (spydata->spy_number > 0) + memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number), + spydata->spy_stat, + sizeof(struct iw_quality) * spydata->spy_number); + /* Reset updated flags. */ + for (i = 0; i < spydata->spy_number; i++) + spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED; + return 0; +} +EXPORT_SYMBOL(iw_handler_get_spy); + +/*------------------------------------------------------------------*/ +/* + * Standard Wireless Handler : set spy threshold + */ +int iw_handler_set_thrspy(struct net_device * dev, + struct iw_request_info *info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct iw_thrspy * threshold = (struct iw_thrspy *) extra; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + /* Just do it */ + memcpy(&(spydata->spy_thr_low), &(threshold->low), + 2 * sizeof(struct iw_quality)); + + /* Clear flag */ + memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under)); + + return 0; +} +EXPORT_SYMBOL(iw_handler_set_thrspy); + +/*------------------------------------------------------------------*/ +/* + * Standard Wireless Handler : get spy threshold + */ +int iw_handler_get_thrspy(struct net_device * dev, + struct iw_request_info *info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct iw_thrspy * threshold = (struct iw_thrspy *) extra; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + /* Just do it */ + memcpy(&(threshold->low), &(spydata->spy_thr_low), + 2 * sizeof(struct iw_quality)); + + return 0; +} +EXPORT_SYMBOL(iw_handler_get_thrspy); + +/*------------------------------------------------------------------*/ +/* + * Prepare and send a Spy Threshold event + */ +static void iw_send_thrspy_event(struct net_device * dev, + struct iw_spy_data * spydata, + unsigned char * address, + struct iw_quality * wstats) +{ + union iwreq_data wrqu; + struct iw_thrspy threshold; + + /* Init */ + wrqu.data.length = 1; + wrqu.data.flags = 0; + /* Copy address */ + memcpy(threshold.addr.sa_data, address, ETH_ALEN); + threshold.addr.sa_family = ARPHRD_ETHER; + /* Copy stats */ + memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality)); + /* Copy also thresholds */ + memcpy(&(threshold.low), &(spydata->spy_thr_low), + 2 * sizeof(struct iw_quality)); + + /* Send event to user space */ + wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold); +} + +/* ---------------------------------------------------------------- */ +/* + * Call for the driver to update the spy data. + * For now, the spy data is a simple array. As the size of the array is + * small, this is good enough. If we wanted to support larger number of + * spy addresses, we should use something more efficient... + */ +void wireless_spy_update(struct net_device * dev, + unsigned char * address, + struct iw_quality * wstats) +{ + struct iw_spy_data * spydata = get_spydata(dev); + int i; + int match = -1; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return; + + /* Update all records that match */ + for (i = 0; i < spydata->spy_number; i++) + if (!compare_ether_addr(address, spydata->spy_address[i])) { + memcpy(&(spydata->spy_stat[i]), wstats, + sizeof(struct iw_quality)); + match = i; + } + + /* Generate an event if we cross the spy threshold. + * To avoid event storms, we have a simple hysteresis : we generate + * event only when we go under the low threshold or above the + * high threshold. */ + if (match >= 0) { + if (spydata->spy_thr_under[match]) { + if (wstats->level > spydata->spy_thr_high.level) { + spydata->spy_thr_under[match] = 0; + iw_send_thrspy_event(dev, spydata, + address, wstats); + } + } else { + if (wstats->level < spydata->spy_thr_low.level) { + spydata->spy_thr_under[match] = 1; + iw_send_thrspy_event(dev, spydata, + address, wstats); + } + } + } +} +EXPORT_SYMBOL(wireless_spy_update); diff --git a/net/wireless/wext.c b/net/wireless/wext.c deleted file mode 100644 index 60fe57761ca9..000000000000 --- a/net/wireless/wext.c +++ /dev/null @@ -1,1775 +0,0 @@ -/* - * This file implement the Wireless Extensions APIs. - * - * Authors : Jean Tourrilhes - HPL - - * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. - * - * (As all part of the Linux kernel, this file is GPL) - */ - -/************************** DOCUMENTATION **************************/ -/* - * API definition : - * -------------- - * See for details of the APIs and the rest. - * - * History : - * ------- - * - * v1 - 5.12.01 - Jean II - * o Created this file. - * - * v2 - 13.12.01 - Jean II - * o Move /proc/net/wireless stuff from net/core/dev.c to here - * o Make Wireless Extension IOCTLs go through here - * o Added iw_handler handling ;-) - * o Added standard ioctl description - * o Initial dumb commit strategy based on orinoco.c - * - * v3 - 19.12.01 - Jean II - * o Make sure we don't go out of standard_ioctl[] in ioctl_standard_call - * o Add event dispatcher function - * o Add event description - * o Propagate events as rtnetlink IFLA_WIRELESS option - * o Generate event on selected SET requests - * - * v4 - 18.04.02 - Jean II - * o Fix stupid off by one in iw_ioctl_description : IW_ESSID_MAX_SIZE + 1 - * - * v5 - 21.06.02 - Jean II - * o Add IW_PRIV_TYPE_ADDR in priv_type_size (+cleanup) - * o Reshuffle IW_HEADER_TYPE_XXX to map IW_PRIV_TYPE_XXX changes - * o Add IWEVCUSTOM for driver specific event/scanning token - * o Turn on WE_STRICT_WRITE by default + kernel warning - * o Fix WE_STRICT_WRITE in ioctl_export_private() (32 => iw_num) - * o Fix off-by-one in test (extra_size <= IFNAMSIZ) - * - * v6 - 9.01.03 - Jean II - * o Add common spy support : iw_handler_set_spy(), wireless_spy_update() - * o Add enhanced spy support : iw_handler_set_thrspy() and event. - * o Add WIRELESS_EXT version display in /proc/net/wireless - * - * v6 - 18.06.04 - Jean II - * o Change get_spydata() method for added safety - * o Remove spy #ifdef, they are always on -> cleaner code - * o Allow any size GET request if user specifies length > max - * and if request has IW_DESCR_FLAG_NOMAX flag or is SIOCGIWPRIV - * o Start migrating get_wireless_stats to struct iw_handler_def - * o Add wmb() in iw_handler_set_spy() for non-coherent archs/cpus - * Based on patch from Pavel Roskin : - * o Fix kernel data leak to user space in private handler handling - * - * v7 - 18.3.05 - Jean II - * o Remove (struct iw_point *)->pointer from events and streams - * o Remove spy_offset from struct iw_handler_def - * o Start deprecating dev->get_wireless_stats, output a warning - * o If IW_QUAL_DBM is set, show dBm values in /proc/net/wireless - * o Don't lose INVALID/DBM flags when clearing UPDATED flags (iwstats) - * - * v8 - 17.02.06 - Jean II - * o RtNetlink requests support (SET/GET) - * - * v8b - 03.08.06 - Herbert Xu - * o Fix Wireless Event locking issues. - * - * v9 - 14.3.06 - Jean II - * o Change length in ESSID and NICK to strlen() instead of strlen()+1 - * o Make standard_ioctl_num and standard_event_num unsigned - * o Remove (struct net_device *)->get_wireless_stats() - * - * v10 - 16.3.07 - Jean II - * o Prevent leaking of kernel space in stream on 64 bits. - */ - -/***************************** INCLUDES *****************************/ - -#include -#include /* off_t */ -#include /* struct ifreq, dev_get_by_name() */ -#include -#include /* rtnetlink stuff */ -#include -#include /* for __init */ -#include /* ARPHRD_ETHER */ -#include /* compare_ether_addr */ -#include -#include - -#include /* Pretty obvious */ -#include /* New driver API */ -#include -#include - -#include /* copy_to_user() */ - -/************************* GLOBAL VARIABLES *************************/ -/* - * You should not use global variables, because of re-entrancy. - * On our case, it's only const, so it's OK... - */ -/* - * Meta-data about all the standard Wireless Extension request we - * know about. - */ -static const struct iw_ioctl_description standard_ioctl[] = { - [SIOCSIWCOMMIT - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_NULL, - }, - [SIOCGIWNAME - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_CHAR, - .flags = IW_DESCR_FLAG_DUMP, - }, - [SIOCSIWNWID - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - .flags = IW_DESCR_FLAG_EVENT, - }, - [SIOCGIWNWID - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - .flags = IW_DESCR_FLAG_DUMP, - }, - [SIOCSIWFREQ - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_FREQ, - .flags = IW_DESCR_FLAG_EVENT, - }, - [SIOCGIWFREQ - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_FREQ, - .flags = IW_DESCR_FLAG_DUMP, - }, - [SIOCSIWMODE - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_UINT, - .flags = IW_DESCR_FLAG_EVENT, - }, - [SIOCGIWMODE - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_UINT, - .flags = IW_DESCR_FLAG_DUMP, - }, - [SIOCSIWSENS - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCGIWSENS - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCSIWRANGE - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_NULL, - }, - [SIOCGIWRANGE - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = sizeof(struct iw_range), - .flags = IW_DESCR_FLAG_DUMP, - }, - [SIOCSIWPRIV - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_NULL, - }, - [SIOCGIWPRIV - SIOCIWFIRST] = { /* (handled directly by us) */ - .header_type = IW_HEADER_TYPE_POINT, - .token_size = sizeof(struct iw_priv_args), - .max_tokens = 16, - .flags = IW_DESCR_FLAG_NOMAX, - }, - [SIOCSIWSTATS - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_NULL, - }, - [SIOCGIWSTATS - SIOCIWFIRST] = { /* (handled directly by us) */ - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = sizeof(struct iw_statistics), - .flags = IW_DESCR_FLAG_DUMP, - }, - [SIOCSIWSPY - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = sizeof(struct sockaddr), - .max_tokens = IW_MAX_SPY, - }, - [SIOCGIWSPY - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = sizeof(struct sockaddr) + - sizeof(struct iw_quality), - .max_tokens = IW_MAX_SPY, - }, - [SIOCSIWTHRSPY - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = sizeof(struct iw_thrspy), - .min_tokens = 1, - .max_tokens = 1, - }, - [SIOCGIWTHRSPY - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = sizeof(struct iw_thrspy), - .min_tokens = 1, - .max_tokens = 1, - }, - [SIOCSIWAP - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_ADDR, - }, - [SIOCGIWAP - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_ADDR, - .flags = IW_DESCR_FLAG_DUMP, - }, - [SIOCSIWMLME - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .min_tokens = sizeof(struct iw_mlme), - .max_tokens = sizeof(struct iw_mlme), - }, - [SIOCGIWAPLIST - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = sizeof(struct sockaddr) + - sizeof(struct iw_quality), - .max_tokens = IW_MAX_AP, - .flags = IW_DESCR_FLAG_NOMAX, - }, - [SIOCSIWSCAN - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .min_tokens = 0, - .max_tokens = sizeof(struct iw_scan_req), - }, - [SIOCGIWSCAN - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_SCAN_MAX_DATA, - .flags = IW_DESCR_FLAG_NOMAX, - }, - [SIOCSIWESSID - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE, - .flags = IW_DESCR_FLAG_EVENT, - }, - [SIOCGIWESSID - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE, - .flags = IW_DESCR_FLAG_DUMP, - }, - [SIOCSIWNICKN - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE, - }, - [SIOCGIWNICKN - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE, - }, - [SIOCSIWRATE - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCGIWRATE - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCSIWRTS - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCGIWRTS - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCSIWFRAG - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCGIWFRAG - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCSIWTXPOW - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCGIWTXPOW - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCSIWRETRY - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCGIWRETRY - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCSIWENCODE - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_ENCODING_TOKEN_MAX, - .flags = IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT, - }, - [SIOCGIWENCODE - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_ENCODING_TOKEN_MAX, - .flags = IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT, - }, - [SIOCSIWPOWER - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCGIWPOWER - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCSIWGENIE - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_GENERIC_IE_MAX, - }, - [SIOCGIWGENIE - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_GENERIC_IE_MAX, - }, - [SIOCSIWAUTH - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCGIWAUTH - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_PARAM, - }, - [SIOCSIWENCODEEXT - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .min_tokens = sizeof(struct iw_encode_ext), - .max_tokens = sizeof(struct iw_encode_ext) + - IW_ENCODING_TOKEN_MAX, - }, - [SIOCGIWENCODEEXT - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .min_tokens = sizeof(struct iw_encode_ext), - .max_tokens = sizeof(struct iw_encode_ext) + - IW_ENCODING_TOKEN_MAX, - }, - [SIOCSIWPMKSA - SIOCIWFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .min_tokens = sizeof(struct iw_pmksa), - .max_tokens = sizeof(struct iw_pmksa), - }, -}; -static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl); - -/* - * Meta-data about all the additional standard Wireless Extension events - * we know about. - */ -static const struct iw_ioctl_description standard_event[] = { - [IWEVTXDROP - IWEVFIRST] = { - .header_type = IW_HEADER_TYPE_ADDR, - }, - [IWEVQUAL - IWEVFIRST] = { - .header_type = IW_HEADER_TYPE_QUAL, - }, - [IWEVCUSTOM - IWEVFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_CUSTOM_MAX, - }, - [IWEVREGISTERED - IWEVFIRST] = { - .header_type = IW_HEADER_TYPE_ADDR, - }, - [IWEVEXPIRED - IWEVFIRST] = { - .header_type = IW_HEADER_TYPE_ADDR, - }, - [IWEVGENIE - IWEVFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_GENERIC_IE_MAX, - }, - [IWEVMICHAELMICFAILURE - IWEVFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = sizeof(struct iw_michaelmicfailure), - }, - [IWEVASSOCREQIE - IWEVFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_GENERIC_IE_MAX, - }, - [IWEVASSOCRESPIE - IWEVFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = IW_GENERIC_IE_MAX, - }, - [IWEVPMKIDCAND - IWEVFIRST] = { - .header_type = IW_HEADER_TYPE_POINT, - .token_size = 1, - .max_tokens = sizeof(struct iw_pmkid_cand), - }, -}; -static const unsigned standard_event_num = ARRAY_SIZE(standard_event); - -/* Size (in bytes) of the various private data types */ -static const char iw_priv_type_size[] = { - 0, /* IW_PRIV_TYPE_NONE */ - 1, /* IW_PRIV_TYPE_BYTE */ - 1, /* IW_PRIV_TYPE_CHAR */ - 0, /* Not defined */ - sizeof(__u32), /* IW_PRIV_TYPE_INT */ - sizeof(struct iw_freq), /* IW_PRIV_TYPE_FLOAT */ - sizeof(struct sockaddr), /* IW_PRIV_TYPE_ADDR */ - 0, /* Not defined */ -}; - -/* Size (in bytes) of various events */ -static const int event_type_size[] = { - IW_EV_LCP_LEN, /* IW_HEADER_TYPE_NULL */ - 0, - IW_EV_CHAR_LEN, /* IW_HEADER_TYPE_CHAR */ - 0, - IW_EV_UINT_LEN, /* IW_HEADER_TYPE_UINT */ - IW_EV_FREQ_LEN, /* IW_HEADER_TYPE_FREQ */ - IW_EV_ADDR_LEN, /* IW_HEADER_TYPE_ADDR */ - 0, - IW_EV_POINT_LEN, /* Without variable payload */ - IW_EV_PARAM_LEN, /* IW_HEADER_TYPE_PARAM */ - IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */ -}; - -#ifdef CONFIG_COMPAT -static const int compat_event_type_size[] = { - IW_EV_COMPAT_LCP_LEN, /* IW_HEADER_TYPE_NULL */ - 0, - IW_EV_COMPAT_CHAR_LEN, /* IW_HEADER_TYPE_CHAR */ - 0, - IW_EV_COMPAT_UINT_LEN, /* IW_HEADER_TYPE_UINT */ - IW_EV_COMPAT_FREQ_LEN, /* IW_HEADER_TYPE_FREQ */ - IW_EV_COMPAT_ADDR_LEN, /* IW_HEADER_TYPE_ADDR */ - 0, - IW_EV_COMPAT_POINT_LEN, /* Without variable payload */ - IW_EV_COMPAT_PARAM_LEN, /* IW_HEADER_TYPE_PARAM */ - IW_EV_COMPAT_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */ -}; -#endif - -/************************ COMMON SUBROUTINES ************************/ -/* - * Stuff that may be used in various place or doesn't fit in one - * of the section below. - */ - -/* ---------------------------------------------------------------- */ -/* - * Return the driver handler associated with a specific Wireless Extension. - */ -static iw_handler get_handler(struct net_device *dev, unsigned int cmd) -{ - /* Don't "optimise" the following variable, it will crash */ - unsigned int index; /* *MUST* be unsigned */ - - /* Check if we have some wireless handlers defined */ - if (dev->wireless_handlers == NULL) - return NULL; - - /* Try as a standard command */ - index = cmd - SIOCIWFIRST; - if (index < dev->wireless_handlers->num_standard) - return dev->wireless_handlers->standard[index]; - - /* Try as a private command */ - index = cmd - SIOCIWFIRSTPRIV; - if (index < dev->wireless_handlers->num_private) - return dev->wireless_handlers->private[index]; - - /* Not found */ - return NULL; -} - -/* ---------------------------------------------------------------- */ -/* - * Get statistics out of the driver - */ -struct iw_statistics *get_wireless_stats(struct net_device *dev) -{ - /* New location */ - if ((dev->wireless_handlers != NULL) && - (dev->wireless_handlers->get_wireless_stats != NULL)) - return dev->wireless_handlers->get_wireless_stats(dev); - - /* Not found */ - return NULL; -} - -/* ---------------------------------------------------------------- */ -/* - * Call the commit handler in the driver - * (if exist and if conditions are right) - * - * Note : our current commit strategy is currently pretty dumb, - * but we will be able to improve on that... - * The goal is to try to agreagate as many changes as possible - * before doing the commit. Drivers that will define a commit handler - * are usually those that need a reset after changing parameters, so - * we want to minimise the number of reset. - * A cool idea is to use a timer : at each "set" command, we re-set the - * timer, when the timer eventually fires, we call the driver. - * Hopefully, more on that later. - * - * Also, I'm waiting to see how many people will complain about the - * netif_running(dev) test. I'm open on that one... - * Hopefully, the driver will remember to do a commit in "open()" ;-) - */ -static int call_commit_handler(struct net_device *dev) -{ - if ((netif_running(dev)) && - (dev->wireless_handlers->standard[0] != NULL)) - /* Call the commit handler on the driver */ - return dev->wireless_handlers->standard[0](dev, NULL, - NULL, NULL); - else - return 0; /* Command completed successfully */ -} - -/* ---------------------------------------------------------------- */ -/* - * Calculate size of private arguments - */ -static int get_priv_size(__u16 args) -{ - int num = args & IW_PRIV_SIZE_MASK; - int type = (args & IW_PRIV_TYPE_MASK) >> 12; - - return num * iw_priv_type_size[type]; -} - -/* ---------------------------------------------------------------- */ -/* - * Re-calculate the size of private arguments - */ -static int adjust_priv_size(__u16 args, struct iw_point *iwp) -{ - int num = iwp->length; - int max = args & IW_PRIV_SIZE_MASK; - int type = (args & IW_PRIV_TYPE_MASK) >> 12; - - /* Make sure the driver doesn't goof up */ - if (max < num) - num = max; - - return num * iw_priv_type_size[type]; -} - -/* ---------------------------------------------------------------- */ -/* - * Standard Wireless Handler : get wireless stats - * Allow programatic access to /proc/net/wireless even if /proc - * doesn't exist... Also more efficient... - */ -static int iw_handler_get_iwstats(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) -{ - /* Get stats from the driver */ - struct iw_statistics *stats; - - stats = get_wireless_stats(dev); - if (stats) { - /* Copy statistics to extra */ - memcpy(extra, stats, sizeof(struct iw_statistics)); - wrqu->data.length = sizeof(struct iw_statistics); - - /* Check if we need to clear the updated flag */ - if (wrqu->data.flags != 0) - stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; - return 0; - } else - return -EOPNOTSUPP; -} - -/* ---------------------------------------------------------------- */ -/* - * Standard Wireless Handler : get iwpriv definitions - * Export the driver private handler definition - * They will be picked up by tools like iwpriv... - */ -static int iw_handler_get_private(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) -{ - /* Check if the driver has something to export */ - if ((dev->wireless_handlers->num_private_args == 0) || - (dev->wireless_handlers->private_args == NULL)) - return -EOPNOTSUPP; - - /* Check if there is enough buffer up there */ - if (wrqu->data.length < dev->wireless_handlers->num_private_args) { - /* User space can't know in advance how large the buffer - * needs to be. Give it a hint, so that we can support - * any size buffer we want somewhat efficiently... */ - wrqu->data.length = dev->wireless_handlers->num_private_args; - return -E2BIG; - } - - /* Set the number of available ioctls. */ - wrqu->data.length = dev->wireless_handlers->num_private_args; - - /* Copy structure to the user buffer. */ - memcpy(extra, dev->wireless_handlers->private_args, - sizeof(struct iw_priv_args) * wrqu->data.length); - - return 0; -} - - -/******************** /proc/net/wireless SUPPORT ********************/ -/* - * The /proc/net/wireless file is a human readable user-space interface - * exporting various wireless specific statistics from the wireless devices. - * This is the most popular part of the Wireless Extensions ;-) - * - * This interface is a pure clone of /proc/net/dev (in net/core/dev.c). - * The content of the file is basically the content of "struct iw_statistics". - */ - -#ifdef CONFIG_PROC_FS - -/* ---------------------------------------------------------------- */ -/* - * Print one entry (line) of /proc/net/wireless - */ -static void wireless_seq_printf_stats(struct seq_file *seq, - struct net_device *dev) -{ - /* Get stats from the driver */ - struct iw_statistics *stats = get_wireless_stats(dev); - static struct iw_statistics nullstats = {}; - - /* show device if it's wireless regardless of current stats */ - if (!stats && dev->wireless_handlers) - stats = &nullstats; - - if (stats) { - seq_printf(seq, "%6s: %04x %3d%c %3d%c %3d%c %6d %6d %6d " - "%6d %6d %6d\n", - dev->name, stats->status, stats->qual.qual, - stats->qual.updated & IW_QUAL_QUAL_UPDATED - ? '.' : ' ', - ((__s32) stats->qual.level) - - ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), - stats->qual.updated & IW_QUAL_LEVEL_UPDATED - ? '.' : ' ', - ((__s32) stats->qual.noise) - - ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), - stats->qual.updated & IW_QUAL_NOISE_UPDATED - ? '.' : ' ', - stats->discard.nwid, stats->discard.code, - stats->discard.fragment, stats->discard.retries, - stats->discard.misc, stats->miss.beacon); - - if (stats != &nullstats) - stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; - } -} - -/* ---------------------------------------------------------------- */ -/* - * Print info for /proc/net/wireless (print all entries) - */ -static int wireless_dev_seq_show(struct seq_file *seq, void *v) -{ - might_sleep(); - - if (v == SEQ_START_TOKEN) - seq_printf(seq, "Inter-| sta-| Quality | Discarded " - "packets | Missed | WE\n" - " face | tus | link level noise | nwid " - "crypt frag retry misc | beacon | %d\n", - WIRELESS_EXT); - else - wireless_seq_printf_stats(seq, v); - return 0; -} - -static void *wireless_dev_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - loff_t off; - struct net_device *dev; - - rtnl_lock(); - if (!*pos) - return SEQ_START_TOKEN; - - off = 1; - for_each_netdev(net, dev) - if (off++ == *pos) - return dev; - return NULL; -} - -static void *wireless_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - - ++*pos; - - return v == SEQ_START_TOKEN ? - first_net_device(net) : next_net_device(v); -} - -static void wireless_dev_seq_stop(struct seq_file *seq, void *v) -{ - rtnl_unlock(); -} - -static const struct seq_operations wireless_seq_ops = { - .start = wireless_dev_seq_start, - .next = wireless_dev_seq_next, - .stop = wireless_dev_seq_stop, - .show = wireless_dev_seq_show, -}; - -static int seq_open_wireless(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &wireless_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations wireless_seq_fops = { - .owner = THIS_MODULE, - .open = seq_open_wireless, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -int wext_proc_init(struct net *net) -{ - /* Create /proc/net/wireless entry */ - if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) - return -ENOMEM; - - return 0; -} - -void wext_proc_exit(struct net *net) -{ - proc_net_remove(net, "wireless"); -} -#endif /* CONFIG_PROC_FS */ - -/************************** IOCTL SUPPORT **************************/ -/* - * The original user space API to configure all those Wireless Extensions - * is through IOCTLs. - * In there, we check if we need to call the new driver API (iw_handler) - * or just call the driver ioctl handler. - */ - -/* ---------------------------------------------------------------- */ -static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, - const struct iw_ioctl_description *descr, - iw_handler handler, struct net_device *dev, - struct iw_request_info *info) -{ - int err, extra_size, user_length = 0, essid_compat = 0; - char *extra; - - /* Calculate space needed by arguments. Always allocate - * for max space. - */ - extra_size = descr->max_tokens * descr->token_size; - - /* Check need for ESSID compatibility for WE < 21 */ - switch (cmd) { - case SIOCSIWESSID: - case SIOCGIWESSID: - case SIOCSIWNICKN: - case SIOCGIWNICKN: - if (iwp->length == descr->max_tokens + 1) - essid_compat = 1; - else if (IW_IS_SET(cmd) && (iwp->length != 0)) { - char essid[IW_ESSID_MAX_SIZE + 1]; - unsigned int len; - len = iwp->length * descr->token_size; - - if (len > IW_ESSID_MAX_SIZE) - return -EFAULT; - - err = copy_from_user(essid, iwp->pointer, len); - if (err) - return -EFAULT; - - if (essid[iwp->length - 1] == '\0') - essid_compat = 1; - } - break; - default: - break; - } - - iwp->length -= essid_compat; - - /* Check what user space is giving us */ - if (IW_IS_SET(cmd)) { - /* Check NULL pointer */ - if (!iwp->pointer && iwp->length != 0) - return -EFAULT; - /* Check if number of token fits within bounds */ - if (iwp->length > descr->max_tokens) - return -E2BIG; - if (iwp->length < descr->min_tokens) - return -EINVAL; - } else { - /* Check NULL pointer */ - if (!iwp->pointer) - return -EFAULT; - /* Save user space buffer size for checking */ - user_length = iwp->length; - - /* Don't check if user_length > max to allow forward - * compatibility. The test user_length < min is - * implied by the test at the end. - */ - - /* Support for very large requests */ - if ((descr->flags & IW_DESCR_FLAG_NOMAX) && - (user_length > descr->max_tokens)) { - /* Allow userspace to GET more than max so - * we can support any size GET requests. - * There is still a limit : -ENOMEM. - */ - extra_size = user_length * descr->token_size; - - /* Note : user_length is originally a __u16, - * and token_size is controlled by us, - * so extra_size won't get negative and - * won't overflow... - */ - } - } - - /* kzalloc() ensures NULL-termination for essid_compat. */ - extra = kzalloc(extra_size, GFP_KERNEL); - if (!extra) - return -ENOMEM; - - /* If it is a SET, get all the extra data in here */ - if (IW_IS_SET(cmd) && (iwp->length != 0)) { - if (copy_from_user(extra, iwp->pointer, - iwp->length * - descr->token_size)) { - err = -EFAULT; - goto out; - } - - if (cmd == SIOCSIWENCODEEXT) { - struct iw_encode_ext *ee = (void *) extra; - - if (iwp->length < sizeof(*ee) + ee->key_len) - return -EFAULT; - } - } - - err = handler(dev, info, (union iwreq_data *) iwp, extra); - - iwp->length += essid_compat; - - /* If we have something to return to the user */ - if (!err && IW_IS_GET(cmd)) { - /* Check if there is enough buffer up there */ - if (user_length < iwp->length) { - err = -E2BIG; - goto out; - } - - if (copy_to_user(iwp->pointer, extra, - iwp->length * - descr->token_size)) { - err = -EFAULT; - goto out; - } - } - - /* Generate an event to notify listeners of the change */ - if ((descr->flags & IW_DESCR_FLAG_EVENT) && err == -EIWCOMMIT) { - union iwreq_data *data = (union iwreq_data *) iwp; - - if (descr->flags & IW_DESCR_FLAG_RESTRICT) - /* If the event is restricted, don't - * export the payload. - */ - wireless_send_event(dev, cmd, data, NULL); - else - wireless_send_event(dev, cmd, data, extra); - } - -out: - kfree(extra); - return err; -} - -/* - * Wrapper to call a standard Wireless Extension handler. - * We do various checks and also take care of moving data between - * user space and kernel space. - */ -static int ioctl_standard_call(struct net_device * dev, - struct iwreq *iwr, - unsigned int cmd, - struct iw_request_info *info, - iw_handler handler) -{ - const struct iw_ioctl_description * descr; - int ret = -EINVAL; - - /* Get the description of the IOCTL */ - if ((cmd - SIOCIWFIRST) >= standard_ioctl_num) - return -EOPNOTSUPP; - descr = &(standard_ioctl[cmd - SIOCIWFIRST]); - - /* Check if we have a pointer to user space data or not */ - if (descr->header_type != IW_HEADER_TYPE_POINT) { - - /* No extra arguments. Trivial to handle */ - ret = handler(dev, info, &(iwr->u), NULL); - - /* Generate an event to notify listeners of the change */ - if ((descr->flags & IW_DESCR_FLAG_EVENT) && - ((ret == 0) || (ret == -EIWCOMMIT))) - wireless_send_event(dev, cmd, &(iwr->u), NULL); - } else { - ret = ioctl_standard_iw_point(&iwr->u.data, cmd, descr, - handler, dev, info); - } - - /* Call commit handler if needed and defined */ - if (ret == -EIWCOMMIT) - ret = call_commit_handler(dev); - - /* Here, we will generate the appropriate event if needed */ - - return ret; -} - -/* ---------------------------------------------------------------- */ -/* - * Wrapper to call a private Wireless Extension handler. - * We do various checks and also take care of moving data between - * user space and kernel space. - * It's not as nice and slimline as the standard wrapper. The cause - * is struct iw_priv_args, which was not really designed for the - * job we are going here. - * - * IMPORTANT : This function prevent to set and get data on the same - * IOCTL and enforce the SET/GET convention. Not doing it would be - * far too hairy... - * If you need to set and get data at the same time, please don't use - * a iw_handler but process it in your ioctl handler (i.e. use the - * old driver API). - */ -static int get_priv_descr_and_size(struct net_device *dev, unsigned int cmd, - const struct iw_priv_args **descrp) -{ - const struct iw_priv_args *descr; - int i, extra_size; - - descr = NULL; - for (i = 0; i < dev->wireless_handlers->num_private_args; i++) { - if (cmd == dev->wireless_handlers->private_args[i].cmd) { - descr = &dev->wireless_handlers->private_args[i]; - break; - } - } - - extra_size = 0; - if (descr) { - if (IW_IS_SET(cmd)) { - int offset = 0; /* For sub-ioctls */ - /* Check for sub-ioctl handler */ - if (descr->name[0] == '\0') - /* Reserve one int for sub-ioctl index */ - offset = sizeof(__u32); - - /* Size of set arguments */ - extra_size = get_priv_size(descr->set_args); - - /* Does it fits in iwr ? */ - if ((descr->set_args & IW_PRIV_SIZE_FIXED) && - ((extra_size + offset) <= IFNAMSIZ)) - extra_size = 0; - } else { - /* Size of get arguments */ - extra_size = get_priv_size(descr->get_args); - - /* Does it fits in iwr ? */ - if ((descr->get_args & IW_PRIV_SIZE_FIXED) && - (extra_size <= IFNAMSIZ)) - extra_size = 0; - } - } - *descrp = descr; - return extra_size; -} - -static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd, - const struct iw_priv_args *descr, - iw_handler handler, struct net_device *dev, - struct iw_request_info *info, int extra_size) -{ - char *extra; - int err; - - /* Check what user space is giving us */ - if (IW_IS_SET(cmd)) { - if (!iwp->pointer && iwp->length != 0) - return -EFAULT; - - if (iwp->length > (descr->set_args & IW_PRIV_SIZE_MASK)) - return -E2BIG; - } else if (!iwp->pointer) - return -EFAULT; - - extra = kmalloc(extra_size, GFP_KERNEL); - if (!extra) - return -ENOMEM; - - /* If it is a SET, get all the extra data in here */ - if (IW_IS_SET(cmd) && (iwp->length != 0)) { - if (copy_from_user(extra, iwp->pointer, extra_size)) { - err = -EFAULT; - goto out; - } - } - - /* Call the handler */ - err = handler(dev, info, (union iwreq_data *) iwp, extra); - - /* If we have something to return to the user */ - if (!err && IW_IS_GET(cmd)) { - /* Adjust for the actual length if it's variable, - * avoid leaking kernel bits outside. - */ - if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) - extra_size = adjust_priv_size(descr->get_args, iwp); - - if (copy_to_user(iwp->pointer, extra, extra_size)) - err = -EFAULT; - } - -out: - kfree(extra); - return err; -} - -static int ioctl_private_call(struct net_device *dev, struct iwreq *iwr, - unsigned int cmd, struct iw_request_info *info, - iw_handler handler) -{ - int extra_size = 0, ret = -EINVAL; - const struct iw_priv_args *descr; - - extra_size = get_priv_descr_and_size(dev, cmd, &descr); - - /* Check if we have a pointer to user space data or not. */ - if (extra_size == 0) { - /* No extra arguments. Trivial to handle */ - ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); - } else { - ret = ioctl_private_iw_point(&iwr->u.data, cmd, descr, - handler, dev, info, extra_size); - } - - /* Call commit handler if needed and defined */ - if (ret == -EIWCOMMIT) - ret = call_commit_handler(dev); - - return ret; -} - -/* ---------------------------------------------------------------- */ -typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *, - unsigned int, struct iw_request_info *, - iw_handler); - -/* - * Main IOCTl dispatcher. - * Check the type of IOCTL and call the appropriate wrapper... - */ -static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, - unsigned int cmd, - struct iw_request_info *info, - wext_ioctl_func standard, - wext_ioctl_func private) -{ - struct iwreq *iwr = (struct iwreq *) ifr; - struct net_device *dev; - iw_handler handler; - - /* Permissions are already checked in dev_ioctl() before calling us. - * The copy_to/from_user() of ifr is also dealt with in there */ - - /* Make sure the device exist */ - if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL) - return -ENODEV; - - /* A bunch of special cases, then the generic case... - * Note that 'cmd' is already filtered in dev_ioctl() with - * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */ - if (cmd == SIOCGIWSTATS) - return standard(dev, iwr, cmd, info, - &iw_handler_get_iwstats); - - if (cmd == SIOCGIWPRIV && dev->wireless_handlers) - return standard(dev, iwr, cmd, info, - &iw_handler_get_private); - - /* Basic check */ - if (!netif_device_present(dev)) - return -ENODEV; - - /* New driver API : try to find the handler */ - handler = get_handler(dev, cmd); - if (handler) { - /* Standard and private are not the same */ - if (cmd < SIOCIWFIRSTPRIV) - return standard(dev, iwr, cmd, info, handler); - else - return private(dev, iwr, cmd, info, handler); - } - /* Old driver API : call driver ioctl handler */ - if (dev->netdev_ops->ndo_do_ioctl) - return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); - return -EOPNOTSUPP; -} - -/* If command is `set a parameter', or `get the encoding parameters', - * check if the user has the right to do it. - */ -static int wext_permission_check(unsigned int cmd) -{ - if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT) - && !capable(CAP_NET_ADMIN)) - return -EPERM; - - return 0; -} - -/* entry point from dev ioctl */ -static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr, - unsigned int cmd, struct iw_request_info *info, - wext_ioctl_func standard, - wext_ioctl_func private) -{ - int ret = wext_permission_check(cmd); - - if (ret) - return ret; - - dev_load(net, ifr->ifr_name); - rtnl_lock(); - ret = wireless_process_ioctl(net, ifr, cmd, info, standard, private); - rtnl_unlock(); - - return ret; -} - -int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, - void __user *arg) -{ - struct iw_request_info info = { .cmd = cmd, .flags = 0 }; - int ret; - - ret = wext_ioctl_dispatch(net, ifr, cmd, &info, - ioctl_standard_call, - ioctl_private_call); - if (ret >= 0 && - IW_IS_GET(cmd) && - copy_to_user(arg, ifr, sizeof(struct iwreq))) - return -EFAULT; - - return ret; -} - -#ifdef CONFIG_COMPAT -static int compat_standard_call(struct net_device *dev, - struct iwreq *iwr, - unsigned int cmd, - struct iw_request_info *info, - iw_handler handler) -{ - const struct iw_ioctl_description *descr; - struct compat_iw_point *iwp_compat; - struct iw_point iwp; - int err; - - descr = standard_ioctl + (cmd - SIOCIWFIRST); - - if (descr->header_type != IW_HEADER_TYPE_POINT) - return ioctl_standard_call(dev, iwr, cmd, info, handler); - - iwp_compat = (struct compat_iw_point *) &iwr->u.data; - iwp.pointer = compat_ptr(iwp_compat->pointer); - iwp.length = iwp_compat->length; - iwp.flags = iwp_compat->flags; - - err = ioctl_standard_iw_point(&iwp, cmd, descr, handler, dev, info); - - iwp_compat->pointer = ptr_to_compat(iwp.pointer); - iwp_compat->length = iwp.length; - iwp_compat->flags = iwp.flags; - - return err; -} - -static int compat_private_call(struct net_device *dev, struct iwreq *iwr, - unsigned int cmd, struct iw_request_info *info, - iw_handler handler) -{ - const struct iw_priv_args *descr; - int ret, extra_size; - - extra_size = get_priv_descr_and_size(dev, cmd, &descr); - - /* Check if we have a pointer to user space data or not. */ - if (extra_size == 0) { - /* No extra arguments. Trivial to handle */ - ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); - } else { - struct compat_iw_point *iwp_compat; - struct iw_point iwp; - - iwp_compat = (struct compat_iw_point *) &iwr->u.data; - iwp.pointer = compat_ptr(iwp_compat->pointer); - iwp.length = iwp_compat->length; - iwp.flags = iwp_compat->flags; - - ret = ioctl_private_iw_point(&iwp, cmd, descr, - handler, dev, info, extra_size); - - iwp_compat->pointer = ptr_to_compat(iwp.pointer); - iwp_compat->length = iwp.length; - iwp_compat->flags = iwp.flags; - } - - /* Call commit handler if needed and defined */ - if (ret == -EIWCOMMIT) - ret = call_commit_handler(dev); - - return ret; -} - -int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, - unsigned long arg) -{ - void __user *argp = (void __user *)arg; - struct iw_request_info info; - struct iwreq iwr; - char *colon; - int ret; - - if (copy_from_user(&iwr, argp, sizeof(struct iwreq))) - return -EFAULT; - - iwr.ifr_name[IFNAMSIZ-1] = 0; - colon = strchr(iwr.ifr_name, ':'); - if (colon) - *colon = 0; - - info.cmd = cmd; - info.flags = IW_REQUEST_FLAG_COMPAT; - - ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd, &info, - compat_standard_call, - compat_private_call); - - if (ret >= 0 && - IW_IS_GET(cmd) && - copy_to_user(argp, &iwr, sizeof(struct iwreq))) - return -EFAULT; - - return ret; -} -#endif - -static int __net_init wext_pernet_init(struct net *net) -{ - skb_queue_head_init(&net->wext_nlevents); - return 0; -} - -static void __net_exit wext_pernet_exit(struct net *net) -{ - skb_queue_purge(&net->wext_nlevents); -} - -static struct pernet_operations wext_pernet_ops = { - .init = wext_pernet_init, - .exit = wext_pernet_exit, -}; - -static int __init wireless_nlevent_init(void) -{ - return register_pernet_subsys(&wext_pernet_ops); -} - -subsys_initcall(wireless_nlevent_init); - -/* Process events generated by the wireless layer or the driver. */ -static void wireless_nlevent_process(struct work_struct *work) -{ - struct sk_buff *skb; - struct net *net; - - rtnl_lock(); - - for_each_net(net) { - while ((skb = skb_dequeue(&net->wext_nlevents))) - rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, - GFP_KERNEL); - } - - rtnl_unlock(); -} - -static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process); - -static struct nlmsghdr *rtnetlink_ifinfo_prep(struct net_device *dev, - struct sk_buff *skb) -{ - struct ifinfomsg *r; - struct nlmsghdr *nlh; - - nlh = nlmsg_put(skb, 0, 0, RTM_NEWLINK, sizeof(*r), 0); - if (!nlh) - return NULL; - - r = nlmsg_data(nlh); - r->ifi_family = AF_UNSPEC; - r->__ifi_pad = 0; - r->ifi_type = dev->type; - r->ifi_index = dev->ifindex; - r->ifi_flags = dev_get_flags(dev); - r->ifi_change = 0; /* Wireless changes don't affect those flags */ - - NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); - - return nlh; - nla_put_failure: - nlmsg_cancel(skb, nlh); - return NULL; -} - - -/* - * Main event dispatcher. Called from other parts and drivers. - * Send the event on the appropriate channels. - * May be called from interrupt context. - */ -void wireless_send_event(struct net_device * dev, - unsigned int cmd, - union iwreq_data * wrqu, - const char * extra) -{ - const struct iw_ioctl_description * descr = NULL; - int extra_len = 0; - struct iw_event *event; /* Mallocated whole event */ - int event_len; /* Its size */ - int hdr_len; /* Size of the event header */ - int wrqu_off = 0; /* Offset in wrqu */ - /* Don't "optimise" the following variable, it will crash */ - unsigned cmd_index; /* *MUST* be unsigned */ - struct sk_buff *skb; - struct nlmsghdr *nlh; - struct nlattr *nla; -#ifdef CONFIG_COMPAT - struct __compat_iw_event *compat_event; - struct compat_iw_point compat_wrqu; - struct sk_buff *compskb; -#endif - - /* - * Nothing in the kernel sends scan events with data, be safe. - * This is necessary because we cannot fix up scan event data - * for compat, due to being contained in 'extra', but normally - * applications are required to retrieve the scan data anyway - * and no data is included in the event, this codifies that - * practice. - */ - if (WARN_ON(cmd == SIOCGIWSCAN && extra)) - extra = NULL; - - /* Get the description of the Event */ - if (cmd <= SIOCIWLAST) { - cmd_index = cmd - SIOCIWFIRST; - if (cmd_index < standard_ioctl_num) - descr = &(standard_ioctl[cmd_index]); - } else { - cmd_index = cmd - IWEVFIRST; - if (cmd_index < standard_event_num) - descr = &(standard_event[cmd_index]); - } - /* Don't accept unknown events */ - if (descr == NULL) { - /* Note : we don't return an error to the driver, because - * the driver would not know what to do about it. It can't - * return an error to the user, because the event is not - * initiated by a user request. - * The best the driver could do is to log an error message. - * We will do it ourselves instead... - */ - printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n", - dev->name, cmd); - return; - } - - /* Check extra parameters and set extra_len */ - if (descr->header_type == IW_HEADER_TYPE_POINT) { - /* Check if number of token fits within bounds */ - if (wrqu->data.length > descr->max_tokens) { - printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length); - return; - } - if (wrqu->data.length < descr->min_tokens) { - printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length); - return; - } - /* Calculate extra_len - extra is NULL for restricted events */ - if (extra != NULL) - extra_len = wrqu->data.length * descr->token_size; - /* Always at an offset in wrqu */ - wrqu_off = IW_EV_POINT_OFF; - } - - /* Total length of the event */ - hdr_len = event_type_size[descr->header_type]; - event_len = hdr_len + extra_len; - - /* - * The problem for 64/32 bit. - * - * On 64-bit, a regular event is laid out as follows: - * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | - * | event.len | event.cmd | p a d d i n g | - * | wrqu data ... (with the correct size) | - * - * This padding exists because we manipulate event->u, - * and 'event' is not packed. - * - * An iw_point event is laid out like this instead: - * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | - * | event.len | event.cmd | p a d d i n g | - * | iwpnt.len | iwpnt.flg | p a d d i n g | - * | extra data ... - * - * The second padding exists because struct iw_point is extended, - * but this depends on the platform... - * - * On 32-bit, all the padding shouldn't be there. - */ - - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); - if (!skb) - return; - - /* Send via the RtNetlink event channel */ - nlh = rtnetlink_ifinfo_prep(dev, skb); - if (WARN_ON(!nlh)) { - kfree_skb(skb); - return; - } - - /* Add the wireless events in the netlink packet */ - nla = nla_reserve(skb, IFLA_WIRELESS, event_len); - if (!nla) { - kfree_skb(skb); - return; - } - event = nla_data(nla); - - /* Fill event - first clear to avoid data leaking */ - memset(event, 0, hdr_len); - event->len = event_len; - event->cmd = cmd; - memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN); - if (extra_len) - memcpy(((char *) event) + hdr_len, extra, extra_len); - - nlmsg_end(skb, nlh); -#ifdef CONFIG_COMPAT - hdr_len = compat_event_type_size[descr->header_type]; - event_len = hdr_len + extra_len; - - compskb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); - if (!compskb) { - kfree_skb(skb); - return; - } - - /* Send via the RtNetlink event channel */ - nlh = rtnetlink_ifinfo_prep(dev, compskb); - if (WARN_ON(!nlh)) { - kfree_skb(skb); - kfree_skb(compskb); - return; - } - - /* Add the wireless events in the netlink packet */ - nla = nla_reserve(compskb, IFLA_WIRELESS, event_len); - if (!nla) { - kfree_skb(skb); - kfree_skb(compskb); - return; - } - compat_event = nla_data(nla); - - compat_event->len = event_len; - compat_event->cmd = cmd; - if (descr->header_type == IW_HEADER_TYPE_POINT) { - compat_wrqu.length = wrqu->data.length; - compat_wrqu.flags = wrqu->data.flags; - memcpy(&compat_event->pointer, - ((char *) &compat_wrqu) + IW_EV_COMPAT_POINT_OFF, - hdr_len - IW_EV_COMPAT_LCP_LEN); - if (extra_len) - memcpy(((char *) compat_event) + hdr_len, - extra, extra_len); - } else { - /* extra_len must be zero, so no if (extra) needed */ - memcpy(&compat_event->pointer, wrqu, - hdr_len - IW_EV_COMPAT_LCP_LEN); - } - - nlmsg_end(compskb, nlh); - - skb_shinfo(skb)->frag_list = compskb; -#endif - skb_queue_tail(&dev_net(dev)->wext_nlevents, skb); - schedule_work(&wireless_nlevent_work); -} -EXPORT_SYMBOL(wireless_send_event); - -/********************** ENHANCED IWSPY SUPPORT **********************/ -/* - * In the old days, the driver was handling spy support all by itself. - * Now, the driver can delegate this task to Wireless Extensions. - * It needs to use those standard spy iw_handler in struct iw_handler_def, - * push data to us via wireless_spy_update() and include struct iw_spy_data - * in its private part (and export it in net_device->wireless_data->spy_data). - * One of the main advantage of centralising spy support here is that - * it becomes much easier to improve and extend it without having to touch - * the drivers. One example is the addition of the Spy-Threshold events. - */ - -/* ---------------------------------------------------------------- */ -/* - * Return the pointer to the spy data in the driver. - * Because this is called on the Rx path via wireless_spy_update(), - * we want it to be efficient... - */ -static inline struct iw_spy_data *get_spydata(struct net_device *dev) -{ - /* This is the new way */ - if (dev->wireless_data) - return dev->wireless_data->spy_data; - return NULL; -} - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : set Spy List - */ -int iw_handler_set_spy(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct sockaddr * address = (struct sockaddr *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Disable spy collection while we copy the addresses. - * While we copy addresses, any call to wireless_spy_update() - * will NOP. This is OK, as anyway the addresses are changing. */ - spydata->spy_number = 0; - - /* We want to operate without locking, because wireless_spy_update() - * most likely will happen in the interrupt handler, and therefore - * have its own locking constraints and needs performance. - * The rtnl_lock() make sure we don't race with the other iw_handlers. - * This make sure wireless_spy_update() "see" that the spy list - * is temporarily disabled. */ - smp_wmb(); - - /* Are there are addresses to copy? */ - if (wrqu->data.length > 0) { - int i; - - /* Copy addresses */ - for (i = 0; i < wrqu->data.length; i++) - memcpy(spydata->spy_address[i], address[i].sa_data, - ETH_ALEN); - /* Reset stats */ - memset(spydata->spy_stat, 0, - sizeof(struct iw_quality) * IW_MAX_SPY); - } - - /* Make sure above is updated before re-enabling */ - smp_wmb(); - - /* Enable addresses */ - spydata->spy_number = wrqu->data.length; - - return 0; -} -EXPORT_SYMBOL(iw_handler_set_spy); - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : get Spy List - */ -int iw_handler_get_spy(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct sockaddr * address = (struct sockaddr *) extra; - int i; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - wrqu->data.length = spydata->spy_number; - - /* Copy addresses. */ - for (i = 0; i < spydata->spy_number; i++) { - memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN); - address[i].sa_family = AF_UNIX; - } - /* Copy stats to the user buffer (just after). */ - if (spydata->spy_number > 0) - memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number), - spydata->spy_stat, - sizeof(struct iw_quality) * spydata->spy_number); - /* Reset updated flags. */ - for (i = 0; i < spydata->spy_number; i++) - spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED; - return 0; -} -EXPORT_SYMBOL(iw_handler_get_spy); - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : set spy threshold - */ -int iw_handler_set_thrspy(struct net_device * dev, - struct iw_request_info *info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct iw_thrspy * threshold = (struct iw_thrspy *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Just do it */ - memcpy(&(spydata->spy_thr_low), &(threshold->low), - 2 * sizeof(struct iw_quality)); - - /* Clear flag */ - memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under)); - - return 0; -} -EXPORT_SYMBOL(iw_handler_set_thrspy); - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : get spy threshold - */ -int iw_handler_get_thrspy(struct net_device * dev, - struct iw_request_info *info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct iw_thrspy * threshold = (struct iw_thrspy *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Just do it */ - memcpy(&(threshold->low), &(spydata->spy_thr_low), - 2 * sizeof(struct iw_quality)); - - return 0; -} -EXPORT_SYMBOL(iw_handler_get_thrspy); - -/*------------------------------------------------------------------*/ -/* - * Prepare and send a Spy Threshold event - */ -static void iw_send_thrspy_event(struct net_device * dev, - struct iw_spy_data * spydata, - unsigned char * address, - struct iw_quality * wstats) -{ - union iwreq_data wrqu; - struct iw_thrspy threshold; - - /* Init */ - wrqu.data.length = 1; - wrqu.data.flags = 0; - /* Copy address */ - memcpy(threshold.addr.sa_data, address, ETH_ALEN); - threshold.addr.sa_family = ARPHRD_ETHER; - /* Copy stats */ - memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality)); - /* Copy also thresholds */ - memcpy(&(threshold.low), &(spydata->spy_thr_low), - 2 * sizeof(struct iw_quality)); - - /* Send event to user space */ - wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold); -} - -/* ---------------------------------------------------------------- */ -/* - * Call for the driver to update the spy data. - * For now, the spy data is a simple array. As the size of the array is - * small, this is good enough. If we wanted to support larger number of - * spy addresses, we should use something more efficient... - */ -void wireless_spy_update(struct net_device * dev, - unsigned char * address, - struct iw_quality * wstats) -{ - struct iw_spy_data * spydata = get_spydata(dev); - int i; - int match = -1; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return; - - /* Update all records that match */ - for (i = 0; i < spydata->spy_number; i++) - if (!compare_ether_addr(address, spydata->spy_address[i])) { - memcpy(&(spydata->spy_stat[i]), wstats, - sizeof(struct iw_quality)); - match = i; - } - - /* Generate an event if we cross the spy threshold. - * To avoid event storms, we have a simple hysteresis : we generate - * event only when we go under the low threshold or above the - * high threshold. */ - if (match >= 0) { - if (spydata->spy_thr_under[match]) { - if (wstats->level > spydata->spy_thr_high.level) { - spydata->spy_thr_under[match] = 0; - iw_send_thrspy_event(dev, spydata, - address, wstats); - } - } else { - if (wstats->level < spydata->spy_thr_low.level) { - spydata->spy_thr_under[match] = 1; - iw_send_thrspy_event(dev, spydata, - address, wstats); - } - } - } -} -EXPORT_SYMBOL(wireless_spy_update); -- cgit v1.3-8-gc7d7 From dfce95f51fe34fa18c87a7d0bea53594b9bf1b9a Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Thu, 24 Sep 2009 11:02:42 -0700 Subject: cfg80211: add firmware and hardware version to wiphy It's useful to provide firmware and hardware version to user space and have a generic interface to retrieve them. Users can provide the version information in bug reports etc. Add fields for firmware and hardware version to struct wiphy. (Dropped nl80211 bits for now and modified remaining bits in favor of ethtool. -- JWL) Cc: Kalle Valo Signed-off-by: John W. Linville --- include/net/cfg80211.h | 3 +++ net/wireless/ethtool.c | 23 ++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 241ea14d6df8..6f4862b3ec2c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1142,6 +1142,9 @@ struct wiphy { u32 frag_threshold; u32 rts_threshold; + char fw_version[ETHTOOL_BUSINFO_LEN]; + u32 hw_version; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index 80d6d0d31f12..ca4c825be93d 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -12,13 +12,34 @@ static void cfg80211_get_drvinfo(struct net_device *dev, strlcpy(info->version, init_utsname()->release, sizeof(info->version)); - strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + if (wdev->wiphy->fw_version[0]) + strncpy(info->fw_version, wdev->wiphy->fw_version, + sizeof(info->fw_version)); + else + strncpy(info->fw_version, "N/A", sizeof(info->fw_version)); strlcpy(info->bus_info, dev_name(wiphy_dev(wdev->wiphy)), sizeof(info->bus_info)); } +static int cfg80211_get_regs_len(struct net_device *dev) +{ + /* For now, return 0... */ + return 0; +} + +static void cfg80211_get_regs(struct net_device *dev, struct ethtool_regs *regs, + void *data) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + regs->version = wdev->wiphy->hw_version; + regs->len = 0; +} + const struct ethtool_ops cfg80211_ethtool_ops = { .get_drvinfo = cfg80211_get_drvinfo, + .get_regs_len = cfg80211_get_regs_len, + .get_regs = cfg80211_get_regs, .get_link = ethtool_op_get_link, }; -- cgit v1.3-8-gc7d7 From 125a77ed9fbd21d1277f53e9ed6b39ad3d34e613 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Wed, 7 Oct 2009 13:57:10 -0700 Subject: IPv6: Fix 6RD build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build error introduced in commit fa857afcf - ipv6 sit: 6rd (IPv6 Rapid Deployment) Support. Struct in6_addr is the issue. I'm only seeing this on x86_64 systems, not on 32-bit with same IPv6 config options, so it could be there's a missing forward declaration somewhere, but including the correct header file fixes the problem too. CC [M] net/ipv6/ip6_tunnel.o In file included from net/ipv6/ip6_tunnel.c:31: include/linux/if_tunnel.h:59: error: field ‘prefix’ has incomplete type make[2]: *** [net/ipv6/ip6_tunnel.o] Error 1 make[1]: *** [net/ipv6] Error 2 Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- include/linux/if_tunnel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index c53c8e016940..8d76cb4c86fa 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -5,6 +5,7 @@ #ifdef __KERNEL__ #include +#include #endif #define SIOCGETTUNNEL (SIOCDEVPRIVATE + 0) -- cgit v1.3-8-gc7d7 From f86dcc5aa8c7908f2c287e7a211228df599e3e71 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Oct 2009 00:37:59 +0000 Subject: udp: dynamically size hash tables at boot time UDP_HTABLE_SIZE was initialy defined to 128, which is a bit small for several setups. 4000 active UDP sockets -> 32 sockets per chain in average. An incoming frame has to lookup all sockets to find best match, so long chains hurt latency. Instead of a fixed size hash table that cant be perfect for every needs, let UDP stack choose its table size at boot time like tcp/ip route, using alloc_large_system_hash() helper Add an optional boot parameter, uhash_entries=x so that an admin can force a size between 256 and 65536 if needed, like thash_entries and rhash_entries. dmesg logs two new lines : [ 0.647039] UDP hash table entries: 512 (order: 0, 4096 bytes) [ 0.647099] UDP Lite hash table entries: 512 (order: 0, 4096 bytes) Maximal size on 64bit arches would be 65536 slots, ie 1 MBytes for non debugging spinlocks. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/kernel-parameters.txt | 3 ++ include/linux/udp.h | 6 +-- include/net/udp.h | 13 ++++-- net/ipv4/udp.c | 91 +++++++++++++++++++++++++++---------- net/ipv4/udplite.c | 4 +- net/ipv6/udp.c | 6 +-- 6 files changed, 87 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6fa7292947e5..02df20be7764 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2589,6 +2589,9 @@ and is between 256 and 4096 characters. It is defined in the file uart6850= [HW,OSS] Format: , + uhash_entries= [KNL,NET] + Set number of hash buckets for UDP/UDP-Lite connections + uhci-hcd.ignore_oc= [USB] Ignore overcurrent events (default N). Some badly-designed motherboards generate lots of diff --git a/include/linux/udp.h b/include/linux/udp.h index 0cf5c4c0ec81..832361e3e596 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -45,11 +45,11 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) return (struct udphdr *)skb_transport_header(skb); } -#define UDP_HTABLE_SIZE 128 +#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) -static inline int udp_hashfn(struct net *net, const unsigned num) +static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask) { - return (num + net_hash_mix(net)) & (UDP_HTABLE_SIZE - 1); + return (num + net_hash_mix(net)) & mask; } struct udp_sock { diff --git a/include/net/udp.h b/include/net/udp.h index f98abd2ce709..22aa2e7eb1d7 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -54,12 +54,19 @@ struct udp_hslot { struct hlist_nulls_head head; spinlock_t lock; } __attribute__((aligned(2 * sizeof(long)))); + struct udp_table { - struct udp_hslot hash[UDP_HTABLE_SIZE]; + struct udp_hslot *hash; + unsigned int mask; + unsigned int log; }; extern struct udp_table udp_table; -extern void udp_table_init(struct udp_table *); - +extern void udp_table_init(struct udp_table *, const char *); +static inline struct udp_hslot *udp_hashslot(struct udp_table *table, + struct net *net, unsigned num) +{ + return &table->hash[udp_hashfn(net, num, table->mask)]; +} /* Note: this must match 'valbool' in sock_setsockopt */ #define UDP_CSUM_NOXMIT 1 diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6ec6a8a4a224..194bcdc6d9fc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -106,7 +106,7 @@ #include #include "udp_impl.h" -struct udp_table udp_table; +struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); int sysctl_udp_mem[3] __read_mostly; @@ -121,14 +121,16 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); -#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) +#define MAX_UDP_PORTS 65536 +#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, unsigned long *bitmap, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2)) + const struct sock *sk2), + unsigned int log) { struct sock *sk2; struct hlist_nulls_node *node; @@ -142,8 +144,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && (*saddr_comp)(sk, sk2)) { if (bitmap) - __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, - bitmap); + __set_bit(sk2->sk_hash >> log, bitmap); else return 1; } @@ -180,13 +181,15 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, /* * force rand to be an odd multiple of UDP_HTABLE_SIZE */ - rand = (rand | 1) * UDP_HTABLE_SIZE; - for (last = first + UDP_HTABLE_SIZE; first != last; first++) { - hslot = &udptable->hash[udp_hashfn(net, first)]; + rand = (rand | 1) * (udptable->mask + 1); + for (last = first + udptable->mask + 1; + first != last; + first++) { + hslot = udp_hashslot(udptable, net, first); bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, - saddr_comp); + saddr_comp, udptable->log); snum = first; /* @@ -196,7 +199,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, */ do { if (low <= snum && snum <= high && - !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) + !test_bit(snum >> udptable->log, bitmap)) goto found; snum += rand; } while (snum != first); @@ -204,9 +207,10 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, } goto fail; } else { - hslot = &udptable->hash[udp_hashfn(net, snum)]; + hslot = udp_hashslot(udptable, net, snum); spin_lock_bh(&hslot->lock); - if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) + if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, + saddr_comp, 0)) goto fail_unlock; } found: @@ -283,7 +287,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); - unsigned int hash = udp_hashfn(net, hnum); + unsigned int hash = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; @@ -1013,8 +1017,8 @@ void udp_lib_unhash(struct sock *sk) { if (sk_hashed(sk)) { struct udp_table *udptable = sk->sk_prot->h.udp_table; - unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); - struct udp_hslot *hslot = &udptable->hash[hash]; + struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk), + sk->sk_hash); spin_lock_bh(&hslot->lock); if (sk_nulls_del_node_init_rcu(sk)) { @@ -1169,7 +1173,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udp_table *udptable) { struct sock *sk; - struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; + struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); int dif; spin_lock(&hslot->lock); @@ -1609,9 +1613,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { + for (state->bucket = start; state->bucket <= state->udp_table->mask; + ++state->bucket) { struct hlist_nulls_node *node; struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; + + if (hlist_nulls_empty(&hslot->head)) + continue; + spin_lock_bh(&hslot->lock); sk_nulls_for_each(sk, node, &hslot->head) { if (!net_eq(sock_net(sk), net)) @@ -1636,7 +1645,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); if (!sk) { - if (state->bucket < UDP_HTABLE_SIZE) + if (state->bucket <= state->udp_table->mask) spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); return udp_get_first(seq, state->bucket + 1); } @@ -1656,7 +1665,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) static void *udp_seq_start(struct seq_file *seq, loff_t *pos) { struct udp_iter_state *state = seq->private; - state->bucket = UDP_HTABLE_SIZE; + state->bucket = MAX_UDP_PORTS; return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } @@ -1678,7 +1687,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v) { struct udp_iter_state *state = seq->private; - if (state->bucket < UDP_HTABLE_SIZE) + if (state->bucket <= state->udp_table->mask) spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); } @@ -1738,7 +1747,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, __u16 destp = ntohs(inet->dport); __u16 srcp = ntohs(inet->sport); - seq_printf(f, "%4d: %08X:%04X %08X:%04X" + seq_printf(f, "%5d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), @@ -1804,11 +1813,43 @@ void udp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -void __init udp_table_init(struct udp_table *table) +static __initdata unsigned long uhash_entries; +static int __init set_uhash_entries(char *str) { - int i; + if (!str) + return 0; + uhash_entries = simple_strtoul(str, &str, 0); + if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) + uhash_entries = UDP_HTABLE_SIZE_MIN; + return 1; +} +__setup("uhash_entries=", set_uhash_entries); - for (i = 0; i < UDP_HTABLE_SIZE; i++) { +void __init udp_table_init(struct udp_table *table, const char *name) +{ + unsigned int i; + + if (!CONFIG_BASE_SMALL) + table->hash = alloc_large_system_hash(name, + sizeof(struct udp_hslot), + uhash_entries, + 21, /* one slot per 2 MB */ + 0, + &table->log, + &table->mask, + 64 * 1024); + /* + * Make sure hash table has the minimum size + */ + if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) { + table->hash = kmalloc(UDP_HTABLE_SIZE_MIN * + sizeof(struct udp_hslot), GFP_KERNEL); + if (!table->hash) + panic(name); + table->log = ilog2(UDP_HTABLE_SIZE_MIN); + table->mask = UDP_HTABLE_SIZE_MIN - 1; + } + for (i = 0; i <= table->mask; i++) { INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); spin_lock_init(&table->hash[i].lock); } @@ -1818,7 +1859,7 @@ void __init udp_init(void) { unsigned long nr_pages, limit; - udp_table_init(&udp_table); + udp_table_init(&udp_table, "UDP"); /* Set the pressure threshold up by the same strategy of TCP. It is a * fraction of global memory that is up to 1/2 at 256 MB, decreasing * toward zero with the amount of memory, with a floor of 128 pages. diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 95248d7f75ec..470c504b9554 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -12,7 +12,7 @@ */ #include "udp_impl.h" -struct udp_table udplite_table; +struct udp_table udplite_table __read_mostly; EXPORT_SYMBOL(udplite_table); static int udplite_rcv(struct sk_buff *skb) @@ -110,7 +110,7 @@ static inline int udplite4_proc_init(void) void __init udplite4_register(void) { - udp_table_init(&udplite_table); + udp_table_init(&udplite_table, "UDP-Lite"); if (proto_register(&udplite_prot, 1)) goto out_register_err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c6a303ec834c..ff778c172ef2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -132,7 +132,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); - unsigned int hash = udp_hashfn(net, hnum); + unsigned int hash = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; @@ -452,7 +452,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, { struct sock *sk, *sk2; const struct udphdr *uh = udp_hdr(skb); - struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; + struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); int dif; spin_lock(&hslot->lock); @@ -1197,7 +1197,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket destp = ntohs(inet->dport); srcp = ntohs(inet->sport); seq_printf(seq, - "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " + "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", bucket, src->s6_addr32[0], src->s6_addr32[1], -- cgit v1.3-8-gc7d7 From 3758bf25db8caeec667e4e56e030da0ec3060529 Mon Sep 17 00:00:00 2001 From: Anant Gole Date: Wed, 7 Oct 2009 02:59:47 +0000 Subject: can: add TI CAN (HECC) driver TI HECC (High End CAN Controller) module is found on many TI devices. It has 32 hardware mailboxes with full implementation of CAN protocol 2.0B with bus speeds up to 1Mbps. Specifications of the module are available on TI web Signed-off-by: Anant Gole Signed-off-by: David S. Miller --- drivers/net/can/Kconfig | 7 + drivers/net/can/Makefile | 1 + drivers/net/can/ti_hecc.c | 1006 ++++++++++++++++++++++++++++++++++ include/linux/can/platform/ti_hecc.h | 40 ++ 4 files changed, 1054 insertions(+) create mode 100644 drivers/net/can/ti_hecc.c create mode 100644 include/linux/can/platform/ti_hecc.h (limited to 'include') diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index df32c109b7ac..26d77cc0ded7 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -95,6 +95,13 @@ config CAN_AT91 ---help--- This is a driver for the SoC CAN controller in Atmel's AT91SAM9263. +config CAN_TI_HECC + depends on CAN_DEV && ARCH_OMAP3 + tristate "TI High End CAN Controller" + ---help--- + Driver for TI HECC (High End CAN Controller) module found on many + TI devices. The device specifications are available from www.ti.com + config CAN_DEBUG_DEVICES bool "CAN devices debugging messages" depends on CAN diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index 0dea62721f2f..31f4ab5df28b 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -11,5 +11,6 @@ obj-y += usb/ obj-$(CONFIG_CAN_SJA1000) += sja1000/ obj-$(CONFIG_CAN_AT91) += at91_can.o +obj-$(CONFIG_CAN_TI_HECC) += ti_hecc.o ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c new file mode 100644 index 000000000000..814e6c5c6386 --- /dev/null +++ b/drivers/net/can/ti_hecc.c @@ -0,0 +1,1006 @@ +/* + * TI HECC (CAN) device driver + * + * This driver supports TI's HECC (High End CAN Controller module) and the + * specs for the same is available at + * + * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed as is WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* + * Your platform definitions should specify module ram offsets and interrupt + * number to use as follows: + * + * static struct ti_hecc_platform_data am3517_evm_hecc_pdata = { + * .scc_hecc_offset = 0, + * .scc_ram_offset = 0x3000, + * .hecc_ram_offset = 0x3000, + * .mbx_offset = 0x2000, + * .int_line = 0, + * .revision = 1, + * }; + * + * Please see include/can/platform/ti_hecc.h for description of above fields + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define DRV_NAME "ti_hecc" +#define HECC_MODULE_VERSION "0.7" +MODULE_VERSION(HECC_MODULE_VERSION); +#define DRV_DESC "TI High End CAN Controller Driver " HECC_MODULE_VERSION + +/* TX / RX Mailbox Configuration */ +#define HECC_MAX_MAILBOXES 32 /* hardware mailboxes - do not change */ +#define MAX_TX_PRIO 0x3F /* hardware value - do not change */ + +/* + * Important Note: TX mailbox configuration + * TX mailboxes should be restricted to the number of SKB buffers to avoid + * maintaining SKB buffers separately. TX mailboxes should be a power of 2 + * for the mailbox logic to work. Top mailbox numbers are reserved for RX + * and lower mailboxes for TX. + * + * HECC_MAX_TX_MBOX HECC_MB_TX_SHIFT + * 4 (default) 2 + * 8 3 + * 16 4 + */ +#define HECC_MB_TX_SHIFT 2 /* as per table above */ +#define HECC_MAX_TX_MBOX BIT(HECC_MB_TX_SHIFT) + +#if (HECC_MAX_TX_MBOX > CAN_ECHO_SKB_MAX) +#error "HECC: MAX TX mailboxes should be equal or less than CAN_ECHO_SKB_MAX" +#endif + +#define HECC_TX_PRIO_SHIFT (HECC_MB_TX_SHIFT) +#define HECC_TX_PRIO_MASK (MAX_TX_PRIO << HECC_MB_TX_SHIFT) +#define HECC_TX_MB_MASK (HECC_MAX_TX_MBOX - 1) +#define HECC_TX_MASK ((HECC_MAX_TX_MBOX - 1) | HECC_TX_PRIO_MASK) +#define HECC_TX_MBOX_MASK (~(BIT(HECC_MAX_TX_MBOX) - 1)) +#define HECC_DEF_NAPI_WEIGHT HECC_MAX_RX_MBOX + +/* + * Important Note: RX mailbox configuration + * RX mailboxes are further logically split into two - main and buffer + * mailboxes. The goal is to get all packets into main mailboxes as + * driven by mailbox number and receive priority (higher to lower) and + * buffer mailboxes are used to receive pkts while main mailboxes are being + * processed. This ensures in-order packet reception. + * + * Here are the recommended values for buffer mailbox. Note that RX mailboxes + * start after TX mailboxes: + * + * HECC_MAX_RX_MBOX HECC_RX_BUFFER_MBOX No of buffer mailboxes + * 28 12 8 + * 16 20 4 + */ + +#define HECC_MAX_RX_MBOX (HECC_MAX_MAILBOXES - HECC_MAX_TX_MBOX) +#define HECC_RX_BUFFER_MBOX 12 /* as per table above */ +#define HECC_RX_FIRST_MBOX (HECC_MAX_MAILBOXES - 1) +#define HECC_RX_HIGH_MBOX_MASK (~(BIT(HECC_RX_BUFFER_MBOX) - 1)) + +/* TI HECC module registers */ +#define HECC_CANME 0x0 /* Mailbox enable */ +#define HECC_CANMD 0x4 /* Mailbox direction */ +#define HECC_CANTRS 0x8 /* Transmit request set */ +#define HECC_CANTRR 0xC /* Transmit request */ +#define HECC_CANTA 0x10 /* Transmission acknowledge */ +#define HECC_CANAA 0x14 /* Abort acknowledge */ +#define HECC_CANRMP 0x18 /* Receive message pending */ +#define HECC_CANRML 0x1C /* Remote message lost */ +#define HECC_CANRFP 0x20 /* Remote frame pending */ +#define HECC_CANGAM 0x24 /* SECC only:Global acceptance mask */ +#define HECC_CANMC 0x28 /* Master control */ +#define HECC_CANBTC 0x2C /* Bit timing configuration */ +#define HECC_CANES 0x30 /* Error and status */ +#define HECC_CANTEC 0x34 /* Transmit error counter */ +#define HECC_CANREC 0x38 /* Receive error counter */ +#define HECC_CANGIF0 0x3C /* Global interrupt flag 0 */ +#define HECC_CANGIM 0x40 /* Global interrupt mask */ +#define HECC_CANGIF1 0x44 /* Global interrupt flag 1 */ +#define HECC_CANMIM 0x48 /* Mailbox interrupt mask */ +#define HECC_CANMIL 0x4C /* Mailbox interrupt level */ +#define HECC_CANOPC 0x50 /* Overwrite protection control */ +#define HECC_CANTIOC 0x54 /* Transmit I/O control */ +#define HECC_CANRIOC 0x58 /* Receive I/O control */ +#define HECC_CANLNT 0x5C /* HECC only: Local network time */ +#define HECC_CANTOC 0x60 /* HECC only: Time-out control */ +#define HECC_CANTOS 0x64 /* HECC only: Time-out status */ +#define HECC_CANTIOCE 0x68 /* SCC only:Enhanced TX I/O control */ +#define HECC_CANRIOCE 0x6C /* SCC only:Enhanced RX I/O control */ + +/* Mailbox registers */ +#define HECC_CANMID 0x0 +#define HECC_CANMCF 0x4 +#define HECC_CANMDL 0x8 +#define HECC_CANMDH 0xC + +#define HECC_SET_REG 0xFFFFFFFF +#define HECC_CANID_MASK 0x3FF /* 18 bits mask for extended id's */ +#define HECC_CCE_WAIT_COUNT 100 /* Wait for ~1 sec for CCE bit */ + +#define HECC_CANMC_SCM BIT(13) /* SCC compat mode */ +#define HECC_CANMC_CCR BIT(12) /* Change config request */ +#define HECC_CANMC_PDR BIT(11) /* Local Power down - for sleep mode */ +#define HECC_CANMC_ABO BIT(7) /* Auto Bus On */ +#define HECC_CANMC_STM BIT(6) /* Self test mode - loopback */ +#define HECC_CANMC_SRES BIT(5) /* Software reset */ + +#define HECC_CANTIOC_EN BIT(3) /* Enable CAN TX I/O pin */ +#define HECC_CANRIOC_EN BIT(3) /* Enable CAN RX I/O pin */ + +#define HECC_CANMID_IDE BIT(31) /* Extended frame format */ +#define HECC_CANMID_AME BIT(30) /* Acceptance mask enable */ +#define HECC_CANMID_AAM BIT(29) /* Auto answer mode */ + +#define HECC_CANES_FE BIT(24) /* form error */ +#define HECC_CANES_BE BIT(23) /* bit error */ +#define HECC_CANES_SA1 BIT(22) /* stuck at dominant error */ +#define HECC_CANES_CRCE BIT(21) /* CRC error */ +#define HECC_CANES_SE BIT(20) /* stuff bit error */ +#define HECC_CANES_ACKE BIT(19) /* ack error */ +#define HECC_CANES_BO BIT(18) /* Bus off status */ +#define HECC_CANES_EP BIT(17) /* Error passive status */ +#define HECC_CANES_EW BIT(16) /* Error warning status */ +#define HECC_CANES_SMA BIT(5) /* suspend mode ack */ +#define HECC_CANES_CCE BIT(4) /* Change config enabled */ +#define HECC_CANES_PDA BIT(3) /* Power down mode ack */ + +#define HECC_CANBTC_SAM BIT(7) /* sample points */ + +#define HECC_BUS_ERROR (HECC_CANES_FE | HECC_CANES_BE |\ + HECC_CANES_CRCE | HECC_CANES_SE |\ + HECC_CANES_ACKE) + +#define HECC_CANMCF_RTR BIT(4) /* Remote transmit request */ + +#define HECC_CANGIF_MAIF BIT(17) /* Message alarm interrupt */ +#define HECC_CANGIF_TCOIF BIT(16) /* Timer counter overflow int */ +#define HECC_CANGIF_GMIF BIT(15) /* Global mailbox interrupt */ +#define HECC_CANGIF_AAIF BIT(14) /* Abort ack interrupt */ +#define HECC_CANGIF_WDIF BIT(13) /* Write denied interrupt */ +#define HECC_CANGIF_WUIF BIT(12) /* Wake up interrupt */ +#define HECC_CANGIF_RMLIF BIT(11) /* Receive message lost interrupt */ +#define HECC_CANGIF_BOIF BIT(10) /* Bus off interrupt */ +#define HECC_CANGIF_EPIF BIT(9) /* Error passive interrupt */ +#define HECC_CANGIF_WLIF BIT(8) /* Warning level interrupt */ +#define HECC_CANGIF_MBOX_MASK 0x1F /* Mailbox number mask */ +#define HECC_CANGIM_I1EN BIT(1) /* Int line 1 enable */ +#define HECC_CANGIM_I0EN BIT(0) /* Int line 0 enable */ +#define HECC_CANGIM_DEF_MASK 0x700 /* only busoff/warning/passive */ +#define HECC_CANGIM_SIL BIT(2) /* system interrupts to int line 1 */ + +/* CAN Bittiming constants as per HECC specs */ +static struct can_bittiming_const ti_hecc_bittiming_const = { + .name = DRV_NAME, + .tseg1_min = 1, + .tseg1_max = 16, + .tseg2_min = 1, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 256, + .brp_inc = 1, +}; + +struct ti_hecc_priv { + struct can_priv can; /* MUST be first member/field */ + struct napi_struct napi; + struct net_device *ndev; + struct clk *clk; + void __iomem *base; + u32 scc_ram_offset; + u32 hecc_ram_offset; + u32 mbx_offset; + u32 int_line; + spinlock_t mbx_lock; /* CANME register needs protection */ + u32 tx_head; + u32 tx_tail; + u32 rx_next; +}; + +static inline int get_tx_head_mb(struct ti_hecc_priv *priv) +{ + return priv->tx_head & HECC_TX_MB_MASK; +} + +static inline int get_tx_tail_mb(struct ti_hecc_priv *priv) +{ + return priv->tx_tail & HECC_TX_MB_MASK; +} + +static inline int get_tx_head_prio(struct ti_hecc_priv *priv) +{ + return (priv->tx_head >> HECC_TX_PRIO_SHIFT) & MAX_TX_PRIO; +} + +static inline void hecc_write_lam(struct ti_hecc_priv *priv, u32 mbxno, u32 val) +{ + __raw_writel(val, priv->base + priv->hecc_ram_offset + mbxno * 4); +} + +static inline void hecc_write_mbx(struct ti_hecc_priv *priv, u32 mbxno, + u32 reg, u32 val) +{ + __raw_writel(val, priv->base + priv->mbx_offset + mbxno * 0x10 + + reg); +} + +static inline u32 hecc_read_mbx(struct ti_hecc_priv *priv, u32 mbxno, u32 reg) +{ + return __raw_readl(priv->base + priv->mbx_offset + mbxno * 0x10 + + reg); +} + +static inline void hecc_write(struct ti_hecc_priv *priv, u32 reg, u32 val) +{ + __raw_writel(val, priv->base + reg); +} + +static inline u32 hecc_read(struct ti_hecc_priv *priv, int reg) +{ + return __raw_readl(priv->base + reg); +} + +static inline void hecc_set_bit(struct ti_hecc_priv *priv, int reg, + u32 bit_mask) +{ + hecc_write(priv, reg, hecc_read(priv, reg) | bit_mask); +} + +static inline void hecc_clear_bit(struct ti_hecc_priv *priv, int reg, + u32 bit_mask) +{ + hecc_write(priv, reg, hecc_read(priv, reg) & ~bit_mask); +} + +static inline u32 hecc_get_bit(struct ti_hecc_priv *priv, int reg, u32 bit_mask) +{ + return (hecc_read(priv, reg) & bit_mask) ? 1 : 0; +} + +static int ti_hecc_get_state(const struct net_device *ndev, + enum can_state *state) +{ + struct ti_hecc_priv *priv = netdev_priv(ndev); + + *state = priv->can.state; + return 0; +} + +static int ti_hecc_set_btc(struct ti_hecc_priv *priv) +{ + struct can_bittiming *bit_timing = &priv->can.bittiming; + u32 can_btc; + + can_btc = (bit_timing->phase_seg2 - 1) & 0x7; + can_btc |= ((bit_timing->phase_seg1 + bit_timing->prop_seg - 1) + & 0xF) << 3; + if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) { + if (bit_timing->brp > 4) + can_btc |= HECC_CANBTC_SAM; + else + dev_warn(priv->ndev->dev.parent, "WARN: Triple" \ + "sampling not set due to h/w limitations"); + } + can_btc |= ((bit_timing->sjw - 1) & 0x3) << 8; + can_btc |= ((bit_timing->brp - 1) & 0xFF) << 16; + + /* ERM being set to 0 by default meaning resync at falling edge */ + + hecc_write(priv, HECC_CANBTC, can_btc); + dev_info(priv->ndev->dev.parent, "setting CANBTC=%#x\n", can_btc); + + return 0; +} + +static void ti_hecc_reset(struct net_device *ndev) +{ + u32 cnt; + struct ti_hecc_priv *priv = netdev_priv(ndev); + + dev_dbg(ndev->dev.parent, "resetting hecc ...\n"); + hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_SRES); + + /* Set change control request and wait till enabled */ + hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_CCR); + + /* + * INFO: It has been observed that at times CCE bit may not be + * set and hw seems to be ok even if this bit is not set so + * timing out with a timing of 1ms to respect the specs + */ + cnt = HECC_CCE_WAIT_COUNT; + while (!hecc_get_bit(priv, HECC_CANES, HECC_CANES_CCE) && cnt != 0) { + --cnt; + udelay(10); + } + + /* + * Note: On HECC, BTC can be programmed only in initialization mode, so + * it is expected that the can bittiming parameters are set via ip + * utility before the device is opened + */ + ti_hecc_set_btc(priv); + + /* Clear CCR (and CANMC register) and wait for CCE = 0 enable */ + hecc_write(priv, HECC_CANMC, 0); + + /* + * INFO: CAN net stack handles bus off and hence disabling auto-bus-on + * hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_ABO); + */ + + /* + * INFO: It has been observed that at times CCE bit may not be + * set and hw seems to be ok even if this bit is not set so + */ + cnt = HECC_CCE_WAIT_COUNT; + while (hecc_get_bit(priv, HECC_CANES, HECC_CANES_CCE) && cnt != 0) { + --cnt; + udelay(10); + } + + /* Enable TX and RX I/O Control pins */ + hecc_write(priv, HECC_CANTIOC, HECC_CANTIOC_EN); + hecc_write(priv, HECC_CANRIOC, HECC_CANRIOC_EN); + + /* Clear registers for clean operation */ + hecc_write(priv, HECC_CANTA, HECC_SET_REG); + hecc_write(priv, HECC_CANRMP, HECC_SET_REG); + hecc_write(priv, HECC_CANGIF0, HECC_SET_REG); + hecc_write(priv, HECC_CANGIF1, HECC_SET_REG); + hecc_write(priv, HECC_CANME, 0); + hecc_write(priv, HECC_CANMD, 0); + + /* SCC compat mode NOT supported (and not needed too) */ + hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_SCM); +} + +static void ti_hecc_start(struct net_device *ndev) +{ + struct ti_hecc_priv *priv = netdev_priv(ndev); + u32 cnt, mbxno, mbx_mask; + + /* put HECC in initialization mode and set btc */ + ti_hecc_reset(ndev); + + priv->tx_head = priv->tx_tail = HECC_TX_MASK; + priv->rx_next = HECC_RX_FIRST_MBOX; + + /* Enable local and global acceptance mask registers */ + hecc_write(priv, HECC_CANGAM, HECC_SET_REG); + + /* Prepare configured mailboxes to receive messages */ + for (cnt = 0; cnt < HECC_MAX_RX_MBOX; cnt++) { + mbxno = HECC_MAX_MAILBOXES - 1 - cnt; + mbx_mask = BIT(mbxno); + hecc_clear_bit(priv, HECC_CANME, mbx_mask); + hecc_write_mbx(priv, mbxno, HECC_CANMID, HECC_CANMID_AME); + hecc_write_lam(priv, mbxno, HECC_SET_REG); + hecc_set_bit(priv, HECC_CANMD, mbx_mask); + hecc_set_bit(priv, HECC_CANME, mbx_mask); + hecc_set_bit(priv, HECC_CANMIM, mbx_mask); + } + + /* Prevent message over-write & Enable interrupts */ + hecc_write(priv, HECC_CANOPC, HECC_SET_REG); + if (priv->int_line) { + hecc_write(priv, HECC_CANMIL, HECC_SET_REG); + hecc_write(priv, HECC_CANGIM, HECC_CANGIM_DEF_MASK | + HECC_CANGIM_I1EN | HECC_CANGIM_SIL); + } else { + hecc_write(priv, HECC_CANMIL, 0); + hecc_write(priv, HECC_CANGIM, + HECC_CANGIM_DEF_MASK | HECC_CANGIM_I0EN); + } + priv->can.state = CAN_STATE_ERROR_ACTIVE; +} + +static void ti_hecc_stop(struct net_device *ndev) +{ + struct ti_hecc_priv *priv = netdev_priv(ndev); + + /* Disable interrupts and disable mailboxes */ + hecc_write(priv, HECC_CANGIM, 0); + hecc_write(priv, HECC_CANMIM, 0); + hecc_write(priv, HECC_CANME, 0); + priv->can.state = CAN_STATE_STOPPED; +} + +static int ti_hecc_do_set_mode(struct net_device *ndev, enum can_mode mode) +{ + int ret = 0; + + switch (mode) { + case CAN_MODE_START: + ti_hecc_start(ndev); + netif_wake_queue(ndev); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + +/* + * ti_hecc_xmit: HECC Transmit + * + * The transmit mailboxes start from 0 to HECC_MAX_TX_MBOX. In HECC the + * priority of the mailbox for tranmission is dependent upon priority setting + * field in mailbox registers. The mailbox with highest value in priority field + * is transmitted first. Only when two mailboxes have the same value in + * priority field the highest numbered mailbox is transmitted first. + * + * To utilize the HECC priority feature as described above we start with the + * highest numbered mailbox with highest priority level and move on to the next + * mailbox with the same priority level and so on. Once we loop through all the + * transmit mailboxes we choose the next priority level (lower) and so on + * until we reach the lowest priority level on the lowest numbered mailbox + * when we stop transmission until all mailboxes are transmitted and then + * restart at highest numbered mailbox with highest priority. + * + * Two counters (head and tail) are used to track the next mailbox to transmit + * and to track the echo buffer for already transmitted mailbox. The queue + * is stopped when all the mailboxes are busy or when there is a priority + * value roll-over happens. + */ +static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct ti_hecc_priv *priv = netdev_priv(ndev); + struct can_frame *cf = (struct can_frame *)skb->data; + u32 mbxno, mbx_mask, data; + unsigned long flags; + + mbxno = get_tx_head_mb(priv); + mbx_mask = BIT(mbxno); + spin_lock_irqsave(&priv->mbx_lock, flags); + if (unlikely(hecc_read(priv, HECC_CANME) & mbx_mask)) { + spin_unlock_irqrestore(&priv->mbx_lock, flags); + netif_stop_queue(ndev); + dev_err(priv->ndev->dev.parent, + "BUG: TX mbx not ready tx_head=%08X, tx_tail=%08X\n", + priv->tx_head, priv->tx_tail); + return NETDEV_TX_BUSY; + } + spin_unlock_irqrestore(&priv->mbx_lock, flags); + + /* Prepare mailbox for transmission */ + data = min_t(u8, cf->can_dlc, 8); + if (cf->can_id & CAN_RTR_FLAG) /* Remote transmission request */ + data |= HECC_CANMCF_RTR; + data |= get_tx_head_prio(priv) << 8; + hecc_write_mbx(priv, mbxno, HECC_CANMCF, data); + + if (cf->can_id & CAN_EFF_FLAG) /* Extended frame format */ + data = (cf->can_id & CAN_EFF_MASK) | HECC_CANMID_IDE; + else /* Standard frame format */ + data = (cf->can_id & CAN_SFF_MASK) << 18; + hecc_write_mbx(priv, mbxno, HECC_CANMID, data); + hecc_write_mbx(priv, mbxno, HECC_CANMDL, + be32_to_cpu(*(u32 *)(cf->data))); + if (cf->can_dlc > 4) + hecc_write_mbx(priv, mbxno, HECC_CANMDH, + be32_to_cpu(*(u32 *)(cf->data + 4))); + else + *(u32 *)(cf->data + 4) = 0; + can_put_echo_skb(skb, ndev, mbxno); + + spin_lock_irqsave(&priv->mbx_lock, flags); + --priv->tx_head; + if ((hecc_read(priv, HECC_CANME) & BIT(get_tx_head_mb(priv))) || + (priv->tx_head & HECC_TX_MASK) == HECC_TX_MASK) { + netif_stop_queue(ndev); + } + hecc_set_bit(priv, HECC_CANME, mbx_mask); + spin_unlock_irqrestore(&priv->mbx_lock, flags); + + hecc_clear_bit(priv, HECC_CANMD, mbx_mask); + hecc_set_bit(priv, HECC_CANMIM, mbx_mask); + hecc_write(priv, HECC_CANTRS, mbx_mask); + + return NETDEV_TX_OK; +} + +static int ti_hecc_rx_pkt(struct ti_hecc_priv *priv, int mbxno) +{ + struct net_device_stats *stats = &priv->ndev->stats; + struct can_frame *cf; + struct sk_buff *skb; + u32 data, mbx_mask; + unsigned long flags; + + skb = netdev_alloc_skb(priv->ndev, sizeof(struct can_frame)); + if (!skb) { + if (printk_ratelimit()) + dev_err(priv->ndev->dev.parent, + "ti_hecc_rx_pkt: netdev_alloc_skb() failed\n"); + return -ENOMEM; + } + skb->protocol = __constant_htons(ETH_P_CAN); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + mbx_mask = BIT(mbxno); + cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); + data = hecc_read_mbx(priv, mbxno, HECC_CANMID); + if (data & HECC_CANMID_IDE) + cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG; + else + cf->can_id = (data >> 18) & CAN_SFF_MASK; + data = hecc_read_mbx(priv, mbxno, HECC_CANMCF); + if (data & HECC_CANMCF_RTR) + cf->can_id |= CAN_RTR_FLAG; + cf->can_dlc = data & 0xF; + data = hecc_read_mbx(priv, mbxno, HECC_CANMDL); + *(u32 *)(cf->data) = cpu_to_be32(data); + if (cf->can_dlc > 4) { + data = hecc_read_mbx(priv, mbxno, HECC_CANMDH); + *(u32 *)(cf->data + 4) = cpu_to_be32(data); + } else { + *(u32 *)(cf->data + 4) = 0; + } + spin_lock_irqsave(&priv->mbx_lock, flags); + hecc_clear_bit(priv, HECC_CANME, mbx_mask); + hecc_write(priv, HECC_CANRMP, mbx_mask); + /* enable mailbox only if it is part of rx buffer mailboxes */ + if (priv->rx_next < HECC_RX_BUFFER_MBOX) + hecc_set_bit(priv, HECC_CANME, mbx_mask); + spin_unlock_irqrestore(&priv->mbx_lock, flags); + + stats->rx_bytes += cf->can_dlc; + netif_receive_skb(skb); + stats->rx_packets++; + + return 0; +} + +/* + * ti_hecc_rx_poll - HECC receive pkts + * + * The receive mailboxes start from highest numbered mailbox till last xmit + * mailbox. On CAN frame reception the hardware places the data into highest + * numbered mailbox that matches the CAN ID filter. Since all receive mailboxes + * have same filtering (ALL CAN frames) packets will arrive in the highest + * available RX mailbox and we need to ensure in-order packet reception. + * + * To ensure the packets are received in the right order we logically divide + * the RX mailboxes into main and buffer mailboxes. Packets are received as per + * mailbox priotity (higher to lower) in the main bank and once it is full we + * disable further reception into main mailboxes. While the main mailboxes are + * processed in NAPI, further packets are received in buffer mailboxes. + * + * We maintain a RX next mailbox counter to process packets and once all main + * mailboxe packets are passed to the upper stack we enable all of them but + * continue to process packets received in buffer mailboxes. With each packet + * received from buffer mailbox we enable it immediately so as to handle the + * overflow from higher mailboxes. + */ +static int ti_hecc_rx_poll(struct napi_struct *napi, int quota) +{ + struct net_device *ndev = napi->dev; + struct ti_hecc_priv *priv = netdev_priv(ndev); + u32 num_pkts = 0; + u32 mbx_mask; + unsigned long pending_pkts, flags; + + if (!netif_running(ndev)) + return 0; + + while ((pending_pkts = hecc_read(priv, HECC_CANRMP)) && + num_pkts < quota) { + mbx_mask = BIT(priv->rx_next); /* next rx mailbox to process */ + if (mbx_mask & pending_pkts) { + if (ti_hecc_rx_pkt(priv, priv->rx_next) < 0) + return num_pkts; + ++num_pkts; + } else if (priv->rx_next > HECC_RX_BUFFER_MBOX) { + break; /* pkt not received yet */ + } + --priv->rx_next; + if (priv->rx_next == HECC_RX_BUFFER_MBOX) { + /* enable high bank mailboxes */ + spin_lock_irqsave(&priv->mbx_lock, flags); + mbx_mask = hecc_read(priv, HECC_CANME); + mbx_mask |= HECC_RX_HIGH_MBOX_MASK; + hecc_write(priv, HECC_CANME, mbx_mask); + spin_unlock_irqrestore(&priv->mbx_lock, flags); + } else if (priv->rx_next == HECC_MAX_TX_MBOX - 1) { + priv->rx_next = HECC_RX_FIRST_MBOX; + break; + } + } + + /* Enable packet interrupt if all pkts are handled */ + if (hecc_read(priv, HECC_CANRMP) == 0) { + napi_complete(napi); + /* Re-enable RX mailbox interrupts */ + mbx_mask = hecc_read(priv, HECC_CANMIM); + mbx_mask |= HECC_TX_MBOX_MASK; + hecc_write(priv, HECC_CANMIM, mbx_mask); + } + + return num_pkts; +} + +static int ti_hecc_error(struct net_device *ndev, int int_status, + int err_status) +{ + struct ti_hecc_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + struct can_frame *cf; + struct sk_buff *skb; + + /* propogate the error condition to the can stack */ + skb = netdev_alloc_skb(ndev, sizeof(struct can_frame)); + if (!skb) { + if (printk_ratelimit()) + dev_err(priv->ndev->dev.parent, + "ti_hecc_error: netdev_alloc_skb() failed\n"); + return -ENOMEM; + } + skb->protocol = __constant_htons(ETH_P_CAN); + skb->ip_summed = CHECKSUM_UNNECESSARY; + cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); + memset(cf, 0, sizeof(struct can_frame)); + cf->can_id = CAN_ERR_FLAG; + cf->can_dlc = CAN_ERR_DLC; + + if (int_status & HECC_CANGIF_WLIF) { /* warning level int */ + if ((int_status & HECC_CANGIF_BOIF) == 0) { + priv->can.state = CAN_STATE_ERROR_WARNING; + ++priv->can.can_stats.error_warning; + cf->can_id |= CAN_ERR_CRTL; + if (hecc_read(priv, HECC_CANTEC) > 96) + cf->data[1] |= CAN_ERR_CRTL_TX_WARNING; + if (hecc_read(priv, HECC_CANREC) > 96) + cf->data[1] |= CAN_ERR_CRTL_RX_WARNING; + } + hecc_set_bit(priv, HECC_CANES, HECC_CANES_EW); + dev_dbg(priv->ndev->dev.parent, "Error Warning interrupt\n"); + hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR); + } + + if (int_status & HECC_CANGIF_EPIF) { /* error passive int */ + if ((int_status & HECC_CANGIF_BOIF) == 0) { + priv->can.state = CAN_STATE_ERROR_PASSIVE; + ++priv->can.can_stats.error_passive; + cf->can_id |= CAN_ERR_CRTL; + if (hecc_read(priv, HECC_CANTEC) > 127) + cf->data[1] |= CAN_ERR_CRTL_TX_PASSIVE; + if (hecc_read(priv, HECC_CANREC) > 127) + cf->data[1] |= CAN_ERR_CRTL_RX_PASSIVE; + } + hecc_set_bit(priv, HECC_CANES, HECC_CANES_EP); + dev_dbg(priv->ndev->dev.parent, "Error passive interrupt\n"); + hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR); + } + + /* + * Need to check busoff condition in error status register too to + * ensure warning interrupts don't hog the system + */ + if ((int_status & HECC_CANGIF_BOIF) || (err_status & HECC_CANES_BO)) { + priv->can.state = CAN_STATE_BUS_OFF; + cf->can_id |= CAN_ERR_BUSOFF; + hecc_set_bit(priv, HECC_CANES, HECC_CANES_BO); + hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR); + /* Disable all interrupts in bus-off to avoid int hog */ + hecc_write(priv, HECC_CANGIM, 0); + can_bus_off(ndev); + } + + if (err_status & HECC_BUS_ERROR) { + ++priv->can.can_stats.bus_error; + cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; + cf->data[2] |= CAN_ERR_PROT_UNSPEC; + if (err_status & HECC_CANES_FE) { + hecc_set_bit(priv, HECC_CANES, HECC_CANES_FE); + cf->data[2] |= CAN_ERR_PROT_FORM; + } + if (err_status & HECC_CANES_BE) { + hecc_set_bit(priv, HECC_CANES, HECC_CANES_BE); + cf->data[2] |= CAN_ERR_PROT_BIT; + } + if (err_status & HECC_CANES_SE) { + hecc_set_bit(priv, HECC_CANES, HECC_CANES_SE); + cf->data[2] |= CAN_ERR_PROT_STUFF; + } + if (err_status & HECC_CANES_CRCE) { + hecc_set_bit(priv, HECC_CANES, HECC_CANES_CRCE); + cf->data[2] |= CAN_ERR_PROT_LOC_CRC_SEQ | + CAN_ERR_PROT_LOC_CRC_DEL; + } + if (err_status & HECC_CANES_ACKE) { + hecc_set_bit(priv, HECC_CANES, HECC_CANES_ACKE); + cf->data[2] |= CAN_ERR_PROT_LOC_ACK | + CAN_ERR_PROT_LOC_ACK_DEL; + } + } + + netif_receive_skb(skb); + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; + return 0; +} + +static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) +{ + struct net_device *ndev = (struct net_device *)dev_id; + struct ti_hecc_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + u32 mbxno, mbx_mask, int_status, err_status; + unsigned long ack, flags; + + int_status = hecc_read(priv, + (priv->int_line) ? HECC_CANGIF1 : HECC_CANGIF0); + + if (!int_status) + return IRQ_NONE; + + err_status = hecc_read(priv, HECC_CANES); + if (err_status & (HECC_BUS_ERROR | HECC_CANES_BO | + HECC_CANES_EP | HECC_CANES_EW)) + ti_hecc_error(ndev, int_status, err_status); + + if (int_status & HECC_CANGIF_GMIF) { + while (priv->tx_tail - priv->tx_head > 0) { + mbxno = get_tx_tail_mb(priv); + mbx_mask = BIT(mbxno); + if (!(mbx_mask & hecc_read(priv, HECC_CANTA))) + break; + hecc_clear_bit(priv, HECC_CANMIM, mbx_mask); + hecc_write(priv, HECC_CANTA, mbx_mask); + spin_lock_irqsave(&priv->mbx_lock, flags); + hecc_clear_bit(priv, HECC_CANME, mbx_mask); + spin_unlock_irqrestore(&priv->mbx_lock, flags); + stats->tx_bytes += hecc_read_mbx(priv, mbxno, + HECC_CANMCF) & 0xF; + stats->tx_packets++; + can_get_echo_skb(ndev, mbxno); + --priv->tx_tail; + } + + /* restart queue if wrap-up or if queue stalled on last pkt */ + if (((priv->tx_head == priv->tx_tail) && + ((priv->tx_head & HECC_TX_MASK) != HECC_TX_MASK)) || + (((priv->tx_tail & HECC_TX_MASK) == HECC_TX_MASK) && + ((priv->tx_head & HECC_TX_MASK) == HECC_TX_MASK))) + netif_wake_queue(ndev); + + /* Disable RX mailbox interrupts and let NAPI reenable them */ + if (hecc_read(priv, HECC_CANRMP)) { + ack = hecc_read(priv, HECC_CANMIM); + ack &= BIT(HECC_MAX_TX_MBOX) - 1; + hecc_write(priv, HECC_CANMIM, ack); + napi_schedule(&priv->napi); + } + } + + /* clear all interrupt conditions - read back to avoid spurious ints */ + if (priv->int_line) { + hecc_write(priv, HECC_CANGIF1, HECC_SET_REG); + int_status = hecc_read(priv, HECC_CANGIF1); + } else { + hecc_write(priv, HECC_CANGIF0, HECC_SET_REG); + int_status = hecc_read(priv, HECC_CANGIF0); + } + + return IRQ_HANDLED; +} + +static int ti_hecc_open(struct net_device *ndev) +{ + struct ti_hecc_priv *priv = netdev_priv(ndev); + int err; + + err = request_irq(ndev->irq, ti_hecc_interrupt, IRQF_SHARED, + ndev->name, ndev); + if (err) { + dev_err(ndev->dev.parent, "error requesting interrupt\n"); + return err; + } + + /* Open common can device */ + err = open_candev(ndev); + if (err) { + dev_err(ndev->dev.parent, "open_candev() failed %d\n", err); + free_irq(ndev->irq, ndev); + return err; + } + + clk_enable(priv->clk); + ti_hecc_start(ndev); + napi_enable(&priv->napi); + netif_start_queue(ndev); + + return 0; +} + +static int ti_hecc_close(struct net_device *ndev) +{ + struct ti_hecc_priv *priv = netdev_priv(ndev); + + netif_stop_queue(ndev); + napi_disable(&priv->napi); + ti_hecc_stop(ndev); + free_irq(ndev->irq, ndev); + clk_disable(priv->clk); + close_candev(ndev); + + return 0; +} + +static const struct net_device_ops ti_hecc_netdev_ops = { + .ndo_open = ti_hecc_open, + .ndo_stop = ti_hecc_close, + .ndo_start_xmit = ti_hecc_xmit, +}; + +static int ti_hecc_probe(struct platform_device *pdev) +{ + struct net_device *ndev = (struct net_device *)0; + struct ti_hecc_priv *priv; + struct ti_hecc_platform_data *pdata; + struct resource *mem, *irq; + void __iomem *addr; + int err = -ENODEV; + + pdata = pdev->dev.platform_data; + if (!pdata) { + dev_err(&pdev->dev, "No platform data\n"); + goto probe_exit; + } + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + dev_err(&pdev->dev, "No mem resources\n"); + goto probe_exit; + } + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!irq) { + dev_err(&pdev->dev, "No irq resource\n"); + goto probe_exit; + } + if (!request_mem_region(mem->start, resource_size(mem), pdev->name)) { + dev_err(&pdev->dev, "HECC region already claimed\n"); + err = -EBUSY; + goto probe_exit; + } + addr = ioremap(mem->start, resource_size(mem)); + if (!addr) { + dev_err(&pdev->dev, "ioremap failed\n"); + err = -ENOMEM; + goto probe_exit_free_region; + } + + ndev = alloc_candev(sizeof(struct ti_hecc_priv)); + if (!ndev) { + dev_err(&pdev->dev, "alloc_candev failed\n"); + err = -ENOMEM; + goto probe_exit_iounmap; + } + + priv = netdev_priv(ndev); + priv->ndev = ndev; + priv->base = addr; + priv->scc_ram_offset = pdata->scc_ram_offset; + priv->hecc_ram_offset = pdata->hecc_ram_offset; + priv->mbx_offset = pdata->mbx_offset; + priv->int_line = pdata->int_line; + + priv->can.bittiming_const = &ti_hecc_bittiming_const; + priv->can.do_set_mode = ti_hecc_do_set_mode; + priv->can.do_get_state = ti_hecc_get_state; + + ndev->irq = irq->start; + ndev->flags |= IFF_ECHO; + platform_set_drvdata(pdev, ndev); + SET_NETDEV_DEV(ndev, &pdev->dev); + ndev->netdev_ops = &ti_hecc_netdev_ops; + + priv->clk = clk_get(&pdev->dev, "hecc_ck"); + if (IS_ERR(priv->clk)) { + dev_err(&pdev->dev, "No clock available\n"); + err = PTR_ERR(priv->clk); + priv->clk = NULL; + goto probe_exit_candev; + } + priv->can.clock.freq = clk_get_rate(priv->clk); + netif_napi_add(ndev, &priv->napi, ti_hecc_rx_poll, + HECC_DEF_NAPI_WEIGHT); + + err = register_candev(ndev); + if (err) { + dev_err(&pdev->dev, "register_candev() failed\n"); + goto probe_exit_clk; + } + dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n", + priv->base, (u32) ndev->irq); + + return 0; + +probe_exit_clk: + clk_put(priv->clk); +probe_exit_candev: + free_candev(ndev); +probe_exit_iounmap: + iounmap(addr); +probe_exit_free_region: + release_mem_region(mem->start, resource_size(mem)); +probe_exit: + return err; +} + +static int __devexit ti_hecc_remove(struct platform_device *pdev) +{ + struct resource *res; + struct net_device *ndev = platform_get_drvdata(pdev); + struct ti_hecc_priv *priv = netdev_priv(ndev); + + clk_put(priv->clk); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + iounmap(priv->base); + release_mem_region(res->start, resource_size(res)); + unregister_candev(ndev); + free_candev(ndev); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +/* TI HECC netdevice driver: platform driver structure */ +static struct platform_driver ti_hecc_driver = { + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + }, + .probe = ti_hecc_probe, + .remove = __devexit_p(ti_hecc_remove), +}; + +static int __init ti_hecc_init_driver(void) +{ + printk(KERN_INFO DRV_DESC "\n"); + return platform_driver_register(&ti_hecc_driver); +} +module_init(ti_hecc_init_driver); + +static void __exit ti_hecc_exit_driver(void) +{ + printk(KERN_INFO DRV_DESC " unloaded\n"); + platform_driver_unregister(&ti_hecc_driver); +} +module_exit(ti_hecc_exit_driver); + +MODULE_AUTHOR("Anant Gole "); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION(DRV_DESC); diff --git a/include/linux/can/platform/ti_hecc.h b/include/linux/can/platform/ti_hecc.h new file mode 100644 index 000000000000..4688c7bb1bd1 --- /dev/null +++ b/include/linux/can/platform/ti_hecc.h @@ -0,0 +1,40 @@ +/* + * TI HECC (High End CAN Controller) driver platform header + * + * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed as is WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/** + * struct hecc_platform_data - HECC Platform Data + * + * @scc_hecc_offset: mostly 0 - should really never change + * @scc_ram_offset: SCC RAM offset + * @hecc_ram_offset: HECC RAM offset + * @mbx_offset: Mailbox RAM offset + * @int_line: Interrupt line to use - 0 or 1 + * @version: version for future use + * + * Platform data structure to get all platform specific settings. + * this structure also accounts the fact that the IP may have different + * RAM and mailbox offsets for different SOC's + */ +struct ti_hecc_platform_data { + u32 scc_hecc_offset; + u32 scc_ram_offset; + u32 hecc_ram_offset; + u32 mbx_offset; + u32 int_line; + u32 version; +}; + + -- cgit v1.3-8-gc7d7 From e0e6f55d298af03ab88bfe8455b671d29d78f426 Mon Sep 17 00:00:00 2001 From: Jin Dongming Date: Thu, 8 Oct 2009 22:44:47 -0700 Subject: ipv6: Fix the size overflow of addrconf_sysctl array (This patch fixes bug of commit f7734fdf61ec6bb848e0bafc1fb8bad2c124bb50 title "make TLLAO option for NA packets configurable") When the IPV6 conf is used, the function sysctl_set_parent is called and the array addrconf_sysctl is used as a parameter of the function. The above patch added new conf "force_tllao" into the array addrconf_sysctl, but the size of the array was not modified, the static allocated size is DEVCONF_MAX + 1 but the real size is DEVCONF_MAX + 2, so the problem is that the function sysctl_set_parent accessed wrong address. I got the following information. Call Trace: [] sysctl_set_parent+0x29/0x3e [] sysctl_set_parent+0x29/0x3e [] sysctl_set_parent+0x29/0x3e [] sysctl_set_parent+0x29/0x3e [] sysctl_set_parent+0x29/0x3e [] __register_sysctl_paths+0xde/0x272 [] ? __kmalloc_track_caller+0x16e/0x180 [] ? __addrconf_sysctl_register+0xc5/0x144 [ipv6] [] register_net_sysctl_table+0x48/0x4b [] __addrconf_sysctl_register+0xf7/0x144 [ipv6] [] addrconf_init_net+0xd4/0x104 [ipv6] [] setup_net+0x35/0x82 [] copy_net_ns+0x76/0xe0 [] create_new_namespaces+0xf0/0x16e [] copy_namespaces+0x65/0x9f [] copy_process+0xb2c/0x12c3 [] do_fork+0x14b/0x2d2 [] ? up_read+0xe/0x10 [] ? do_page_fault+0x27a/0x2aa [] sys_clone+0x28/0x2a [] stub_clone+0x13/0x20 [] ? system_call_fastpath+0x16/0x1b And the information of IPV6 in .config is as following. IPV6 in .config: CONFIG_IPV6=m CONFIG_IPV6_PRIVACY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y CONFIG_IPV6_MIP6=m CONFIG_IPV6_SIT=m # CONFIG_IPV6_SIT_6RD is not set CONFIG_IPV6_NDISC_NODETYPE=y CONFIG_IPV6_TUNNEL=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y CONFIG_IPV6_MROUTE=y CONFIG_IPV6_PIMSM_V2=y # CONFIG_IP_VS_IPV6 is not set CONFIG_NF_CONNTRACK_IPV6=m CONFIG_IP6_NF_MATCH_IPV6HEADER=m I confirmed this patch fixes this problem. Signed-off-by: Jin Dongming Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ae74ede1abe7..56404251248c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -208,6 +208,7 @@ enum { DEVCONF_MC_FORWARDING, DEVCONF_DISABLE_IPV6, DEVCONF_ACCEPT_DAD, + DEVCONF_FORCE_TLLAO, DEVCONF_MAX }; -- cgit v1.3-8-gc7d7 From 493b67efffc462703d583389aca96f850c18d3b3 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 9 Oct 2009 15:55:41 +0300 Subject: ASoC: TPA6130A2 amplifier driver Driver for Texas Instruments TPA6130A2 stereo headphone amplifier. The driver provides playback gain control and also pre-defined DAPM_HP widgets and DAPM routings for power management. The DAPM_HP widget names are: "TPA6130A2 Headphone Left" "TPA6130A2 Headphone Right" From soc machine drivers to use with the tpa6130a2 amplifier, the tpa6130a2_add_controls has to be called, which adds the alsa controls and the DAPM routing needed for the tpa6130a2. After that the machine driver can connect the codec's output with 'TPA6130A2 Left' and 'TPA6130A2 Right': {"TPA6130A2 Left", NULL, "CODEC LEFT OUT"}, {"TPA6130A2 Right", NULL, "CODEC RIGHT OUT"}, Internally the left and right channels are powered separately. When none of the channels are needed the amplifier is powered down: hard power: valid GPIO number is passed within platform data soft power: Using the software shutdown of the amplifier Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown --- include/sound/tpa6130a2-plat.h | 30 +++ sound/soc/codecs/Kconfig | 4 + sound/soc/codecs/Makefile | 2 + sound/soc/codecs/tpa6130a2.c | 463 +++++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/tpa6130a2.h | 62 ++++++ 5 files changed, 561 insertions(+) create mode 100644 include/sound/tpa6130a2-plat.h create mode 100644 sound/soc/codecs/tpa6130a2.c create mode 100644 sound/soc/codecs/tpa6130a2.h (limited to 'include') diff --git a/include/sound/tpa6130a2-plat.h b/include/sound/tpa6130a2-plat.h new file mode 100644 index 000000000000..e8c901e749d8 --- /dev/null +++ b/include/sound/tpa6130a2-plat.h @@ -0,0 +1,30 @@ +/* + * TPA6130A2 driver platform header + * + * Copyright (C) Nokia Corporation + * + * Written by Peter Ujfalusi + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef TPA6130A2_PLAT_H +#define TPA6130A2_PLAT_H + +struct tpa6130a2_platform_data { + int power_gpio; +}; + +#endif diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 3c46f34928ec..fab01c991828 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -29,6 +29,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_TLV320AIC23 if I2C select SND_SOC_TLV320AIC26 if SPI_MASTER select SND_SOC_TLV320AIC3X if I2C + select SND_SOC_TPA6130A2 if I2C select SND_SOC_TWL4030 if TWL4030_CORE select SND_SOC_UDA134X select SND_SOC_UDA1380 if I2C @@ -228,3 +229,6 @@ config SND_SOC_WM9713 # Amp config SND_SOC_MAX9877 tristate + +config SND_SOC_TPA6130A2 + tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index fc1c458cbe2f..2f14391b96f9 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -49,6 +49,7 @@ snd-soc-wm-hubs-objs := wm_hubs.o # Amp snd-soc-max9877-objs := max9877.o +snd-soc-tpa6130a2-objs := tpa6130a2.o obj-$(CONFIG_SND_SOC_AC97_CODEC) += snd-soc-ac97.o obj-$(CONFIG_SND_SOC_AD1836) += snd-soc-ad1836.o @@ -101,3 +102,4 @@ obj-$(CONFIG_SND_SOC_WM_HUBS) += snd-soc-wm-hubs.o # Amp obj-$(CONFIG_SND_SOC_MAX9877) += snd-soc-max9877.o +obj-$(CONFIG_SND_SOC_TPA6130A2) += snd-soc-tpa6130a2.o diff --git a/sound/soc/codecs/tpa6130a2.c b/sound/soc/codecs/tpa6130a2.c new file mode 100644 index 000000000000..1b77c959e2dc --- /dev/null +++ b/sound/soc/codecs/tpa6130a2.c @@ -0,0 +1,463 @@ +/* + * ALSA SoC Texas Instruments TPA6130A2 headset stereo amplifier driver + * + * Copyright (C) Nokia Corporation + * + * Author: Peter Ujfalusi + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tpa6130a2.h" + +struct i2c_client *tpa6130a2_client; + +/* This struct is used to save the context */ +struct tpa6130a2_data { + struct mutex mutex; + unsigned char regs[TPA6130A2_CACHEREGNUM]; + int power_gpio; + unsigned char power_state; +}; + +static int tpa6130a2_i2c_read(int reg) +{ + struct tpa6130a2_data *data; + int val; + + BUG_ON(tpa6130a2_client == NULL); + data = i2c_get_clientdata(tpa6130a2_client); + + /* If powered off, return the cached value */ + if (data->power_state) { + val = i2c_smbus_read_byte_data(tpa6130a2_client, reg); + if (val < 0) + dev_err(&tpa6130a2_client->dev, "Read failed\n"); + else + data->regs[reg] = val; + } else { + val = data->regs[reg]; + } + + return val; +} + +static int tpa6130a2_i2c_write(int reg, u8 value) +{ + struct tpa6130a2_data *data; + int val = 0; + + BUG_ON(tpa6130a2_client == NULL); + data = i2c_get_clientdata(tpa6130a2_client); + + if (data->power_state) { + val = i2c_smbus_write_byte_data(tpa6130a2_client, reg, value); + if (val < 0) + dev_err(&tpa6130a2_client->dev, "Write failed\n"); + } + + /* Either powered on or off, we save the context */ + data->regs[reg] = value; + + return val; +} + +static u8 tpa6130a2_read(int reg) +{ + struct tpa6130a2_data *data; + + BUG_ON(tpa6130a2_client == NULL); + data = i2c_get_clientdata(tpa6130a2_client); + + return data->regs[reg]; +} + +static void tpa6130a2_initialize(void) +{ + struct tpa6130a2_data *data; + int i; + + BUG_ON(tpa6130a2_client == NULL); + data = i2c_get_clientdata(tpa6130a2_client); + + for (i = 1; i < TPA6130A2_REG_VERSION; i++) + tpa6130a2_i2c_write(i, data->regs[i]); +} + +void tpa6130a2_power(int power) +{ + struct tpa6130a2_data *data; + u8 val; + + BUG_ON(tpa6130a2_client == NULL); + data = i2c_get_clientdata(tpa6130a2_client); + + mutex_lock(&data->mutex); + if (power) { + /* Power on */ + if (data->power_gpio >= 0) { + gpio_set_value(data->power_gpio, 1); + data->power_state = 1; + tpa6130a2_initialize(); + } + /* Clear SWS */ + val = tpa6130a2_read(TPA6130A2_REG_CONTROL); + val &= ~TPA6130A2_SWS; + tpa6130a2_i2c_write(TPA6130A2_REG_CONTROL, val); + } else { + /* set SWS */ + val = tpa6130a2_read(TPA6130A2_REG_CONTROL); + val |= TPA6130A2_SWS; + tpa6130a2_i2c_write(TPA6130A2_REG_CONTROL, val); + /* Power off */ + if (data->power_gpio >= 0) { + gpio_set_value(data->power_gpio, 0); + data->power_state = 0; + } + } + mutex_unlock(&data->mutex); +} + +static int tpa6130a2_get_reg(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct soc_mixer_control *mc = + (struct soc_mixer_control *)kcontrol->private_value; + struct tpa6130a2_data *data; + unsigned int reg = mc->reg; + unsigned int shift = mc->shift; + unsigned int mask = mc->max; + unsigned int invert = mc->invert; + + BUG_ON(tpa6130a2_client == NULL); + data = i2c_get_clientdata(tpa6130a2_client); + + mutex_lock(&data->mutex); + + ucontrol->value.integer.value[0] = + (tpa6130a2_read(reg) >> shift) & mask; + + if (invert) + ucontrol->value.integer.value[0] = + mask - ucontrol->value.integer.value[0]; + + mutex_unlock(&data->mutex); + return 0; +} + +static int tpa6130a2_set_reg(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct soc_mixer_control *mc = + (struct soc_mixer_control *)kcontrol->private_value; + struct tpa6130a2_data *data; + unsigned int reg = mc->reg; + unsigned int shift = mc->shift; + unsigned int mask = mc->max; + unsigned int invert = mc->invert; + unsigned int val = (ucontrol->value.integer.value[0] & mask); + unsigned int val_reg; + + BUG_ON(tpa6130a2_client == NULL); + data = i2c_get_clientdata(tpa6130a2_client); + + if (invert) + val = mask - val; + + mutex_lock(&data->mutex); + + val_reg = tpa6130a2_read(reg); + if (((val_reg >> shift) & mask) == val) { + mutex_unlock(&data->mutex); + return 0; + } + + val_reg &= ~(mask << shift); + val_reg |= val << shift; + tpa6130a2_i2c_write(reg, val_reg); + + mutex_unlock(&data->mutex); + + return 1; +} + +/* + * TPA6130 volume. From -59.5 to 4 dB with increasing step size when going + * down in gain. + */ +static const unsigned int tpa6130_tlv[] = { + TLV_DB_RANGE_HEAD(10), + 0, 1, TLV_DB_SCALE_ITEM(-5950, 600, 0), + 2, 3, TLV_DB_SCALE_ITEM(-5000, 250, 0), + 4, 5, TLV_DB_SCALE_ITEM(-4550, 160, 0), + 6, 7, TLV_DB_SCALE_ITEM(-4140, 190, 0), + 8, 9, TLV_DB_SCALE_ITEM(-3650, 120, 0), + 10, 11, TLV_DB_SCALE_ITEM(-3330, 160, 0), + 12, 13, TLV_DB_SCALE_ITEM(-3040, 180, 0), + 14, 20, TLV_DB_SCALE_ITEM(-2710, 110, 0), + 21, 37, TLV_DB_SCALE_ITEM(-1960, 74, 0), + 38, 63, TLV_DB_SCALE_ITEM(-720, 45, 0), +}; + +static const struct snd_kcontrol_new tpa6130a2_controls[] = { + SOC_SINGLE_EXT_TLV("TPA6130A2 Headphone Playback Volume", + TPA6130A2_REG_VOL_MUTE, 0, 0x3f, 0, + tpa6130a2_get_reg, tpa6130a2_set_reg, + tpa6130_tlv), +}; + +/* + * Enable or disable channel (left or right) + * The bit number for mute and amplifier are the same per channel: + * bit 6: Right channel + * bit 7: Left channel + * in both registers. + */ +static void tpa6130a2_channel_enable(u8 channel, int enable) +{ + struct tpa6130a2_data *data; + u8 val; + + BUG_ON(tpa6130a2_client == NULL); + data = i2c_get_clientdata(tpa6130a2_client); + + if (enable) { + /* Enable channel */ + /* Enable amplifier */ + val = tpa6130a2_read(TPA6130A2_REG_CONTROL); + val |= channel; + tpa6130a2_i2c_write(TPA6130A2_REG_CONTROL, val); + + /* Unmute channel */ + val = tpa6130a2_read(TPA6130A2_REG_VOL_MUTE); + val &= ~channel; + tpa6130a2_i2c_write(TPA6130A2_REG_VOL_MUTE, val); + } else { + /* Disable channel */ + /* Mute channel */ + val = tpa6130a2_read(TPA6130A2_REG_VOL_MUTE); + val |= channel; + tpa6130a2_i2c_write(TPA6130A2_REG_VOL_MUTE, val); + + /* Disable amplifier */ + val = tpa6130a2_read(TPA6130A2_REG_CONTROL); + val &= ~channel; + tpa6130a2_i2c_write(TPA6130A2_REG_CONTROL, val); + } +} + +static int tpa6130a2_left_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + switch (event) { + case SND_SOC_DAPM_POST_PMU: + tpa6130a2_channel_enable(TPA6130A2_HP_EN_L, 1); + break; + case SND_SOC_DAPM_POST_PMD: + tpa6130a2_channel_enable(TPA6130A2_HP_EN_L, 0); + break; + } + return 0; +} + +static int tpa6130a2_right_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + switch (event) { + case SND_SOC_DAPM_POST_PMU: + tpa6130a2_channel_enable(TPA6130A2_HP_EN_R, 1); + break; + case SND_SOC_DAPM_POST_PMD: + tpa6130a2_channel_enable(TPA6130A2_HP_EN_R, 0); + break; + } + return 0; +} + +static int tpa6130a2_supply_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + switch (event) { + case SND_SOC_DAPM_POST_PMU: + tpa6130a2_power(1); + break; + case SND_SOC_DAPM_POST_PMD: + tpa6130a2_power(0); + break; + } + return 0; +} + +static const struct snd_soc_dapm_widget tpa6130a2_dapm_widgets[] = { + SND_SOC_DAPM_PGA_E("TPA6130A2 Left", SND_SOC_NOPM, + 0, 0, NULL, 0, tpa6130a2_left_event, + SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_PGA_E("TPA6130A2 Right", SND_SOC_NOPM, + 0, 0, NULL, 0, tpa6130a2_right_event, + SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SUPPLY("TPA6130A2 Enable", SND_SOC_NOPM, + 0, 0, tpa6130a2_supply_event, + SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD), + /* Outputs */ + SND_SOC_DAPM_HP("TPA6130A2 Headphone Left", NULL), + SND_SOC_DAPM_HP("TPA6130A2 Headphone Right", NULL), +}; + +static const struct snd_soc_dapm_route audio_map[] = { + {"TPA6130A2 Headphone Left", NULL, "TPA6130A2 Left"}, + {"TPA6130A2 Headphone Right", NULL, "TPA6130A2 Right"}, + + {"TPA6130A2 Headphone Left", NULL, "TPA6130A2 Enable"}, + {"TPA6130A2 Headphone Right", NULL, "TPA6130A2 Enable"}, +}; + +int tpa6130a2_add_controls(struct snd_soc_codec *codec) +{ + snd_soc_dapm_new_controls(codec, tpa6130a2_dapm_widgets, + ARRAY_SIZE(tpa6130a2_dapm_widgets)); + + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); + + return snd_soc_add_controls(codec, tpa6130a2_controls, + ARRAY_SIZE(tpa6130a2_controls)); + +} +EXPORT_SYMBOL_GPL(tpa6130a2_add_controls); + +static int tpa6130a2_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device *dev; + struct tpa6130a2_data *data; + struct tpa6130a2_platform_data *pdata; + int ret; + + dev = &client->dev; + + if (client->dev.platform_data == NULL) { + dev_err(dev, "Platform data not set\n"); + dump_stack(); + return -ENODEV; + } + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) { + dev_err(dev, "Can not allocate memory\n"); + return -ENOMEM; + } + + tpa6130a2_client = client; + + i2c_set_clientdata(tpa6130a2_client, data); + + pdata = (struct tpa6130a2_platform_data *)client->dev.platform_data; + data->power_gpio = pdata->power_gpio; + + mutex_init(&data->mutex); + + /* Set default register values */ + data->regs[TPA6130A2_REG_CONTROL] = TPA6130A2_SWS; + data->regs[TPA6130A2_REG_VOL_MUTE] = TPA6130A2_MUTE_R | + TPA6130A2_MUTE_L; + + if (data->power_gpio >= 0) { + ret = gpio_request(data->power_gpio, "tpa6130a2 enable"); + if (ret < 0) { + dev_err(dev, "Failed to request power GPIO (%d)\n", + data->power_gpio); + goto fail; + } + gpio_direction_output(data->power_gpio, 0); + } else { + data->power_state = 1; + tpa6130a2_initialize(); + } + + tpa6130a2_power(1); + + /* Read version */ + ret = tpa6130a2_i2c_read(TPA6130A2_REG_VERSION) & + TPA6130A2_VERSION_MASK; + if ((ret != 1) && (ret != 2)) + dev_warn(dev, "UNTESTED version detected (%d)\n", ret); + + /* Disable the chip */ + tpa6130a2_power(0); + + return 0; +fail: + kfree(data); + i2c_set_clientdata(tpa6130a2_client, NULL); + tpa6130a2_client = 0; + + return ret; +} + +static int tpa6130a2_remove(struct i2c_client *client) +{ + struct tpa6130a2_data *data = i2c_get_clientdata(client); + + tpa6130a2_power(0); + + if (data->power_gpio >= 0) + gpio_free(data->power_gpio); + kfree(data); + tpa6130a2_client = 0; + + return 0; +} + +static const struct i2c_device_id tpa6130a2_id[] = { + { "tpa6130a2", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, tpa6130a2_id); + +static struct i2c_driver tpa6130a2_i2c_driver = { + .driver = { + .name = "tpa6130a2", + .owner = THIS_MODULE, + }, + .probe = tpa6130a2_probe, + .remove = __devexit_p(tpa6130a2_remove), + .id_table = tpa6130a2_id, +}; + +static int __init tpa6130a2_init(void) +{ + return i2c_add_driver(&tpa6130a2_i2c_driver); +} + +static void __exit tpa6130a2_exit(void) +{ + i2c_del_driver(&tpa6130a2_i2c_driver); +} + +MODULE_AUTHOR("Peter Ujfalusi"); +MODULE_DESCRIPTION("TPA6130A2 Headphone amplifier driver"); +MODULE_LICENSE("GPL"); + +module_init(tpa6130a2_init); +module_exit(tpa6130a2_exit); diff --git a/sound/soc/codecs/tpa6130a2.h b/sound/soc/codecs/tpa6130a2.h new file mode 100644 index 000000000000..6a794f16cee9 --- /dev/null +++ b/sound/soc/codecs/tpa6130a2.h @@ -0,0 +1,62 @@ +/* + * ALSA SoC TPA6130A2 amplifier driver + * + * Copyright (C) Nokia Corporation + * + * Author: Peter Ujfalusi + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __TPA6130A2_H__ +#define __TPA6130A2_H__ + +/* Register addresses */ +#define TPA6130A2_REG_CONTROL 0x01 +#define TPA6130A2_REG_VOL_MUTE 0x02 +#define TPA6130A2_REG_OUT_IMPEDANCE 0x03 +#define TPA6130A2_REG_VERSION 0x04 + +#define TPA6130A2_CACHEREGNUM (TPA6130A2_REG_VERSION + 1) + +/* Register bits */ +/* TPA6130A2_REG_CONTROL (0x01) */ +#define TPA6130A2_SWS (0x01 << 0) +#define TPA6130A2_TERMAL (0x01 << 1) +#define TPA6130A2_MODE(x) (x << 4) +#define TPA6130A2_MODE_STEREO (0x00) +#define TPA6130A2_MODE_DUAL_MONO (0x01) +#define TPA6130A2_MODE_BRIDGE (0x02) +#define TPA6130A2_MODE_MASK (0x03) +#define TPA6130A2_HP_EN_R (0x01 << 6) +#define TPA6130A2_HP_EN_L (0x01 << 7) + +/* TPA6130A2_REG_VOL_MUTE (0x02) */ +#define TPA6130A2_VOLUME(x) ((x & 0x3f) << 0) +#define TPA6130A2_MUTE_R (0x01 << 6) +#define TPA6130A2_MUTE_L (0x01 << 7) + +/* TPA6130A2_REG_OUT_IMPEDANCE (0x03) */ +#define TPA6130A2_HIZ_R (0x01 << 0) +#define TPA6130A2_HIZ_L (0x01 << 1) + +/* TPA6130A2_REG_VERSION (0x04) */ +#define TPA6130A2_VERSION_MASK (0x0f) + +extern int tpa6130a2_add_controls(struct snd_soc_codec *codec); +extern void tpa6130a2_power(int power); + +#endif /* __TPA6130A2_H__ */ -- cgit v1.3-8-gc7d7 From 89eda06837094ce9f34fae269b8773fcfd70f046 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 4 Oct 2009 21:49:47 +0900 Subject: LSM: Add security_path_chmod() and security_path_chown(). This patch allows pathname based LSM modules to check chmod()/chown() operations. Since notify_change() does not receive "struct vfsmount *", we add security_path_chmod() and security_path_chown() to the caller of notify_change(). These hooks are used by TOMOYO. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- fs/open.c | 24 ++++++++++++++++++++---- include/linux/security.h | 30 ++++++++++++++++++++++++++++++ security/capability.c | 13 +++++++++++++ security/security.c | 15 +++++++++++++++ 4 files changed, 78 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/open.c b/fs/open.c index 4f01e06227c6..b5c294d35bd1 100644 --- a/fs/open.c +++ b/fs/open.c @@ -616,6 +616,9 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) err = mnt_want_write_file(file); if (err) goto out_putf; + err = security_path_chmod(dentry, file->f_vfsmnt, mode); + if (err) + goto out_drop_write; mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; @@ -623,6 +626,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; err = notify_change(dentry, &newattrs); mutex_unlock(&inode->i_mutex); +out_drop_write: mnt_drop_write(file->f_path.mnt); out_putf: fput(file); @@ -645,6 +649,9 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) error = mnt_want_write(path.mnt); if (error) goto dput_and_out; + error = security_path_chmod(path.dentry, path.mnt, mode); + if (error) + goto out_drop_write; mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; @@ -652,6 +659,7 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; error = notify_change(path.dentry, &newattrs); mutex_unlock(&inode->i_mutex); +out_drop_write: mnt_drop_write(path.mnt); dput_and_out: path_put(&path); @@ -700,7 +708,9 @@ SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(path.dentry, user, group); + error = security_path_chown(&path, user, group); + if (!error) + error = chown_common(path.dentry, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -725,7 +735,9 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(path.dentry, user, group); + error = security_path_chown(&path, user, group); + if (!error) + error = chown_common(path.dentry, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -744,7 +756,9 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(path.dentry, user, group); + error = security_path_chown(&path, user, group); + if (!error) + error = chown_common(path.dentry, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -767,7 +781,9 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) goto out_fput; dentry = file->f_path.dentry; audit_inode(NULL, dentry); - error = chown_common(dentry, user, group); + error = security_path_chown(&file->f_path, user, group); + if (!error) + error = chown_common(dentry, user, group); mnt_drop_write(file->f_path.mnt); out_fput: fput(file); diff --git a/include/linux/security.h b/include/linux/security.h index 239e40d0450b..c8a584c26f7b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -447,6 +447,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @new_dir contains the path structure for parent of the new link. * @new_dentry contains the dentry structure of the new link. * Return 0 if permission is granted. + * @path_chmod: + * Check for permission to change DAC's permission of a file or directory. + * @dentry contains the dentry structure. + * @mnt contains the vfsmnt structure. + * @mode contains DAC's mode. + * Return 0 if permission is granted. + * @path_chown: + * Check for permission to change owner/group of a file or directory. + * @path contains the path structure. + * @uid contains new owner's ID. + * @gid contains new group's ID. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -1488,6 +1500,9 @@ struct security_operations { struct dentry *new_dentry); int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); + int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt, + mode_t mode); + int (*path_chown) (struct path *path, uid_t uid, gid_t gid); #endif int (*inode_alloc_security) (struct inode *inode); @@ -2952,6 +2967,9 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); +int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, + mode_t mode); +int security_path_chown(struct path *path, uid_t uid, gid_t gid); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) { @@ -3001,6 +3019,18 @@ static inline int security_path_rename(struct path *old_dir, { return 0; } + +static inline int security_path_chmod(struct dentry *dentry, + struct vfsmount *mnt, + mode_t mode) +{ + return 0; +} + +static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid) +{ + return 0; +} #endif /* CONFIG_SECURITY_PATH */ #ifdef CONFIG_KEYS diff --git a/security/capability.c b/security/capability.c index fce07a7bc825..09279a8d4a14 100644 --- a/security/capability.c +++ b/security/capability.c @@ -308,6 +308,17 @@ static int cap_path_truncate(struct path *path, loff_t length, { return 0; } + +static int cap_path_chmod(struct dentry *dentry, struct vfsmount *mnt, + mode_t mode) +{ + return 0; +} + +static int cap_path_chown(struct path *path, uid_t uid, gid_t gid) +{ + return 0; +} #endif static int cap_file_permission(struct file *file, int mask) @@ -977,6 +988,8 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, path_link); set_to_cap_if_null(ops, path_rename); set_to_cap_if_null(ops, path_truncate); + set_to_cap_if_null(ops, path_chmod); + set_to_cap_if_null(ops, path_chown); #endif set_to_cap_if_null(ops, file_permission); set_to_cap_if_null(ops, file_alloc_security); diff --git a/security/security.c b/security/security.c index c4c673240c1c..5259270e558f 100644 --- a/security/security.c +++ b/security/security.c @@ -434,6 +434,21 @@ int security_path_truncate(struct path *path, loff_t length, return 0; return security_ops->path_truncate(path, length, time_attrs); } + +int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, + mode_t mode) +{ + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; + return security_ops->path_chmod(dentry, mnt, mode); +} + +int security_path_chown(struct path *path, uid_t uid, gid_t gid) +{ + if (unlikely(IS_PRIVATE(path->dentry->d_inode))) + return 0; + return security_ops->path_chown(path, uid, gid); +} #endif int security_inode_create(struct inode *dir, struct dentry *dentry, int mode) -- cgit v1.3-8-gc7d7 From 8b8efb44033c7e86b3dc76f825c693ec92ae30e9 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 4 Oct 2009 21:49:48 +0900 Subject: LSM: Add security_path_chroot(). This patch allows pathname based LSM modules to check chroot() operations. This hook is used by TOMOYO. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- fs/open.c | 3 +++ include/linux/security.h | 11 +++++++++++ security/capability.c | 6 ++++++ security/security.c | 5 +++++ 4 files changed, 25 insertions(+) (limited to 'include') diff --git a/fs/open.c b/fs/open.c index b5c294d35bd1..201041dfca57 100644 --- a/fs/open.c +++ b/fs/open.c @@ -587,6 +587,9 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) error = -EPERM; if (!capable(CAP_SYS_CHROOT)) goto dput_and_out; + error = security_path_chroot(&path); + if (error) + goto dput_and_out; set_fs_root(current->fs, &path); error = 0; diff --git a/include/linux/security.h b/include/linux/security.h index c8a584c26f7b..ed0faea60b82 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -459,6 +459,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @uid contains new owner's ID. * @gid contains new group's ID. * Return 0 if permission is granted. + * @path_chroot: + * Check for permission to change root directory. + * @path contains the path structure. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -1503,6 +1507,7 @@ struct security_operations { int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt, mode_t mode); int (*path_chown) (struct path *path, uid_t uid, gid_t gid); + int (*path_chroot) (struct path *path); #endif int (*inode_alloc_security) (struct inode *inode); @@ -2970,6 +2975,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, mode_t mode); int security_path_chown(struct path *path, uid_t uid, gid_t gid); +int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) { @@ -3031,6 +3037,11 @@ static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid) { return 0; } + +static inline int security_path_chroot(struct path *path) +{ + return 0; +} #endif /* CONFIG_SECURITY_PATH */ #ifdef CONFIG_KEYS diff --git a/security/capability.c b/security/capability.c index 09279a8d4a14..4f3ab476937f 100644 --- a/security/capability.c +++ b/security/capability.c @@ -319,6 +319,11 @@ static int cap_path_chown(struct path *path, uid_t uid, gid_t gid) { return 0; } + +static int cap_path_chroot(struct path *root) +{ + return 0; +} #endif static int cap_file_permission(struct file *file, int mask) @@ -990,6 +995,7 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, path_truncate); set_to_cap_if_null(ops, path_chmod); set_to_cap_if_null(ops, path_chown); + set_to_cap_if_null(ops, path_chroot); #endif set_to_cap_if_null(ops, file_permission); set_to_cap_if_null(ops, file_alloc_security); diff --git a/security/security.c b/security/security.c index 5259270e558f..279757314a05 100644 --- a/security/security.c +++ b/security/security.c @@ -449,6 +449,11 @@ int security_path_chown(struct path *path, uid_t uid, gid_t gid) return 0; return security_ops->path_chown(path, uid, gid); } + +int security_path_chroot(struct path *path) +{ + return security_ops->path_chroot(path); +} #endif int security_inode_create(struct inode *dir, struct dentry *dentry, int mode) -- cgit v1.3-8-gc7d7 From d5e63bded6e819ca77ee1a1d97c783a31f6caf30 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 12 Oct 2009 03:00:31 -0700 Subject: Revert "af_packet: add interframe drop cmsg (v6)" This reverts commit 977750076d98c7ff6cbda51858bb5a5894a9d9ab. Neil is reimplementing this generically, outside of AF_PACKET. Signed-off-by: David S. Miller --- include/linux/if_packet.h | 2 -- net/packet/af_packet.c | 33 --------------------------------- 2 files changed, 35 deletions(-) (limited to 'include') diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index e5d200f53fc3..dea7d6b7cf98 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -48,13 +48,11 @@ struct sockaddr_ll #define PACKET_RESERVE 12 #define PACKET_TX_RING 13 #define PACKET_LOSS 14 -#define PACKET_GAPDATA 15 struct tpacket_stats { unsigned int tp_packets; unsigned int tp_drops; - unsigned int tp_gap; }; struct tpacket_auxdata diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 70073a0dea5d..f87ed4803c11 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -524,31 +524,6 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, return res; } -/* - * If we've lost frames since the last time we queued one to the - * sk_receive_queue, we need to record it here. - * This must be called under the protection of the socket lock - * to prevent racing with other softirqs and user space - */ -static inline void record_packet_gap(struct sk_buff *skb, - struct packet_sock *po) -{ - /* - * We overload the mark field here, since we're about - * to enqueue to a receive queue and no body else will - * use this field at this point - */ - skb->mark = po->stats.tp_gap; - po->stats.tp_gap = 0; - return; - -} - -static inline __u32 check_packet_gap(struct sk_buff *skb) -{ - return skb->mark; -} - /* This function makes lazy skb cloning in hope that most of packets are discarded by BPF. @@ -652,7 +627,6 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, spin_lock(&sk->sk_receive_queue.lock); po->stats.tp_packets++; - record_packet_gap(skb, po); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk, skb->len); @@ -661,7 +635,6 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, drop_n_acct: spin_lock(&sk->sk_receive_queue.lock); po->stats.tp_drops++; - po->stats.tp_gap++; spin_unlock(&sk->sk_receive_queue.lock); drop_n_restore: @@ -839,7 +812,6 @@ drop: ring_is_full: po->stats.tp_drops++; - po->stats.tp_gap++; spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk, 0); @@ -1449,7 +1421,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb; int copied, err; struct sockaddr_ll *sll; - __u32 gap; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) @@ -1528,10 +1499,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } - gap = check_packet_gap(skb); - if (gap) - put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u32), &gap); - /* * Free or return the buffer as appropriate. Again this * hides all the races and re-entrancy issues from us. -- cgit v1.3-8-gc7d7 From 3b885787ea4112eaa80945999ea0901bf742707f Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 12 Oct 2009 13:26:31 -0700 Subject: net: Generalize socket rx gap / receive queue overflow cmsg Create a new socket level option to report number of queue overflows Recently I augmented the AF_PACKET protocol to report the number of frames lost on the socket receive queue between any two enqueued frames. This value was exported via a SOL_PACKET level cmsg. AFter I completed that work it was requested that this feature be generalized so that any datagram oriented socket could make use of this option. As such I've created this patch, It creates a new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue overflowed between any two given frames. It also augments the AF_PACKET protocol to take advantage of this new feature (as it previously did not touch sk->sk_drops, which this patch uses to record the overflow count). Tested successfully by me. Notes: 1) Unlike my previous patch, this patch simply records the sk_drops value, which is not a number of drops between packets, but rather a total number of drops. Deltas must be computed in user space. 2) While this patch currently works with datagram oriented protocols, it will also be accepted by non-datagram oriented protocols. I'm not sure if thats agreeable to everyone, but my argument in favor of doing so is that, for those protocols which aren't applicable to this option, sk_drops will always be zero, and reporting no drops on a receive queue that isn't used for those non-participating protocols seems reasonable to me. This also saves us having to code in a per-protocol opt in mechanism. 3) This applies cleanly to net-next assuming that commit 977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted Signed-off-by: Neil Horman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/alpha/include/asm/socket.h | 2 ++ arch/arm/include/asm/socket.h | 2 ++ arch/avr32/include/asm/socket.h | 2 ++ arch/cris/include/asm/socket.h | 2 ++ arch/frv/include/asm/socket.h | 2 ++ arch/h8300/include/asm/socket.h | 2 ++ arch/ia64/include/asm/socket.h | 2 ++ arch/m32r/include/asm/socket.h | 2 ++ arch/m68k/include/asm/socket.h | 2 ++ arch/mips/include/asm/socket.h | 2 ++ arch/mn10300/include/asm/socket.h | 2 ++ arch/parisc/include/asm/socket.h | 2 ++ arch/powerpc/include/asm/socket.h | 2 ++ arch/s390/include/asm/socket.h | 2 ++ arch/sparc/include/asm/socket.h | 2 ++ arch/xtensa/include/asm/socket.h | 2 ++ include/asm-generic/socket.h | 1 + include/linux/skbuff.h | 6 ++++-- include/net/sock.h | 3 +++ net/atm/common.c | 2 +- net/bluetooth/af_bluetooth.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/can/bcm.c | 2 +- net/can/raw.c | 2 +- net/core/sock.c | 17 ++++++++++++++++- net/ieee802154/dgram.c | 2 +- net/ieee802154/raw.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- net/key/af_key.c | 2 +- net/packet/af_packet.c | 7 +++---- net/rxrpc/ar-recvmsg.c | 2 +- net/sctp/socket.c | 2 +- net/socket.c | 15 +++++++++++++++ 36 files changed, 88 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index 26773e3246e2..06edfefc3373 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -67,6 +67,8 @@ #define SO_TIMESTAMPING 37 #define SCM_TIMESTAMPING SO_TIMESTAMPING +#define SO_RXQ_OVFL 40 + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h index 92ac61d294fd..90ffd04b8e74 100644 --- a/arch/arm/include/asm/socket.h +++ b/arch/arm/include/asm/socket.h @@ -60,4 +60,6 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h index fe863f9794d5..c8d1fae49476 100644 --- a/arch/avr32/include/asm/socket.h +++ b/arch/avr32/include/asm/socket.h @@ -60,4 +60,6 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h index 45ec49bdb7b1..1a4a61909ca8 100644 --- a/arch/cris/include/asm/socket.h +++ b/arch/cris/include/asm/socket.h @@ -62,6 +62,8 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h index 2dea726095c2..a6b26880c1ec 100644 --- a/arch/frv/include/asm/socket.h +++ b/arch/frv/include/asm/socket.h @@ -60,5 +60,7 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h index 1547f01c8e22..04c0f4596eb5 100644 --- a/arch/h8300/include/asm/socket.h +++ b/arch/h8300/include/asm/socket.h @@ -60,4 +60,6 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h index 0b0d5ff062e5..51427eaa51ba 100644 --- a/arch/ia64/include/asm/socket.h +++ b/arch/ia64/include/asm/socket.h @@ -69,4 +69,6 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h index 3390a864f224..469787c30098 100644 --- a/arch/m32r/include/asm/socket.h +++ b/arch/m32r/include/asm/socket.h @@ -60,4 +60,6 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h index eee01cce921b..9bf49c87d954 100644 --- a/arch/m68k/include/asm/socket.h +++ b/arch/m68k/include/asm/socket.h @@ -60,4 +60,6 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h index ae05accd9fe4..9de5190f2487 100644 --- a/arch/mips/include/asm/socket.h +++ b/arch/mips/include/asm/socket.h @@ -80,6 +80,8 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_TIMESTAMPING 37 #define SCM_TIMESTAMPING SO_TIMESTAMPING +#define SO_RXQ_OVFL 40 + #ifdef __KERNEL__ /** sock_type - Socket types diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h index 4df75af29d76..4e60c4281288 100644 --- a/arch/mn10300/include/asm/socket.h +++ b/arch/mn10300/include/asm/socket.h @@ -60,4 +60,6 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h index 960b1e5d8e16..225b7d6a1a0a 100644 --- a/arch/parisc/include/asm/socket.h +++ b/arch/parisc/include/asm/socket.h @@ -59,6 +59,8 @@ #define SO_TIMESTAMPING 0x4020 #define SCM_TIMESTAMPING SO_TIMESTAMPING +#define SO_RXQ_OVFL 0x4021 + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h index 3ab8b3e6feb0..866f7606da68 100644 --- a/arch/powerpc/include/asm/socket.h +++ b/arch/powerpc/include/asm/socket.h @@ -67,4 +67,6 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index e42df89a0b85..fdff1e995c73 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -68,4 +68,6 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h index 3a5ae3d12088..9d3fefcff2f5 100644 --- a/arch/sparc/include/asm/socket.h +++ b/arch/sparc/include/asm/socket.h @@ -56,6 +56,8 @@ #define SO_TIMESTAMPING 0x0023 #define SCM_TIMESTAMPING SO_TIMESTAMPING +#define SO_RXQ_OVFL 0x0024 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h index beb3a6bdb61d..cbdf2ffaacff 100644 --- a/arch/xtensa/include/asm/socket.h +++ b/arch/xtensa/include/asm/socket.h @@ -71,4 +71,6 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h index 538991cef6f0..9a6115e7cf63 100644 --- a/include/asm-generic/socket.h +++ b/include/asm-generic/socket.h @@ -63,4 +63,5 @@ #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_RXQ_OVFL 40 #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index df7b23ac66e6..8c866b5cb97b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -389,8 +389,10 @@ struct sk_buff { #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif - - __u32 mark; + union { + __u32 mark; + __u32 dropcount; + }; __u16 vlan_tci; diff --git a/include/net/sock.h b/include/net/sock.h index 98398bdec57d..10669b01eeab 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -505,6 +505,7 @@ enum sock_flags { SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */ SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */ SOCK_FASYNC, /* fasync() active */ + SOCK_RXQ_OVFL, }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) @@ -1493,6 +1494,8 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) sk->sk_stamp = kt; } +extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); + /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * @msg: outgoing packet diff --git a/net/atm/common.c b/net/atm/common.c index 950bd16d2383..d61e051e0a3f 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -496,7 +496,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, error = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (error) return error; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); pr_debug("RcvM %d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize); atm_return(vcc, skb->truesize); skb_free_datagram(sk, skb); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 1f6e49c1cde8..399e59c9c6cb 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -257,7 +257,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (err == 0) - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); skb_free_datagram(sk, skb); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index c70786503850..d3bfc1b0afb1 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -703,7 +703,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, copied += chunk; size -= chunk; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (!(flags & MSG_PEEK)) { atomic_sub(chunk, &sk->sk_rmem_alloc); diff --git a/net/can/bcm.c b/net/can/bcm.c index 597da4f8f888..2f47039c79dd 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1534,7 +1534,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { msg->msg_namelen = sizeof(struct sockaddr_can); diff --git a/net/can/raw.c b/net/can/raw.c index b5e897922d32..962fc9f1d0c7 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -702,7 +702,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { msg->msg_namelen = sizeof(struct sockaddr_can); diff --git a/net/core/sock.c b/net/core/sock.c index 7626b6aacd68..43ca2c995393 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -276,6 +276,8 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int err = 0; int skb_len; + unsigned long flags; + struct sk_buff_head *list = &sk->sk_receive_queue; /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces number of warnings when compiling with -W --ANK @@ -305,7 +307,10 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ skb_len = skb->len; - skb_queue_tail(&sk->sk_receive_queue, skb); + spin_lock_irqsave(&list->lock, flags); + skb->dropcount = atomic_read(&sk->sk_drops); + __skb_queue_tail(list, skb); + spin_unlock_irqrestore(&list->lock, flags); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, skb_len); @@ -702,6 +707,12 @@ set_rcvbuf: /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ + case SO_RXQ_OVFL: + if (valbool) + sock_set_flag(sk, SOCK_RXQ_OVFL); + else + sock_reset_flag(sk, SOCK_RXQ_OVFL); + break; default: ret = -ENOPROTOOPT; break; @@ -901,6 +912,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_mark; break; + case SO_RXQ_OVFL: + v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); + break; + default: return -ENOPROTOOPT; } diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index a413b1bf4465..25ad956a39d8 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -303,7 +303,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, if (err) goto done; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (flags & MSG_TRUNC) copied = skb->len; diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index 30e74eee07d6..769c8d138fc3 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -191,7 +191,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (err) goto done; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (flags & MSG_TRUNC) copied = skb->len; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 757c9171e7c2..f18172b07611 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -682,7 +682,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (err) goto done; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); /* Copy the address. */ if (sin) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 194bcdc6d9fc..71e5353b30c8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -955,7 +955,7 @@ try_again: UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); /* Copy the address. */ if (sin) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4f24570b0869..d8375bc7f2d5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -497,7 +497,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sin6->sin6_scope_id = IP6CB(skb)->iif; } - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (np->rxopt.all) datagram_recv_ctl(sk, msg, skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ff778c172ef2..1f8e2afa4490 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -252,7 +252,7 @@ try_again: UDP_MIB_INDATAGRAMS, is_udplite); } - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); /* Copy the address. */ if (msg->msg_name) { diff --git a/net/key/af_key.c b/net/key/af_key.c index c078ae6e975b..472f6594184a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3606,7 +3606,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb, if (err) goto out_free; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); err = (flags & MSG_TRUNC) ? skb->len : copied; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f87ed4803c11..bf3a2954cd4d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -627,15 +627,14 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, spin_lock(&sk->sk_receive_queue.lock); po->stats.tp_packets++; + skb->dropcount = atomic_read(&sk->sk_drops); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk, skb->len); return 0; drop_n_acct: - spin_lock(&sk->sk_receive_queue.lock); - po->stats.tp_drops++; - spin_unlock(&sk->sk_receive_queue.lock); + po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { @@ -1478,7 +1477,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, if (err) goto out_free; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index a39bf97f8830..60c2b94e6b54 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -146,7 +146,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, memcpy(msg->msg_name, &call->conn->trans->peer->srx, sizeof(call->conn->trans->peer->srx)); - sock_recv_timestamp(msg, &rx->sk, skb); + sock_recv_ts_and_drops(msg, &rx->sk, skb); } /* receive the message */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c8d05758661d..0970e92c6acd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1958,7 +1958,7 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, if (err) goto out_free; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (sctp_ulpevent_is_notification(event)) { msg->msg_flags |= MSG_NOTIFICATION; sp->pf->event_msgname(event, msg->msg_name, addr_len); diff --git a/net/socket.c b/net/socket.c index 954f3381cc8a..807935693846 100644 --- a/net/socket.c +++ b/net/socket.c @@ -668,6 +668,21 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, EXPORT_SYMBOL_GPL(__sock_recv_timestamp); +inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) +{ + if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) + put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, + sizeof(__u32), &skb->dropcount); +} + +void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + sock_recv_timestamp(msg, sk, skb); + sock_recv_drops(msg, sk, skb); +} +EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); + static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { -- cgit v1.3-8-gc7d7 From a2e2725541fad72416326798c2d7fa4dafb7d337 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 Oct 2009 23:40:10 -0700 Subject: net: Introduce recvmmsg socket syscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Meaning receive multiple messages, reducing the number of syscalls and net stack entry/exit operations. Next patches will introduce mechanisms where protocols that want to optimize this operation will provide an unlocked_recvmsg operation. This takes into account comments made by: . Paul Moore: sock_recvmsg is called only for the first datagram, sock_recvmsg_nosec is used for the rest. . Caitlin Bestler: recvmmsg now has a struct timespec timeout, that works in the same fashion as the ppoll one. If the underlying protocol returns a datagram with MSG_OOB set, this will make recvmmsg return right away with as many datagrams (+ the OOB one) it has received so far. . Rémi Denis-Courmont & Steven Whitehouse: If we receive N < vlen datagrams and then recvmsg returns an error, recvmmsg will return the successfully received datagrams, store the error and return it in the next call. This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg, where we will be able to acquire the lock only at batch start and end, not at every underlying recvmsg call. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- arch/alpha/kernel/systbls.S | 1 + arch/arm/kernel/calls.S | 1 + arch/avr32/kernel/syscall_table.S | 1 + arch/blackfin/mach-common/entry.S | 1 + arch/ia64/kernel/entry.S | 1 + arch/microblaze/kernel/syscall_table.S | 1 + arch/mips/kernel/scall32-o32.S | 1 + arch/mips/kernel/scall64-64.S | 1 + arch/mips/kernel/scall64-n32.S | 1 + arch/mips/kernel/scall64-o32.S | 1 + arch/sh/kernel/syscalls_64.S | 1 + arch/sparc/kernel/systbls_32.S | 2 +- arch/sparc/kernel/systbls_64.S | 4 +- arch/x86/ia32/ia32entry.S | 1 + arch/x86/include/asm/unistd_32.h | 3 +- arch/x86/include/asm/unistd_64.h | 2 + arch/x86/kernel/syscall_table_32.S | 1 + arch/xtensa/include/asm/unistd.h | 4 +- include/linux/net.h | 1 + include/linux/socket.h | 10 ++ include/linux/syscalls.h | 4 + include/net/compat.h | 8 ++ kernel/sys_ni.c | 2 + net/compat.c | 33 ++++- net/socket.c | 225 +++++++++++++++++++++++++++------ 25 files changed, 261 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 95c9aef1c106..cda6b8b3d573 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -497,6 +497,7 @@ sys_call_table: .quad sys_signalfd .quad sys_ni_syscall .quad sys_eventfd + .quad sys_recvmmsg .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index fafce1b5c69f..f58c1156e779 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -374,6 +374,7 @@ CALL(sys_pwritev) CALL(sys_rt_tgsigqueueinfo) CALL(sys_perf_event_open) +/* 365 */ CALL(sys_recvmmsg) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S index 7ee0057613b3..e76bad16b0f0 100644 --- a/arch/avr32/kernel/syscall_table.S +++ b/arch/avr32/kernel/syscall_table.S @@ -295,4 +295,5 @@ sys_call_table: .long sys_signalfd .long sys_ni_syscall /* 280, was sys_timerfd */ .long sys_eventfd + .long sys_recvmmsg .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 1e7cac23e25f..48692724b74c 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1621,6 +1621,7 @@ ENTRY(_sys_call_table) .long _sys_pwritev .long _sys_rt_tgsigqueueinfo .long _sys_perf_event_open + .long _sys_recvmmsg /* 370 */ .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index d0e7d37017b4..d75b872ca4dc 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1806,6 +1806,7 @@ sys_call_table: data8 sys_preadv data8 sys_pwritev // 1320 data8 sys_rt_tgsigqueueinfo + data8 sys_recvmmsg .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index ecec19155135..c1ab1dc10898 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -371,3 +371,4 @@ ENTRY(sys_call_table) .long sys_ni_syscall .long sys_rt_tgsigqueueinfo /* 365 */ .long sys_perf_event_open + .long sys_recvmmsg diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index fd2a9bb620d6..17202bbe843f 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -583,6 +583,7 @@ einval: li v0, -ENOSYS sys sys_rt_tgsigqueueinfo 4 sys sys_perf_event_open 5 sys sys_accept4 4 + sys sys_recvmmsg 5 .endm /* We pre-compute the number of _instruction_ bytes needed to diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 18bf7f32c5e4..a8a6c596eb04 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -420,4 +420,5 @@ sys_call_table: PTR sys_rt_tgsigqueueinfo PTR sys_perf_event_open PTR sys_accept4 + PTR sys_recvmmsg .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 6ebc07976694..5154e64f7cfe 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -418,4 +418,5 @@ EXPORT(sysn32_call_table) PTR compat_sys_rt_tgsigqueueinfo /* 5295 */ PTR sys_perf_event_open PTR sys_accept4 + PTR compat_sys_recvmmsg .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 9bbf9775e0bd..d0eff53d7cb9 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -538,4 +538,5 @@ sys_call_table: PTR compat_sys_rt_tgsigqueueinfo PTR sys_perf_event_open PTR sys_accept4 + PTR compat_sys_recvmmsg .size sys_call_table,.-sys_call_table diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S index 5bfde6c77498..07d2aaea9ae8 100644 --- a/arch/sh/kernel/syscalls_64.S +++ b/arch/sh/kernel/syscalls_64.S @@ -391,3 +391,4 @@ sys_call_table: .long sys_pwritev .long sys_rt_tgsigqueueinfo .long sys_perf_event_open + .long sys_recvmmsg /* 365 */ diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 0f1658d37490..ceb1530f8aa6 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -82,5 +82,5 @@ sys_call_table: /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv -/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open +/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 009825f6e73c..f37bef747e60 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -83,7 +83,7 @@ sys_call_table32: /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv - .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open + .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg #endif /* CONFIG_COMPAT */ @@ -158,4 +158,4 @@ sys_call_table: /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv - .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open + .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 74619c4f9fda..11a6c79d5f46 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -832,4 +832,5 @@ ia32_sys_call_table: .quad compat_sys_pwritev .quad compat_sys_rt_tgsigqueueinfo /* 335 */ .quad sys_perf_event_open + .quad compat_sys_recvmmsg ia32_syscall_end: diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6fb3c209a7e3..3baf379fa840 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -342,10 +342,11 @@ #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 #define __NR_perf_event_open 336 +#define __NR_recvmmsg 337 #ifdef __KERNEL__ -#define NR_syscalls 337 +#define NR_syscalls 338 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 8d3ad0adbc68..4843f7ba754a 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev) __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) #define __NR_perf_event_open 298 __SYSCALL(__NR_perf_event_open, sys_perf_event_open) +#define __NR_recvmmsg 299 +__SYSCALL(__NR_recvmmsg, sys_recvmmsg) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 0157cd26d7cc..70c2125d55b9 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -336,3 +336,4 @@ ENTRY(sys_call_table) .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ .long sys_perf_event_open + .long sys_recvmmsg diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index c092c8fbb2cf..4e55dc763021 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -681,8 +681,10 @@ __SYSCALL(304, sys_signalfd, 3) __SYSCALL(305, sys_ni_syscall, 0) #define __NR_eventfd 306 __SYSCALL(306, sys_eventfd, 1) +#define __NR_recvmmsg 307 +__SYSCALL(307, sys_recvmmsg, 5) -#define __NR_syscall_count 307 +#define __NR_syscall_count 308 /* * sysxtensa syscall handler diff --git a/include/linux/net.h b/include/linux/net.h index 529a0931711d..b42bb60fe92f 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -41,6 +41,7 @@ #define SYS_SENDMSG 16 /* sys_sendmsg(2) */ #define SYS_RECVMSG 17 /* sys_recvmsg(2) */ #define SYS_ACCEPT4 18 /* sys_accept4(2) */ +#define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */ typedef enum { SS_FREE = 0, /* not allocated */ diff --git a/include/linux/socket.h b/include/linux/socket.h index 3273a0c5043b..59966f12990c 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -65,6 +65,12 @@ struct msghdr { unsigned msg_flags; }; +/* For recvmmsg/sendmmsg */ +struct mmsghdr { + struct msghdr msg_hdr; + unsigned msg_len; +}; + /* * POSIX 1003.1g - ancillary data object information * Ancillary data consits of a sequence of pairs of @@ -312,6 +318,10 @@ extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uadd extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +struct timespec; + +extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, + unsigned int flags, struct timespec *timeout); #endif #endif /* not kernel and not glibc */ #endif /* _LINUX_SOCKET_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a990ace1a838..714f063a3e6d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -25,6 +25,7 @@ struct linux_dirent64; struct list_head; struct msgbuf; struct msghdr; +struct mmsghdr; struct msqid_ds; struct new_utsname; struct nfsctl_arg; @@ -677,6 +678,9 @@ asmlinkage long sys_recv(int, void __user *, size_t, unsigned); asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, struct sockaddr __user *, int __user *); asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags); +asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg, + unsigned int vlen, unsigned flags, + struct timespec __user *timeout); asmlinkage long sys_socket(int, int, int); asmlinkage long sys_socketpair(int, int, int, int __user *); asmlinkage long sys_socketcall(int call, unsigned long __user *args); diff --git a/include/net/compat.h b/include/net/compat.h index 7c3002832d05..9679f05e9896 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -18,6 +18,11 @@ struct compat_msghdr { compat_uint_t msg_flags; }; +struct compat_mmsghdr { + struct compat_msghdr msg_hdr; + compat_uint_t msg_len; +}; + struct compat_cmsghdr { compat_size_t cmsg_len; compat_int_t cmsg_level; @@ -35,6 +40,9 @@ extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *); extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int); extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); +extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *, + unsigned, unsigned, + struct timespec __user *); extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *); extern int put_cmsg_compat(struct msghdr*, int, int, int, void *); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e06d0b8d1951..f050ba85d420 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -48,8 +48,10 @@ cond_syscall(sys_shutdown); cond_syscall(sys_sendmsg); cond_syscall(compat_sys_sendmsg); cond_syscall(sys_recvmsg); +cond_syscall(sys_recvmmsg); cond_syscall(compat_sys_recvmsg); cond_syscall(compat_sys_recvfrom); +cond_syscall(compat_sys_recvmmsg); cond_syscall(sys_socketcall); cond_syscall(sys_futex); cond_syscall(compat_sys_futex); diff --git a/net/compat.c b/net/compat.c index a407c3addbae..e13f5256fd20 100644 --- a/net/compat.c +++ b/net/compat.c @@ -727,10 +727,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt); /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) -static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), +static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4)}; + AL(4),AL(5)}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -755,13 +755,36 @@ asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len, return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); } +asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned vlen, unsigned int flags, + struct timespec __user *timeout) +{ + int datagrams; + struct timespec ktspec; + struct compat_timespec __user *utspec = + (struct compat_timespec __user *)timeout; + + if (get_user(ktspec.tv_sec, &utspec->tv_sec) || + get_user(ktspec.tv_nsec, &utspec->tv_nsec)) + return -EFAULT; + + datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, + flags | MSG_CMSG_COMPAT, &ktspec); + if (datagrams > 0 && + (put_user(ktspec.tv_sec, &utspec->tv_sec) || + put_user(ktspec.tv_nsec, &utspec->tv_nsec))) + datagrams = -EFAULT; + + return datagrams; +} + asmlinkage long compat_sys_socketcall(int call, u32 __user *args) { int ret; u32 a[6]; u32 a0, a1; - if (call < SYS_SOCKET || call > SYS_ACCEPT4) + if (call < SYS_SOCKET || call > SYS_RECVMMSG) return -EINVAL; if (copy_from_user(a, args, nas[call])) return -EFAULT; @@ -823,6 +846,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) case SYS_RECVMSG: ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; + case SYS_RECVMMSG: + ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3], + compat_ptr(a[4])); + break; case SYS_ACCEPT4: ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); break; diff --git a/net/socket.c b/net/socket.c index 807935693846..9dff31c9b799 100644 --- a/net/socket.c +++ b/net/socket.c @@ -683,10 +683,9 @@ void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, int flags) { - int err; struct sock_iocb *si = kiocb_to_siocb(iocb); si->sock = sock; @@ -695,13 +694,17 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, si->size = size; si->flags = flags; - err = security_socket_recvmsg(sock, msg, size, flags); - if (err) - return err; - return sock->ops->recvmsg(iocb, sock, msg, size, flags); } +static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, int flags) +{ + int err = security_socket_recvmsg(sock, msg, size, flags); + + return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); +} + int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -717,6 +720,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, return ret; } +static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size, int flags) +{ + struct kiocb iocb; + struct sock_iocb siocb; + int ret; + + init_sync_kiocb(&iocb, NULL); + iocb.private = &siocb; + ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&iocb); + return ret; +} + int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -1983,22 +2001,15 @@ out: return err; } -/* - * BSD recvmsg interface - */ - -SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, - unsigned int, flags) +static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, + struct msghdr *msg_sys, unsigned flags, int nosec) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; - struct socket *sock; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; - struct msghdr msg_sys; unsigned long cmsg_ptr; int err, iov_size, total_len, len; - int fput_needed; /* kernel mode address */ struct sockaddr_storage addr; @@ -2008,27 +2019,23 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, int __user *uaddr_len; if (MSG_CMSG_COMPAT & flags) { - if (get_compat_msghdr(&msg_sys, msg_compat)) + if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; - err = -EMSGSIZE; - if (msg_sys.msg_iovlen > UIO_MAXIOV) - goto out_put; + if (msg_sys->msg_iovlen > UIO_MAXIOV) + goto out; /* Check whether to allocate the iovec area */ err = -ENOMEM; - iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); - if (msg_sys.msg_iovlen > UIO_FASTIOV) { + iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); + if (msg_sys->msg_iovlen > UIO_FASTIOV) { iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); if (!iov) - goto out_put; + goto out; } /* @@ -2036,46 +2043,47 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, * kernel msghdr to use the kernel address space) */ - uaddr = (__force void __user *)msg_sys.msg_name; + uaddr = (__force void __user *)msg_sys->msg_name; uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(&msg_sys, iov, + err = verify_compat_iovec(msg_sys, iov, (struct sockaddr *)&addr, VERIFY_WRITE); } else - err = verify_iovec(&msg_sys, iov, + err = verify_iovec(msg_sys, iov, (struct sockaddr *)&addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; total_len = err; - cmsg_ptr = (unsigned long)msg_sys.msg_control; - msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); + cmsg_ptr = (unsigned long)msg_sys->msg_control; + msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg_sys, total_len, flags); + err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, + total_len, flags); if (err < 0) goto out_freeiov; len = err; if (uaddr != NULL) { err = move_addr_to_user((struct sockaddr *)&addr, - msg_sys.msg_namelen, uaddr, + msg_sys->msg_namelen, uaddr, uaddr_len); if (err < 0) goto out_freeiov; } - err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), + err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(msg)); if (err) goto out_freeiov; if (MSG_CMSG_COMPAT & flags) - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, + err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, &msg_compat->msg_controllen); else - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, + err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, &msg->msg_controllen); if (err) goto out_freeiov; @@ -2084,21 +2092,150 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, out_freeiov: if (iov != iovstack) sock_kfree_s(sock->sk, iov, iov_size); -out_put: +out: + return err; +} + +/* + * BSD recvmsg interface + */ + +SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, + unsigned int, flags) +{ + int fput_needed, err; + struct msghdr msg_sys; + struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); + + if (!sock) + goto out; + + err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); + fput_light(sock->file, fput_needed); out: return err; } -#ifdef __ARCH_WANT_SYS_SOCKETCALL +/* + * Linux recvmmsg interface + */ + +int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, + unsigned int flags, struct timespec *timeout) +{ + int fput_needed, err, datagrams; + struct socket *sock; + struct mmsghdr __user *entry; + struct msghdr msg_sys; + struct timespec end_time; + + if (timeout && + poll_select_set_timeout(&end_time, timeout->tv_sec, + timeout->tv_nsec)) + return -EINVAL; + + datagrams = 0; + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + return err; + + err = sock_error(sock->sk); + if (err) + goto out_put; + + entry = mmsg; + + while (datagrams < vlen) { + /* + * No need to ask LSM for more than the first datagram. + */ + err = __sys_recvmsg(sock, (struct msghdr __user *)entry, + &msg_sys, flags, datagrams); + if (err < 0) + break; + err = put_user(err, &entry->msg_len); + if (err) + break; + ++entry; + ++datagrams; + + if (timeout) { + ktime_get_ts(timeout); + *timeout = timespec_sub(end_time, *timeout); + if (timeout->tv_sec < 0) { + timeout->tv_sec = timeout->tv_nsec = 0; + break; + } + + /* Timeout, return less than vlen datagrams */ + if (timeout->tv_nsec == 0 && timeout->tv_sec == 0) + break; + } + + /* Out of band data, return right away */ + if (msg_sys.msg_flags & MSG_OOB) + break; + } + +out_put: + fput_light(sock->file, fput_needed); + if (err == 0) + return datagrams; + + if (datagrams != 0) { + /* + * We may return less entries than requested (vlen) if the + * sock is non block and there aren't enough datagrams... + */ + if (err != -EAGAIN) { + /* + * ... or if recvmsg returns an error after we + * received some datagrams, where we record the + * error to return on the next call or if the + * app asks about it using getsockopt(SO_ERROR). + */ + sock->sk->sk_err = -err; + } + + return datagrams; + } + + return err; +} + +SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags, + struct timespec __user *, timeout) +{ + int datagrams; + struct timespec timeout_sys; + + if (!timeout) + return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); + + if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys))) + return -EFAULT; + + datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys); + + if (datagrams > 0 && + copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys))) + datagrams = -EFAULT; + + return datagrams; +} + +#ifdef __ARCH_WANT_SYS_SOCKETCALL /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) -static const unsigned char nargs[19]={ +static const unsigned char nargs[20] = { AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4) + AL(4),AL(5) }; #undef AL @@ -2118,7 +2255,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) int err; unsigned int len; - if (call < 1 || call > SYS_ACCEPT4) + if (call < 1 || call > SYS_RECVMMSG) return -EINVAL; len = nargs[call]; @@ -2196,6 +2333,10 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) case SYS_RECVMSG: err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; + case SYS_RECVMMSG: + err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], + (struct timespec __user *)a[4]); + break; case SYS_ACCEPT4: err = sys_accept4(a0, (struct sockaddr __user *)a1, (int __user *)a[2], a[3]); -- cgit v1.3-8-gc7d7 From 8968f9d3dc23d9a1821d97c6f11e72a59382e56c Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 13 Oct 2009 16:19:41 +0900 Subject: perf_event, x86, mce: Use TRACE_EVENT() for MCE logging This approach is the first baby step towards solving many of the structural problems the x86 MCE logging code is having today: - It has a private ring-buffer implementation that has a number of limitations and has been historically fragile and buggy. - It is using a quirky /dev/mcelog ioctl driven ABI that is MCE specific. /dev/mcelog is not part of any larger logging framework and hence has remained on the fringes for many years. - The MCE logging code is still very unclean partly due to its ABI limitations. Fields are being reused for multiple purposes, and the whole message structure is limited and x86 specific to begin with. All in one, the x86 tree would like to move away from this private implementation of an event logging facility to a broader framework. By using perf events we gain the following advantages: - Multiple user-space agents can access MCE events. We can have an mcelog daemon running but also a system-wide tracer capturing important events in flight-recorder mode. - Sampling support: the kernel and the user-space call-chain of MCE events can be stored and analyzed as well. This way actual patterns of bad behavior can be matched to precisely what kind of activity happened in the kernel (and/or in the app) around that moment in time. - Coupling with other hardware and software events: the PMU can track a number of other anomalies - monitoring software might chose to monitor those plus the MCE events as well - in one coherent stream of events. - Discovery of MCE sources - tracepoints are enumerated and tools can act upon the existence (or non-existence) of various channels of MCE information. - Filtering support: we just subscribe to and act upon the events we are interested in. Then even on a per event source basis there's in-kernel filter expressions available that can restrict the amount of data that hits the event channel. - Arbitrary deep per cpu buffering of events - we can buffer 32 entries or we can buffer as much as we want, as long as we have the RAM. - An NMI-safe ring-buffer implementation - mappable to user-space. - Built-in support for timestamping of events, PID markers, CPU markers, etc. - A rich ABI accessible over system call interface. Per cpu, per task and per workload monitoring of MCE events can be done this way. The ABI itself has a nice, meaningful structure. - Extensible ABI: new fields can be added without breaking tooling. New tracepoints can be added as the hardware side evolves. There's various parsers that can be used. - Lots of scheduling/buffering/batching modes of operandi for MCE events. poll() support. mmap() support. read() support. You name it. - Rich tooling support: even without any MCE specific extensions added the 'perf' tool today offers various views of MCE data: perf report, perf stat, perf trace can all be used to view logged MCE events and perhaps correlate them to certain user-space usage patterns. But it can be used directly as well, for user-space agents and policy action in mcelog, etc. With this we hope to achieve significant code cleanup and feature improvements in the MCE code, and we hope to be able to drop the /dev/mcelog facility in the end. This patch is just a plain dumb dump of mce_log() records to the tracepoints / perf events framework - a first proof of concept step. Signed-off-by: Hidetoshi Seto Cc: Huang Ying Cc: Andi Kleen LKML-Reference: <4AD42A0D.7050104@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++++ include/trace/events/mce.h | 69 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 include/trace/events/mce.h (limited to 'include') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1598a9436d0..39caea3d8bc3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -46,6 +46,9 @@ #include "mce-internal.h" +#define CREATE_TRACE_POINTS +#include + int mce_disabled __read_mostly; #define MISC_MCELOG_MINOR 227 @@ -141,6 +144,9 @@ void mce_log(struct mce *mce) { unsigned next, entry; + /* Emit the trace record: */ + trace_mce_record(mce); + mce->finished = 0; wmb(); for (;;) { diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h new file mode 100644 index 000000000000..7eee77895cb3 --- /dev/null +++ b/include/trace/events/mce.h @@ -0,0 +1,69 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mce + +#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MCE_H + +#include +#include +#include + +TRACE_EVENT(mce_record, + + TP_PROTO(struct mce *m), + + TP_ARGS(m), + + TP_STRUCT__entry( + __field( u64, mcgcap ) + __field( u64, mcgstatus ) + __field( u8, bank ) + __field( u64, status ) + __field( u64, addr ) + __field( u64, misc ) + __field( u64, ip ) + __field( u8, cs ) + __field( u64, tsc ) + __field( u64, walltime ) + __field( u32, cpu ) + __field( u32, cpuid ) + __field( u32, apicid ) + __field( u32, socketid ) + __field( u8, cpuvendor ) + ), + + TP_fast_assign( + __entry->mcgcap = m->mcgcap; + __entry->mcgstatus = m->mcgstatus; + __entry->bank = m->bank; + __entry->status = m->status; + __entry->addr = m->addr; + __entry->misc = m->misc; + __entry->ip = m->ip; + __entry->cs = m->cs; + __entry->tsc = m->tsc; + __entry->walltime = m->time; + __entry->cpu = m->extcpu; + __entry->cpuid = m->cpuid; + __entry->apicid = m->apicid; + __entry->socketid = m->socketid; + __entry->cpuvendor = m->cpuvendor; + ), + + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + __entry->cpu, + __entry->mcgcap, __entry->mcgstatus, + __entry->bank, __entry->status, + __entry->addr, __entry->misc, + __entry->cs, __entry->ip, + __entry->tsc, + __entry->cpuvendor, __entry->cpuid, + __entry->walltime, + __entry->socketid, + __entry->apicid) +); + +#endif /* _TRACE_MCE_H */ + +/* This part must be outside protection */ +#include -- cgit v1.3-8-gc7d7 From f373b53b5fe67aa4a6f28f921a529cc90f88e79b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Oct 2009 00:16:19 +0000 Subject: tcp: replace ehash_size by ehash_mask Storing the mask (size - 1) instead of the size allows fast path to be a bit faster. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 4 ++-- net/dccp/proto.c | 13 +++++++------ net/ipv4/inet_diag.c | 2 +- net/ipv4/inet_hashtables.c | 2 +- net/ipv4/inet_timewait_sock.c | 2 +- net/ipv4/tcp.c | 11 +++++------ net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv6/inet6_hashtables.c | 2 +- 8 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index d522dcf3031a..5f11c4a0daca 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -125,7 +125,7 @@ struct inet_hashinfo { */ struct inet_ehash_bucket *ehash; spinlock_t *ehash_locks; - unsigned int ehash_size; + unsigned int ehash_mask; unsigned int ehash_locks_mask; /* Ok, let's try this, I give up, we do need a local binding @@ -158,7 +158,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( struct inet_hashinfo *hashinfo, unsigned int hash) { - return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; + return &hashinfo->ehash[hash & hashinfo->ehash_mask]; } static inline spinlock_t *inet_ehash_lockp( diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a156319fd0ac..ecb203fff501 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1060,11 +1060,12 @@ static int __init dccp_init(void) for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) ; do { - dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / + unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE / sizeof(struct inet_ehash_bucket); - while (dccp_hashinfo.ehash_size & - (dccp_hashinfo.ehash_size - 1)) - dccp_hashinfo.ehash_size--; + + while (hash_size & (hash_size - 1)) + hash_size--; + dccp_hashinfo.ehash_mask = hash_size - 1; dccp_hashinfo.ehash = (struct inet_ehash_bucket *) __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order); } while (!dccp_hashinfo.ehash && --ehash_order > 0); @@ -1074,7 +1075,7 @@ static int __init dccp_init(void) goto out_free_bind_bucket_cachep; } - for (i = 0; i < dccp_hashinfo.ehash_size; i++) { + for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) { INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i); } @@ -1153,7 +1154,7 @@ static void __exit dccp_fini(void) get_order(dccp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket))); free_pages((unsigned long)dccp_hashinfo.ehash, - get_order(dccp_hashinfo.ehash_size * + get_order((dccp_hashinfo.ehash_mask + 1) * sizeof(struct inet_ehash_bucket))); inet_ehash_locks_free(&dccp_hashinfo); kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index a706a47f4dbb..cb73fdefba91 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -774,7 +774,7 @@ skip_listen_ht: if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) goto unlock; - for (i = s_i; i < hashinfo->ehash_size; i++) { + for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); struct sock *sk; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 625cc5f64c94..a45aaf3d48b1 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -209,7 +209,7 @@ struct sock * __inet_lookup_established(struct net *net, * have wildcards anyways. */ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); - unsigned int slot = hash & (hashinfo->ehash_size - 1); + unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; rcu_read_lock(); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 13f0781f35cd..2fe571155b22 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -430,7 +430,7 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, int h; local_bh_disable(); - for (h = 0; h < (hashinfo->ehash_size); h++) { + for (h = 0; h <= hashinfo->ehash_mask; h++) { struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, h); spinlock_t *lock = inet_ehash_lockp(hashinfo, h); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 64d0af675823..cf13726259cd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2865,11 +2865,10 @@ void __init tcp_init(void) (totalram_pages >= 128 * 1024) ? 13 : 15, 0, - &tcp_hashinfo.ehash_size, NULL, + &tcp_hashinfo.ehash_mask, thash_entries ? 0 : 512 * 1024); - tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; - for (i = 0; i < tcp_hashinfo.ehash_size; i++) { + for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) { INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); } @@ -2878,7 +2877,7 @@ void __init tcp_init(void) tcp_hashinfo.bhash = alloc_large_system_hash("TCP bind", sizeof(struct inet_bind_hashbucket), - tcp_hashinfo.ehash_size, + tcp_hashinfo.ehash_mask + 1, (totalram_pages >= 128 * 1024) ? 13 : 15, 0, @@ -2933,8 +2932,8 @@ void __init tcp_init(void) sysctl_tcp_rmem[2] = max(87380, max_share); printk(KERN_INFO "TCP: Hash tables configured " - "(established %d bind %d)\n", - tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); + "(established %u bind %u)\n", + tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); tcp_register_congestion_control(&tcp_reno); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7cda24b53f61..99718703d040 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2000,7 +2000,7 @@ static void *established_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); void *rc = NULL; - for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { + for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { struct sock *sk; struct hlist_nulls_node *node; struct inet_timewait_sock *tw; @@ -2061,10 +2061,10 @@ get_tw: st->state = TCP_SEQ_STATE_ESTABLISHED; /* Look for next non empty bucket */ - while (++st->bucket < tcp_hashinfo.ehash_size && + while (++st->bucket <= tcp_hashinfo.ehash_mask && empty_bucket(st)) ; - if (st->bucket >= tcp_hashinfo.ehash_size) + if (st->bucket > tcp_hashinfo.ehash_mask) return NULL; spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 1bcc3431859e..874aed86e1a2 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -73,7 +73,7 @@ struct sock *__inet6_lookup_established(struct net *net, * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); - unsigned int slot = hash & (hashinfo->ehash_size - 1); + unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; -- cgit v1.3-8-gc7d7 From 61321bbd6235ca9a40ba3bc249e8906cc66233c3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Oct 2009 17:11:23 +0000 Subject: net: Add netdev_alloc_skb_ip_align() helper Instead of hardcoding NET_IP_ALIGN stuff in various network drivers, we can add a helper around netdev_alloc_skb() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8c866b5cb97b..0c68fbd6faac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1491,6 +1491,16 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } +static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length) +{ + struct sk_buff *skb = netdev_alloc_skb(dev, length + NET_IP_ALIGN); + + if (NET_IP_ALIGN && skb) + skb_reserve(skb, NET_IP_ALIGN); + return skb; +} + extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask); /** -- cgit v1.3-8-gc7d7 From a6e4bc5304033e434fabccabb230b8e9ff55d76f Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Thu, 8 Oct 2009 22:17:11 +0000 Subject: can: make the number of echo skb's configurable This patch allows the CAN controller driver to define the number of echo skb's used for the local loopback (echo), as suggested by Kurt Van Dijck, with the function: struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max); The CAN drivers have been adapted accordingly. For the ems_usb driver, as suggested by Sebastian Haas, the number of echo skb's has been increased to 10, which improves the transmission performance a lot. Signed-off-by: Wolfgang Grandegger Signed-off-by: Kurt Van Dijck Signed-off-by: David S. Miller --- drivers/net/can/at91_can.c | 2 +- drivers/net/can/dev.c | 32 ++++++++++++++++++++++++++------ drivers/net/can/sja1000/sja1000.c | 3 ++- drivers/net/can/sja1000/sja1000.h | 2 ++ drivers/net/can/ti_hecc.c | 6 +----- drivers/net/can/usb/ems_usb.c | 4 ++-- include/linux/can/dev.h | 16 ++++++++-------- 7 files changed, 42 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index f67ae285a35a..b13fd9114130 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -1087,7 +1087,7 @@ static int __init at91_can_probe(struct platform_device *pdev) goto exit_release; } - dev = alloc_candev(sizeof(struct at91_priv)); + dev = alloc_candev(sizeof(struct at91_priv), AT91_MB_TX_NUM); if (!dev) { err = -ENOMEM; goto exit_iounmap; diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index f0b9a1e1db46..39b99f57c265 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -245,7 +245,7 @@ static void can_flush_echo_skb(struct net_device *dev) struct net_device_stats *stats = &dev->stats; int i; - for (i = 0; i < CAN_ECHO_SKB_MAX; i++) { + for (i = 0; i < priv->echo_skb_max; i++) { if (priv->echo_skb[i]) { kfree_skb(priv->echo_skb[i]); priv->echo_skb[i] = NULL; @@ -262,10 +262,13 @@ static void can_flush_echo_skb(struct net_device *dev) * of the device driver. The driver must protect access to * priv->echo_skb, if necessary. */ -void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, int idx) +void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, + unsigned int idx) { struct can_priv *priv = netdev_priv(dev); + BUG_ON(idx >= priv->echo_skb_max); + /* check flag whether this packet has to be looped back */ if (!(dev->flags & IFF_ECHO) || skb->pkt_type != PACKET_LOOPBACK) { kfree_skb(skb); @@ -311,10 +314,12 @@ EXPORT_SYMBOL_GPL(can_put_echo_skb); * is handled in the device driver. The driver must protect * access to priv->echo_skb, if necessary. */ -void can_get_echo_skb(struct net_device *dev, int idx) +void can_get_echo_skb(struct net_device *dev, unsigned int idx) { struct can_priv *priv = netdev_priv(dev); + BUG_ON(idx >= priv->echo_skb_max); + if (priv->echo_skb[idx]) { netif_rx(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; @@ -327,10 +332,12 @@ EXPORT_SYMBOL_GPL(can_get_echo_skb); * * The function is typically called when TX failed. */ -void can_free_echo_skb(struct net_device *dev, int idx) +void can_free_echo_skb(struct net_device *dev, unsigned int idx) { struct can_priv *priv = netdev_priv(dev); + BUG_ON(idx >= priv->echo_skb_max); + if (priv->echo_skb[idx]) { kfree_skb(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; @@ -445,17 +452,30 @@ static void can_setup(struct net_device *dev) /* * Allocate and setup space for the CAN network device */ -struct net_device *alloc_candev(int sizeof_priv) +struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max) { struct net_device *dev; struct can_priv *priv; + int size; - dev = alloc_netdev(sizeof_priv, "can%d", can_setup); + if (echo_skb_max) + size = ALIGN(sizeof_priv, sizeof(struct sk_buff *)) + + echo_skb_max * sizeof(struct sk_buff *); + else + size = sizeof_priv; + + dev = alloc_netdev(size, "can%d", can_setup); if (!dev) return NULL; priv = netdev_priv(dev); + if (echo_skb_max) { + priv->echo_skb_max = echo_skb_max; + priv->echo_skb = (void *)priv + + ALIGN(sizeof_priv, sizeof(struct sk_buff *)); + } + priv->state = CAN_STATE_STOPPED; init_timer(&priv->restart_timer); diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 16d2ecd2a3b7..96d8be4253f8 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -565,7 +565,8 @@ struct net_device *alloc_sja1000dev(int sizeof_priv) struct net_device *dev; struct sja1000_priv *priv; - dev = alloc_candev(sizeof(struct sja1000_priv) + sizeof_priv); + dev = alloc_candev(sizeof(struct sja1000_priv) + sizeof_priv, + SJA1000_ECHO_SKB_MAX); if (!dev) return NULL; diff --git a/drivers/net/can/sja1000/sja1000.h b/drivers/net/can/sja1000/sja1000.h index 302d2c763ad7..97a622b9302f 100644 --- a/drivers/net/can/sja1000/sja1000.h +++ b/drivers/net/can/sja1000/sja1000.h @@ -50,6 +50,8 @@ #include #include +#define SJA1000_ECHO_SKB_MAX 1 /* the SJA1000 has one TX buffer object */ + #define SJA1000_MAX_IRQ 20 /* max. number of interrupts handled in ISR */ /* SJA1000 registers - manual section 6.4 (Pelican Mode) */ diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 814e6c5c6386..23a7128e4eb7 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -74,10 +74,6 @@ MODULE_VERSION(HECC_MODULE_VERSION); #define HECC_MB_TX_SHIFT 2 /* as per table above */ #define HECC_MAX_TX_MBOX BIT(HECC_MB_TX_SHIFT) -#if (HECC_MAX_TX_MBOX > CAN_ECHO_SKB_MAX) -#error "HECC: MAX TX mailboxes should be equal or less than CAN_ECHO_SKB_MAX" -#endif - #define HECC_TX_PRIO_SHIFT (HECC_MB_TX_SHIFT) #define HECC_TX_PRIO_MASK (MAX_TX_PRIO << HECC_MB_TX_SHIFT) #define HECC_TX_MB_MASK (HECC_MAX_TX_MBOX - 1) @@ -902,7 +898,7 @@ static int ti_hecc_probe(struct platform_device *pdev) goto probe_exit_free_region; } - ndev = alloc_candev(sizeof(struct ti_hecc_priv)); + ndev = alloc_candev(sizeof(struct ti_hecc_priv), HECC_MAX_TX_MBOX); if (!ndev) { dev_err(&pdev->dev, "alloc_candev failed\n"); err = -ENOMEM; diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 9012e0abc626..a65f56a9cd3d 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -232,7 +232,7 @@ MODULE_DEVICE_TABLE(usb, ems_usb_table); #define INTR_IN_BUFFER_SIZE 4 #define MAX_RX_URBS 10 -#define MAX_TX_URBS CAN_ECHO_SKB_MAX +#define MAX_TX_URBS 10 struct ems_usb; @@ -1012,7 +1012,7 @@ static int ems_usb_probe(struct usb_interface *intf, struct ems_usb *dev; int i, err = -ENOMEM; - netdev = alloc_candev(sizeof(struct ems_usb)); + netdev = alloc_candev(sizeof(struct ems_usb), MAX_TX_URBS); if (!netdev) { dev_err(netdev->dev.parent, "Couldn't alloc candev\n"); return -ENOMEM; diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 5824b20b5fcb..1d3f7f00e3af 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -29,8 +29,6 @@ enum can_mode { /* * CAN common private data */ -#define CAN_ECHO_SKB_MAX 4 - struct can_priv { struct can_device_stats can_stats; @@ -44,15 +42,16 @@ struct can_priv { int restart_ms; struct timer_list restart_timer; - struct sk_buff *echo_skb[CAN_ECHO_SKB_MAX]; - int (*do_set_bittiming)(struct net_device *dev); int (*do_set_mode)(struct net_device *dev, enum can_mode mode); int (*do_get_state)(const struct net_device *dev, enum can_state *state); + + unsigned int echo_skb_max; + struct sk_buff **echo_skb; }; -struct net_device *alloc_candev(int sizeof_priv); +struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max); void free_candev(struct net_device *dev); int open_candev(struct net_device *dev); @@ -64,8 +63,9 @@ void unregister_candev(struct net_device *dev); int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); -void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, int idx); -void can_get_echo_skb(struct net_device *dev, int idx); -void can_free_echo_skb(struct net_device *dev, int idx); +void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, + unsigned int idx); +void can_get_echo_skb(struct net_device *dev, unsigned int idx); +void can_free_echo_skb(struct net_device *dev, unsigned int idx); #endif /* CAN_DEV_H */ -- cgit v1.3-8-gc7d7 From aace495933a981274b6491d71b915165a61defdc Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Tue, 13 Oct 2009 07:25:49 +0000 Subject: net: smsc911x: allow platform_data to specify mac address Extend the driver to accept a MAC address specified in platform_data. Signed-off-by: Manuel Lauss Signed-off-by: David S. Miller --- drivers/net/smsc911x.c | 3 +++ include/linux/smsc911x.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index ccdd196f5297..6a9f51d1d9f2 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -2071,6 +2071,9 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev) if (is_valid_ether_addr(dev->dev_addr)) { smsc911x_set_hw_mac_address(pdata, dev->dev_addr); SMSC_TRACE(PROBE, "MAC Address is specified by configuration"); + } else if (is_valid_ether_addr(pdata->config.mac)) { + memcpy(dev->dev_addr, pdata->config.mac, 6); + SMSC_TRACE(PROBE, "MAC Address specified by platform data"); } else { /* Try reading mac address from device. if EEPROM is present * it will already have been set */ diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h index 5241e4fb4eca..7144e8aa1e41 100644 --- a/include/linux/smsc911x.h +++ b/include/linux/smsc911x.h @@ -30,6 +30,7 @@ struct smsc911x_platform_config { unsigned int irq_type; unsigned int flags; phy_interface_t phy_interface; + unsigned char mac[6]; }; /* Constants for platform_device irq polarity configuration */ -- cgit v1.3-8-gc7d7 From c44fc770845163f8d9e573f37f92a7b7a7ade14e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 19 Sep 2009 06:50:42 +0200 Subject: tracing: Move syscalls metadata handling from arch to core Most of the syscalls metadata processing is done from arch. But these operations are mostly generic accross archs. Especially now that we have a common variable name that expresses the number of syscalls supported by an arch: NR_syscalls, the only remaining bits that need to reside in arch is the syscall nr to addr translation. v2: Compare syscalls symbols only after the "sys" prefix so that we avoid spurious mismatches with archs that have syscalls wrappers, in which case syscalls symbols have "SyS" prefixed aliases. (Reported by: Heiko Carstens) Signed-off-by: Frederic Weisbecker Acked-by: Heiko Carstens Cc: Ingo Molnar Cc: Steven Rostedt Cc: Li Zefan Cc: Masami Hiramatsu Cc: Jason Baron Cc: Lai Jiangshan Cc: Martin Schwidefsky Cc: Paul Mundt --- arch/s390/kernel/ftrace.c | 67 +-------------------------------- arch/x86/kernel/ftrace.c | 76 +------------------------------------- include/trace/syscall.h | 2 +- kernel/trace/trace_syscalls.c | 86 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 140 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 57bdcb1e3cdf..7c5752c3423d 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -206,73 +206,10 @@ out: #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned int sys_call_table[]; -static struct syscall_metadata **syscalls_metadata; - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) -{ - int i; - - if (!syscalls_metadata) - return -1; - for (i = 0; i < NR_syscalls; i++) - if (syscalls_metadata[i]) - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) +unsigned long __init arch_syscall_addr(int nr) { - syscalls_metadata[num]->exit_id = id; -} - -static struct syscall_metadata *find_syscall_meta(unsigned long syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup(syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name + 3, str + 3)) - return start; - } - return NULL; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - struct syscall_metadata *meta; - int i; - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls, - GFP_KERNEL); - if (!syscalls_metadata) - return -ENOMEM; - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta((unsigned long)sys_call_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)sys_call_table[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 25e6f5fc4b1e..5a1b9758fd62 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -470,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned long *sys_call_table; -static struct syscall_metadata **syscalls_metadata; - -static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name, str)) - return start; - } - return NULL; -} - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) -{ - int i; - - if (!syscalls_metadata) - return -1; - - for (i = 0; i < NR_syscalls; i++) { - if (syscalls_metadata[i]) { - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - } - } - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) +unsigned long __init arch_syscall_addr(int nr) { - syscalls_metadata[num]->exit_id = id; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - int i; - struct syscall_metadata *meta; - unsigned long **psys_syscall_table = &sys_call_table; - - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * - NR_syscalls, GFP_KERNEL); - if (!syscalls_metadata) { - WARN_ON(1); - return -ENOMEM; - } - - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta(psys_syscall_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)(&sys_call_table)[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dc283ba5ae0..e972f0a40f8d 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -33,7 +33,7 @@ struct syscall_metadata { }; #ifdef CONFIG_FTRACE_SYSCALLS -extern struct syscall_metadata *syscall_nr_to_meta(int nr); +extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 9fbce6c9d2e1..8bda4bff2286 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -14,6 +14,69 @@ static int sys_refcount_exit; static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); +extern unsigned long __start_syscalls_metadata[]; +extern unsigned long __stop_syscalls_metadata[]; + +static struct syscall_metadata **syscalls_metadata; + +static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +{ + struct syscall_metadata *start; + struct syscall_metadata *stop; + char str[KSYM_SYMBOL_LEN]; + + + start = (struct syscall_metadata *)__start_syscalls_metadata; + stop = (struct syscall_metadata *)__stop_syscalls_metadata; + kallsyms_lookup(syscall, NULL, NULL, NULL, str); + + for ( ; start < stop; start++) { + /* + * Only compare after the "sys" prefix. Archs that use + * syscall wrappers may have syscalls symbols aliases prefixed + * with "SyS" instead of "sys", leading to an unwanted + * mismatch. + */ + if (start->name && !strcmp(start->name + 3, str + 3)) + return start; + } + return NULL; +} + +static struct syscall_metadata *syscall_nr_to_meta(int nr) +{ + if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) + return NULL; + + return syscalls_metadata[nr]; +} + +int syscall_name_to_nr(char *name) +{ + int i; + + if (!syscalls_metadata) + return -1; + + for (i = 0; i < NR_syscalls; i++) { + if (syscalls_metadata[i]) { + if (!strcmp(syscalls_metadata[i]->name, name)) + return i; + } + } + return -1; +} + +void set_syscall_enter_id(int num, int id) +{ + syscalls_metadata[num]->enter_id = id; +} + +void set_syscall_exit_id(int num, int id) +{ + syscalls_metadata[num]->exit_id = id; +} + enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) { @@ -375,6 +438,29 @@ struct trace_event event_syscall_exit = { .trace = print_syscall_exit, }; +int __init init_ftrace_syscalls(void) +{ + struct syscall_metadata *meta; + unsigned long addr; + int i; + + syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * + NR_syscalls, GFP_KERNEL); + if (!syscalls_metadata) { + WARN_ON(1); + return -ENOMEM; + } + + for (i = 0; i < NR_syscalls; i++) { + addr = arch_syscall_addr(i); + meta = find_syscall_meta(addr); + syscalls_metadata[i] = meta; + } + + return 0; +} +core_initcall(init_ftrace_syscalls); + #ifdef CONFIG_EVENT_PROFILE static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); -- cgit v1.3-8-gc7d7 From 8e85973efc87dfae8508f1a3440fd44612897458 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 8 Oct 2009 00:41:59 +0200 Subject: firewire: optimize config ROM creation The config ROM image of the local node was created in CPU byte order, then a temporary big endian copy was created to compute the CRC, and finally the card driver created its own big endian copy. We now generate it in big endian byte order in the first place to avoid one byte order conversion and the temporary on-stack copy of the ROM image (1000 bytes stack usage in process context). Furthermore, two 1000 bytes memset()s are replaced by one 1000 bytes - ROM length sized memset. The trivial fw_memcpy_{from,to}_be32() helpers are now superfluous and removed. The newly added __compute_block_crc() function will be folded into fw_compute_block_crc() in a subsequent change. Signed-off-by: Stefan Richter --- drivers/firewire/core-card.c | 62 ++++++++++++++++++++++++++------------------ drivers/firewire/core.h | 7 ++--- drivers/firewire/ohci.c | 30 +++++++++++++-------- include/linux/firewire.h | 14 ---------- 4 files changed, 60 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 33898b63cdf7..f73e3bdfc84c 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -38,6 +38,18 @@ #include "core.h" +static int __compute_block_crc(__be32 *block) +{ + int length; + u16 crc; + + length = (be32_to_cpu(block[0]) >> 16) & 0xff; + crc = crc_itu_t(0, (u8 *)&block[1], length * 4); + *block |= cpu_to_be32(crc); + + return length; +} + int fw_compute_block_crc(u32 *block) { __be32 be32_block[256]; @@ -72,11 +84,11 @@ static int descriptor_count; #define BIB_CMC ((1) << 30) #define BIB_IMC ((1) << 31) -static u32 *generate_config_rom(struct fw_card *card, size_t *config_rom_length) +static __be32 *generate_config_rom(struct fw_card *card, size_t *rom_length) { struct fw_descriptor *desc; - static u32 config_rom[256]; - int i, j, length; + static __be32 config_rom[256]; + int i, j, k, length; /* * Initialize contents of config rom buffer. On the OHCI @@ -87,40 +99,39 @@ static u32 *generate_config_rom(struct fw_card *card, size_t *config_rom_length) * the version stored in the OHCI registers. */ - memset(config_rom, 0, sizeof(config_rom)); - config_rom[0] = BIB_CRC_LENGTH(4) | BIB_INFO_LENGTH(4) | BIB_CRC(0); - config_rom[1] = 0x31333934; - - config_rom[2] = + config_rom[0] = cpu_to_be32( + BIB_CRC_LENGTH(4) | BIB_INFO_LENGTH(4) | BIB_CRC(0)); + config_rom[1] = cpu_to_be32(0x31333934); + config_rom[2] = cpu_to_be32( BIB_LINK_SPEED(card->link_speed) | BIB_GENERATION(card->config_rom_generation++ % 14 + 2) | BIB_MAX_ROM(2) | BIB_MAX_RECEIVE(card->max_receive) | - BIB_BMC | BIB_ISC | BIB_CMC | BIB_IMC; - config_rom[3] = card->guid >> 32; - config_rom[4] = card->guid; + BIB_BMC | BIB_ISC | BIB_CMC | BIB_IMC); + config_rom[3] = cpu_to_be32(card->guid >> 32); + config_rom[4] = cpu_to_be32(card->guid); /* Generate root directory. */ - i = 5; - config_rom[i++] = 0; - config_rom[i++] = 0x0c0083c0; /* node capabilities */ - j = i + descriptor_count; + config_rom[6] = cpu_to_be32(0x0c0083c0); /* node capabilities */ + i = 7; + j = 7 + descriptor_count; /* Generate root directory entries for descriptors. */ list_for_each_entry (desc, &descriptor_list, link) { if (desc->immediate > 0) - config_rom[i++] = desc->immediate; - config_rom[i] = desc->key | (j - i); + config_rom[i++] = cpu_to_be32(desc->immediate); + config_rom[i] = cpu_to_be32(desc->key | (j - i)); i++; j += desc->length; } /* Update root directory length. */ - config_rom[5] = (i - 5 - 1) << 16; + config_rom[5] = cpu_to_be32((i - 5 - 1) << 16); /* End of root directory, now copy in descriptors. */ list_for_each_entry (desc, &descriptor_list, link) { - memcpy(&config_rom[i], desc->data, desc->length * 4); + for (k = 0; k < desc->length; k++) + config_rom[i + k] = cpu_to_be32(desc->data[k]); i += desc->length; } @@ -129,9 +140,9 @@ static u32 *generate_config_rom(struct fw_card *card, size_t *config_rom_length) * the bus info block, which is always the case for this * implementation. */ for (i = 0; i < j; i += length + 1) - length = fw_compute_block_crc(config_rom + i); + length = __compute_block_crc(config_rom + i); - *config_rom_length = j; + *rom_length = j; return config_rom; } @@ -139,7 +150,7 @@ static u32 *generate_config_rom(struct fw_card *card, size_t *config_rom_length) static void update_config_roms(void) { struct fw_card *card; - u32 *config_rom; + __be32 *config_rom; size_t length; list_for_each_entry (card, &card_list, link) { @@ -432,7 +443,7 @@ EXPORT_SYMBOL(fw_card_initialize); int fw_card_add(struct fw_card *card, u32 max_receive, u32 link_speed, u64 guid) { - u32 *config_rom; + __be32 *config_rom; size_t length; int ret; @@ -462,7 +473,8 @@ EXPORT_SYMBOL(fw_card_add); * shutdown still need to be provided by the card driver. */ -static int dummy_enable(struct fw_card *card, u32 *config_rom, size_t length) +static int dummy_enable(struct fw_card *card, + const __be32 *config_rom, size_t length) { BUG(); return -1; @@ -475,7 +487,7 @@ static int dummy_update_phy_reg(struct fw_card *card, int address, } static int dummy_set_config_rom(struct fw_card *card, - u32 *config_rom, size_t length) + const __be32 *config_rom, size_t length) { /* * We take the card out of card_list before setting the dummy diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h index 7ff6e7585152..7adca7cb9f55 100644 --- a/drivers/firewire/core.h +++ b/drivers/firewire/core.h @@ -40,7 +40,8 @@ struct fw_card_driver { * enable the PHY or set the link_on bit and initiate a bus * reset. */ - int (*enable)(struct fw_card *card, u32 *config_rom, size_t length); + int (*enable)(struct fw_card *card, + const __be32 *config_rom, size_t length); int (*update_phy_reg)(struct fw_card *card, int address, int clear_bits, int set_bits); @@ -48,10 +49,10 @@ struct fw_card_driver { /* * Update the config rom for an enabled card. This function * should change the config rom that is presented on the bus - * an initiate a bus reset. + * and initiate a bus reset. */ int (*set_config_rom)(struct fw_card *card, - u32 *config_rom, size_t length); + const __be32 *config_rom, size_t length); void (*send_request)(struct fw_card *card, struct fw_packet *packet); void (*send_response)(struct fw_card *card, struct fw_packet *packet); diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 5d524254499e..418415564791 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -205,7 +205,7 @@ struct fw_ohci { dma_addr_t config_rom_bus; __be32 *next_config_rom; dma_addr_t next_config_rom_bus; - u32 next_header; + __be32 next_header; struct ar_context ar_request_ctx; struct ar_context ar_response_ctx; @@ -1355,8 +1355,9 @@ static void bus_reset_tasklet(unsigned long data) */ reg_write(ohci, OHCI1394_BusOptions, be32_to_cpu(ohci->config_rom[2])); - ohci->config_rom[0] = cpu_to_be32(ohci->next_header); - reg_write(ohci, OHCI1394_ConfigROMhdr, ohci->next_header); + ohci->config_rom[0] = ohci->next_header; + reg_write(ohci, OHCI1394_ConfigROMhdr, + be32_to_cpu(ohci->next_header)); } #ifdef CONFIG_FIREWIRE_OHCI_REMOTE_DMA @@ -1464,7 +1465,17 @@ static int software_reset(struct fw_ohci *ohci) return -EBUSY; } -static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length) +static void copy_config_rom(__be32 *dest, const __be32 *src, size_t length) +{ + size_t size = length * 4; + + memcpy(dest, src, size); + if (size < CONFIG_ROM_SIZE) + memset(&dest[length], 0, CONFIG_ROM_SIZE - size); +} + +static int ohci_enable(struct fw_card *card, + const __be32 *config_rom, size_t length) { struct fw_ohci *ohci = fw_ohci(card); struct pci_dev *dev = to_pci_dev(card->device); @@ -1565,8 +1576,7 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length) if (ohci->next_config_rom == NULL) return -ENOMEM; - memset(ohci->next_config_rom, 0, CONFIG_ROM_SIZE); - fw_memcpy_to_be32(ohci->next_config_rom, config_rom, length * 4); + copy_config_rom(ohci->next_config_rom, config_rom, length); } else { /* * In the suspend case, config_rom is NULL, which @@ -1576,7 +1586,7 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length) ohci->next_config_rom_bus = ohci->config_rom_bus; } - ohci->next_header = be32_to_cpu(ohci->next_config_rom[0]); + ohci->next_header = ohci->next_config_rom[0]; ohci->next_config_rom[0] = 0; reg_write(ohci, OHCI1394_ConfigROMhdr, 0); reg_write(ohci, OHCI1394_BusOptions, @@ -1610,7 +1620,7 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length) } static int ohci_set_config_rom(struct fw_card *card, - u32 *config_rom, size_t length) + const __be32 *config_rom, size_t length) { struct fw_ohci *ohci; unsigned long flags; @@ -1659,9 +1669,7 @@ static int ohci_set_config_rom(struct fw_card *card, ohci->next_config_rom = next_config_rom; ohci->next_config_rom_bus = next_config_rom_bus; - memset(ohci->next_config_rom, 0, CONFIG_ROM_SIZE); - fw_memcpy_to_be32(ohci->next_config_rom, config_rom, - length * 4); + copy_config_rom(ohci->next_config_rom, config_rom, length); ohci->next_header = config_rom[0]; ohci->next_config_rom[0] = 0; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 7e1d4dec83e7..53b9217de86c 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -20,20 +20,6 @@ #define fw_notify(s, args...) printk(KERN_NOTICE KBUILD_MODNAME ": " s, ## args) #define fw_error(s, args...) printk(KERN_ERR KBUILD_MODNAME ": " s, ## args) -static inline void fw_memcpy_from_be32(void *_dst, void *_src, size_t size) -{ - u32 *dst = _dst; - __be32 *src = _src; - int i; - - for (i = 0; i < size / 4; i++) - dst[i] = be32_to_cpu(src[i]); -} - -static inline void fw_memcpy_to_be32(void *_dst, void *_src, size_t size) -{ - fw_memcpy_from_be32(_dst, _src, size); -} #define CSR_REGISTER_BASE 0xfffff0000000ULL /* register offsets are relative to CSR_REGISTER_BASE */ -- cgit v1.3-8-gc7d7 From cb7c96da3651111efbe088fa12f9bed61836ea93 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 8 Oct 2009 00:42:53 +0200 Subject: firewire: core: optimize Topology Map creation The Topology Map of the local node was created in CPU byte order, then a temporary big endian copy was created to compute the CRC, and when a read request to the Topology Map arrived it had to be converted to big endian byte order again. We now generate it in big endian byte order in the first place. This also rids us of 1000 bytes stack usage in tasklet context. Signed-off-by: Stefan Richter --- drivers/firewire/core-card.c | 17 ++--------------- drivers/firewire/core-topology.c | 17 ++++++++++------- drivers/firewire/core-transaction.c | 9 ++------- drivers/firewire/core.h | 2 +- include/linux/firewire.h | 2 +- 5 files changed, 16 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index f58130789990..7083bcc1b9c7 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -38,7 +38,7 @@ #include "core.h" -static int __compute_block_crc(__be32 *block) +int fw_compute_block_crc(__be32 *block) { int length; u16 crc; @@ -50,19 +50,6 @@ static int __compute_block_crc(__be32 *block) return length; } -int fw_compute_block_crc(u32 *block) -{ - __be32 be32_block[256]; - int i, length; - - length = (*block >> 16) & 0xff; - for (i = 0; i < length; i++) - be32_block[i] = cpu_to_be32(block[i + 1]); - *block |= crc_itu_t(0, (u8 *) be32_block, length * 4); - - return length; -} - static DEFINE_MUTEX(card_mutex); static LIST_HEAD(card_list); @@ -141,7 +128,7 @@ static size_t generate_config_rom(struct fw_card *card, __be32 *config_rom) * the bus info block, which is always the case for this * implementation. */ for (i = 0; i < j; i += length + 1) - length = __compute_block_crc(config_rom + i); + length = fw_compute_block_crc(config_rom + i); return j; } diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index fddf2b358936..9a5f38c80b0e 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -28,9 +28,9 @@ #include #include #include -#include #include +#include #include #include "core.h" @@ -510,13 +510,16 @@ static void update_tree(struct fw_card *card, struct fw_node *root) static void update_topology_map(struct fw_card *card, u32 *self_ids, int self_id_count) { - int node_count; + int node_count = (card->root_node->node_id & 0x3f) + 1; + __be32 *map = card->topology_map; + + *map++ = cpu_to_be32((self_id_count + 2) << 16); + *map++ = cpu_to_be32(be32_to_cpu(card->topology_map[1]) + 1); + *map++ = cpu_to_be32((node_count << 16) | self_id_count); + + while (self_id_count--) + *map++ = cpu_to_be32p(self_ids++); - card->topology_map[1]++; - node_count = (card->root_node->node_id & 0x3f) + 1; - card->topology_map[2] = (node_count << 16) | self_id_count; - card->topology_map[0] = (self_id_count + 2) << 16; - memcpy(&card->topology_map[3], self_ids, self_id_count * 4); fw_compute_block_crc(card->topology_map); } diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index da628c72a462..203e6428bada 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -810,8 +810,7 @@ static void handle_topology_map(struct fw_card *card, struct fw_request *request int speed, unsigned long long offset, void *payload, size_t length, void *callback_data) { - int i, start, end; - __be32 *map; + int start; if (!TCODE_IS_READ_REQUEST(tcode)) { fw_send_response(card, request, RCODE_TYPE_ERROR); @@ -824,11 +823,7 @@ static void handle_topology_map(struct fw_card *card, struct fw_request *request } start = (offset - topology_map_region.start) / 4; - end = start + length / 4; - map = payload; - - for (i = 0; i < length / 4; i++) - map[i] = cpu_to_be32(card->topology_map[start + i]); + memcpy(payload, &card->topology_map[start], length); fw_send_response(card, request, RCODE_COMPLETE); } diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h index 7adca7cb9f55..ed3b1a765c00 100644 --- a/drivers/firewire/core.h +++ b/drivers/firewire/core.h @@ -94,7 +94,7 @@ int fw_card_add(struct fw_card *card, u32 max_receive, u32 link_speed, u64 guid); void fw_core_remove_card(struct fw_card *card); int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset); -int fw_compute_block_crc(u32 *block); +int fw_compute_block_crc(__be32 *block); void fw_schedule_bm_work(struct fw_card *card, unsigned long delay); static inline struct fw_card *fw_card_get(struct fw_card *card) diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 53b9217de86c..211a5d7d87b3 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -117,7 +117,7 @@ struct fw_card { bool broadcast_channel_allocated; u32 broadcast_channel; - u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4]; + __be32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4]; }; struct fw_attribute_group { -- cgit v1.3-8-gc7d7 From 205cb37b89ab37db553907e5ac17962eec561804 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 14 Oct 2009 23:22:17 +0200 Subject: kill-the-bkl/reiserfs: definitely drop the bkl from reiserfs_ioctl() The reiserfs ioctl path doesn't need the big kernel lock anymore , now that the filesystem synchronizes through its own lock. We can then turn reiserfs_ioctl() into an unlocked_ioctl callback. Signed-off-by: Frederic Weisbecker Cc: Jeff Mahoney Cc: Chris Mason Cc: Ingo Molnar Cc: Alexander Beregalov Cc: Laurent Riffard Cc: Thomas Gleixner --- fs/reiserfs/dir.c | 2 +- fs/reiserfs/file.c | 2 +- fs/reiserfs/ioctl.c | 11 +++-------- include/linux/reiserfs_fs.h | 3 +-- 4 files changed, 6 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 17f31ad379c8..c094f58c7448 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -20,7 +20,7 @@ const struct file_operations reiserfs_dir_operations = { .read = generic_read_dir, .readdir = reiserfs_readdir, .fsync = reiserfs_dir_fsync, - .ioctl = reiserfs_ioctl, + .unlocked_ioctl = reiserfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = reiserfs_compat_ioctl, #endif diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 9f436668b7f8..da2dba082e2d 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -284,7 +284,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t const struct file_operations reiserfs_file_operations = { .read = do_sync_read, .write = reiserfs_file_write, - .ioctl = reiserfs_ioctl, + .unlocked_ioctl = reiserfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = reiserfs_compat_ioctl, #endif diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index e30e8be09179..ace77451ceb1 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -20,9 +20,9 @@ * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION * 3) That's all for a while ... */ -int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, - unsigned long arg) +long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_path.dentry->d_inode; unsigned int flags; int err = 0; @@ -132,9 +132,6 @@ setversion_out: long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int ret; - /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case REISERFS_IOC32_UNPACK: @@ -156,9 +153,7 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, return -ENOIOCTLCMD; } - ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - - return ret; + return reiserfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index a498d9266d8c..a05b4a20768d 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2314,8 +2314,7 @@ __u32 r5_hash(const signed char *msg, int len); #define SPARE_SPACE 500 /* prototypes from ioctl.c */ -int reiserfs_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); +long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long reiserfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); int reiserfs_unpack(struct inode *inode, struct file *filp); -- cgit v1.3-8-gc7d7 From f14001fcd7bd03983c872810a3b11af4148bae72 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 14 Oct 2009 00:48:27 +0000 Subject: Phonet: deliver broadcast packets to broadcast sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 1 + net/phonet/af_phonet.c | 6 ++++++ net/phonet/socket.c | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index d43f71b5ec00..fdb05fa0346f 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -47,6 +47,7 @@ static inline struct pn_sock *pn_sk(struct sock *sk) extern const struct proto_ops phonet_dgram_ops; struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *sa); +void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb); void phonet_get_local_port_range(int *min, int *max); void pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index c711d58b4bb5..b113fe00c154 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -369,6 +369,12 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, pn_skb_get_dst_sockaddr(skb, &sa); + /* check if this is broadcasted */ + if (pn_sockaddr_get_addr(&sa) == PNADDR_BROADCAST) { + pn_deliver_sock_broadcast(net, skb); + goto out; + } + /* check if we are the destination */ if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) { /* Phonet packet input */ diff --git a/net/phonet/socket.c b/net/phonet/socket.c index aa5b5a972bff..8c84190f22de 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -94,7 +94,28 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) spin_unlock_bh(&pnsocks.lock); return rval; +} + +/* Deliver a broadcast packet (only in bottom-half) */ +void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb) +{ + struct hlist_node *node; + struct sock *sknode; + + spin_lock(&pnsocks.lock); + sk_for_each(sknode, node, &pnsocks.hlist) { + struct sk_buff *clone; + + if (!net_eq(sock_net(sknode), net)) + continue; + if (!sock_flag(sknode, SOCK_BROADCAST)) + continue; + clone = skb_clone(skb, GFP_ATOMIC); + if (clone) + sk_receive_skb(sknode, clone, 0); + } + spin_unlock(&pnsocks.lock); } void pn_sock_hash(struct sock *sk) -- cgit v1.3-8-gc7d7 From 55748ac0468134a89bc55aed6a9691e320caa8a9 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 14 Oct 2009 00:48:28 +0000 Subject: Phonet: routing table backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Phonet "universe" only has 64 addresses, so we keep a trivial flat routing table. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pn_dev.h | 5 +++ net/phonet/pn_dev.c | 100 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 99 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 44c923c9e21d..87b5d8112e18 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -47,6 +47,11 @@ u8 phonet_address_get(struct net_device *dev, u8 addr); int phonet_address_lookup(struct net *net, u8 addr); void phonet_address_notify(int event, struct net_device *dev, u8 addr); +int phonet_route_add(struct net_device *dev, u8 daddr); +int phonet_route_del(struct net_device *dev, u8 daddr); +struct net_device *phonet_route_get(struct net *net, u8 daddr); +struct net_device *phonet_route_output(struct net *net, u8 daddr); + #define PN_NO_ADDR 0xff extern const struct file_operations pn_sock_seq_fops; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 5f42f30dd168..71fffa587783 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -33,8 +33,14 @@ #include #include +struct phonet_routes { + spinlock_t lock; + struct net_device *table[64]; +}; + struct phonet_net { struct phonet_device_list pndevs; + struct phonet_routes routes; }; int phonet_net_id; @@ -154,10 +160,11 @@ int phonet_address_del(struct net_device *dev, u8 addr) } /* Gets a source address toward a destination, through a interface. */ -u8 phonet_address_get(struct net_device *dev, u8 addr) +u8 phonet_address_get(struct net_device *dev, u8 daddr) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; + u8 saddr; spin_lock_bh(&pndevs->lock); pnd = __phonet_get(dev); @@ -165,12 +172,26 @@ u8 phonet_address_get(struct net_device *dev, u8 addr) BUG_ON(bitmap_empty(pnd->addrs, 64)); /* Use same source address as destination, if possible */ - if (!test_bit(addr >> 2, pnd->addrs)) - addr = find_first_bit(pnd->addrs, 64) << 2; + if (test_bit(daddr >> 2, pnd->addrs)) + saddr = daddr; + else + saddr = find_first_bit(pnd->addrs, 64) << 2; } else - addr = PN_NO_ADDR; + saddr = PN_NO_ADDR; spin_unlock_bh(&pndevs->lock); - return addr; + + if (saddr == PN_NO_ADDR) { + /* Fallback to another device */ + struct net_device *def_dev; + + def_dev = phonet_device_get(dev_net(dev)); + if (def_dev) { + if (def_dev != dev) + saddr = phonet_address_get(def_dev, daddr); + dev_put(def_dev); + } + } + return saddr; } int phonet_address_lookup(struct net *net, u8 addr) @@ -246,7 +267,7 @@ static struct notifier_block phonet_device_notifier = { /* Per-namespace Phonet devices handling */ static int phonet_init_net(struct net *net) { - struct phonet_net *pnn = kmalloc(sizeof(*pnn), GFP_KERNEL); + struct phonet_net *pnn = kzalloc(sizeof(*pnn), GFP_KERNEL); if (!pnn) return -ENOMEM; @@ -257,6 +278,7 @@ static int phonet_init_net(struct net *net) INIT_LIST_HEAD(&pnn->pndevs.list); spin_lock_init(&pnn->pndevs.lock); + spin_lock_init(&pnn->routes.lock); net_assign_generic(net, phonet_net_id, pnn); return 0; } @@ -300,3 +322,69 @@ void phonet_device_exit(void) unregister_netdevice_notifier(&phonet_device_notifier); unregister_pernet_gen_device(phonet_net_id, &phonet_net_ops); } + +int phonet_route_add(struct net_device *dev, u8 daddr) +{ + struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); + struct phonet_routes *routes = &pnn->routes; + int err = -EEXIST; + + daddr = daddr >> 2; + spin_lock_bh(&routes->lock); + if (routes->table[daddr] == NULL) { + routes->table[daddr] = dev; + dev_hold(dev); + err = 0; + } + spin_unlock_bh(&routes->lock); + return err; +} + +int phonet_route_del(struct net_device *dev, u8 daddr) +{ + struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); + struct phonet_routes *routes = &pnn->routes; + int err = -ENOENT; + + daddr = daddr >> 2; + spin_lock_bh(&routes->lock); + if (dev == routes->table[daddr]) { + routes->table[daddr] = NULL; + dev_put(dev); + err = 0; + } + spin_unlock_bh(&routes->lock); + return err; +} + +struct net_device *phonet_route_get(struct net *net, u8 daddr) +{ + struct phonet_net *pnn = net_generic(net, phonet_net_id); + struct phonet_routes *routes = &pnn->routes; + struct net_device *dev; + + ASSERT_RTNL(); /* no need to hold the device */ + + daddr >>= 2; + spin_lock_bh(&routes->lock); + dev = routes->table[daddr]; + spin_unlock_bh(&routes->lock); + return dev; +} + +struct net_device *phonet_route_output(struct net *net, u8 daddr) +{ + struct phonet_net *pnn = net_generic(net, phonet_net_id); + struct phonet_routes *routes = &pnn->routes; + struct net_device *dev; + + spin_lock_bh(&routes->lock); + dev = routes->table[daddr >> 2]; + if (dev) + dev_hold(dev); + spin_unlock_bh(&routes->lock); + + if (!dev) + dev = phonet_device_get(net); /* Default route */ + return dev; +} -- cgit v1.3-8-gc7d7 From f062f41d06575744b9eaf725eef8a5d3b5f5b7ca Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 14 Oct 2009 00:48:29 +0000 Subject: Phonet: routing table Netlink interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pn_dev.h | 1 + net/phonet/pn_dev.c | 31 +++++++++++ net/phonet/pn_netlink.c | 130 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 162 insertions(+) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 87b5d8112e18..afa7defceb14 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -49,6 +49,7 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr); int phonet_route_add(struct net_device *dev, u8 daddr); int phonet_route_del(struct net_device *dev, u8 daddr); +void rtm_phonet_notify(int event, struct net_device *dev, u8 dst); struct net_device *phonet_route_get(struct net *net, u8 daddr); struct net_device *phonet_route_output(struct net *net, u8 daddr); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 71fffa587783..6d64fda1afc9 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -240,6 +240,27 @@ static int phonet_device_autoconf(struct net_device *dev) return 0; } +static void phonet_route_autodel(struct net_device *dev) +{ + struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); + unsigned i; + DECLARE_BITMAP(deleted, 64); + + /* Remove left-over Phonet routes */ + bitmap_zero(deleted, 64); + spin_lock_bh(&pnn->routes.lock); + for (i = 0; i < 64; i++) + if (dev == pnn->routes.table[i]) { + set_bit(i, deleted); + pnn->routes.table[i] = NULL; + dev_put(dev); + } + spin_unlock_bh(&pnn->routes.lock); + for (i = find_first_bit(deleted, 64); i < 64; + i = find_next_bit(deleted, 64, i + 1)) + rtm_phonet_notify(RTM_DELROUTE, dev, i); +} + /* notify Phonet of device events */ static int phonet_device_notify(struct notifier_block *me, unsigned long what, void *arg) @@ -253,6 +274,7 @@ static int phonet_device_notify(struct notifier_block *me, unsigned long what, break; case NETDEV_UNREGISTER: phonet_device_destroy(dev); + phonet_route_autodel(dev); break; } return 0; @@ -287,10 +309,19 @@ static void phonet_exit_net(struct net *net) { struct phonet_net *pnn = net_generic(net, phonet_net_id); struct net_device *dev; + unsigned i; rtnl_lock(); for_each_netdev(net, dev) phonet_device_destroy(dev); + + for (i = 0; i < 64; i++) { + dev = pnn->routes.table[i]; + if (dev) { + rtm_phonet_notify(RTM_DELROUTE, dev, i); + dev_put(dev); + } + } rtnl_unlock(); proc_net_remove(net, "phonet"); diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index d21fd3576610..d8f5d3fb9ee2 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -29,6 +29,8 @@ #include #include +/* Device address handling */ + static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, u32 pid, u32 seq, int event); @@ -160,6 +162,131 @@ out: return skb->len; } +/* Routes handling */ + +static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, + u32 pid, u32 seq, int event) +{ + struct rtmsg *rtm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), 0); + if (nlh == NULL) + return -EMSGSIZE; + + rtm = nlmsg_data(nlh); + rtm->rtm_family = AF_PHONET; + rtm->rtm_dst_len = 6; + rtm->rtm_src_len = 0; + rtm->rtm_tos = 0; + rtm->rtm_table = RT_TABLE_MAIN; + rtm->rtm_protocol = RTPROT_STATIC; + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + rtm->rtm_type = RTN_UNICAST; + rtm->rtm_flags = 0; + NLA_PUT_U8(skb, RTA_DST, dst); + NLA_PUT_U32(skb, RTA_OIF, dev->ifindex); + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +void rtm_phonet_notify(int event, struct net_device *dev, u8 dst) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(1) + nla_total_size(4), GFP_KERNEL); + if (skb == NULL) + goto errout; + err = fill_route(skb, dev, dst, 0, 0, event); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, dev_net(dev), 0, + RTNLGRP_PHONET_ROUTE, NULL, GFP_KERNEL); + return; +errout: + if (err < 0) + rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_ROUTE, err); +} + +static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = { + [RTA_DST] = { .type = NLA_U8 }, + [RTA_OIF] = { .type = NLA_U32 }, +}; + +static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[RTA_MAX+1]; + struct net_device *dev; + struct rtmsg *rtm; + int err; + u8 dst; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ASSERT_RTNL(); + + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy); + if (err < 0) + return err; + + rtm = nlmsg_data(nlh); + if (rtm->rtm_table != RT_TABLE_MAIN || rtm->rtm_type != RTN_UNICAST) + return -EINVAL; + if (tb[RTA_DST] == NULL || tb[RTA_OIF] == NULL) + return -EINVAL; + dst = nla_get_u8(tb[RTA_DST]); + if (dst & 3) /* Phonet addresses only have 6 high-order bits */ + return -EINVAL; + + dev = __dev_get_by_index(net, nla_get_u32(tb[RTA_OIF])); + if (dev == NULL) + return -ENODEV; + + if (nlh->nlmsg_type == RTM_NEWROUTE) + err = phonet_route_add(dev, dst); + else + err = phonet_route_del(dev, dst); + if (!err) + rtm_phonet_notify(nlh->nlmsg_type, dev, dst); + return err; +} + +static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; + + for (addr = 0; addr < 64; addr++) { + struct net_device *dev; + + dev = phonet_route_get(net, addr << 2); + if (!dev) + continue; + + if (addr_idx++ < addr_start_idx) + continue; + if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWROUTE)) + goto out; + } + +out: + cb->args[0] = addr_idx; + cb->args[1] = 0; + + return skb->len; +} + int __init phonet_netlink_register(void) { int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL); @@ -169,5 +296,8 @@ int __init phonet_netlink_register(void) /* Further __rtnl_register() cannot fail */ __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL); __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); + __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL); + __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL); + __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit); return 0; } -- cgit v1.3-8-gc7d7 From 6fb2915df7f0747d9044da9dbff5b46dc2e20830 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Oct 2009 11:21:42 +0800 Subject: tracing/profile: Add filter support - Add an ioctl to allocate a filter for a perf event. - Free the filter when the associated perf event is to be freed. - Do the filtering in perf_swevent_match(). Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <4AD69546.8050401@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 11 ++- include/linux/perf_counter.h | 1 + include/linux/perf_event.h | 6 ++ kernel/perf_event.c | 80 ++++++++++++++++++++-- kernel/trace/trace.h | 3 +- kernel/trace/trace_events_filter.c | 133 +++++++++++++++++++++++++++++-------- 6 files changed, 199 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4ec5e67e18cf..d11770472bc8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -144,7 +144,7 @@ extern char *trace_profile_buf_nmi; #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ extern void destroy_preds(struct ftrace_event_call *call); -extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern int filter_match_preds(struct event_filter *filter, void *rec); extern int filter_current_check_discard(struct ring_buffer *buffer, struct ftrace_event_call *call, void *rec, @@ -186,4 +186,13 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) +#ifdef CONFIG_EVENT_PROFILE +struct perf_event; +extern int ftrace_profile_enable(int event_id); +extern void ftrace_profile_disable(int event_id); +extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, + char *filter_str); +extern void ftrace_profile_free_filter(struct perf_event *event); +#endif + #endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7b7fbf433cff..91a2b4309e7a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -225,6 +225,7 @@ struct perf_counter_attr { #define PERF_COUNTER_IOC_RESET _IO ('$', 3) #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) #define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_COUNTER_IOC_SET_FILTER _IOW('$', 6, char *) enum perf_counter_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e6d95f97419..df9d964c15fc 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -221,6 +221,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_RESET _IO ('$', 3) #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, @@ -633,7 +634,12 @@ struct perf_event { struct pid_namespace *ns; u64 id; + +#ifdef CONFIG_EVENT_PROFILE + struct event_filter *filter; #endif + +#endif /* CONFIG_PERF_EVENTS */ }; /** diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9d0b5c665883..12b5ec39bf97 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -1658,6 +1659,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) return ERR_PTR(err); } +static void perf_event_free_filter(struct perf_event *event); + static void free_event_rcu(struct rcu_head *head) { struct perf_event *event; @@ -1665,6 +1668,7 @@ static void free_event_rcu(struct rcu_head *head) event = container_of(head, struct perf_event, rcu_head); if (event->ns) put_pid_ns(event->ns); + perf_event_free_filter(event); kfree(event); } @@ -1974,7 +1978,8 @@ unlock: return ret; } -int perf_event_set_output(struct perf_event *event, int output_fd); +static int perf_event_set_output(struct perf_event *event, int output_fd); +static int perf_event_set_filter(struct perf_event *event, void __user *arg); static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2002,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_EVENT_IOC_SET_OUTPUT: return perf_event_set_output(event, arg); + case PERF_EVENT_IOC_SET_FILTER: + return perf_event_set_filter(event, (void __user *)arg); + default: return -ENOTTY; } @@ -3806,9 +3814,14 @@ static int perf_swevent_is_counting(struct perf_event *event) return 1; } +static int perf_tp_event_match(struct perf_event *event, + struct perf_sample_data *data); + static int perf_swevent_match(struct perf_event *event, enum perf_type_id type, - u32 event_id, struct pt_regs *regs) + u32 event_id, + struct perf_sample_data *data, + struct pt_regs *regs) { if (!perf_swevent_is_counting(event)) return 0; @@ -3826,6 +3839,10 @@ static int perf_swevent_match(struct perf_event *event, return 0; } + if (event->attr.type == PERF_TYPE_TRACEPOINT && + !perf_tp_event_match(event, data)) + return 0; + return 1; } @@ -3842,7 +3859,7 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { - if (perf_swevent_match(event, type, event_id, regs)) + if (perf_swevent_match(event, type, event_id, data, regs)) perf_swevent_add(event, nr, nmi, data, regs); } rcu_read_unlock(); @@ -4086,6 +4103,7 @@ static const struct pmu perf_ops_task_clock = { }; #ifdef CONFIG_EVENT_PROFILE + void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size) { @@ -4109,8 +4127,15 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, } EXPORT_SYMBOL_GPL(perf_tp_event); -extern int ftrace_profile_enable(int); -extern void ftrace_profile_disable(int); +static int perf_tp_event_match(struct perf_event *event, + struct perf_sample_data *data) +{ + void *record = data->raw->data; + + if (likely(!event->filter) || filter_match_preds(event->filter, record)) + return 1; + return 0; +} static void tp_perf_event_destroy(struct perf_event *event) { @@ -4135,12 +4160,53 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) return &perf_ops_generic; } + +static int perf_event_set_filter(struct perf_event *event, void __user *arg) +{ + char *filter_str; + int ret; + + if (event->attr.type != PERF_TYPE_TRACEPOINT) + return -EINVAL; + + filter_str = strndup_user(arg, PAGE_SIZE); + if (IS_ERR(filter_str)) + return PTR_ERR(filter_str); + + ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); + + kfree(filter_str); + return ret; +} + +static void perf_event_free_filter(struct perf_event *event) +{ + ftrace_profile_free_filter(event); +} + #else + +static int perf_tp_event_match(struct perf_event *event, + struct perf_sample_data *data) +{ + return 1; +} + static const struct pmu *tp_perf_event_init(struct perf_event *event) { return NULL; } -#endif + +static int perf_event_set_filter(struct perf_event *event, void __user *arg) +{ + return -ENOENT; +} + +static void perf_event_free_filter(struct perf_event *event) +{ +} + +#endif /* CONFIG_EVENT_PROFILE */ atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; @@ -4394,7 +4460,7 @@ err_size: goto out; } -int perf_event_set_output(struct perf_event *event, int output_fd) +static int perf_event_set_output(struct perf_event *event, int output_fd) { struct perf_event *output_event = NULL; struct file *output_file = NULL; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ffe53ddbe67a..4959ada9e0bb 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -743,7 +743,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { + if (unlikely(call->filter_active) && + !filter_match_preds(call->filter, rec)) { ring_buffer_discard_commit(buffer, event); return 1; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 273845fce393..e27bb6acc2dd 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "trace.h" #include "trace_output.h" @@ -363,9 +364,8 @@ static void filter_build_regex(struct filter_pred *pred) } /* return 1 if event matches, 0 otherwise (discard) */ -int filter_match_preds(struct ftrace_event_call *call, void *rec) +int filter_match_preds(struct event_filter *filter, void *rec) { - struct event_filter *filter = call->filter; int match, top = 0, val1 = 0, val2 = 0; int stack[MAX_FILTER_PRED]; struct filter_pred *pred; @@ -538,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call) filter->preds[i]->fn = filter_pred_none; } -void destroy_preds(struct ftrace_event_call *call) +static void __free_preds(struct event_filter *filter) { - struct event_filter *filter = call->filter; int i; if (!filter) @@ -553,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call) kfree(filter->preds); kfree(filter->filter_string); kfree(filter); +} + +void destroy_preds(struct ftrace_event_call *call) +{ + __free_preds(call->filter); call->filter = NULL; + call->filter_active = 0; } -static int init_preds(struct ftrace_event_call *call) +static struct event_filter *__alloc_preds(void) { struct event_filter *filter; struct filter_pred *pred; int i; - if (call->filter) - return 0; - - filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); - if (!call->filter) - return -ENOMEM; + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return ERR_PTR(-ENOMEM); filter->n_preds = 0; @@ -583,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call) filter->preds[i] = pred; } - return 0; + return filter; oom: - destroy_preds(call); + __free_preds(filter); + return ERR_PTR(-ENOMEM); +} + +static int init_preds(struct ftrace_event_call *call) +{ + if (call->filter) + return 0; + + call->filter_active = 0; + call->filter = __alloc_preds(); + if (IS_ERR(call->filter)) + return PTR_ERR(call->filter); - return -ENOMEM; + return 0; } static int init_subsystem_preds(struct event_subsystem *system) @@ -629,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system) static int filter_add_pred_fn(struct filter_parse_state *ps, struct ftrace_event_call *call, + struct event_filter *filter, struct filter_pred *pred, filter_pred_fn_t fn) { - struct event_filter *filter = call->filter; int idx, err; if (filter->n_preds == MAX_FILTER_PRED) { @@ -647,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, return err; filter->n_preds++; - call->filter_active = 1; return 0; } @@ -726,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size, static int filter_add_pred(struct filter_parse_state *ps, struct ftrace_event_call *call, + struct event_filter *filter, struct filter_pred *pred, bool dry_run) { @@ -795,7 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps, add_pred_fn: if (!dry_run) - return filter_add_pred_fn(ps, call, pred, fn); + return filter_add_pred_fn(ps, call, filter, pred, fn); return 0; } @@ -1154,6 +1168,7 @@ static int check_preds(struct filter_parse_state *ps) } static int replace_preds(struct ftrace_event_call *call, + struct event_filter *filter, struct filter_parse_state *ps, char *filter_string, bool dry_run) @@ -1200,7 +1215,7 @@ static int replace_preds(struct ftrace_event_call *call, add_pred: if (!pred) return -ENOMEM; - err = filter_add_pred(ps, call, pred, dry_run); + err = filter_add_pred(ps, call, filter, pred, dry_run); filter_free_pred(pred); if (err) return err; @@ -1216,6 +1231,7 @@ static int replace_system_preds(struct event_subsystem *system, char *filter_string) { struct ftrace_event_call *call; + struct event_filter *filter; int err; bool fail = true; @@ -1228,17 +1244,19 @@ static int replace_system_preds(struct event_subsystem *system, continue; /* try to see if the filter can be applied */ - err = replace_preds(call, ps, filter_string, true); + err = replace_preds(call, filter, ps, filter_string, true); if (err) continue; /* really apply the filter */ filter_disable_preds(call); - err = replace_preds(call, ps, filter_string, false); + err = replace_preds(call, filter, ps, filter_string, false); if (err) filter_disable_preds(call); - else - replace_filter_string(call->filter, filter_string); + else { + call->filter_active = 1; + replace_filter_string(filter, filter_string); + } fail = false; } @@ -1252,7 +1270,6 @@ static int replace_system_preds(struct event_subsystem *system, int apply_event_filter(struct ftrace_event_call *call, char *filter_string) { int err; - struct filter_parse_state *ps; mutex_lock(&event_mutex); @@ -1283,10 +1300,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) goto out; } - err = replace_preds(call, ps, filter_string, false); + err = replace_preds(call, call->filter, ps, filter_string, false); if (err) append_filter_err(ps, call->filter); - + else + call->filter_active = 1; out: filter_opstack_clear(ps); postfix_clear(ps); @@ -1301,7 +1319,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system, char *filter_string) { int err; - struct filter_parse_state *ps; mutex_lock(&event_mutex); @@ -1345,3 +1362,67 @@ out_unlock: return err; } +#ifdef CONFIG_EVENT_PROFILE + +void ftrace_profile_free_filter(struct perf_event *event) +{ + struct event_filter *filter = event->filter; + + event->filter = NULL; + __free_preds(filter); +} + +int ftrace_profile_set_filter(struct perf_event *event, int event_id, + char *filter_str) +{ + int err; + struct event_filter *filter; + struct filter_parse_state *ps; + struct ftrace_event_call *call = NULL; + + mutex_lock(&event_mutex); + + list_for_each_entry(call, &ftrace_events, list) { + if (call->id == event_id) + break; + } + if (!call) + return -EINVAL; + + if (event->filter) + return -EEXIST; + + filter = __alloc_preds(); + if (IS_ERR(filter)) + return PTR_ERR(filter); + + err = -ENOMEM; + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + goto free_preds; + + parse_init(ps, filter_ops, filter_str); + err = filter_parse(ps); + if (err) + goto free_ps; + + err = replace_preds(call, filter, ps, filter_str, false); + if (!err) + event->filter = filter; + +free_ps: + filter_opstack_clear(ps); + postfix_clear(ps); + kfree(ps); + +free_preds: + if (err) + __free_preds(filter); + + mutex_unlock(&event_mutex); + + return err; +} + +#endif /* CONFIG_EVENT_PROFILE */ + -- cgit v1.3-8-gc7d7 From 434a83c3fbb951908a3a52040f7f0e0b8ba00dd0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Oct 2009 11:50:39 +0200 Subject: events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan Cc: Tom Zanussi Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/trace/events/bkl.h | 18 +++++----- include/trace/events/irq.h | 8 ++--- include/trace/events/power.h | 2 -- include/trace/events/sched.h | 44 ++++++++++++------------ include/trace/events/timer.h | 79 ++++++++++++++++++++++---------------------- 5 files changed, 74 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h index 8abd620a490e..1af72dc24278 100644 --- a/include/trace/events/bkl.h +++ b/include/trace/events/bkl.h @@ -13,7 +13,7 @@ TRACE_EVENT(lock_kernel, TP_ARGS(func, file, line), TP_STRUCT__entry( - __field( int, lock_depth ) + __field( int, depth ) __field_ext( const char *, func, FILTER_PTR_STRING ) __field_ext( const char *, file, FILTER_PTR_STRING ) __field( int, line ) @@ -21,13 +21,13 @@ TRACE_EVENT(lock_kernel, TP_fast_assign( /* We want to record the lock_depth after lock is acquired */ - __entry->lock_depth = current->lock_depth + 1; + __entry->depth = current->lock_depth + 1; __entry->func = func; __entry->file = file; __entry->line = line; ), - TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, __entry->file, __entry->line, __entry->func) ); @@ -38,20 +38,20 @@ TRACE_EVENT(unlock_kernel, TP_ARGS(func, file, line), TP_STRUCT__entry( - __field(int, lock_depth) - __field(const char *, func) - __field(const char *, file) - __field(int, line) + __field(int, depth ) + __field(const char *, func ) + __field(const char *, file ) + __field(int, line ) ), TP_fast_assign( - __entry->lock_depth = current->lock_depth; + __entry->depth = current->lock_depth; __entry->func = func; __entry->file = file; __entry->line = line; ), - TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, __entry->file, __entry->line, __entry->func) ); diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index b89f9db4a404..dcfcd4407623 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -48,7 +48,7 @@ TRACE_EVENT(irq_handler_entry, __assign_str(name, action->name); ), - TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) + TP_printk("irq=%d name=%s", __entry->irq, __get_str(name)) ); /** @@ -78,7 +78,7 @@ TRACE_EVENT(irq_handler_exit, __entry->ret = ret; ), - TP_printk("irq=%d return=%s", + TP_printk("irq=%d ret=%s", __entry->irq, __entry->ret ? "handled" : "unhandled") ); @@ -107,7 +107,7 @@ TRACE_EVENT(softirq_entry, __entry->vec = (int)(h - vec); ), - TP_printk("softirq=%d action=%s", __entry->vec, + TP_printk("vec=%d [action=%s]", __entry->vec, show_softirq_name(__entry->vec)) ); @@ -136,7 +136,7 @@ TRACE_EVENT(softirq_exit, __entry->vec = (int)(h - vec); ), - TP_printk("softirq=%d action=%s", __entry->vec, + TP_printk("vec=%d [action=%s]", __entry->vec, show_softirq_name(__entry->vec)) ); diff --git a/include/trace/events/power.h b/include/trace/events/power.h index ea6d579261ad..9bb96e5a2848 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -16,8 +16,6 @@ enum { }; #endif - - TRACE_EVENT(power_start, TP_PROTO(unsigned int type, unsigned int state), diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4069c43f4187..b50b9856c59f 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -26,7 +26,7 @@ TRACE_EVENT(sched_kthread_stop, __entry->pid = t->pid; ), - TP_printk("task %s:%d", __entry->comm, __entry->pid) + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) ); /* @@ -46,7 +46,7 @@ TRACE_EVENT(sched_kthread_stop_ret, __entry->ret = ret; ), - TP_printk("ret %d", __entry->ret) + TP_printk("ret=%d", __entry->ret) ); /* @@ -73,7 +73,7 @@ TRACE_EVENT(sched_wait_task, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -94,7 +94,7 @@ TRACE_EVENT(sched_wakeup, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -102,12 +102,12 @@ TRACE_EVENT(sched_wakeup, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); /* @@ -127,7 +127,7 @@ TRACE_EVENT(sched_wakeup_new, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -135,12 +135,12 @@ TRACE_EVENT(sched_wakeup_new, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); /* @@ -176,7 +176,7 @@ TRACE_EVENT(sched_switch, __entry->next_prio = next->prio; ), - TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", + TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->prev_state ? __print_flags(__entry->prev_state, "|", @@ -211,7 +211,7 @@ TRACE_EVENT(sched_migrate_task, __entry->dest_cpu = dest_cpu; ), - TP_printk("task %s:%d [%d] from: %d to: %d", + TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", __entry->comm, __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu) ); @@ -237,7 +237,7 @@ TRACE_EVENT(sched_process_free, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -262,7 +262,7 @@ TRACE_EVENT(sched_process_exit, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -287,7 +287,7 @@ TRACE_EVENT(sched_process_wait, __entry->prio = current->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -314,7 +314,7 @@ TRACE_EVENT(sched_process_fork, __entry->child_pid = child->pid; ), - TP_printk("parent %s:%d child %s:%d", + TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", __entry->parent_comm, __entry->parent_pid, __entry->child_comm, __entry->child_pid) ); @@ -340,7 +340,7 @@ TRACE_EVENT(sched_signal_send, __entry->sig = sig; ), - TP_printk("sig: %d task %s:%d", + TP_printk("sig=%d comm=%s pid=%d", __entry->sig, __entry->comm, __entry->pid) ); @@ -374,7 +374,7 @@ TRACE_EVENT(sched_stat_wait, __perf_count(delay); ), - TP_printk("task: %s:%d wait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); @@ -406,7 +406,7 @@ TRACE_EVENT(sched_stat_runtime, __perf_count(runtime); ), - TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]", + TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->runtime, (unsigned long long)__entry->vruntime) @@ -437,7 +437,7 @@ TRACE_EVENT(sched_stat_sleep, __perf_count(delay); ), - TP_printk("task: %s:%d sleep: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); @@ -467,7 +467,7 @@ TRACE_EVENT(sched_stat_iowait, __perf_count(delay); ), - TP_printk("task: %s:%d iowait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 1844c48d640e..e5ce87a0498d 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -26,7 +26,7 @@ TRACE_EVENT(timer_init, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -54,7 +54,7 @@ TRACE_EVENT(timer_start, __entry->now = jiffies; ), - TP_printk("timer %p: func %pf, expires %lu, timeout %ld", + TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", __entry->timer, __entry->function, __entry->expires, (long)__entry->expires - __entry->now) ); @@ -81,7 +81,7 @@ TRACE_EVENT(timer_expire_entry, __entry->now = jiffies; ), - TP_printk("timer %p: now %lu", __entry->timer, __entry->now) + TP_printk("timer=%p now=%lu", __entry->timer, __entry->now) ); /** @@ -108,7 +108,7 @@ TRACE_EVENT(timer_expire_exit, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -129,7 +129,7 @@ TRACE_EVENT(timer_cancel, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -140,24 +140,24 @@ TRACE_EVENT(timer_cancel, */ TRACE_EVENT(hrtimer_init, - TP_PROTO(struct hrtimer *timer, clockid_t clockid, + TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, enum hrtimer_mode mode), - TP_ARGS(timer, clockid, mode), + TP_ARGS(hrtimer, clockid, mode), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( clockid_t, clockid ) __field( enum hrtimer_mode, mode ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; __entry->clockid = clockid; __entry->mode = mode; ), - TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer, + TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, __entry->clockid == CLOCK_REALTIME ? "CLOCK_REALTIME" : "CLOCK_MONOTONIC", __entry->mode == HRTIMER_MODE_ABS ? @@ -170,26 +170,26 @@ TRACE_EVENT(hrtimer_init, */ TRACE_EVENT(hrtimer_start, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( void *, function ) __field( s64, expires ) __field( s64, softexpires ) ), TP_fast_assign( - __entry->timer = timer; - __entry->function = timer->function; - __entry->expires = hrtimer_get_expires(timer).tv64; - __entry->softexpires = hrtimer_get_softexpires(timer).tv64; + __entry->hrtimer = hrtimer; + __entry->function = hrtimer->function; + __entry->expires = hrtimer_get_expires(hrtimer).tv64; + __entry->softexpires = hrtimer_get_softexpires(hrtimer).tv64; ), - TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu", - __entry->timer, __entry->function, + TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", + __entry->hrtimer, __entry->function, (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->expires }), (unsigned long long)ktime_to_ns((ktime_t) { @@ -206,23 +206,22 @@ TRACE_EVENT(hrtimer_start, */ TRACE_EVENT(hrtimer_expire_entry, - TP_PROTO(struct hrtimer *timer, ktime_t *now), + TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), - TP_ARGS(timer, now), + TP_ARGS(hrtimer, now), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( s64, now ) ), TP_fast_assign( - __entry->timer = timer; - __entry->now = now->tv64; + __entry->hrtimer = hrtimer; + __entry->now = now->tv64; ), - TP_printk("hrtimer %p, now %llu", __entry->timer, - (unsigned long long)ktime_to_ns((ktime_t) { - .tv64 = __entry->now })) + TP_printk("hrtimer=%p now=%llu", __entry->hrtimer, + (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) ); /** @@ -234,40 +233,40 @@ TRACE_EVENT(hrtimer_expire_entry, */ TRACE_EVENT(hrtimer_expire_exit, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** * hrtimer_cancel - called when the hrtimer is canceled - * @timer: pointer to struct hrtimer + * @hrtimer: pointer to struct hrtimer */ TRACE_EVENT(hrtimer_cancel, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** @@ -302,7 +301,7 @@ TRACE_EVENT(itimer_state, __entry->interval_usec = value->it_interval.tv_usec; ), - TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu", + TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu", __entry->which, __entry->expires, __entry->value_sec, __entry->value_usec, __entry->interval_sec, __entry->interval_usec) @@ -332,7 +331,7 @@ TRACE_EVENT(itimer_expire, __entry->pid = pid_nr(pid); ), - TP_printk("which %d, pid %d, now %lu", __entry->which, + TP_printk("which=%d pid=%d now=%lu", __entry->which, (int) __entry->pid, __entry->now) ); -- cgit v1.3-8-gc7d7 From d2058b0cd039aad89b111d83b9c347e9d8f57a84 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Oct 2009 17:39:56 +0100 Subject: ASoC: Remove snd_soc_suspend_device() The PM core will grow pm_link infrastructure in 2.6.33 which can be used to implement the intended functionality of the ASoC-specific device suspend and resume callbacks so drop them. Signed-off-by: Mark Brown --- include/sound/soc.h | 5 ----- sound/soc/codecs/cs4270.c | 20 -------------------- sound/soc/codecs/wm8350.c | 17 ----------------- sound/soc/codecs/wm8400.c | 17 ----------------- sound/soc/codecs/wm8523.c | 17 ----------------- sound/soc/codecs/wm8580.c | 17 ----------------- sound/soc/codecs/wm8711.c | 17 ----------------- sound/soc/codecs/wm8731.c | 34 ---------------------------------- sound/soc/codecs/wm8753.c | 35 ----------------------------------- sound/soc/codecs/wm8776.c | 34 ---------------------------------- sound/soc/codecs/wm8900.c | 17 ----------------- sound/soc/codecs/wm8903.c | 17 ----------------- sound/soc/codecs/wm8940.c | 17 ----------------- sound/soc/codecs/wm8960.c | 17 ----------------- sound/soc/codecs/wm8961.c | 17 ----------------- sound/soc/codecs/wm8988.c | 34 ---------------------------------- sound/soc/codecs/wm9081.c | 17 ----------------- sound/soc/soc-core.c | 39 --------------------------------------- 18 files changed, 388 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 0b1f917a53ba..b1245e3acdfc 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -223,11 +223,6 @@ int snd_soc_codec_set_cache_io(struct snd_soc_codec *codec, int addr_bits, int data_bits, enum snd_soc_control_type control); -#ifdef CONFIG_PM -int snd_soc_suspend_device(struct device *dev); -int snd_soc_resume_device(struct device *dev); -#endif - /* pcm <-> DAI connect */ void snd_soc_free_pcms(struct snd_soc_device *socdev); int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid); diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c index ca1e24a8f12a..59bb16d033d6 100644 --- a/sound/soc/codecs/cs4270.c +++ b/sound/soc/codecs/cs4270.c @@ -802,22 +802,6 @@ MODULE_DEVICE_TABLE(i2c, cs4270_id); * and all registers are written back to the hardware when resuming. */ -static int cs4270_i2c_suspend(struct i2c_client *client, pm_message_t mesg) -{ - struct cs4270_private *cs4270 = i2c_get_clientdata(client); - struct snd_soc_codec *codec = &cs4270->codec; - - return snd_soc_suspend_device(codec->dev); -} - -static int cs4270_i2c_resume(struct i2c_client *client) -{ - struct cs4270_private *cs4270 = i2c_get_clientdata(client); - struct snd_soc_codec *codec = &cs4270->codec; - - return snd_soc_resume_device(codec->dev); -} - static int cs4270_soc_suspend(struct platform_device *pdev, pm_message_t mesg) { struct snd_soc_codec *codec = cs4270_codec; @@ -853,8 +837,6 @@ static int cs4270_soc_resume(struct platform_device *pdev) return snd_soc_write(codec, CS4270_PWRCTL, reg); } #else -#define cs4270_i2c_suspend NULL -#define cs4270_i2c_resume NULL #define cs4270_soc_suspend NULL #define cs4270_soc_resume NULL #endif /* CONFIG_PM */ @@ -873,8 +855,6 @@ static struct i2c_driver cs4270_i2c_driver = { .id_table = cs4270_id, .probe = cs4270_i2c_probe, .remove = cs4270_i2c_remove, - .suspend = cs4270_i2c_suspend, - .resume = cs4270_i2c_resume, }; /* diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index 72abc5a6d8d8..714114b50d18 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -1680,21 +1680,6 @@ static int __devexit wm8350_codec_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int wm8350_codec_suspend(struct platform_device *pdev, pm_message_t m) -{ - return snd_soc_suspend_device(&pdev->dev); -} - -static int wm8350_codec_resume(struct platform_device *pdev) -{ - return snd_soc_resume_device(&pdev->dev); -} -#else -#define wm8350_codec_suspend NULL -#define wm8350_codec_resume NULL -#endif - static struct platform_driver wm8350_codec_driver = { .driver = { .name = "wm8350-codec", @@ -1702,8 +1687,6 @@ static struct platform_driver wm8350_codec_driver = { }, .probe = wm8350_codec_probe, .remove = __devexit_p(wm8350_codec_remove), - .suspend = wm8350_codec_suspend, - .resume = wm8350_codec_resume, }; static __init int wm8350_init(void) diff --git a/sound/soc/codecs/wm8400.c b/sound/soc/codecs/wm8400.c index 9cb8e50f0fbb..bd7eecba20fe 100644 --- a/sound/soc/codecs/wm8400.c +++ b/sound/soc/codecs/wm8400.c @@ -1559,21 +1559,6 @@ static int __exit wm8400_codec_remove(struct platform_device *dev) return 0; } -#ifdef CONFIG_PM -static int wm8400_pdev_suspend(struct platform_device *pdev, pm_message_t msg) -{ - return snd_soc_suspend_device(&pdev->dev); -} - -static int wm8400_pdev_resume(struct platform_device *pdev) -{ - return snd_soc_resume_device(&pdev->dev); -} -#else -#define wm8400_pdev_suspend NULL -#define wm8400_pdev_resume NULL -#endif - static struct platform_driver wm8400_codec_driver = { .driver = { .name = "wm8400-codec", @@ -1581,8 +1566,6 @@ static struct platform_driver wm8400_codec_driver = { }, .probe = wm8400_codec_probe, .remove = __exit_p(wm8400_codec_remove), - .suspend = wm8400_pdev_suspend, - .resume = wm8400_pdev_resume, }; static int __init wm8400_codec_init(void) diff --git a/sound/soc/codecs/wm8523.c b/sound/soc/codecs/wm8523.c index 25870a4652fb..268cab21c2cc 100644 --- a/sound/soc/codecs/wm8523.c +++ b/sound/soc/codecs/wm8523.c @@ -638,21 +638,6 @@ static __devexit int wm8523_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm8523_i2c_suspend(struct i2c_client *i2c, pm_message_t msg) -{ - return snd_soc_suspend_device(&i2c->dev); -} - -static int wm8523_i2c_resume(struct i2c_client *i2c) -{ - return snd_soc_resume_device(&i2c->dev); -} -#else -#define wm8523_i2c_suspend NULL -#define wm8523_i2c_resume NULL -#endif - static const struct i2c_device_id wm8523_i2c_id[] = { { "wm8523", 0 }, { } @@ -666,8 +651,6 @@ static struct i2c_driver wm8523_i2c_driver = { }, .probe = wm8523_i2c_probe, .remove = __devexit_p(wm8523_i2c_remove), - .suspend = wm8523_i2c_suspend, - .resume = wm8523_i2c_resume, .id_table = wm8523_i2c_id, }; #endif diff --git a/sound/soc/codecs/wm8580.c b/sound/soc/codecs/wm8580.c index 3be5c0b2552c..a09b23e03664 100644 --- a/sound/soc/codecs/wm8580.c +++ b/sound/soc/codecs/wm8580.c @@ -961,21 +961,6 @@ static int wm8580_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm8580_i2c_suspend(struct i2c_client *client, pm_message_t msg) -{ - return snd_soc_suspend_device(&client->dev); -} - -static int wm8580_i2c_resume(struct i2c_client *client) -{ - return snd_soc_resume_device(&client->dev); -} -#else -#define wm8580_i2c_suspend NULL -#define wm8580_i2c_resume NULL -#endif - static const struct i2c_device_id wm8580_i2c_id[] = { { "wm8580", 0 }, { } @@ -989,8 +974,6 @@ static struct i2c_driver wm8580_i2c_driver = { }, .probe = wm8580_i2c_probe, .remove = wm8580_i2c_remove, - .suspend = wm8580_i2c_suspend, - .resume = wm8580_i2c_resume, .id_table = wm8580_i2c_id, }; #endif diff --git a/sound/soc/codecs/wm8711.c b/sound/soc/codecs/wm8711.c index 90ec8c58e2f4..54189fbf9e93 100644 --- a/sound/soc/codecs/wm8711.c +++ b/sound/soc/codecs/wm8711.c @@ -548,21 +548,6 @@ static int __devexit wm8711_spi_remove(struct spi_device *spi) return 0; } -#ifdef CONFIG_PM -static int wm8711_spi_suspend(struct spi_device *spi, pm_message_t msg) -{ - return snd_soc_suspend_device(&spi->dev); -} - -static int wm8711_spi_resume(struct spi_device *spi) -{ - return snd_soc_resume_device(&spi->dev); -} -#else -#define wm8711_spi_suspend NULL -#define wm8711_spi_resume NULL -#endif - static struct spi_driver wm8711_spi_driver = { .driver = { .name = "wm8711", @@ -570,8 +555,6 @@ static struct spi_driver wm8711_spi_driver = { .owner = THIS_MODULE, }, .probe = wm8711_spi_probe, - .suspend = wm8711_spi_suspend, - .resume = wm8711_spi_resume, .remove = __devexit_p(wm8711_spi_remove), }; #endif /* CONFIG_SPI_MASTER */ diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c index d3fd4f28d96e..0e59219a59f4 100644 --- a/sound/soc/codecs/wm8731.c +++ b/sound/soc/codecs/wm8731.c @@ -623,21 +623,6 @@ static int __devexit wm8731_spi_remove(struct spi_device *spi) return 0; } -#ifdef CONFIG_PM -static int wm8731_spi_suspend(struct spi_device *spi, pm_message_t msg) -{ - return snd_soc_suspend_device(&spi->dev); -} - -static int wm8731_spi_resume(struct spi_device *spi) -{ - return snd_soc_resume_device(&spi->dev); -} -#else -#define wm8731_spi_suspend NULL -#define wm8731_spi_resume NULL -#endif - static struct spi_driver wm8731_spi_driver = { .driver = { .name = "wm8731", @@ -645,8 +630,6 @@ static struct spi_driver wm8731_spi_driver = { .owner = THIS_MODULE, }, .probe = wm8731_spi_probe, - .suspend = wm8731_spi_suspend, - .resume = wm8731_spi_resume, .remove = __devexit_p(wm8731_spi_remove), }; #endif /* CONFIG_SPI_MASTER */ @@ -679,21 +662,6 @@ static __devexit int wm8731_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm8731_i2c_suspend(struct i2c_client *i2c, pm_message_t msg) -{ - return snd_soc_suspend_device(&i2c->dev); -} - -static int wm8731_i2c_resume(struct i2c_client *i2c) -{ - return snd_soc_resume_device(&i2c->dev); -} -#else -#define wm8731_i2c_suspend NULL -#define wm8731_i2c_resume NULL -#endif - static const struct i2c_device_id wm8731_i2c_id[] = { { "wm8731", 0 }, { } @@ -707,8 +675,6 @@ static struct i2c_driver wm8731_i2c_driver = { }, .probe = wm8731_i2c_probe, .remove = __devexit_p(wm8731_i2c_remove), - .suspend = wm8731_i2c_suspend, - .resume = wm8731_i2c_resume, .id_table = wm8731_i2c_id, }; #endif diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c index 9b27efb052fe..8f7305257d29 100644 --- a/sound/soc/codecs/wm8753.c +++ b/sound/soc/codecs/wm8753.c @@ -1767,21 +1767,6 @@ static int wm8753_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm8753_i2c_suspend(struct i2c_client *client, pm_message_t msg) -{ - return snd_soc_suspend_device(&client->dev); -} - -static int wm8753_i2c_resume(struct i2c_client *client) -{ - return snd_soc_resume_device(&client->dev); -} -#else -#define wm8753_i2c_suspend NULL -#define wm8753_i2c_resume NULL -#endif - static const struct i2c_device_id wm8753_i2c_id[] = { { "wm8753", 0 }, { } @@ -1795,8 +1780,6 @@ static struct i2c_driver wm8753_i2c_driver = { }, .probe = wm8753_i2c_probe, .remove = wm8753_i2c_remove, - .suspend = wm8753_i2c_suspend, - .resume = wm8753_i2c_resume, .id_table = wm8753_i2c_id, }; #endif @@ -1852,22 +1835,6 @@ static int __devexit wm8753_spi_remove(struct spi_device *spi) return 0; } -#ifdef CONFIG_PM -static int wm8753_spi_suspend(struct spi_device *spi, pm_message_t msg) -{ - return snd_soc_suspend_device(&spi->dev); -} - -static int wm8753_spi_resume(struct spi_device *spi) -{ - return snd_soc_resume_device(&spi->dev); -} - -#else -#define wm8753_spi_suspend NULL -#define wm8753_spi_resume NULL -#endif - static struct spi_driver wm8753_spi_driver = { .driver = { .name = "wm8753", @@ -1876,8 +1843,6 @@ static struct spi_driver wm8753_spi_driver = { }, .probe = wm8753_spi_probe, .remove = __devexit_p(wm8753_spi_remove), - .suspend = wm8753_spi_suspend, - .resume = wm8753_spi_resume, }; #endif diff --git a/sound/soc/codecs/wm8776.c b/sound/soc/codecs/wm8776.c index a9829aa26e53..a0bbb28eed75 100644 --- a/sound/soc/codecs/wm8776.c +++ b/sound/soc/codecs/wm8776.c @@ -616,21 +616,6 @@ static int __devexit wm8776_spi_remove(struct spi_device *spi) return 0; } -#ifdef CONFIG_PM -static int wm8776_spi_suspend(struct spi_device *spi, pm_message_t msg) -{ - return snd_soc_suspend_device(&spi->dev); -} - -static int wm8776_spi_resume(struct spi_device *spi) -{ - return snd_soc_resume_device(&spi->dev); -} -#else -#define wm8776_spi_suspend NULL -#define wm8776_spi_resume NULL -#endif - static struct spi_driver wm8776_spi_driver = { .driver = { .name = "wm8776", @@ -638,8 +623,6 @@ static struct spi_driver wm8776_spi_driver = { .owner = THIS_MODULE, }, .probe = wm8776_spi_probe, - .suspend = wm8776_spi_suspend, - .resume = wm8776_spi_resume, .remove = __devexit_p(wm8776_spi_remove), }; #endif /* CONFIG_SPI_MASTER */ @@ -673,21 +656,6 @@ static __devexit int wm8776_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm8776_i2c_suspend(struct i2c_client *i2c, pm_message_t msg) -{ - return snd_soc_suspend_device(&i2c->dev); -} - -static int wm8776_i2c_resume(struct i2c_client *i2c) -{ - return snd_soc_resume_device(&i2c->dev); -} -#else -#define wm8776_i2c_suspend NULL -#define wm8776_i2c_resume NULL -#endif - static const struct i2c_device_id wm8776_i2c_id[] = { { "wm8776", 0 }, { } @@ -701,8 +669,6 @@ static struct i2c_driver wm8776_i2c_driver = { }, .probe = wm8776_i2c_probe, .remove = __devexit_p(wm8776_i2c_remove), - .suspend = wm8776_i2c_suspend, - .resume = wm8776_i2c_resume, .id_table = wm8776_i2c_id, }; #endif diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c index 882604ef768c..b48804b5cacd 100644 --- a/sound/soc/codecs/wm8900.c +++ b/sound/soc/codecs/wm8900.c @@ -1312,21 +1312,6 @@ static __devexit int wm8900_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm8900_i2c_suspend(struct i2c_client *client, pm_message_t msg) -{ - return snd_soc_suspend_device(&client->dev); -} - -static int wm8900_i2c_resume(struct i2c_client *client) -{ - return snd_soc_resume_device(&client->dev); -} -#else -#define wm8900_i2c_suspend NULL -#define wm8900_i2c_resume NULL -#endif - static const struct i2c_device_id wm8900_i2c_id[] = { { "wm8900", 0 }, { } @@ -1340,8 +1325,6 @@ static struct i2c_driver wm8900_i2c_driver = { }, .probe = wm8900_i2c_probe, .remove = __devexit_p(wm8900_i2c_remove), - .suspend = wm8900_i2c_suspend, - .resume = wm8900_i2c_resume, .id_table = wm8900_i2c_id, }; diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index fe1307b500cf..94cdb8130415 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -1655,21 +1655,6 @@ static __devexit int wm8903_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm8903_i2c_suspend(struct i2c_client *client, pm_message_t msg) -{ - return snd_soc_suspend_device(&client->dev); -} - -static int wm8903_i2c_resume(struct i2c_client *client) -{ - return snd_soc_resume_device(&client->dev); -} -#else -#define wm8903_i2c_suspend NULL -#define wm8903_i2c_resume NULL -#endif - /* i2c codec control layer */ static const struct i2c_device_id wm8903_i2c_id[] = { { "wm8903", 0 }, @@ -1684,8 +1669,6 @@ static struct i2c_driver wm8903_i2c_driver = { }, .probe = wm8903_i2c_probe, .remove = __devexit_p(wm8903_i2c_remove), - .suspend = wm8903_i2c_suspend, - .resume = wm8903_i2c_resume, .id_table = wm8903_i2c_id, }; diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c index 1685cfb993c6..8d4fd3c08c09 100644 --- a/sound/soc/codecs/wm8940.c +++ b/sound/soc/codecs/wm8940.c @@ -877,21 +877,6 @@ static int __devexit wm8940_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm8940_i2c_suspend(struct i2c_client *client, pm_message_t msg) -{ - return snd_soc_suspend_device(&client->dev); -} - -static int wm8940_i2c_resume(struct i2c_client *client) -{ - return snd_soc_resume_device(&client->dev); -} -#else -#define wm8940_i2c_suspend NULL -#define wm8940_i2c_resume NULL -#endif - static const struct i2c_device_id wm8940_i2c_id[] = { { "wm8940", 0 }, { } @@ -905,8 +890,6 @@ static struct i2c_driver wm8940_i2c_driver = { }, .probe = wm8940_i2c_probe, .remove = __devexit_p(wm8940_i2c_remove), - .suspend = wm8940_i2c_suspend, - .resume = wm8940_i2c_resume, .id_table = wm8940_i2c_id, }; diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 416fb3c17018..b9b096a85396 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -883,21 +883,6 @@ static __devexit int wm8960_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm8960_i2c_suspend(struct i2c_client *client, pm_message_t msg) -{ - return snd_soc_suspend_device(&client->dev); -} - -static int wm8960_i2c_resume(struct i2c_client *client) -{ - return snd_soc_resume_device(&client->dev); -} -#else -#define wm8960_i2c_suspend NULL -#define wm8960_i2c_resume NULL -#endif - static const struct i2c_device_id wm8960_i2c_id[] = { { "wm8960", 0 }, { } @@ -911,8 +896,6 @@ static struct i2c_driver wm8960_i2c_driver = { }, .probe = wm8960_i2c_probe, .remove = __devexit_p(wm8960_i2c_remove), - .suspend = wm8960_i2c_suspend, - .resume = wm8960_i2c_resume, .id_table = wm8960_i2c_id, }; diff --git a/sound/soc/codecs/wm8961.c b/sound/soc/codecs/wm8961.c index 503032085899..b5c6f2cd5ae2 100644 --- a/sound/soc/codecs/wm8961.c +++ b/sound/soc/codecs/wm8961.c @@ -1206,21 +1206,6 @@ static __devexit int wm8961_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm8961_i2c_suspend(struct i2c_client *client, pm_message_t state) -{ - return snd_soc_suspend_device(&client->dev); -} - -static int wm8961_i2c_resume(struct i2c_client *client) -{ - return snd_soc_resume_device(&client->dev); -} -#else -#define wm8961_i2c_suspend NULL -#define wm8961_i2c_resume NULL -#endif - static const struct i2c_device_id wm8961_i2c_id[] = { { "wm8961", 0 }, { } @@ -1234,8 +1219,6 @@ static struct i2c_driver wm8961_i2c_driver = { }, .probe = wm8961_i2c_probe, .remove = __devexit_p(wm8961_i2c_remove), - .suspend = wm8961_i2c_suspend, - .resume = wm8961_i2c_resume, .id_table = wm8961_i2c_id, }; diff --git a/sound/soc/codecs/wm8988.c b/sound/soc/codecs/wm8988.c index 3f530f8a972a..d8d8f68b81ea 100644 --- a/sound/soc/codecs/wm8988.c +++ b/sound/soc/codecs/wm8988.c @@ -944,21 +944,6 @@ static int wm8988_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm8988_i2c_suspend(struct i2c_client *client, pm_message_t msg) -{ - return snd_soc_suspend_device(&client->dev); -} - -static int wm8988_i2c_resume(struct i2c_client *client) -{ - return snd_soc_resume_device(&client->dev); -} -#else -#define wm8988_i2c_suspend NULL -#define wm8988_i2c_resume NULL -#endif - static const struct i2c_device_id wm8988_i2c_id[] = { { "wm8988", 0 }, { } @@ -972,8 +957,6 @@ static struct i2c_driver wm8988_i2c_driver = { }, .probe = wm8988_i2c_probe, .remove = wm8988_i2c_remove, - .suspend = wm8988_i2c_suspend, - .resume = wm8988_i2c_resume, .id_table = wm8988_i2c_id, }; #endif @@ -1006,21 +989,6 @@ static int __devexit wm8988_spi_remove(struct spi_device *spi) return 0; } -#ifdef CONFIG_PM -static int wm8988_spi_suspend(struct spi_device *spi, pm_message_t msg) -{ - return snd_soc_suspend_device(&spi->dev); -} - -static int wm8988_spi_resume(struct spi_device *spi) -{ - return snd_soc_resume_device(&spi->dev); -} -#else -#define wm8988_spi_suspend NULL -#define wm8988_spi_resume NULL -#endif - static struct spi_driver wm8988_spi_driver = { .driver = { .name = "wm8988", @@ -1029,8 +997,6 @@ static struct spi_driver wm8988_spi_driver = { }, .probe = wm8988_spi_probe, .remove = __devexit_p(wm8988_spi_remove), - .suspend = wm8988_spi_suspend, - .resume = wm8988_spi_resume, }; #endif diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c index 686e5aa97206..4cb6b104b729 100644 --- a/sound/soc/codecs/wm9081.c +++ b/sound/soc/codecs/wm9081.c @@ -1452,21 +1452,6 @@ static __devexit int wm9081_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM -static int wm9081_i2c_suspend(struct i2c_client *client, pm_message_t msg) -{ - return snd_soc_suspend_device(&client->dev); -} - -static int wm9081_i2c_resume(struct i2c_client *client) -{ - return snd_soc_resume_device(&client->dev); -} -#else -#define wm9081_i2c_suspend NULL -#define wm9081_i2c_resume NULL -#endif - static const struct i2c_device_id wm9081_i2c_id[] = { { "wm9081", 0 }, { } @@ -1480,8 +1465,6 @@ static struct i2c_driver wm9081_i2c_driver = { }, .probe = wm9081_i2c_probe, .remove = __devexit_p(wm9081_i2c_remove), - .suspend = wm9081_i2c_suspend, - .resume = wm9081_i2c_resume, .id_table = wm9081_i2c_id, }; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 1dec9d21c55e..fa0da3cac705 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -790,45 +790,6 @@ static int soc_resume(struct device *dev) return 0; } - -/** - * snd_soc_suspend_device: Notify core of device suspend - * - * @dev: Device being suspended. - * - * In order to ensure that the entire audio subsystem is suspended in a - * coordinated fashion ASoC devices should suspend themselves when - * called by ASoC. When the standard kernel suspend process asks the - * device to suspend it should call this function to initiate a suspend - * of the entire ASoC card. - * - * \note Currently this function is stubbed out. - */ -int snd_soc_suspend_device(struct device *dev) -{ - return 0; -} -EXPORT_SYMBOL_GPL(snd_soc_suspend_device); - -/** - * snd_soc_resume_device: Notify core of device resume - * - * @dev: Device being resumed. - * - * In order to ensure that the entire audio subsystem is resumed in a - * coordinated fashion ASoC devices should resume themselves when called - * by ASoC. When the standard kernel resume process asks the device - * to resume it should call this function. Once all the components of - * the card have notified that they are ready to be resumed the card - * will be resumed. - * - * \note Currently this function is stubbed out. - */ -int snd_soc_resume_device(struct device *dev) -{ - return 0; -} -EXPORT_SYMBOL_GPL(snd_soc_resume_device); #else #define soc_suspend NULL #define soc_resume NULL -- cgit v1.3-8-gc7d7 From c8bf93f0fe8c5a509a29e30f3bac823fa0f6d96e Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 15 Oct 2009 09:03:56 +0300 Subject: ASoC: Codec driver for Texas Instruments tlv320dac33 codec Driver for Texas Instruments TLV320DAC33 (SLAS546) low power stereo audio DAC. TLV320DAC33 is a stereo audio codec with integrated 24KB FIFO for low power audio playback. The digital interface can use I2S, DSP (A or B), Right and Left justified formats. DAC33 has stereo analog input, which can be bypassed to the analog outputs. Regarding to the internal 24KB FIFO the driver implements 'FIFO bypass' mode (default) and nSample mode (FIFO is in use). a) In 'FIFO bypass' mode the internal FIFO is not in use, the codec is working synchronously as a normal codec (it needs constant stream of data on the digital interface). b) The nSample mode implementation uses one interrupt line from DAC33 to the host: Alarm threshold is set to 10ms of audio data (limit by the driver implementation). DAC33 will signal an interrupt, when the FIFO level goes under the Alarm threshold. The host will write to nSample register a value (number of stereo samples), to tell DAC33 how many samples it should read in a burst from the host. When the DAC33 received the number of samples, it disables the clocks on the I2S bus. When the FIFO use again goes under the Alarm threshold, DAC33 signals the host with an interrupt, and the process is repeated. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown --- include/sound/tlv320dac33-plat.h | 20 + sound/soc/codecs/Kconfig | 4 + sound/soc/codecs/Makefile | 2 + sound/soc/codecs/tlv320dac33.c | 1237 ++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/tlv320dac33.h | 267 ++++++++ 5 files changed, 1530 insertions(+) create mode 100644 include/sound/tlv320dac33-plat.h create mode 100644 sound/soc/codecs/tlv320dac33.c create mode 100644 sound/soc/codecs/tlv320dac33.h (limited to 'include') diff --git a/include/sound/tlv320dac33-plat.h b/include/sound/tlv320dac33-plat.h new file mode 100644 index 000000000000..5858d06a7ffa --- /dev/null +++ b/include/sound/tlv320dac33-plat.h @@ -0,0 +1,20 @@ +/* + * Platform header for Texas Instruments TLV320DAC33 codec driver + * + * Author: Peter Ujfalusi + * + * Copyright: (C) 2009 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __TLV320DAC33_PLAT_H +#define __TLV320DAC33_PLAT_H + +struct tlv320dac33_platform_data { + int power_gpio; +}; + +#endif /* __TLV320DAC33_PLAT_H */ diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index fab01c991828..d30fce71cfe8 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -30,6 +30,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_TLV320AIC26 if SPI_MASTER select SND_SOC_TLV320AIC3X if I2C select SND_SOC_TPA6130A2 if I2C + select SND_SOC_TLV320DAC33 if I2C select SND_SOC_TWL4030 if TWL4030_CORE select SND_SOC_UDA134X select SND_SOC_UDA1380 if I2C @@ -142,6 +143,9 @@ config SND_SOC_TLV320AIC26 config SND_SOC_TLV320AIC3X tristate +config SND_SOC_TLV320DAC33 + tristate + config SND_SOC_TWL4030 tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 2f14391b96f9..8f519ee9600d 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -17,6 +17,7 @@ snd-soc-stac9766-objs := stac9766.o snd-soc-tlv320aic23-objs := tlv320aic23.o snd-soc-tlv320aic26-objs := tlv320aic26.o snd-soc-tlv320aic3x-objs := tlv320aic3x.o +snd-soc-tlv320dac33-objs := tlv320dac33.o snd-soc-twl4030-objs := twl4030.o snd-soc-uda134x-objs := uda134x.o snd-soc-uda1380-objs := uda1380.o @@ -70,6 +71,7 @@ obj-$(CONFIG_SND_SOC_STAC9766) += snd-soc-stac9766.o obj-$(CONFIG_SND_SOC_TLV320AIC23) += snd-soc-tlv320aic23.o obj-$(CONFIG_SND_SOC_TLV320AIC26) += snd-soc-tlv320aic26.o obj-$(CONFIG_SND_SOC_TLV320AIC3X) += snd-soc-tlv320aic3x.o +obj-$(CONFIG_SND_SOC_TLV320DAC33) += snd-soc-tlv320dac33.o obj-$(CONFIG_SND_SOC_TWL4030) += snd-soc-twl4030.o obj-$(CONFIG_SND_SOC_UDA134X) += snd-soc-uda134x.o obj-$(CONFIG_SND_SOC_UDA1380) += snd-soc-uda1380.o diff --git a/sound/soc/codecs/tlv320dac33.c b/sound/soc/codecs/tlv320dac33.c new file mode 100644 index 000000000000..3ca8934fc26c --- /dev/null +++ b/sound/soc/codecs/tlv320dac33.c @@ -0,0 +1,1237 @@ +/* + * ALSA SoC Texas Instruments TLV320DAC33 codec driver + * + * Author: Peter Ujfalusi + * + * Copyright: (C) 2009 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "tlv320dac33.h" + +#define DAC33_BUFFER_SIZE_BYTES 24576 /* bytes, 12288 16 bit words, + * 6144 stereo */ +#define DAC33_BUFFER_SIZE_SAMPLES 6144 + +#define NSAMPLE_MAX 5700 + +#define LATENCY_TIME_MS 20 + +static struct snd_soc_codec *tlv320dac33_codec; + +enum dac33_state { + DAC33_IDLE = 0, + DAC33_PREFILL, + DAC33_PLAYBACK, + DAC33_FLUSH, +}; + +struct tlv320dac33_priv { + struct mutex mutex; + struct workqueue_struct *dac33_wq; + struct work_struct work; + struct snd_soc_codec codec; + int power_gpio; + int chip_power; + int irq; + unsigned int refclk; + + unsigned int alarm_threshold; /* set to be half of LATENCY_TIME_MS */ + unsigned int nsample_min; /* nsample should not be lower than + * this */ + unsigned int nsample_max; /* nsample should not be higher than + * this */ + unsigned int nsample_switch; /* Use FIFO or bypass FIFO switch */ + unsigned int nsample; /* burst read amount from host */ + + enum dac33_state state; +}; + +static const u8 dac33_reg[DAC33_CACHEREGNUM] = { +0x00, 0x00, 0x00, 0x00, /* 0x00 - 0x03 */ +0x00, 0x00, 0x00, 0x00, /* 0x04 - 0x07 */ +0x00, 0x00, 0x00, 0x00, /* 0x08 - 0x0b */ +0x00, 0x00, 0x00, 0x00, /* 0x0c - 0x0f */ +0x00, 0x00, 0x00, 0x00, /* 0x10 - 0x13 */ +0x00, 0x00, 0x00, 0x00, /* 0x14 - 0x17 */ +0x00, 0x00, 0x00, 0x00, /* 0x18 - 0x1b */ +0x00, 0x00, 0x00, 0x00, /* 0x1c - 0x1f */ +0x00, 0x00, 0x00, 0x00, /* 0x20 - 0x23 */ +0x00, 0x00, 0x00, 0x00, /* 0x24 - 0x27 */ +0x00, 0x00, 0x00, 0x00, /* 0x28 - 0x2b */ +0x00, 0x00, 0x00, 0x80, /* 0x2c - 0x2f */ +0x80, 0x00, 0x00, 0x00, /* 0x30 - 0x33 */ +0x00, 0x00, 0x00, 0x00, /* 0x34 - 0x37 */ +0x00, 0x00, /* 0x38 - 0x39 */ +/* Registers 0x3a - 0x3f are reserved */ + 0x00, 0x00, /* 0x3a - 0x3b */ +0x00, 0x00, 0x00, 0x00, /* 0x3c - 0x3f */ + +0x00, 0x00, 0x00, 0x00, /* 0x40 - 0x43 */ +0x00, 0x80, /* 0x44 - 0x45 */ +/* Registers 0x46 - 0x47 are reserved */ + 0x80, 0x80, /* 0x46 - 0x47 */ + +0x80, 0x00, 0x00, /* 0x48 - 0x4a */ +/* Registers 0x4b - 0x7c are reserved */ + 0x00, /* 0x4b */ +0x00, 0x00, 0x00, 0x00, /* 0x4c - 0x4f */ +0x00, 0x00, 0x00, 0x00, /* 0x50 - 0x53 */ +0x00, 0x00, 0x00, 0x00, /* 0x54 - 0x57 */ +0x00, 0x00, 0x00, 0x00, /* 0x58 - 0x5b */ +0x00, 0x00, 0x00, 0x00, /* 0x5c - 0x5f */ +0x00, 0x00, 0x00, 0x00, /* 0x60 - 0x63 */ +0x00, 0x00, 0x00, 0x00, /* 0x64 - 0x67 */ +0x00, 0x00, 0x00, 0x00, /* 0x68 - 0x6b */ +0x00, 0x00, 0x00, 0x00, /* 0x6c - 0x6f */ +0x00, 0x00, 0x00, 0x00, /* 0x70 - 0x73 */ +0x00, 0x00, 0x00, 0x00, /* 0x74 - 0x77 */ +0x00, 0x00, 0x00, 0x00, /* 0x78 - 0x7b */ +0x00, /* 0x7c */ + + 0xda, 0x33, 0x03, /* 0x7d - 0x7f */ +}; + +/* Register read and write */ +static inline unsigned int dac33_read_reg_cache(struct snd_soc_codec *codec, + unsigned reg) +{ + u8 *cache = codec->reg_cache; + if (reg >= DAC33_CACHEREGNUM) + return 0; + + return cache[reg]; +} + +static inline void dac33_write_reg_cache(struct snd_soc_codec *codec, + u8 reg, u8 value) +{ + u8 *cache = codec->reg_cache; + if (reg >= DAC33_CACHEREGNUM) + return; + + cache[reg] = value; +} + +static int dac33_read(struct snd_soc_codec *codec, unsigned int reg, + u8 *value) +{ + struct tlv320dac33_priv *dac33 = codec->private_data; + int val; + + *value = reg & 0xff; + + /* If powered off, return the cached value */ + if (dac33->chip_power) { + val = i2c_smbus_read_byte_data(codec->control_data, value[0]); + if (val < 0) { + dev_err(codec->dev, "Read failed (%d)\n", val); + value[0] = dac33_read_reg_cache(codec, reg); + } else { + value[0] = val; + dac33_write_reg_cache(codec, reg, val); + } + } else { + value[0] = dac33_read_reg_cache(codec, reg); + } + + return 0; +} + +static int dac33_write(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + struct tlv320dac33_priv *dac33 = codec->private_data; + u8 data[2]; + int ret = 0; + + /* + * data is + * D15..D8 dac33 register offset + * D7...D0 register data + */ + data[0] = reg & 0xff; + data[1] = value & 0xff; + + dac33_write_reg_cache(codec, data[0], data[1]); + if (dac33->chip_power) { + ret = codec->hw_write(codec->control_data, data, 2); + if (ret != 2) + dev_err(codec->dev, "Write failed (%d)\n", ret); + else + ret = 0; + } + + return ret; +} + +static int dac33_write_locked(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + struct tlv320dac33_priv *dac33 = codec->private_data; + int ret; + + mutex_lock(&dac33->mutex); + ret = dac33_write(codec, reg, value); + mutex_unlock(&dac33->mutex); + + return ret; +} + +#define DAC33_I2C_ADDR_AUTOINC 0x80 +static int dac33_write16(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + struct tlv320dac33_priv *dac33 = codec->private_data; + u8 data[3]; + int ret = 0; + + /* + * data is + * D23..D16 dac33 register offset + * D15..D8 register data MSB + * D7...D0 register data LSB + */ + data[0] = reg & 0xff; + data[1] = (value >> 8) & 0xff; + data[2] = value & 0xff; + + dac33_write_reg_cache(codec, data[0], data[1]); + dac33_write_reg_cache(codec, data[0] + 1, data[2]); + + if (dac33->chip_power) { + /* We need to set autoincrement mode for 16 bit writes */ + data[0] |= DAC33_I2C_ADDR_AUTOINC; + ret = codec->hw_write(codec->control_data, data, 3); + if (ret != 3) + dev_err(codec->dev, "Write failed (%d)\n", ret); + else + ret = 0; + } + + return ret; +} + +static void dac33_restore_regs(struct snd_soc_codec *codec) +{ + struct tlv320dac33_priv *dac33 = codec->private_data; + u8 *cache = codec->reg_cache; + u8 data[2]; + int i, ret; + + if (!dac33->chip_power) + return; + + for (i = DAC33_PWR_CTRL; i <= DAC33_INTP_CTRL_B; i++) { + data[0] = i; + data[1] = cache[i]; + /* Skip the read only registers */ + if ((i >= DAC33_INT_OSC_STATUS && + i <= DAC33_INT_OSC_FREQ_RAT_READ_B) || + (i >= DAC33_FIFO_WPTR_MSB && i <= DAC33_FIFO_IRQ_FLAG) || + i == DAC33_DAC_STATUS_FLAGS || + i == DAC33_SRC_EST_REF_CLK_RATIO_A || + i == DAC33_SRC_EST_REF_CLK_RATIO_B) + continue; + ret = codec->hw_write(codec->control_data, data, 2); + if (ret != 2) + dev_err(codec->dev, "Write failed (%d)\n", ret); + } + for (i = DAC33_LDAC_PWR_CTRL; i <= DAC33_LINEL_TO_LLO_VOL; i++) { + data[0] = i; + data[1] = cache[i]; + ret = codec->hw_write(codec->control_data, data, 2); + if (ret != 2) + dev_err(codec->dev, "Write failed (%d)\n", ret); + } + for (i = DAC33_LINER_TO_RLO_VOL; i <= DAC33_OSC_TRIM; i++) { + data[0] = i; + data[1] = cache[i]; + ret = codec->hw_write(codec->control_data, data, 2); + if (ret != 2) + dev_err(codec->dev, "Write failed (%d)\n", ret); + } +} + +static inline void dac33_soft_power(struct snd_soc_codec *codec, int power) +{ + u8 reg; + + reg = dac33_read_reg_cache(codec, DAC33_PWR_CTRL); + if (power) + reg |= DAC33_PDNALLB; + else + reg &= ~DAC33_PDNALLB; + dac33_write(codec, DAC33_PWR_CTRL, reg); +} + +static void dac33_hard_power(struct snd_soc_codec *codec, int power) +{ + struct tlv320dac33_priv *dac33 = codec->private_data; + + mutex_lock(&dac33->mutex); + if (power) { + if (dac33->power_gpio >= 0) { + gpio_set_value(dac33->power_gpio, 1); + dac33->chip_power = 1; + /* Restore registers */ + dac33_restore_regs(codec); + } + dac33_soft_power(codec, 1); + } else { + dac33_soft_power(codec, 0); + if (dac33->power_gpio >= 0) { + gpio_set_value(dac33->power_gpio, 0); + dac33->chip_power = 0; + } + } + mutex_unlock(&dac33->mutex); + +} + +static int dac33_get_nsample(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + struct tlv320dac33_priv *dac33 = codec->private_data; + + ucontrol->value.integer.value[0] = dac33->nsample; + + return 0; +} + +static int dac33_set_nsample(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + struct tlv320dac33_priv *dac33 = codec->private_data; + int ret = 0; + + if (dac33->nsample == ucontrol->value.integer.value[0]) + return 0; + + if (ucontrol->value.integer.value[0] < dac33->nsample_min || + ucontrol->value.integer.value[0] > dac33->nsample_max) + ret = -EINVAL; + else + dac33->nsample = ucontrol->value.integer.value[0]; + + return ret; +} + +static int dac33_get_nsample_switch(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + struct tlv320dac33_priv *dac33 = codec->private_data; + + ucontrol->value.integer.value[0] = dac33->nsample_switch; + + return 0; +} + +static int dac33_set_nsample_switch(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + struct tlv320dac33_priv *dac33 = codec->private_data; + int ret = 0; + + if (dac33->nsample_switch == ucontrol->value.integer.value[0]) + return 0; + /* Do not allow changes while stream is running*/ + if (codec->active) + return -EPERM; + + if (ucontrol->value.integer.value[0] < 0 || + ucontrol->value.integer.value[0] > 1) + ret = -EINVAL; + else + dac33->nsample_switch = ucontrol->value.integer.value[0]; + + return ret; +} + +/* + * DACL/R digital volume control: + * from 0 dB to -63.5 in 0.5 dB steps + * Need to be inverted later on: + * 0x00 == 0 dB + * 0x7f == -63.5 dB + */ +static DECLARE_TLV_DB_SCALE(dac_digivol_tlv, -6350, 50, 0); + +static const struct snd_kcontrol_new dac33_snd_controls[] = { + SOC_DOUBLE_R_TLV("DAC Digital Playback Volume", + DAC33_LDAC_DIG_VOL_CTRL, DAC33_RDAC_DIG_VOL_CTRL, + 0, 0x7f, 1, dac_digivol_tlv), + SOC_DOUBLE_R("DAC Digital Playback Switch", + DAC33_LDAC_DIG_VOL_CTRL, DAC33_RDAC_DIG_VOL_CTRL, 7, 1, 1), + SOC_DOUBLE_R("Line to Line Out Volume", + DAC33_LINEL_TO_LLO_VOL, DAC33_LINER_TO_RLO_VOL, 0, 127, 1), +}; + +static const struct snd_kcontrol_new dac33_nsample_snd_controls[] = { + SOC_SINGLE_EXT("nSample", 0, 0, 5900, 0, + dac33_get_nsample, dac33_set_nsample), + SOC_SINGLE_EXT("nSample Switch", 0, 0, 1, 0, + dac33_get_nsample_switch, dac33_set_nsample_switch), +}; + +/* Analog bypass */ +static const struct snd_kcontrol_new dac33_dapm_abypassl_control = + SOC_DAPM_SINGLE("Switch", DAC33_LINEL_TO_LLO_VOL, 7, 1, 1); + +static const struct snd_kcontrol_new dac33_dapm_abypassr_control = + SOC_DAPM_SINGLE("Switch", DAC33_LINER_TO_RLO_VOL, 7, 1, 1); + +static const struct snd_soc_dapm_widget dac33_dapm_widgets[] = { + SND_SOC_DAPM_OUTPUT("LEFT_LO"), + SND_SOC_DAPM_OUTPUT("RIGHT_LO"), + + SND_SOC_DAPM_INPUT("LINEL"), + SND_SOC_DAPM_INPUT("LINER"), + + SND_SOC_DAPM_DAC("DACL", "Left Playback", DAC33_LDAC_PWR_CTRL, 2, 0), + SND_SOC_DAPM_DAC("DACR", "Right Playback", DAC33_RDAC_PWR_CTRL, 2, 0), + + /* Analog bypass */ + SND_SOC_DAPM_SWITCH("Analog Left Bypass", SND_SOC_NOPM, 0, 0, + &dac33_dapm_abypassl_control), + SND_SOC_DAPM_SWITCH("Analog Right Bypass", SND_SOC_NOPM, 0, 0, + &dac33_dapm_abypassr_control), + + SND_SOC_DAPM_REG(snd_soc_dapm_mixer, "Output Left Amp Power", + DAC33_OUT_AMP_PWR_CTRL, 6, 3, 3, 0), + SND_SOC_DAPM_REG(snd_soc_dapm_mixer, "Output Right Amp Power", + DAC33_OUT_AMP_PWR_CTRL, 4, 3, 3, 0), +}; + +static const struct snd_soc_dapm_route audio_map[] = { + /* Analog bypass */ + {"Analog Left Bypass", "Switch", "LINEL"}, + {"Analog Right Bypass", "Switch", "LINER"}, + + {"Output Left Amp Power", NULL, "DACL"}, + {"Output Right Amp Power", NULL, "DACR"}, + + {"Output Left Amp Power", NULL, "Analog Left Bypass"}, + {"Output Right Amp Power", NULL, "Analog Right Bypass"}, + + /* output */ + {"LEFT_LO", NULL, "Output Left Amp Power"}, + {"RIGHT_LO", NULL, "Output Right Amp Power"}, +}; + +static int dac33_add_widgets(struct snd_soc_codec *codec) +{ + snd_soc_dapm_new_controls(codec, dac33_dapm_widgets, + ARRAY_SIZE(dac33_dapm_widgets)); + + /* set up audio path interconnects */ + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); + snd_soc_dapm_new_widgets(codec); + + return 0; +} + +static int dac33_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) +{ + switch (level) { + case SND_SOC_BIAS_ON: + dac33_soft_power(codec, 1); + break; + case SND_SOC_BIAS_PREPARE: + break; + case SND_SOC_BIAS_STANDBY: + if (codec->bias_level == SND_SOC_BIAS_OFF) + dac33_hard_power(codec, 1); + dac33_soft_power(codec, 0); + break; + case SND_SOC_BIAS_OFF: + dac33_hard_power(codec, 0); + break; + } + codec->bias_level = level; + + return 0; +} + +static void dac33_work(struct work_struct *work) +{ + struct snd_soc_codec *codec; + struct tlv320dac33_priv *dac33; + u8 reg; + + dac33 = container_of(work, struct tlv320dac33_priv, work); + codec = &dac33->codec; + + mutex_lock(&dac33->mutex); + switch (dac33->state) { + case DAC33_PREFILL: + dac33->state = DAC33_PLAYBACK; + dac33_write16(codec, DAC33_NSAMPLE_MSB, + DAC33_THRREG(dac33->nsample)); + dac33_write16(codec, DAC33_PREFILL_MSB, + DAC33_THRREG(dac33->alarm_threshold)); + break; + case DAC33_PLAYBACK: + dac33_write16(codec, DAC33_NSAMPLE_MSB, + DAC33_THRREG(dac33->nsample)); + break; + case DAC33_IDLE: + break; + case DAC33_FLUSH: + dac33->state = DAC33_IDLE; + /* Mask all interrupts from dac33 */ + dac33_write(codec, DAC33_FIFO_IRQ_MASK, 0); + + /* flush fifo */ + reg = dac33_read_reg_cache(codec, DAC33_FIFO_CTRL_A); + reg |= DAC33_FIFOFLUSH; + dac33_write(codec, DAC33_FIFO_CTRL_A, reg); + break; + } + mutex_unlock(&dac33->mutex); +} + +static irqreturn_t dac33_interrupt_handler(int irq, void *dev) +{ + struct snd_soc_codec *codec = dev; + struct tlv320dac33_priv *dac33 = codec->private_data; + + queue_work(dac33->dac33_wq, &dac33->work); + + return IRQ_HANDLED; +} + +static void dac33_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->card->codec; + struct tlv320dac33_priv *dac33 = codec->private_data; + unsigned int pwr_ctrl; + + /* Stop pending workqueue */ + if (dac33->nsample_switch) + cancel_work_sync(&dac33->work); + + mutex_lock(&dac33->mutex); + pwr_ctrl = dac33_read_reg_cache(codec, DAC33_PWR_CTRL); + pwr_ctrl &= ~(DAC33_OSCPDNB | DAC33_DACRPDNB | DAC33_DACLPDNB); + dac33_write(codec, DAC33_PWR_CTRL, pwr_ctrl); + mutex_unlock(&dac33->mutex); +} + +static void dac33_oscwait(struct snd_soc_codec *codec) +{ + int timeout = 20; + u8 reg; + + do { + msleep(1); + dac33_read(codec, DAC33_INT_OSC_STATUS, ®); + } while (((reg & 0x03) != DAC33_OSCSTATUS_NORMAL) && timeout--); + if ((reg & 0x03) != DAC33_OSCSTATUS_NORMAL) + dev_err(codec->dev, + "internal oscillator calibration failed\n"); +} + +static int dac33_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->card->codec; + + /* Check parameters for validity */ + switch (params_rate(params)) { + case 44100: + case 48000: + break; + default: + dev_err(codec->dev, "unsupported rate %d\n", + params_rate(params)); + return -EINVAL; + } + + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S16_LE: + break; + default: + dev_err(codec->dev, "unsupported format %d\n", + params_format(params)); + return -EINVAL; + } + + return 0; +} + +#define CALC_OSCSET(rate, refclk) ( \ + ((((rate * 10000) / refclk) * 4096) + 5000) / 10000) +#define CALC_RATIOSET(rate, refclk) ( \ + ((((refclk * 100000) / rate) * 16384) + 50000) / 100000) + +/* + * tlv320dac33 is strict on the sequence of the register writes, if the register + * writes happens in different order, than dac33 might end up in unknown state. + * Use the known, working sequence of register writes to initialize the dac33. + */ +static int dac33_prepare_chip(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->card->codec; + struct tlv320dac33_priv *dac33 = codec->private_data; + unsigned int oscset, ratioset, pwr_ctrl, reg_tmp; + u8 aictrl_a, fifoctrl_a; + + switch (substream->runtime->rate) { + case 44100: + case 48000: + oscset = CALC_OSCSET(substream->runtime->rate, dac33->refclk); + ratioset = CALC_RATIOSET(substream->runtime->rate, + dac33->refclk); + break; + default: + dev_err(codec->dev, "unsupported rate %d\n", + substream->runtime->rate); + return -EINVAL; + } + + + aictrl_a = dac33_read_reg_cache(codec, DAC33_SER_AUDIOIF_CTRL_A); + aictrl_a &= ~(DAC33_NCYCL_MASK | DAC33_WLEN_MASK); + fifoctrl_a = dac33_read_reg_cache(codec, DAC33_FIFO_CTRL_A); + fifoctrl_a &= ~DAC33_WIDTH; + switch (substream->runtime->format) { + case SNDRV_PCM_FORMAT_S16_LE: + aictrl_a |= (DAC33_NCYCL_16 | DAC33_WLEN_16); + fifoctrl_a |= DAC33_WIDTH; + break; + default: + dev_err(codec->dev, "unsupported format %d\n", + substream->runtime->format); + return -EINVAL; + } + + mutex_lock(&dac33->mutex); + dac33_soft_power(codec, 1); + + reg_tmp = dac33_read_reg_cache(codec, DAC33_INT_OSC_CTRL); + dac33_write(codec, DAC33_INT_OSC_CTRL, reg_tmp); + + /* Write registers 0x08 and 0x09 (MSB, LSB) */ + dac33_write16(codec, DAC33_INT_OSC_FREQ_RAT_A, oscset); + + /* calib time: 128 is a nice number ;) */ + dac33_write(codec, DAC33_CALIB_TIME, 128); + + /* adjustment treshold & step */ + dac33_write(codec, DAC33_INT_OSC_CTRL_B, DAC33_ADJTHRSHLD(2) | + DAC33_ADJSTEP(1)); + + /* div=4 / gain=1 / div */ + dac33_write(codec, DAC33_INT_OSC_CTRL_C, DAC33_REFDIV(4)); + + pwr_ctrl = dac33_read_reg_cache(codec, DAC33_PWR_CTRL); + pwr_ctrl |= DAC33_OSCPDNB | DAC33_DACRPDNB | DAC33_DACLPDNB; + dac33_write(codec, DAC33_PWR_CTRL, pwr_ctrl); + + dac33_oscwait(codec); + + if (dac33->nsample_switch) { + /* 50-51 : ASRC Control registers */ + dac33_write(codec, DAC33_ASRC_CTRL_A, (1 << 4)); /* div=2 */ + dac33_write(codec, DAC33_ASRC_CTRL_B, 1); /* ??? */ + + /* Write registers 0x34 and 0x35 (MSB, LSB) */ + dac33_write16(codec, DAC33_SRC_REF_CLK_RATIO_A, ratioset); + + /* Set interrupts to high active */ + dac33_write(codec, DAC33_INTP_CTRL_A, DAC33_INTPM_AHIGH); + + dac33_write(codec, DAC33_FIFO_IRQ_MODE_B, + DAC33_ATM(DAC33_FIFO_IRQ_MODE_LEVEL)); + dac33_write(codec, DAC33_FIFO_IRQ_MASK, DAC33_MAT); + } else { + /* 50-51 : ASRC Control registers */ + dac33_write(codec, DAC33_ASRC_CTRL_A, DAC33_SRCBYP); + dac33_write(codec, DAC33_ASRC_CTRL_B, 0); /* ??? */ + } + + if (dac33->nsample_switch) + fifoctrl_a &= ~DAC33_FBYPAS; + else + fifoctrl_a |= DAC33_FBYPAS; + dac33_write(codec, DAC33_FIFO_CTRL_A, fifoctrl_a); + + dac33_write(codec, DAC33_SER_AUDIOIF_CTRL_A, aictrl_a); + reg_tmp = dac33_read_reg_cache(codec, DAC33_SER_AUDIOIF_CTRL_B); + if (dac33->nsample_switch) + reg_tmp &= ~DAC33_BCLKON; + else + reg_tmp |= DAC33_BCLKON; + dac33_write(codec, DAC33_SER_AUDIOIF_CTRL_B, reg_tmp); + + if (dac33->nsample_switch) { + /* 20: BCLK divide ratio */ + dac33_write(codec, DAC33_SER_AUDIOIF_CTRL_C, 3); + + dac33_write16(codec, DAC33_ATHR_MSB, + DAC33_THRREG(dac33->alarm_threshold)); + } else { + dac33_write(codec, DAC33_SER_AUDIOIF_CTRL_C, 32); + } + + mutex_unlock(&dac33->mutex); + + return 0; +} + +static void dac33_calculate_times(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->card->codec; + struct tlv320dac33_priv *dac33 = codec->private_data; + unsigned int nsample_limit; + + /* Number of samples (16bit, stereo) in one period */ + dac33->nsample_min = snd_pcm_lib_period_bytes(substream) / 4; + + /* Number of samples (16bit, stereo) in ALSA buffer */ + dac33->nsample_max = snd_pcm_lib_buffer_bytes(substream) / 4; + /* Subtract one period from the total */ + dac33->nsample_max -= dac33->nsample_min; + + /* Number of samples for LATENCY_TIME_MS / 2 */ + dac33->alarm_threshold = substream->runtime->rate / + (1000 / (LATENCY_TIME_MS / 2)); + + /* Find and fix up the lowest nsmaple limit */ + nsample_limit = substream->runtime->rate / (1000 / LATENCY_TIME_MS); + + if (dac33->nsample_min < nsample_limit) + dac33->nsample_min = nsample_limit; + + if (dac33->nsample < dac33->nsample_min) + dac33->nsample = dac33->nsample_min; + + /* + * Find and fix up the highest nsmaple limit + * In order to not overflow the DAC33 buffer substract the + * alarm_threshold value from the size of the DAC33 buffer + */ + nsample_limit = DAC33_BUFFER_SIZE_SAMPLES - dac33->alarm_threshold; + + if (dac33->nsample_max > nsample_limit) + dac33->nsample_max = nsample_limit; + + if (dac33->nsample > dac33->nsample_max) + dac33->nsample = dac33->nsample_max; +} + +static int dac33_pcm_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + dac33_calculate_times(substream); + dac33_prepare_chip(substream); + + return 0; +} + +static int dac33_pcm_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->card->codec; + struct tlv320dac33_priv *dac33 = codec->private_data; + int ret = 0; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + if (dac33->nsample_switch) { + dac33->state = DAC33_PREFILL; + queue_work(dac33->dac33_wq, &dac33->work); + } + break; + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + if (dac33->nsample_switch) { + dac33->state = DAC33_FLUSH; + queue_work(dac33->dac33_wq, &dac33->work); + } + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int dac33_set_dai_sysclk(struct snd_soc_dai *codec_dai, + int clk_id, unsigned int freq, int dir) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct tlv320dac33_priv *dac33 = codec->private_data; + u8 ioc_reg, asrcb_reg; + + ioc_reg = dac33_read_reg_cache(codec, DAC33_INT_OSC_CTRL); + asrcb_reg = dac33_read_reg_cache(codec, DAC33_ASRC_CTRL_B); + switch (clk_id) { + case TLV320DAC33_MCLK: + ioc_reg |= DAC33_REFSEL; + asrcb_reg |= DAC33_SRCREFSEL; + break; + case TLV320DAC33_SLEEPCLK: + ioc_reg &= ~DAC33_REFSEL; + asrcb_reg &= ~DAC33_SRCREFSEL; + break; + default: + dev_err(codec->dev, "Invalid clock ID (%d)\n", clk_id); + break; + } + dac33->refclk = freq; + + dac33_write_reg_cache(codec, DAC33_INT_OSC_CTRL, ioc_reg); + dac33_write_reg_cache(codec, DAC33_ASRC_CTRL_B, asrcb_reg); + + return 0; +} + +static int dac33_set_dai_fmt(struct snd_soc_dai *codec_dai, + unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u8 aictrl_a, aictrl_b; + + aictrl_a = dac33_read_reg_cache(codec, DAC33_SER_AUDIOIF_CTRL_A); + aictrl_b = dac33_read_reg_cache(codec, DAC33_SER_AUDIOIF_CTRL_B); + /* set master/slave audio interface */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + /* Codec Master */ + aictrl_a |= (DAC33_MSBCLK | DAC33_MSWCLK); + break; + case SND_SOC_DAIFMT_CBS_CFS: + /* Codec Slave */ + aictrl_a &= ~(DAC33_MSBCLK | DAC33_MSWCLK); + break; + default: + return -EINVAL; + } + + aictrl_a &= ~DAC33_AFMT_MASK; + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + aictrl_a |= DAC33_AFMT_I2S; + break; + case SND_SOC_DAIFMT_DSP_A: + aictrl_a |= DAC33_AFMT_DSP; + aictrl_b &= ~DAC33_DATA_DELAY_MASK; + aictrl_b |= DAC33_DATA_DELAY(1); /* 1 bit delay */ + break; + case SND_SOC_DAIFMT_DSP_B: + aictrl_a |= DAC33_AFMT_DSP; + aictrl_b &= ~DAC33_DATA_DELAY_MASK; /* No delay */ + break; + case SND_SOC_DAIFMT_RIGHT_J: + aictrl_a |= DAC33_AFMT_RIGHT_J; + break; + case SND_SOC_DAIFMT_LEFT_J: + aictrl_a |= DAC33_AFMT_LEFT_J; + break; + default: + dev_err(codec->dev, "Unsupported format (%u)\n", + fmt & SND_SOC_DAIFMT_FORMAT_MASK); + return -EINVAL; + } + + dac33_write_reg_cache(codec, DAC33_SER_AUDIOIF_CTRL_A, aictrl_a); + dac33_write_reg_cache(codec, DAC33_SER_AUDIOIF_CTRL_B, aictrl_b); + + return 0; +} + +static void dac33_init_chip(struct snd_soc_codec *codec) +{ + /* 44-46: DAC Control Registers */ + /* A : DAC sample rate Fsref/1.5 */ + dac33_write(codec, DAC33_DAC_CTRL_A, DAC33_DACRATE(1)); + /* B : DAC src=normal, not muted */ + dac33_write(codec, DAC33_DAC_CTRL_B, DAC33_DACSRCR_RIGHT | + DAC33_DACSRCL_LEFT); + /* C : (defaults) */ + dac33_write(codec, DAC33_DAC_CTRL_C, 0x00); + + /* 64-65 : L&R DAC power control + Line In -> OUT 1V/V Gain, DAC -> OUT 4V/V Gain*/ + dac33_write(codec, DAC33_LDAC_PWR_CTRL, DAC33_LROUT_GAIN(2)); + dac33_write(codec, DAC33_RDAC_PWR_CTRL, DAC33_LROUT_GAIN(2)); + + /* 73 : volume soft stepping control, + clock source = internal osc (?) */ + dac33_write(codec, DAC33_ANA_VOL_SOFT_STEP_CTRL, DAC33_VOLCLKEN); + + /* 66 : LOP/LOM Modes */ + dac33_write(codec, DAC33_OUT_AMP_CM_CTRL, 0xff); + + /* 68 : LOM inverted from LOP */ + dac33_write(codec, DAC33_OUT_AMP_CTRL, (3<<2)); + + dac33_write(codec, DAC33_PWR_CTRL, DAC33_PDNALLB); +} + +static int dac33_soc_probe(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec; + struct tlv320dac33_priv *dac33; + int ret = 0; + + BUG_ON(!tlv320dac33_codec); + + codec = tlv320dac33_codec; + socdev->card->codec = codec; + dac33 = codec->private_data; + + /* Power up the codec */ + dac33_hard_power(codec, 1); + /* Set default configuration */ + dac33_init_chip(codec); + + /* register pcms */ + ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1); + if (ret < 0) { + dev_err(codec->dev, "failed to create pcms\n"); + goto pcm_err; + } + + snd_soc_add_controls(codec, dac33_snd_controls, + ARRAY_SIZE(dac33_snd_controls)); + /* Only add the nSample controls, if we have valid IRQ number */ + if (dac33->irq >= 0) + snd_soc_add_controls(codec, dac33_nsample_snd_controls, + ARRAY_SIZE(dac33_nsample_snd_controls)); + + dac33_add_widgets(codec); + + /* power on device */ + dac33_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + + ret = snd_soc_init_card(socdev); + if (ret < 0) { + dev_err(codec->dev, "failed to register card\n"); + goto card_err; + } + + return 0; +card_err: + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); +pcm_err: + dac33_hard_power(codec, 0); + return ret; +} + +static int dac33_soc_remove(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->card->codec; + + dac33_set_bias_level(codec, SND_SOC_BIAS_OFF); + + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); + + return 0; +} + +static int dac33_soc_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->card->codec; + + dac33_set_bias_level(codec, SND_SOC_BIAS_OFF); + + return 0; +} + +static int dac33_soc_resume(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->card->codec; + + dac33_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + dac33_set_bias_level(codec, codec->suspend_bias_level); + + return 0; +} + +struct snd_soc_codec_device soc_codec_dev_tlv320dac33 = { + .probe = dac33_soc_probe, + .remove = dac33_soc_remove, + .suspend = dac33_soc_suspend, + .resume = dac33_soc_resume, +}; +EXPORT_SYMBOL_GPL(soc_codec_dev_tlv320dac33); + +#define DAC33_RATES (SNDRV_PCM_RATE_44100 | \ + SNDRV_PCM_RATE_48000) +#define DAC33_FORMATS SNDRV_PCM_FMTBIT_S16_LE + +static struct snd_soc_dai_ops dac33_dai_ops = { + .shutdown = dac33_shutdown, + .hw_params = dac33_hw_params, + .prepare = dac33_pcm_prepare, + .trigger = dac33_pcm_trigger, + .set_sysclk = dac33_set_dai_sysclk, + .set_fmt = dac33_set_dai_fmt, +}; + +struct snd_soc_dai dac33_dai = { + .name = "tlv320dac33", + .playback = { + .stream_name = "Playback", + .channels_min = 2, + .channels_max = 2, + .rates = DAC33_RATES, + .formats = DAC33_FORMATS,}, + .ops = &dac33_dai_ops, +}; +EXPORT_SYMBOL_GPL(dac33_dai); + +static int dac33_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct tlv320dac33_platform_data *pdata; + struct tlv320dac33_priv *dac33; + struct snd_soc_codec *codec; + int ret = 0; + + if (client->dev.platform_data == NULL) { + dev_err(&client->dev, "Platform data not set\n"); + return -ENODEV; + } + pdata = client->dev.platform_data; + + dac33 = kzalloc(sizeof(struct tlv320dac33_priv), GFP_KERNEL); + if (dac33 == NULL) + return -ENOMEM; + + codec = &dac33->codec; + codec->private_data = dac33; + codec->control_data = client; + + mutex_init(&codec->mutex); + mutex_init(&dac33->mutex); + INIT_LIST_HEAD(&codec->dapm_widgets); + INIT_LIST_HEAD(&codec->dapm_paths); + + codec->name = "tlv320dac33"; + codec->owner = THIS_MODULE; + codec->read = dac33_read_reg_cache; + codec->write = dac33_write_locked; + codec->hw_write = (hw_write_t) i2c_master_send; + codec->bias_level = SND_SOC_BIAS_OFF; + codec->set_bias_level = dac33_set_bias_level; + codec->dai = &dac33_dai; + codec->num_dai = 1; + codec->reg_cache_size = ARRAY_SIZE(dac33_reg); + codec->reg_cache = kmemdup(dac33_reg, ARRAY_SIZE(dac33_reg), + GFP_KERNEL); + if (codec->reg_cache == NULL) { + ret = -ENOMEM; + goto error_reg; + } + + i2c_set_clientdata(client, dac33); + + dac33->power_gpio = pdata->power_gpio; + dac33->irq = client->irq; + dac33->nsample = NSAMPLE_MAX; + /* Disable FIFO use by default */ + dac33->nsample_switch = 0; + + tlv320dac33_codec = codec; + + codec->dev = &client->dev; + dac33_dai.dev = codec->dev; + + /* Check if the reset GPIO number is valid and request it */ + if (dac33->power_gpio >= 0) { + ret = gpio_request(dac33->power_gpio, "tlv320dac33 reset"); + if (ret < 0) { + dev_err(codec->dev, + "Failed to request reset GPIO (%d)\n", + dac33->power_gpio); + snd_soc_unregister_dai(&dac33_dai); + snd_soc_unregister_codec(codec); + goto error_gpio; + } + gpio_direction_output(dac33->power_gpio, 0); + } else { + dac33->chip_power = 1; + } + + /* Check if the IRQ number is valid and request it */ + if (dac33->irq >= 0) { + ret = request_irq(dac33->irq, dac33_interrupt_handler, + IRQF_TRIGGER_RISING | IRQF_DISABLED, + codec->name, codec); + if (ret < 0) { + dev_err(codec->dev, "Could not request IRQ%d (%d)\n", + dac33->irq, ret); + dac33->irq = -1; + } + if (dac33->irq != -1) { + /* Setup work queue */ + dac33->dac33_wq = create_rt_workqueue("tlv320dac33"); + if (dac33->dac33_wq == NULL) { + free_irq(dac33->irq, &dac33->codec); + ret = -ENOMEM; + goto error_wq; + } + + INIT_WORK(&dac33->work, dac33_work); + } + } + + ret = snd_soc_register_codec(codec); + if (ret != 0) { + dev_err(codec->dev, "Failed to register codec: %d\n", ret); + goto error_codec; + } + + ret = snd_soc_register_dai(&dac33_dai); + if (ret != 0) { + dev_err(codec->dev, "Failed to register DAI: %d\n", ret); + snd_soc_unregister_codec(codec); + goto error_codec; + } + + /* Shut down the codec for now */ + dac33_hard_power(codec, 0); + + return ret; + +error_codec: + if (dac33->irq >= 0) { + free_irq(dac33->irq, &dac33->codec); + destroy_workqueue(dac33->dac33_wq); + } +error_wq: + if (dac33->power_gpio >= 0) + gpio_free(dac33->power_gpio); +error_gpio: + kfree(codec->reg_cache); +error_reg: + tlv320dac33_codec = NULL; + kfree(dac33); + + return ret; +} + +static int dac33_i2c_remove(struct i2c_client *client) +{ + struct tlv320dac33_priv *dac33; + + dac33 = i2c_get_clientdata(client); + dac33_hard_power(&dac33->codec, 0); + + if (dac33->power_gpio >= 0) + gpio_free(dac33->power_gpio); + if (dac33->irq >= 0) + free_irq(dac33->irq, &dac33->codec); + + destroy_workqueue(dac33->dac33_wq); + snd_soc_unregister_dai(&dac33_dai); + snd_soc_unregister_codec(&dac33->codec); + kfree(dac33->codec.reg_cache); + kfree(dac33); + tlv320dac33_codec = NULL; + + return 0; +} + +static const struct i2c_device_id tlv320dac33_i2c_id[] = { + { + .name = "tlv320dac33", + .driver_data = 0, + }, + { }, +}; + +static struct i2c_driver tlv320dac33_i2c_driver = { + .driver = { + .name = "tlv320dac33", + .owner = THIS_MODULE, + }, + .probe = dac33_i2c_probe, + .remove = __devexit_p(dac33_i2c_remove), + .id_table = tlv320dac33_i2c_id, +}; + +static int __init dac33_module_init(void) +{ + int r; + r = i2c_add_driver(&tlv320dac33_i2c_driver); + if (r < 0) { + printk(KERN_ERR "DAC33: driver registration failed\n"); + return r; + } + return 0; +} +module_init(dac33_module_init); + +static void __exit dac33_module_exit(void) +{ + i2c_del_driver(&tlv320dac33_i2c_driver); +} +module_exit(dac33_module_exit); + + +MODULE_DESCRIPTION("ASoC TLV320DAC33 codec driver"); +MODULE_AUTHOR("Peter Ujfalusi "); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/tlv320dac33.h b/sound/soc/codecs/tlv320dac33.h new file mode 100644 index 000000000000..0fedd709028e --- /dev/null +++ b/sound/soc/codecs/tlv320dac33.h @@ -0,0 +1,267 @@ +/* + * ALSA SoC Texas Instruments TLV320DAC33 codec driver + * + * Author: Peter Ujfalusi + * + * Copyright: (C) 2009 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __TLV320DAC33_H +#define __TLV320DAC33_H + +#define DAC33_PAGE_SELECT 0x00 +#define DAC33_PWR_CTRL 0x01 +#define DAC33_PLL_CTRL_A 0x02 +#define DAC33_PLL_CTRL_B 0x03 +#define DAC33_PLL_CTRL_C 0x04 +#define DAC33_PLL_CTRL_D 0x05 +#define DAC33_PLL_CTRL_E 0x06 +#define DAC33_INT_OSC_CTRL 0x07 +#define DAC33_INT_OSC_FREQ_RAT_A 0x08 +#define DAC33_INT_OSC_FREQ_RAT_B 0x09 +#define DAC33_INT_OSC_DAC_RATIO_SET 0x0A +#define DAC33_CALIB_TIME 0x0B +#define DAC33_INT_OSC_CTRL_B 0x0C +#define DAC33_INT_OSC_CTRL_C 0x0D +#define DAC33_INT_OSC_STATUS 0x0E +#define DAC33_INT_OSC_DAC_RATIO_READ 0x0F +#define DAC33_INT_OSC_FREQ_RAT_READ_A 0x10 +#define DAC33_INT_OSC_FREQ_RAT_READ_B 0x11 +#define DAC33_SER_AUDIOIF_CTRL_A 0x12 +#define DAC33_SER_AUDIOIF_CTRL_B 0x13 +#define DAC33_SER_AUDIOIF_CTRL_C 0x14 +#define DAC33_FIFO_CTRL_A 0x15 +#define DAC33_UTHR_MSB 0x16 +#define DAC33_UTHR_LSB 0x17 +#define DAC33_ATHR_MSB 0x18 +#define DAC33_ATHR_LSB 0x19 +#define DAC33_LTHR_MSB 0x1A +#define DAC33_LTHR_LSB 0x1B +#define DAC33_PREFILL_MSB 0x1C +#define DAC33_PREFILL_LSB 0x1D +#define DAC33_NSAMPLE_MSB 0x1E +#define DAC33_NSAMPLE_LSB 0x1F +#define DAC33_FIFO_WPTR_MSB 0x20 +#define DAC33_FIFO_WPTR_LSB 0x21 +#define DAC33_FIFO_RPTR_MSB 0x22 +#define DAC33_FIFO_RPTR_LSB 0x23 +#define DAC33_FIFO_DEPTH_MSB 0x24 +#define DAC33_FIFO_DEPTH_LSB 0x25 +#define DAC33_SAMPLES_REMAINING_MSB 0x26 +#define DAC33_SAMPLES_REMAINING_LSB 0x27 +#define DAC33_FIFO_IRQ_FLAG 0x28 +#define DAC33_FIFO_IRQ_MASK 0x29 +#define DAC33_FIFO_IRQ_MODE_A 0x2A +#define DAC33_FIFO_IRQ_MODE_B 0x2B +#define DAC33_DAC_CTRL_A 0x2C +#define DAC33_DAC_CTRL_B 0x2D +#define DAC33_DAC_CTRL_C 0x2E +#define DAC33_LDAC_DIG_VOL_CTRL 0x2F +#define DAC33_RDAC_DIG_VOL_CTRL 0x30 +#define DAC33_DAC_STATUS_FLAGS 0x31 +#define DAC33_ASRC_CTRL_A 0x32 +#define DAC33_ASRC_CTRL_B 0x33 +#define DAC33_SRC_REF_CLK_RATIO_A 0x34 +#define DAC33_SRC_REF_CLK_RATIO_B 0x35 +#define DAC33_SRC_EST_REF_CLK_RATIO_A 0x36 +#define DAC33_SRC_EST_REF_CLK_RATIO_B 0x37 +#define DAC33_INTP_CTRL_A 0x38 +#define DAC33_INTP_CTRL_B 0x39 +/* Registers 0x3A - 0x3F Reserved */ +#define DAC33_LDAC_PWR_CTRL 0x40 +#define DAC33_RDAC_PWR_CTRL 0x41 +#define DAC33_OUT_AMP_CM_CTRL 0x42 +#define DAC33_OUT_AMP_PWR_CTRL 0x43 +#define DAC33_OUT_AMP_CTRL 0x44 +#define DAC33_LINEL_TO_LLO_VOL 0x45 +/* Registers 0x45 - 0x47 Reserved */ +#define DAC33_LINER_TO_RLO_VOL 0x48 +#define DAC33_ANA_VOL_SOFT_STEP_CTRL 0x49 +#define DAC33_OSC_TRIM 0x4A +/* Registers 0x4B - 0x7C Reserved */ +#define DAC33_DEVICE_ID_MSB 0x7D +#define DAC33_DEVICE_ID_LSB 0x7E +#define DAC33_DEVICE_REV_ID 0x7F + +#define DAC33_CACHEREGNUM 128 + +/* Bit definitions */ + +/* DAC33_PWR_CTRL (0x01) */ +#define DAC33_DACRPDNB (0x01 << 0) +#define DAC33_DACLPDNB (0x01 << 1) +#define DAC33_OSCPDNB (0x01 << 2) +#define DAC33_PLLPDNB (0x01 << 3) +#define DAC33_PDNALLB (0x01 << 4) +#define DAC33_SOFT_RESET (0x01 << 7) + +/* DAC33_INT_OSC_CTRL (0x07) */ +#define DAC33_REFSEL (0x01 << 1) + +/* DAC33_INT_OSC_CTRL_B (0x0C) */ +#define DAC33_ADJSTEP(x) (x << 0) +#define DAC33_ADJTHRSHLD(x) (x << 4) + +/* DAC33_INT_OSC_CTRL_C (0x0D) */ +#define DAC33_REFDIV(x) (x << 4) + +/* DAC33_INT_OSC_STATUS (0x0E) */ +#define DAC33_OSCSTATUS_IDLE_CALIB (0x00) +#define DAC33_OSCSTATUS_NORMAL (0x01) +#define DAC33_OSCSTATUS_ADJUSTMENT (0x03) +#define DAC33_OSCSTATUS_NOT_USED (0x02) + +/* DAC33_SER_AUDIOIF_CTRL_A (0x12) */ +#define DAC33_MSWCLK (0x01 << 0) +#define DAC33_MSBCLK (0x01 << 1) +#define DAC33_AFMT_MASK (0x03 << 2) +#define DAC33_AFMT_I2S (0x00 << 2) +#define DAC33_AFMT_DSP (0x01 << 2) +#define DAC33_AFMT_RIGHT_J (0x02 << 2) +#define DAC33_AFMT_LEFT_J (0x03 << 2) +#define DAC33_WLEN_MASK (0x03 << 4) +#define DAC33_WLEN_16 (0x00 << 4) +#define DAC33_WLEN_20 (0x01 << 4) +#define DAC33_WLEN_24 (0x02 << 4) +#define DAC33_WLEN_32 (0x03 << 4) +#define DAC33_NCYCL_MASK (0x03 << 6) +#define DAC33_NCYCL_16 (0x00 << 6) +#define DAC33_NCYCL_20 (0x01 << 6) +#define DAC33_NCYCL_24 (0x02 << 6) +#define DAC33_NCYCL_32 (0x03 << 6) + +/* DAC33_SER_AUDIOIF_CTRL_B (0x13) */ +#define DAC33_DATA_DELAY_MASK (0x03 << 2) +#define DAC33_DATA_DELAY(x) (x << 2) +#define DAC33_BCLKON (0x01 << 5) + +/* DAC33_FIFO_CTRL_A (0x15) */ +#define DAC33_WIDTH (0x01 << 0) +#define DAC33_FBYPAS (0x01 << 1) +#define DAC33_FAUTO (0x01 << 2) +#define DAC33_FIFOFLUSH (0x01 << 3) + +/* + * UTHR, ATHR, LTHR, PREFILL, NSAMPLE (0x16 - 0x1F) + * 13-bit values +*/ +#define DAC33_THRREG(x) (((x) & 0x1FFF) << 3) + +/* DAC33_FIFO_IRQ_MASK (0x29) */ +#define DAC33_MNS (0x01 << 0) +#define DAC33_MPS (0x01 << 1) +#define DAC33_MAT (0x01 << 2) +#define DAC33_MLT (0x01 << 3) +#define DAC33_MUT (0x01 << 4) +#define DAC33_MUF (0x01 << 5) +#define DAC33_MOF (0x01 << 6) + +#define DAC33_FIFO_IRQ_MODE_MASK (0x03) +#define DAC33_FIFO_IRQ_MODE_RISING (0x00) +#define DAC33_FIFO_IRQ_MODE_FALLING (0x01) +#define DAC33_FIFO_IRQ_MODE_LEVEL (0x02) +#define DAC33_FIFO_IRQ_MODE_EDGE (0x03) + +/* DAC33_FIFO_IRQ_MODE_A (0x2A) */ +#define DAC33_UTM(x) (x << 0) +#define DAC33_UFM(x) (x << 2) +#define DAC33_OFM(x) (x << 4) + +/* DAC33_FIFO_IRQ_MODE_B (0x2B) */ +#define DAC33_NSM(x) (x << 0) +#define DAC33_PSM(x) (x << 2) +#define DAC33_ATM(x) (x << 4) +#define DAC33_LTM(x) (x << 4) + +/* DAC33_DAC_CTRL_A (0x2C) */ +#define DAC33_DACRATE(x) (x << 0) +#define DAC33_DACDUAL (0x01 << 4) +#define DAC33_DACLKSEL_MASK (0x03 << 5) +#define DAC33_DACLKSEL_INTSOC (0x00 << 5) +#define DAC33_DACLKSEL_PLL (0x01 << 5) +#define DAC33_DACLKSEL_MCLK (0x02 << 5) +#define DAC33_DACLKSEL_BCLK (0x03 << 5) + +/* DAC33_DAC_CTRL_B (0x2D) */ +#define DAC33_DACSRCR_MASK (0x03 << 0) +#define DAC33_DACSRCR_MUTE (0x00 << 0) +#define DAC33_DACSRCR_RIGHT (0x01 << 0) +#define DAC33_DACSRCR_LEFT (0x02 << 0) +#define DAC33_DACSRCR_MONOMIX (0x03 << 0) +#define DAC33_DACSRCL_MASK (0x03 << 2) +#define DAC33_DACSRCL_MUTE (0x00 << 2) +#define DAC33_DACSRCL_LEFT (0x01 << 2) +#define DAC33_DACSRCL_RIGHT (0x02 << 2) +#define DAC33_DACSRCL_MONOMIX (0x03 << 2) +#define DAC33_DVOLSTEP_MASK (0x03 << 4) +#define DAC33_DVOLSTEP_SS_PERFS (0x00 << 4) +#define DAC33_DVOLSTEP_SS_PER2FS (0x01 << 4) +#define DAC33_DVOLSTEP_SS_DISABLED (0x02 << 4) +#define DAC33_DVOLCTRL_MASK (0x03 << 6) +#define DAC33_DVOLCTRL_LR_INDEPENDENT1 (0x00 << 6) +#define DAC33_DVOLCTRL_LR_RIGHT_CONTROL (0x01 << 6) +#define DAC33_DVOLCTRL_LR_LEFT_CONTROL (0x02 << 6) +#define DAC33_DVOLCTRL_LR_INDEPENDENT2 (0x03 << 6) + +/* DAC33_DAC_CTRL_C (0x2E) */ +#define DAC33_DEEMENR (0x01 << 0) +#define DAC33_EFFENR (0x01 << 1) +#define DAC33_DEEMENL (0x01 << 2) +#define DAC33_EFFENL (0x01 << 3) +#define DAC33_EN3D (0x01 << 4) +#define DAC33_RESYNMUTE (0x01 << 5) +#define DAC33_RESYNEN (0x01 << 6) + +/* DAC33_ASRC_CTRL_A (0x32) */ +#define DAC33_SRCBYP (0x01 << 0) +#define DAC33_SRCLKSEL_MASK (0x03 << 1) +#define DAC33_SRCLKSEL_INTSOC (0x00 << 1) +#define DAC33_SRCLKSEL_PLL (0x01 << 1) +#define DAC33_SRCLKSEL_MCLK (0x02 << 1) +#define DAC33_SRCLKSEL_BCLK (0x03 << 1) +#define DAC33_SRCLKDIV(x) (x << 3) + +/* DAC33_ASRC_CTRL_B (0x33) */ +#define DAC33_SRCSETUP(x) (x << 0) +#define DAC33_SRCREFSEL (0x01 << 4) +#define DAC33_SRCREFDIV(x) (x << 5) + +/* DAC33_INTP_CTRL_A (0x38) */ +#define DAC33_INTPSEL (0x01 << 0) +#define DAC33_INTPM_MASK (0x03 << 1) +#define DAC33_INTPM_ALOW_OPENDRAIN (0x00 << 1) +#define DAC33_INTPM_ALOW (0x01 << 1) +#define DAC33_INTPM_AHIGH (0x02 << 1) + +/* DAC33_LDAC_PWR_CTRL (0x40) */ +/* DAC33_RDAC_PWR_CTRL (0x41) */ +#define DAC33_DACLRNUM (0x01 << 2) +#define DAC33_LROUT_GAIN(x) (x << 0) + +/* DAC33_ANA_VOL_SOFT_STEP_CTRL (0x49) */ +#define DAC33_VOLCLKSEL (0x01 << 0) +#define DAC33_VOLCLKEN (0x01 << 1) +#define DAC33_VOLBYPASS (0x01 << 2) + +#define TLV320DAC33_MCLK 0 +#define TLV320DAC33_SLEEPCLK 1 + +extern struct snd_soc_dai dac33_dai; +extern struct snd_soc_codec_device soc_codec_dev_tlv320dac33; + +#endif /* __TLV320DAC33_H */ -- cgit v1.3-8-gc7d7 From 731581e6a653f6a68a4d7ba9df6b886a85c7d080 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 15 Oct 2009 10:57:46 -0600 Subject: of: merge phandle, ihandle and struct property Merge of common code duplicated between Sparc, PowerPC and Microblaze Signed-off-by: Grant Likely Acked-by: David S. Miller Acked-by: Wolfram Sang Acked-by: Michal Simek Acked-by: Stephen Neuendorffer Acked-by: Stephen Rothwell --- arch/microblaze/include/asm/prom.h | 10 ---------- arch/powerpc/include/asm/prom.h | 12 ------------ arch/sparc/include/asm/prom.h | 12 ------------ include/linux/of.h | 12 ++++++++++++ 4 files changed, 12 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index 66368896f922..11cb48419c7d 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -73,16 +73,6 @@ struct boot_param_header { u32 dt_struct_size; /* size of the DT structure block */ }; -typedef u32 phandle; -typedef u32 ihandle; - -struct property { - char *name; - int length; - void *value; - struct property *next; -}; - struct device_node { const char *name; const char *type; diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index b0a84ea5f8ed..c236326177a8 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -71,18 +71,6 @@ struct boot_param_header u32 dt_struct_size; /* size of the DT structure block */ }; - - -typedef u32 phandle; -typedef u32 ihandle; - -struct property { - char *name; - int length; - void *value; - struct property *next; -}; - struct device_node { const char *name; const char *type; diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h index 0733170e02ca..b34f988a2aad 100644 --- a/arch/sparc/include/asm/prom.h +++ b/arch/sparc/include/asm/prom.h @@ -29,18 +29,6 @@ #define of_prop_cmp(s1, s2) strcasecmp((s1), (s2)) #define of_node_cmp(s1, s2) strcmp((s1), (s2)) -typedef u32 phandle; -typedef u32 ihandle; - -struct property { - char *name; - int length; - void *value; - struct property *next; - unsigned long _flags; - unsigned int unique_id; -}; - struct of_irq_controller; struct device_node { const char *name; diff --git a/include/linux/of.h b/include/linux/of.h index 7be2d1043c16..4668b298479a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -19,6 +19,18 @@ #include #include +typedef u32 phandle; +typedef u32 ihandle; + +struct property { + char *name; + int length; + void *value; + struct property *next; + unsigned long _flags; + unsigned int unique_id; +}; + #include /* flag descriptions */ -- cgit v1.3-8-gc7d7 From 6f1924928377bd035a9f64466f91a487c69271d2 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 15 Oct 2009 10:57:49 -0600 Subject: of: merge struct device_node Merge of common code duplicated between Sparc, PowerPC and Microblaze Signed-off-by: Grant Likely Acked-by: David S. Miller Acked-by: Wolfram Sang Acked-by: Michal Simek Acked-by: Stephen Neuendorffer Acked-by: Stephen Rothwell --- arch/microblaze/include/asm/prom.h | 20 -------------------- arch/powerpc/include/asm/prom.h | 20 -------------------- arch/sparc/include/asm/prom.h | 24 ------------------------ include/linux/of.h | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 32 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index 11cb48419c7d..64e8b3a8c3cf 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -73,26 +73,6 @@ struct boot_param_header { u32 dt_struct_size; /* size of the DT structure block */ }; -struct device_node { - const char *name; - const char *type; - phandle node; - phandle linux_phandle; - char *full_name; - - struct property *properties; - struct property *deadprops; /* removed properties */ - struct device_node *parent; - struct device_node *child; - struct device_node *sibling; - struct device_node *next; /* next device of same type */ - struct device_node *allnext; /* next in list of all nodes */ - struct proc_dir_entry *pde; /* this node's proc directory */ - struct kref kref; - unsigned long _flags; - void *data; -}; - extern struct device_node *of_chosen; static inline int of_node_check_flag(struct device_node *n, unsigned long flag) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index c236326177a8..c918db535f08 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -71,26 +71,6 @@ struct boot_param_header u32 dt_struct_size; /* size of the DT structure block */ }; -struct device_node { - const char *name; - const char *type; - phandle node; - phandle linux_phandle; - char *full_name; - - struct property *properties; - struct property *deadprops; /* removed properties */ - struct device_node *parent; - struct device_node *child; - struct device_node *sibling; - struct device_node *next; /* next device of same type */ - struct device_node *allnext; /* next in list of all nodes */ - struct proc_dir_entry *pde; /* this node's proc directory */ - struct kref kref; - unsigned long _flags; - void *data; -}; - extern struct device_node *of_chosen; static inline int of_node_check_flag(struct device_node *n, unsigned long flag) diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h index b34f988a2aad..e5f4a1d8fc46 100644 --- a/arch/sparc/include/asm/prom.h +++ b/arch/sparc/include/asm/prom.h @@ -29,30 +29,6 @@ #define of_prop_cmp(s1, s2) strcasecmp((s1), (s2)) #define of_node_cmp(s1, s2) strcmp((s1), (s2)) -struct of_irq_controller; -struct device_node { - const char *name; - const char *type; - phandle node; - char *path_component_name; - char *full_name; - - struct property *properties; - struct property *deadprops; /* removed properties */ - struct device_node *parent; - struct device_node *child; - struct device_node *sibling; - struct device_node *next; /* next device of same type */ - struct device_node *allnext; /* next in list of all nodes */ - struct proc_dir_entry *pde; /* this node's proc directory */ - struct kref kref; - unsigned long _flags; - void *data; - unsigned int unique_id; - - struct of_irq_controller *irq_trans; -}; - struct of_irq_controller { unsigned int (*irq_build)(struct device_node *, unsigned int, void *); void *data; diff --git a/include/linux/of.h b/include/linux/of.h index 4668b298479a..65a158dc7257 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -17,6 +17,7 @@ */ #include #include +#include #include typedef u32 phandle; @@ -31,6 +32,37 @@ struct property { unsigned int unique_id; }; +#if defined(CONFIG_SPARC) +struct of_irq_controller; +#endif + +struct device_node { + const char *name; + const char *type; + phandle node; +#if !defined(CONFIG_SPARC) + phandle linux_phandle; +#endif + char *full_name; + + struct property *properties; + struct property *deadprops; /* removed properties */ + struct device_node *parent; + struct device_node *child; + struct device_node *sibling; + struct device_node *next; /* next device of same type */ + struct device_node *allnext; /* next in list of all nodes */ + struct proc_dir_entry *pde; /* this node's proc directory */ + struct kref kref; + unsigned long _flags; + void *data; +#if defined(CONFIG_SPARC) + char *path_component_name; + unsigned int unique_id; + struct of_irq_controller *irq_trans; +#endif +}; + #include /* flag descriptions */ -- cgit v1.3-8-gc7d7 From 61e955db539e748cff2b8ea3bf7705259ebe9fb6 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 15 Oct 2009 10:57:51 -0600 Subject: of: Move OF_IS_DYNAMIC and OF_MARK_DYNAMIC macros to of.h Merge of common code duplicated between Sparc, PowerPC and Microblaze Signed-off-by: Grant Likely Acked-by: David S. Miller Acked-by: Wolfram Sang Acked-by: Michal Simek Acked-by: Stephen Neuendorffer Acked-by: Stephen Rothwell --- arch/sparc/include/asm/prom.h | 3 --- include/linux/of.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h index e5f4a1d8fc46..ddbd870b5720 100644 --- a/arch/sparc/include/asm/prom.h +++ b/arch/sparc/include/asm/prom.h @@ -34,9 +34,6 @@ struct of_irq_controller { void *data; }; -#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) -#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) - extern struct device_node *of_find_node_by_cpuid(int cpuid); extern int of_set_property(struct device_node *node, const char *name, void *val, int len); extern struct mutex of_set_property_mutex; diff --git a/include/linux/of.h b/include/linux/of.h index 65a158dc7257..a66c1eb31693 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -69,6 +69,9 @@ struct device_node { #define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */ #define OF_DETACHED 2 /* node has been detached from the device tree */ +#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) +#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) + #define OF_BAD_ADDR ((u64)-1) extern struct device_node *of_find_node_by_name(struct device_node *from, -- cgit v1.3-8-gc7d7 From d8678b58708d7e6bf947ebd03eaf44baf2adfad8 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 15 Oct 2009 10:57:53 -0600 Subject: of: add common header for flattened device tree representation Add a common header file for working with the flattened device tree data structure and merge the shared data tags used by Microblaze and PowerPC Signed-off-by: Grant Likely Acked-by: David S. Miller Acked-by: Wolfram Sang Acked-by: Michal Simek Acked-by: Stephen Neuendorffer Acked-by: Stephen Rothwell --- arch/microblaze/include/asm/prom.h | 12 +----------- arch/microblaze/kernel/head.S | 2 +- arch/powerpc/include/asm/prom.h | 12 +----------- include/linux/of_fdt.h | 26 ++++++++++++++++++++++++++ 4 files changed, 29 insertions(+), 23 deletions(-) create mode 100644 include/linux/of_fdt.h (limited to 'include') diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index 64e8b3a8c3cf..5f461f08db11 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -17,20 +17,10 @@ #ifndef _ASM_MICROBLAZE_PROM_H #define _ASM_MICROBLAZE_PROM_H #ifdef __KERNEL__ - -/* Definitions used by the flattened device tree */ -#define OF_DT_HEADER 0xd00dfeed /* marker */ -#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ -#define OF_DT_END_NODE 0x2 /* End node */ -#define OF_DT_PROP 0x3 /* Property: name off, size, content */ -#define OF_DT_NOP 0x4 /* nop */ -#define OF_DT_END 0x9 - -#define OF_DT_VERSION 0x10 - #ifndef __ASSEMBLY__ #include +#include #include #include #include diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S index 697ce3007f30..30916193fcc7 100644 --- a/arch/microblaze/kernel/head.S +++ b/arch/microblaze/kernel/head.S @@ -31,7 +31,7 @@ #include #include #include -#include /* for OF_DT_HEADER */ +#include /* for OF_DT_HEADER */ #ifdef CONFIG_MMU #include /* COMMAND_LINE_SIZE */ diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index c918db535f08..7181f8ac40f9 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -17,6 +17,7 @@ * 2 of the License, or (at your option) any later version. */ #include +#include #include #include #include @@ -29,17 +30,6 @@ #define of_prop_cmp(s1, s2) strcmp((s1), (s2)) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) -/* Definitions used by the flattened device tree */ -#define OF_DT_HEADER 0xd00dfeed /* marker */ -#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ -#define OF_DT_END_NODE 0x2 /* End node */ -#define OF_DT_PROP 0x3 /* Property: name off, size, - * content */ -#define OF_DT_NOP 0x4 /* nop */ -#define OF_DT_END 0x9 - -#define OF_DT_VERSION 0x10 - /* * This is what gets passed to the kernel by prom_init or kexec * diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h new file mode 100644 index 000000000000..8b5ecc1cb6aa --- /dev/null +++ b/include/linux/of_fdt.h @@ -0,0 +1,26 @@ +/* + * Definitions for working with the Flattened Device Tree data format + * + * Copyright 2009 Benjamin Herrenschmidt, IBM Corp + * benh@kernel.crashing.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#ifndef _LINUX_OF_FDT_H +#define _LINUX_OF_FDT_H + +/* Definitions used by the flattened device tree */ +#define OF_DT_HEADER 0xd00dfeed /* marker */ +#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ +#define OF_DT_END_NODE 0x2 /* End node */ +#define OF_DT_PROP 0x3 /* Property: name off, size, + * content */ +#define OF_DT_NOP 0x4 /* nop */ +#define OF_DT_END 0x9 + +#define OF_DT_VERSION 0x10 + +#endif /* _LINUX_OF_FDT_H */ -- cgit v1.3-8-gc7d7 From d45d94f672e3c79b0db1e6d76e1638ee521d56c0 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 15 Oct 2009 10:57:55 -0600 Subject: of: merge struct boot_param_header from Microblaze and PowerPC Merge common code for working with Flattened Device Tree data structure Signed-off-by: Grant Likely Acked-by: David S. Miller Acked-by: Wolfram Sang Acked-by: Michal Simek Acked-by: Stephen Neuendorffer Acked-by: Stephen Rothwell --- arch/microblaze/include/asm/prom.h | 30 ------------------------------ arch/powerpc/include/asm/prom.h | 31 ------------------------------- include/linux/of_fdt.h | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index 5f461f08db11..dfc4afcdbd2b 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -33,36 +33,6 @@ #define of_prop_cmp(s1, s2) strcmp((s1), (s2)) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) -/* - * This is what gets passed to the kernel by prom_init or kexec - * - * The dt struct contains the device tree structure, full pathes and - * property contents. The dt strings contain a separate block with just - * the strings for the property names, and is fully page aligned and - * self contained in a page, so that it can be kept around by the kernel, - * each property name appears only once in this page (cheap compression) - * - * the mem_rsvmap contains a map of reserved ranges of physical memory, - * passing it here instead of in the device-tree itself greatly simplifies - * the job of everybody. It's just a list of u64 pairs (base/size) that - * ends when size is 0 - */ -struct boot_param_header { - u32 magic; /* magic word OF_DT_HEADER */ - u32 totalsize; /* total size of DT block */ - u32 off_dt_struct; /* offset to structure */ - u32 off_dt_strings; /* offset to strings */ - u32 off_mem_rsvmap; /* offset to memory reserve map */ - u32 version; /* format version */ - u32 last_comp_version; /* last compatible version */ - /* version 2 fields below */ - u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ - /* version 3 fields below */ - u32 dt_strings_size; /* size of the DT strings block */ - /* version 17 fields below */ - u32 dt_struct_size; /* size of the DT structure block */ -}; - extern struct device_node *of_chosen; static inline int of_node_check_flag(struct device_node *n, unsigned long flag) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 7181f8ac40f9..ef20e6c23235 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -30,37 +30,6 @@ #define of_prop_cmp(s1, s2) strcmp((s1), (s2)) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) -/* - * This is what gets passed to the kernel by prom_init or kexec - * - * The dt struct contains the device tree structure, full pathes and - * property contents. The dt strings contain a separate block with just - * the strings for the property names, and is fully page aligned and - * self contained in a page, so that it can be kept around by the kernel, - * each property name appears only once in this page (cheap compression) - * - * the mem_rsvmap contains a map of reserved ranges of physical memory, - * passing it here instead of in the device-tree itself greatly simplifies - * the job of everybody. It's just a list of u64 pairs (base/size) that - * ends when size is 0 - */ -struct boot_param_header -{ - u32 magic; /* magic word OF_DT_HEADER */ - u32 totalsize; /* total size of DT block */ - u32 off_dt_struct; /* offset to structure */ - u32 off_dt_strings; /* offset to strings */ - u32 off_mem_rsvmap; /* offset to memory reserve map */ - u32 version; /* format version */ - u32 last_comp_version; /* last compatible version */ - /* version 2 fields below */ - u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ - /* version 3 fields below */ - u32 dt_strings_size; /* size of the DT strings block */ - /* version 17 fields below */ - u32 dt_struct_size; /* size of the DT structure block */ -}; - extern struct device_node *of_chosen; static inline int of_node_check_flag(struct device_node *n, unsigned long flag) diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 8b5ecc1cb6aa..b37ad3a973b9 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -23,4 +23,36 @@ #define OF_DT_VERSION 0x10 +#ifndef __ASSEMBLY__ +/* + * This is what gets passed to the kernel by prom_init or kexec + * + * The dt struct contains the device tree structure, full pathes and + * property contents. The dt strings contain a separate block with just + * the strings for the property names, and is fully page aligned and + * self contained in a page, so that it can be kept around by the kernel, + * each property name appears only once in this page (cheap compression) + * + * the mem_rsvmap contains a map of reserved ranges of physical memory, + * passing it here instead of in the device-tree itself greatly simplifies + * the job of everybody. It's just a list of u64 pairs (base/size) that + * ends when size is 0 + */ +struct boot_param_header { + u32 magic; /* magic word OF_DT_HEADER */ + u32 totalsize; /* total size of DT block */ + u32 off_dt_struct; /* offset to structure */ + u32 off_dt_strings; /* offset to strings */ + u32 off_mem_rsvmap; /* offset to memory reserve map */ + u32 version; /* format version */ + u32 last_comp_version; /* last compatible version */ + /* version 2 fields below */ + u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ + /* version 3 fields below */ + u32 dt_strings_size; /* size of the DT strings block */ + /* version 17 fields below */ + u32 dt_struct_size; /* size of the DT structure block */ +}; + +#endif /* __ASSEMBLY__ */ #endif /* _LINUX_OF_FDT_H */ -- cgit v1.3-8-gc7d7 From 50436312f47f1fd2bf82c983638fe27ca7e03238 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 15 Oct 2009 10:57:58 -0600 Subject: of: merge of_node_*_flag() and set_node_proc_entry() Merge common code between PowerPC and Microblaze Signed-off-by: Grant Likely Acked-by: David S. Miller Acked-by: Wolfram Sang Acked-by: Michal Simek Acked-by: Stephen Neuendorffer Acked-by: Stephen Rothwell --- arch/microblaze/include/asm/prom.h | 16 ---------------- arch/powerpc/include/asm/prom.h | 17 ----------------- include/linux/of.h | 16 ++++++++++++++++ 3 files changed, 16 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index dfc4afcdbd2b..180d84481306 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -35,24 +35,8 @@ extern struct device_node *of_chosen; -static inline int of_node_check_flag(struct device_node *n, unsigned long flag) -{ - return test_bit(flag, &n->_flags); -} - -static inline void of_node_set_flag(struct device_node *n, unsigned long flag) -{ - set_bit(flag, &n->_flags); -} - #define HAVE_ARCH_DEVTREE_FIXUPS -static inline void set_node_proc_entry(struct device_node *dn, - struct proc_dir_entry *de) -{ - dn->pde = de; -} - extern struct device_node *allnodes; /* temporary while merging */ extern rwlock_t devtree_lock; /* temporary while merging */ diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index ef20e6c23235..2cfd43288a3e 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -32,25 +32,8 @@ extern struct device_node *of_chosen; -static inline int of_node_check_flag(struct device_node *n, unsigned long flag) -{ - return test_bit(flag, &n->_flags); -} - -static inline void of_node_set_flag(struct device_node *n, unsigned long flag) -{ - set_bit(flag, &n->_flags); -} - - #define HAVE_ARCH_DEVTREE_FIXUPS -static inline void set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de) -{ - dn->pde = de; -} - - extern struct device_node *of_find_all_nodes(struct device_node *prev); extern struct device_node *of_node_get(struct device_node *node); extern void of_node_put(struct device_node *node); diff --git a/include/linux/of.h b/include/linux/of.h index a66c1eb31693..d5f666290f6b 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -63,6 +63,22 @@ struct device_node { #endif }; +static inline int of_node_check_flag(struct device_node *n, unsigned long flag) +{ + return test_bit(flag, &n->_flags); +} + +static inline void of_node_set_flag(struct device_node *n, unsigned long flag) +{ + set_bit(flag, &n->_flags); +} + +static inline void +set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de) +{ + dn->pde = de; +} + #include /* flag descriptions */ -- cgit v1.3-8-gc7d7 From b6caf2ad7ce30648b89c1cf40d8f7cf6f4b58033 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 15 Oct 2009 10:58:00 -0600 Subject: of: merge of_read_number() an of_read_ulong() Merge common code between Microblaze and PowerPC Signed-off-by: Grant Likely Acked-by: David S. Miller Acked-by: Wolfram Sang Acked-by: Michal Simek Acked-by: Stephen Neuendorffer Acked-by: Stephen Rothwell --- arch/microblaze/include/asm/prom.h | 12 ------------ arch/powerpc/include/asm/prom.h | 20 -------------------- include/linux/of.h | 23 +++++++++++++++++++++++ 3 files changed, 23 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index 180d84481306..d4f57ffdae3f 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -82,18 +82,6 @@ extern int release_OF_resource(struct device_node *node, int index); * OF address retreival & translation */ -/* Helper to read a big number; size is in cells (not bytes) */ -static inline u64 of_read_number(const u32 *cell, int size) -{ - u64 r = 0; - while (size--) - r = (r << 32) | *(cell++); - return r; -} - -/* Like of_read_number, but we want an unsigned long result */ -#define of_read_ulong(cell, size) of_read_number(cell, size) - /* Translate an OF address block into a CPU physical address */ extern u64 of_translate_address(struct device_node *np, const u32 *addr); diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 2cfd43288a3e..d8c0525c3139 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -89,26 +89,6 @@ extern int release_OF_resource(struct device_node* node, int index); * OF address retreival & translation */ - -/* Helper to read a big number; size is in cells (not bytes) */ -static inline u64 of_read_number(const u32 *cell, int size) -{ - u64 r = 0; - while (size--) - r = (r << 32) | *(cell++); - return r; -} - -/* Like of_read_number, but we want an unsigned long result */ -#ifdef CONFIG_PPC32 -static inline unsigned long of_read_ulong(const u32 *cell, int size) -{ - return cell[size-1]; -} -#else -#define of_read_ulong(cell, size) of_read_number(cell, size) -#endif - /* Translate an OF address block into a CPU physical address */ extern u64 of_translate_address(struct device_node *np, const u32 *addr); diff --git a/include/linux/of.h b/include/linux/of.h index d5f666290f6b..18e4379b8b7f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -79,6 +79,29 @@ set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de) dn->pde = de; } +/* + * OF address retreival & translation + */ + +/* Helper to read a big number; size is in cells (not bytes) */ +static inline u64 of_read_number(const u32 *cell, int size) +{ + u64 r = 0; + while (size--) + r = (r << 32) | *(cell++); + return r; +} + +/* Like of_read_number, but we want an unsigned long result */ +#ifdef CONFIG_PPC32 +static inline unsigned long of_read_ulong(const u32 *cell, int size) +{ + return cell[size-1]; +} +#else +#define of_read_ulong(cell, size) of_read_number(cell, size) +#endif + #include /* flag descriptions */ -- cgit v1.3-8-gc7d7 From 526b5b3ed97bac22ed0c9feed97adcdc3a25244c Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 15 Oct 2009 10:58:02 -0600 Subject: of: merge of_node_get(), of_node_put() and of_find_all_nodes() Merge common code between Sparc, PowerPC and Microblaze. Sparc differs in the implementation at this point, so this patch uses a #ifdef to handle sparc differently for now. The merging of implementations will occur in a later patch Signed-off-by: Grant Likely Acked-by: David S. Miller Acked-by: Wolfram Sang Acked-by: Michal Simek Acked-by: Stephen Neuendorffer Acked-by: Stephen Rothwell --- arch/microblaze/include/asm/prom.h | 4 ---- arch/powerpc/include/asm/prom.h | 4 ---- arch/sparc/include/asm/prom.h | 9 --------- include/linux/of.h | 16 ++++++++++++++++ 4 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index d4f57ffdae3f..c92b4a9e4397 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -40,10 +40,6 @@ extern struct device_node *of_chosen; extern struct device_node *allnodes; /* temporary while merging */ extern rwlock_t devtree_lock; /* temporary while merging */ -extern struct device_node *of_find_all_nodes(struct device_node *prev); -extern struct device_node *of_node_get(struct device_node *node); -extern void of_node_put(struct device_node *node); - /* For scanning the flat device-tree at boot time */ extern int __init of_scan_flat_dt(int (*it)(unsigned long node, const char *uname, int depth, diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index d8c0525c3139..622769cd1d62 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -34,10 +34,6 @@ extern struct device_node *of_chosen; #define HAVE_ARCH_DEVTREE_FIXUPS -extern struct device_node *of_find_all_nodes(struct device_node *prev); -extern struct device_node *of_node_get(struct device_node *node); -extern void of_node_put(struct device_node *node); - /* For scanning the flat device-tree at boot time */ extern int __init of_scan_flat_dt(int (*it)(unsigned long node, const char *uname, int depth, diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h index ddbd870b5720..f845828ca4c6 100644 --- a/arch/sparc/include/asm/prom.h +++ b/arch/sparc/include/asm/prom.h @@ -51,15 +51,6 @@ extern void prom_build_devicetree(void); extern void of_populate_present_mask(void); extern void of_fill_in_cpu_data(void); -/* Dummy ref counting routines - to be implemented later */ -static inline struct device_node *of_node_get(struct device_node *node) -{ - return node; -} -static inline void of_node_put(struct device_node *node) -{ -} - /* These routines are here to provide compatibility with how powerpc * handles IRQ mapping for OF device nodes. We precompute and permanently * register them in the of_device objects, whereas powerpc computes them diff --git a/include/linux/of.h b/include/linux/of.h index 18e4379b8b7f..4636bba93afa 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -79,6 +79,22 @@ set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de) dn->pde = de; } +#if defined(CONFIG_SPARC) +/* Dummy ref counting routines - to be implemented later */ +static inline struct device_node *of_node_get(struct device_node *node) +{ + return node; +} +static inline void of_node_put(struct device_node *node) +{ +} + +#else +extern struct device_node *of_find_all_nodes(struct device_node *prev); +extern struct device_node *of_node_get(struct device_node *node); +extern void of_node_put(struct device_node *node); +#endif + /* * OF address retreival & translation */ -- cgit v1.3-8-gc7d7 From 8482f56803b9498af84bc09e7bc769a5924f6443 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 15 Oct 2009 10:58:04 -0600 Subject: of: merge of_*_flat_dt*() functions Merge common flattened device tree code between Microblaze and PowerPC Signed-off-by: Grant Likely Acked-by: David S. Miller Acked-by: Wolfram Sang Acked-by: Michal Simek Acked-by: Stephen Neuendorffer Acked-by: Stephen Rothwell --- arch/microblaze/include/asm/prom.h | 11 ----------- arch/powerpc/include/asm/prom.h | 10 ---------- include/linux/of_fdt.h | 14 ++++++++++++++ 3 files changed, 14 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index c92b4a9e4397..be4db2a22245 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -40,17 +40,6 @@ extern struct device_node *of_chosen; extern struct device_node *allnodes; /* temporary while merging */ extern rwlock_t devtree_lock; /* temporary while merging */ -/* For scanning the flat device-tree at boot time */ -extern int __init of_scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data); -extern void *__init of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size); -extern int __init - of_flat_dt_is_compatible(unsigned long node, const char *name); -extern unsigned long __init of_get_flat_dt_root(void); - /* For updating the device tree at runtime */ extern void of_attach_node(struct device_node *); extern void of_detach_node(struct device_node *); diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 622769cd1d62..c8b59330b04a 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -34,16 +34,6 @@ extern struct device_node *of_chosen; #define HAVE_ARCH_DEVTREE_FIXUPS -/* For scanning the flat device-tree at boot time */ -extern int __init of_scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data); -extern void* __init of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size); -extern int __init of_flat_dt_is_compatible(unsigned long node, const char *name); -extern unsigned long __init of_get_flat_dt_root(void); - /* For updating the device tree at runtime */ extern void of_attach_node(struct device_node *); extern void of_detach_node(struct device_node *); diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index b37ad3a973b9..b363eadea819 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -12,6 +12,9 @@ #ifndef _LINUX_OF_FDT_H #define _LINUX_OF_FDT_H +#include +#include + /* Definitions used by the flattened device tree */ #define OF_DT_HEADER 0xd00dfeed /* marker */ #define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ @@ -54,5 +57,16 @@ struct boot_param_header { u32 dt_struct_size; /* size of the DT structure block */ }; +/* For scanning the flat device-tree at boot time */ +extern int __init of_scan_flat_dt(int (*it)(unsigned long node, + const char *uname, int depth, + void *data), + void *data); +extern void __init *of_get_flat_dt_prop(unsigned long node, const char *name, + unsigned long *size); +extern int __init of_flat_dt_is_compatible(unsigned long node, + const char *name); +extern unsigned long __init of_get_flat_dt_root(void); + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_OF_FDT_H */ -- cgit v1.3-8-gc7d7 From 82b2928c95d824afd9af3bb41660f3c3fa1f234e Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 15 Oct 2009 10:58:07 -0600 Subject: of: merge other miscellaneous prototypes Merge common prototypes used by Microblaze and PowerPC Signed-off-by: Grant Likely Acked-by: David S. Miller Acked-by: Wolfram Sang Acked-by: Michal Simek Acked-by: Stephen Neuendorffer Acked-by: Stephen Rothwell --- arch/microblaze/include/asm/prom.h | 12 ------------ arch/powerpc/include/asm/prom.h | 14 -------------- include/linux/of_fdt.h | 14 ++++++++++++++ 3 files changed, 14 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index be4db2a22245..ef3ec1d6ceb3 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -45,19 +45,7 @@ extern void of_attach_node(struct device_node *); extern void of_detach_node(struct device_node *); /* Other Prototypes */ -extern void finish_device_tree(void); -extern void unflatten_device_tree(void); extern int early_uartlite_console(void); -extern void early_init_devtree(void *); -extern int machine_is_compatible(const char *compat); -extern void print_properties(struct device_node *node); -extern int prom_n_intr_cells(struct device_node *np); -extern void prom_get_irq_senses(unsigned char *senses, int off, int max); -extern int prom_add_property(struct device_node *np, struct property *prop); -extern int prom_remove_property(struct device_node *np, struct property *prop); -extern int prom_update_property(struct device_node *np, - struct property *newprop, - struct property *oldprop); extern struct resource *request_OF_resource(struct device_node *node, int index, const char *name_postfix); diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index c8b59330b04a..2ab9cbd98826 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -38,20 +38,6 @@ extern struct device_node *of_chosen; extern void of_attach_node(struct device_node *); extern void of_detach_node(struct device_node *); -/* Other Prototypes */ -extern void finish_device_tree(void); -extern void unflatten_device_tree(void); -extern void early_init_devtree(void *); -extern int machine_is_compatible(const char *compat); -extern void print_properties(struct device_node *node); -extern int prom_n_intr_cells(struct device_node* np); -extern void prom_get_irq_senses(unsigned char *senses, int off, int max); -extern int prom_add_property(struct device_node* np, struct property* prop); -extern int prom_remove_property(struct device_node *np, struct property *prop); -extern int prom_update_property(struct device_node *np, - struct property *newprop, - struct property *oldprop); - #ifdef CONFIG_PPC32 /* * PCI <-> OF matching functions diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index b363eadea819..41d432b13553 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -68,5 +68,19 @@ extern int __init of_flat_dt_is_compatible(unsigned long node, const char *name); extern unsigned long __init of_get_flat_dt_root(void); +/* Other Prototypes */ +extern void finish_device_tree(void); +extern void unflatten_device_tree(void); +extern void early_init_devtree(void *); +extern int machine_is_compatible(const char *compat); +extern void print_properties(struct device_node *node); +extern int prom_n_intr_cells(struct device_node* np); +extern void prom_get_irq_senses(unsigned char *senses, int off, int max); +extern int prom_add_property(struct device_node* np, struct property* prop); +extern int prom_remove_property(struct device_node *np, struct property *prop); +extern int prom_update_property(struct device_node *np, + struct property *newprop, + struct property *oldprop); + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_OF_FDT_H */ -- cgit v1.3-8-gc7d7 From e91edcf5a2940bb7f1f316c871dfe9e2aaf9d6d9 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 15 Oct 2009 10:58:09 -0600 Subject: of: merge of_find_all_nodes() implementations Merge common code between Microblaze and PowerPC, and make it available to Sparc Signed-off-by: Grant Likely Acked-by: David S. Miller Acked-by: Wolfram Sang Acked-by: Michal Simek Acked-by: Stephen Neuendorffer Acked-by: Stephen Rothwell --- arch/microblaze/kernel/prom.c | 23 ----------------------- arch/powerpc/kernel/prom.c | 23 ----------------------- drivers/of/base.c | 26 +++++++++++++++++++++++++- include/linux/of.h | 3 ++- 4 files changed, 27 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index c005cc6f1aaf..b817df172aa9 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -859,29 +859,6 @@ struct device_node *of_find_node_by_phandle(phandle handle) } EXPORT_SYMBOL(of_find_node_by_phandle); -/** - * of_find_all_nodes - Get next node in global list - * @prev: Previous node or NULL to start iteration - * of_node_put() will be called on it - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_all_nodes(struct device_node *prev) -{ - struct device_node *np; - - read_lock(&devtree_lock); - np = prev ? prev->allnext : allnodes; - for (; np != NULL; np = np->allnext) - if (of_node_get(np)) - break; - of_node_put(prev); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_all_nodes); - /** * of_node_get - Increment refcount of a node * @node: Node to inc refcount, NULL is supported to diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index d4405b95bfaa..4ec300862466 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -1316,29 +1316,6 @@ struct device_node *of_find_next_cache_node(struct device_node *np) return NULL; } -/** - * of_find_all_nodes - Get next node in global list - * @prev: Previous node or NULL to start iteration - * of_node_put() will be called on it - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_all_nodes(struct device_node *prev) -{ - struct device_node *np; - - read_lock(&devtree_lock); - np = prev ? prev->allnext : allnodes; - for (; np != 0; np = np->allnext) - if (of_node_get(np)) - break; - of_node_put(prev); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_all_nodes); - /** * of_node_get - Increment refcount of a node * @node: Node to inc refcount, NULL is supported to diff --git a/drivers/of/base.c b/drivers/of/base.c index ddf224d456b2..e6627b2320f1 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -9,7 +9,8 @@ * * Adapted for sparc and sparc64 by David S. Miller davem@davemloft.net * - * Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell. + * Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell and + * Grant Likely. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -82,6 +83,29 @@ struct property *of_find_property(const struct device_node *np, } EXPORT_SYMBOL(of_find_property); +/** + * of_find_all_nodes - Get next node in global list + * @prev: Previous node or NULL to start iteration + * of_node_put() will be called on it + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_all_nodes(struct device_node *prev) +{ + struct device_node *np; + + read_lock(&devtree_lock); + np = prev ? prev->allnext : allnodes; + for (; np != NULL; np = np->allnext) + if (of_node_get(np)) + break; + of_node_put(prev); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_all_nodes); + /* * Find a property with a given name for a given node * and return the value. diff --git a/include/linux/of.h b/include/linux/of.h index 4636bba93afa..e7facd8fbce8 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -79,6 +79,8 @@ set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de) dn->pde = de; } +extern struct device_node *of_find_all_nodes(struct device_node *prev); + #if defined(CONFIG_SPARC) /* Dummy ref counting routines - to be implemented later */ static inline struct device_node *of_node_get(struct device_node *node) @@ -90,7 +92,6 @@ static inline void of_node_put(struct device_node *node) } #else -extern struct device_node *of_find_all_nodes(struct device_node *prev); extern struct device_node *of_node_get(struct device_node *node); extern void of_node_put(struct device_node *node); #endif -- cgit v1.3-8-gc7d7 From b0aba1e66c38d64be2c7dbf4b08c71857031ab67 Mon Sep 17 00:00:00 2001 From: Samu Onkalo Date: Sun, 18 Oct 2009 00:38:57 -0700 Subject: Input: add open and close methods for polled devices Optional open and close methods for preparing and closing the device. Signed-off-by: Samu Onkalo Signed-off-by: Dmitry Torokhov --- drivers/input/input-polldev.c | 7 +++++-- drivers/input/misc/wistron_btns.c | 2 +- include/linux/input-polldev.h | 11 +++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c index 0d3ce7a50fb1..910220c127cb 100644 --- a/drivers/input/input-polldev.c +++ b/drivers/input/input-polldev.c @@ -80,8 +80,8 @@ static int input_open_polled_device(struct input_dev *input) if (error) return error; - if (dev->flush) - dev->flush(dev); + if (dev->open) + dev->open(dev); queue_delayed_work(polldev_wq, &dev->work, msecs_to_jiffies(dev->poll_interval)); @@ -95,6 +95,9 @@ static void input_close_polled_device(struct input_dev *input) cancel_delayed_work_sync(&dev->work); input_polldev_stop_workqueue(); + + if (dev->close) + dev->close(dev); } /** diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c index a932179c4c9e..00eb9d651d97 100644 --- a/drivers/input/misc/wistron_btns.c +++ b/drivers/input/misc/wistron_btns.c @@ -1263,7 +1263,7 @@ static int __devinit setup_input_dev(void) if (!wistron_idev) return -ENOMEM; - wistron_idev->flush = wistron_flush; + wistron_idev->open = wistron_flush; wistron_idev->poll = wistron_poll; wistron_idev->poll_interval = POLL_INTERVAL_DEFAULT; diff --git a/include/linux/input-polldev.h b/include/linux/input-polldev.h index 597a0077b3c5..5c0ec68a965e 100644 --- a/include/linux/input-polldev.h +++ b/include/linux/input-polldev.h @@ -14,9 +14,11 @@ /** * struct input_polled_dev - simple polled input device - * @private: private driver data - * @flush: driver-supplied method that flushes device's state upon - * opening (optional) + * @private: private driver data. + * @open: driver-supplied method that prepares device for polling + * (enabled the device and maybe flushes device state). + * @close: driver-supplied method that is called when device is no + * longer being polled. Used to put device into low power mode. * @poll: driver-supplied method that polls the device and posts * input events (mandatory). * @poll_interval: specifies how often the poll() method shoudl be called. @@ -30,7 +32,8 @@ struct input_polled_dev { void *private; - void (*flush)(struct input_polled_dev *dev); + void (*open)(struct input_polled_dev *dev); + void (*close)(struct input_polled_dev *dev); void (*poll)(struct input_polled_dev *dev); unsigned int poll_interval; /* msec */ -- cgit v1.3-8-gc7d7 From c720c7e8383aff1cb219bddf474ed89d850336e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Oct 2009 06:30:45 +0000 Subject: inet: rename some inet_sock fields In order to have better cache layouts of struct sock (separate zones for rx/tx paths), we need this preliminary patch. Goal is to transfert fields used at lookup time in the first read-mostly cache line (inside struct sock_common) and move sk_refcnt to a separate cache line (only written by rx path) This patch adds inet_ prefix to daddr, rcv_saddr, dport, num, saddr, sport and id fields. This allows a future patch to define these fields as macros, like sk_refcnt, without name clashes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/pppol2tp.c | 22 ++++---- fs/ocfs2/cluster/netdebug.c | 8 +-- include/linux/ipv6.h | 2 +- include/net/inet6_hashtables.h | 4 +- include/net/inet_hashtables.h | 12 ++--- include/net/inet_sock.h | 36 ++++++------- include/net/inet_timewait_sock.h | 2 +- include/net/ip.h | 12 ++--- net/dccp/ipv4.c | 40 ++++++++------- net/dccp/ipv6.c | 27 +++++----- net/dccp/output.c | 4 +- net/dccp/probe.c | 13 ++--- net/dccp/proto.c | 4 +- net/ipv4/af_inet.c | 62 +++++++++++------------ net/ipv4/datagram.c | 18 +++---- net/ipv4/inet_connection_sock.c | 20 ++++---- net/ipv4/inet_diag.c | 26 +++++----- net/ipv4/inet_hashtables.c | 34 +++++++------ net/ipv4/inet_timewait_sock.c | 12 ++--- net/ipv4/ip_input.c | 2 +- net/ipv4/ip_output.c | 15 +++--- net/ipv4/ip_sockglue.c | 8 +-- net/ipv4/ipmr.c | 2 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 8 +-- net/ipv4/raw.c | 30 +++++------ net/ipv4/tcp.c | 4 +- net/ipv4/tcp_ipv4.c | 70 +++++++++++++------------- net/ipv4/tcp_output.c | 4 +- net/ipv4/tcp_probe.c | 11 ++-- net/ipv4/tcp_timer.c | 8 +-- net/ipv4/udp.c | 51 ++++++++++--------- net/ipv6/af_inet6.c | 28 +++++------ net/ipv6/datagram.c | 15 +++--- net/ipv6/inet6_connection_sock.c | 6 +-- net/ipv6/inet6_hashtables.c | 12 ++--- net/ipv6/ip6mr.c | 2 +- net/ipv6/ipv6_sockglue.c | 6 +-- net/ipv6/raw.c | 30 +++++------ net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 32 ++++++------ net/ipv6/udp.c | 24 ++++----- net/netfilter/xt_socket.c | 2 +- net/rds/tcp_listen.c | 8 +-- net/sctp/protocol.c | 8 +-- net/sctp/socket.c | 30 +++++------ net/sunrpc/svcsock.c | 8 +-- security/lsm_audit.c | 12 ++--- 47 files changed, 408 insertions(+), 388 deletions(-) (limited to 'include') diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index 5910df60c93e..849cc9c62c2a 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -516,7 +516,7 @@ static inline int pppol2tp_verify_udp_checksum(struct sock *sk, return 0; inet = inet_sk(sk); - psum = csum_tcpudp_nofold(inet->saddr, inet->daddr, ulen, + psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen, IPPROTO_UDP, 0); if ((skb->ip_summed == CHECKSUM_COMPLETE) && @@ -949,8 +949,8 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh inet = inet_sk(sk_tun); udp_len = hdr_len + sizeof(ppph) + total_len; uh = (struct udphdr *) skb->data; - uh->source = inet->sport; - uh->dest = inet->dport; + uh->source = inet->inet_sport; + uh->dest = inet->inet_dport; uh->len = htons(udp_len); uh->check = 0; skb_put(skb, sizeof(struct udphdr)); @@ -978,7 +978,8 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { skb->ip_summed = CHECKSUM_COMPLETE; csum = skb_checksum(skb, 0, udp_len, 0); - uh->check = csum_tcpudp_magic(inet->saddr, inet->daddr, + uh->check = csum_tcpudp_magic(inet->inet_saddr, + inet->inet_daddr, udp_len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -986,7 +987,8 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(inet->saddr, inet->daddr, + uh->check = ~csum_tcpudp_magic(inet->inet_saddr, + inet->inet_daddr, udp_len, IPPROTO_UDP, 0); } @@ -1136,8 +1138,8 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); uh = udp_hdr(skb); - uh->source = inet->sport; - uh->dest = inet->dport; + uh->source = inet->inet_sport; + uh->dest = inet->inet_dport; uh->len = htons(udp_len); uh->check = 0; @@ -1181,7 +1183,8 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { skb->ip_summed = CHECKSUM_COMPLETE; csum = skb_checksum(skb, 0, udp_len, 0); - uh->check = csum_tcpudp_magic(inet->saddr, inet->daddr, + uh->check = csum_tcpudp_magic(inet->inet_saddr, + inet->inet_daddr, udp_len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -1189,7 +1192,8 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(inet->saddr, inet->daddr, + uh->check = ~csum_tcpudp_magic(inet->inet_saddr, + inet->inet_daddr, udp_len, IPPROTO_UDP, 0); } diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index da794bc07a6c..a3f150e52b02 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -294,10 +294,10 @@ static int sc_seq_show(struct seq_file *seq, void *v) if (sc->sc_sock) { inet = inet_sk(sc->sc_sock->sk); /* the stack's structs aren't sparse endian clean */ - saddr = (__force __be32)inet->saddr; - daddr = (__force __be32)inet->daddr; - sport = (__force __be16)inet->sport; - dport = (__force __be16)inet->dport; + saddr = (__force __be32)inet->inet_saddr; + daddr = (__force __be32)inet->inet_daddr; + sport = (__force __be16)inet->inet_sport; + dport = (__force __be16)inet->inet_dport; } /* XXX sigh, inet-> doesn't have sparse annotation so any diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 56404251248c..e0cc9a7db2b5 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -505,7 +505,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define INET6_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ - ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 22c73a77cd99..92838d3a1ab7 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -46,8 +46,8 @@ static inline int inet6_sk_ehashfn(const struct sock *sk) const struct ipv6_pinfo *np = inet6_sk(sk); const struct in6_addr *laddr = &np->rcv_saddr; const struct in6_addr *faddr = &np->daddr; - const __u16 lport = inet->num; - const __be16 fport = inet->dport; + const __u16 lport = inet->inet_num; + const __be16 fport = inet->inet_dport; struct net *net = sock_net(sk); return inet6_ehashfn(net, laddr, lport, faddr, fport); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5f11c4a0daca..5b698b3b463d 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -241,7 +241,7 @@ static inline int inet_lhashfn(struct net *net, const unsigned short num) static inline int inet_sk_listen_hashfn(const struct sock *sk) { - return inet_lhashfn(sock_net(sk), inet_sk(sk)->num); + return inet_lhashfn(sock_net(sk), inet_sk(sk)->inet_num); } /* Caller must disable local BH processing. */ @@ -301,8 +301,8 @@ typedef __u64 __bitwise __addrpair; #endif /* __BIG_ENDIAN */ #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ - ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ - ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((*((__addrpair *)&(inet_sk(__sk)->inet_daddr))) == (__cookie)) && \ + ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ @@ -313,9 +313,9 @@ typedef __u64 __bitwise __addrpair; #define INET_ADDR_COOKIE(__name, __saddr, __daddr) #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ - (inet_sk(__sk)->daddr == (__saddr)) && \ - (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ - ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + (inet_sk(__sk)->inet_daddr == (__saddr)) && \ + (inet_sk(__sk)->inet_rcv_saddr == (__daddr)) && \ + ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 47004f35cc7e..bd4c53f75ac0 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -93,14 +93,14 @@ struct rtable; * * @sk - ancestor class * @pinet6 - pointer to IPv6 control block - * @daddr - Foreign IPv4 addr - * @rcv_saddr - Bound local IPv4 addr - * @dport - Destination port - * @num - Local port - * @saddr - Sending source + * @inet_daddr - Foreign IPv4 addr + * @inet_rcv_saddr - Bound local IPv4 addr + * @inet_dport - Destination port + * @inet_num - Local port + * @inet_saddr - Sending source * @uc_ttl - Unicast TTL - * @sport - Source port - * @id - ID counter for DF pkts + * @inet_sport - Source port + * @inet_id - ID counter for DF pkts * @tos - TOS * @mc_ttl - Multicasting TTL * @is_icsk - is this an inet_connection_sock? @@ -115,16 +115,16 @@ struct inet_sock { struct ipv6_pinfo *pinet6; #endif /* Socket demultiplex comparisons on incoming packets. */ - __be32 daddr; - __be32 rcv_saddr; - __be16 dport; - __u16 num; - __be32 saddr; + __be32 inet_daddr; + __be32 inet_rcv_saddr; + __be16 inet_dport; + __u16 inet_num; + __be32 inet_saddr; __s16 uc_ttl; __u16 cmsg_flags; struct ip_options *opt; - __be16 sport; - __u16 id; + __be16 inet_sport; + __u16 inet_id; __u8 tos; __u8 mc_ttl; __u8 pmtudisc; @@ -190,10 +190,10 @@ static inline unsigned int inet_ehashfn(struct net *net, static inline int inet_sk_ehashfn(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - const __be32 laddr = inet->rcv_saddr; - const __u16 lport = inet->num; - const __be32 faddr = inet->daddr; - const __be16 fport = inet->dport; + const __be32 laddr = inet->inet_rcv_saddr; + const __u16 lport = inet->inet_num; + const __be32 faddr = inet->inet_daddr; + const __be16 fport = inet->inet_dport; struct net *net = sock_net(sk); return inet_ehashfn(net, laddr, lport, faddr, fport); diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index b63b80fac567..37f3aea074a5 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -194,7 +194,7 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) static inline __be32 inet_rcv_saddr(const struct sock *sk) { return likely(sk->sk_state != TCP_TIME_WAIT) ? - inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; + inet_sk(sk)->inet_rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; } extern void inet_twsk_put(struct inet_timewait_sock *tw); diff --git a/include/net/ip.h b/include/net/ip.h index 2f47e5482b55..376adf47764e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -240,8 +240,8 @@ static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, str * does not change, they drop every other packet in * a TCP stream using header compression. */ - iph->id = (sk && inet_sk(sk)->daddr) ? - htons(inet_sk(sk)->id++) : 0; + iph->id = (sk && inet_sk(sk)->inet_daddr) ? + htons(inet_sk(sk)->inet_id++) : 0; } else __ip_select_ident(iph, dst, 0); } @@ -249,9 +249,9 @@ static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, str static inline void ip_select_ident_more(struct iphdr *iph, struct dst_entry *dst, struct sock *sk, int more) { if (iph->frag_off & htons(IP_DF)) { - if (sk && inet_sk(sk)->daddr) { - iph->id = htons(inet_sk(sk)->id); - inet_sk(sk)->id += 1 + more; + if (sk && inet_sk(sk)->inet_daddr) { + iph->id = htons(inet_sk(sk)->inet_id); + inet_sk(sk)->inet_id += 1 + more; } else iph->id = 0; } else @@ -317,7 +317,7 @@ static inline void ip_ib_mc_map(__be32 naddr, const unsigned char *broadcast, ch static __inline__ void inet_reset_saddr(struct sock *sk) { - inet_sk(sk)->rcv_saddr = inet_sk(sk)->saddr = 0; + inet_sk(sk)->inet_rcv_saddr = inet_sk(sk)->inet_saddr = 0; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7302e1498d46..00028d4b09d9 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -62,10 +62,10 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = inet->opt->faddr; } - tmp = ip_route_connect(&rt, nexthop, inet->saddr, + tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, - inet->sport, usin->sin_port, sk, 1); + inet->inet_sport, usin->sin_port, sk, 1); if (tmp < 0) return tmp; @@ -77,12 +77,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (inet->opt == NULL || !inet->opt->srr) daddr = rt->rt_dst; - if (inet->saddr == 0) - inet->saddr = rt->rt_src; - inet->rcv_saddr = inet->saddr; + if (inet->inet_saddr == 0) + inet->inet_saddr = rt->rt_src; + inet->inet_rcv_saddr = inet->inet_saddr; - inet->dport = usin->sin_port; - inet->daddr = daddr; + inet->inet_dport = usin->sin_port; + inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt != NULL) @@ -98,17 +98,19 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err != 0) goto failure; - err = ip_route_newports(&rt, IPPROTO_DCCP, inet->sport, inet->dport, - sk); + err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, + inet->inet_dport, sk); if (err != 0) goto failure; /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->u.dst); - dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, inet->daddr, - inet->sport, inet->dport); - inet->id = dp->dccps_iss ^ jiffies; + dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + inet->inet_dport); + inet->inet_id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); rt = NULL; @@ -123,7 +125,7 @@ failure: dccp_set_state(sk, DCCP_CLOSED); ip_rt_put(rt); sk->sk_route_caps = 0; - inet->dport = 0; + inet->inet_dport = 0; goto out; } @@ -352,7 +354,9 @@ void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb) struct dccp_hdr *dh = dccp_hdr(skb); dccp_csum_outgoing(skb); - dh->dccph_checksum = dccp_v4_csum_finish(skb, inet->saddr, inet->daddr); + dh->dccph_checksum = dccp_v4_csum_finish(skb, + inet->inet_saddr, + inet->inet_daddr); } EXPORT_SYMBOL_GPL(dccp_v4_send_check); @@ -393,14 +397,14 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->daddr = ireq->rmt_addr; - newinet->rcv_saddr = ireq->loc_addr; - newinet->saddr = ireq->loc_addr; + newinet->inet_daddr = ireq->rmt_addr; + newinet->inet_rcv_saddr = ireq->loc_addr; + newinet->inet_saddr = ireq->loc_addr; newinet->opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; - newinet->id = jiffies; + newinet->inet_id = jiffies; dccp_sync_mss(newsk, dst_mtu(dst)); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index a2afb553d8b3..6d89f9f7d5d8 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -158,8 +158,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; + fl.fl_ip_dport = inet->inet_dport; + fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); @@ -510,9 +510,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->daddr, &newnp->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); - ipv6_addr_set_v4mapped(newinet->saddr, &newnp->saddr); + ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); @@ -640,7 +640,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, dccp_sync_mss(newsk, dst_mtu(dst)); - newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; + newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; + newinet->inet_rcv_saddr = LOOPBACK4_IPV6; __inet6_hash(newsk); __inet_inherit_port(sk, newsk); @@ -968,10 +969,9 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, icsk->icsk_af_ops = &dccp_ipv6_af_ops; sk->sk_backlog_rcv = dccp_v6_do_rcv; goto failure; - } else { - ipv6_addr_set_v4mapped(inet->saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr); } + ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); + ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); return err; } @@ -984,7 +984,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->sport; + fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); if (np->opt != NULL && np->opt->srcrt != NULL) { @@ -1017,7 +1017,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, /* set the source address */ ipv6_addr_copy(&np->saddr, saddr); - inet->rcv_saddr = LOOPBACK4_IPV6; + inet->inet_rcv_saddr = LOOPBACK4_IPV6; __ip6_dst_store(sk, dst, NULL, NULL); @@ -1026,7 +1026,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, icsk->icsk_ext_hdr_len = (np->opt->opt_flen + np->opt->opt_nflen); - inet->dport = usin->sin6_port; + inet->inet_dport = usin->sin6_port; dccp_set_state(sk, DCCP_REQUESTING); err = inet6_hash_connect(&dccp_death_row, sk); @@ -1035,7 +1035,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, - inet->sport, inet->dport); + inet->inet_sport, + inet->inet_dport); err = dccp_connect(sk); if (err) goto late_failure; @@ -1046,7 +1047,7 @@ late_failure: dccp_set_state(sk, DCCP_CLOSED); __sk_dst_reset(sk); failure: - inet->dport = 0; + inet->inet_dport = 0; sk->sk_route_caps = 0; return err; } diff --git a/net/dccp/output.c b/net/dccp/output.c index c96119fda688..d6bb753bf6ad 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -99,8 +99,8 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) /* Build DCCP header and checksum it. */ dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_type = dcb->dccpd_type; - dh->dccph_sport = inet->sport; - dh->dccph_dport = inet->dport; + dh->dccph_sport = inet->inet_sport; + dh->dccph_dport = inet->inet_dport; dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; dh->dccph_ccval = dcb->dccpd_ccval; dh->dccph_cscov = dp->dccps_pcslen; diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 5e6ec8b9b7b6..dc328425fa20 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -80,19 +80,20 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3) hc = ccid3_hc_tx_sk(sk); - if (port == 0 || ntohs(inet->dport) == port || - ntohs(inet->sport) == port) { + if (port == 0 || ntohs(inet->inet_dport) == port || + ntohs(inet->inet_sport) == port) { if (hc) printl("%pI4:%u %pI4:%u %d %d %d %d %u %llu %llu %d\n", - &inet->saddr, ntohs(inet->sport), - &inet->daddr, ntohs(inet->dport), size, + &inet->inet_saddr, ntohs(inet->inet_sport), + &inet->inet_daddr, ntohs(inet->inet_dport), size, hc->tx_s, hc->tx_rtt, hc->tx_p, hc->tx_x_calc, hc->tx_x_recv >> 6, hc->tx_x >> 6, hc->tx_t_ipi); else printl("%pI4:%u %pI4:%u %d\n", - &inet->saddr, ntohs(inet->sport), - &inet->daddr, ntohs(inet->dport), size); + &inet->inet_saddr, ntohs(inet->inet_sport), + &inet->inet_daddr, ntohs(inet->inet_dport), + size); } jprobe_return(); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ecb203fff501..671cd1413d59 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -278,7 +278,7 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_send_head = NULL; } - inet->dport = 0; + inet->inet_dport = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -290,7 +290,7 @@ int dccp_disconnect(struct sock *sk, int flags) inet_csk_delack_init(sk); __sk_dst_reset(sk); - WARN_ON(inet->num && !icsk->icsk_bind_hash); + WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1deff48b122e..04a14b1600ac 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -174,12 +174,12 @@ static int inet_autobind(struct sock *sk) /* We may need to bind the socket. */ lock_sock(sk); inet = inet_sk(sk); - if (!inet->num) { + if (!inet->inet_num) { if (sk->sk_prot->get_port(sk, 0)) { release_sock(sk); return -EAGAIN; } - inet->sport = htons(inet->num); + inet->inet_sport = htons(inet->inet_num); } release_sock(sk); return 0; @@ -354,7 +354,7 @@ lookup_protocol: inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; if (SOCK_RAW == sock->type) { - inet->num = protocol; + inet->inet_num = protocol; if (IPPROTO_RAW == protocol) inet->hdrincl = 1; } @@ -364,7 +364,7 @@ lookup_protocol: else inet->pmtudisc = IP_PMTUDISC_WANT; - inet->id = 0; + inet->inet_id = 0; sock_init_data(sock, sk); @@ -381,13 +381,13 @@ lookup_protocol: sk_refcnt_debug_inc(sk); - if (inet->num) { + if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically * shares. */ - inet->sport = htons(inet->num); + inet->inet_sport = htons(inet->inet_num); /* Add to protocol hash chains. */ sk->sk_prot->hash(sk); } @@ -494,27 +494,27 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check these errors (active socket, double bind). */ err = -EINVAL; - if (sk->sk_state != TCP_CLOSE || inet->num) + if (sk->sk_state != TCP_CLOSE || inet->inet_num) goto out_release_sock; - inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; + inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) - inet->saddr = 0; /* Use device */ + inet->inet_saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ if (sk->sk_prot->get_port(sk, snum)) { - inet->saddr = inet->rcv_saddr = 0; + inet->inet_saddr = inet->inet_rcv_saddr = 0; err = -EADDRINUSE; goto out_release_sock; } - if (inet->rcv_saddr) + if (inet->inet_rcv_saddr) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - inet->sport = htons(inet->num); - inet->daddr = 0; - inet->dport = 0; + inet->inet_sport = htons(inet->inet_num); + inet->inet_daddr = 0; + inet->inet_dport = 0; sk_dst_reset(sk); err = 0; out_release_sock: @@ -532,7 +532,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, if (uaddr->sa_family == AF_UNSPEC) return sk->sk_prot->disconnect(sk, flags); - if (!inet_sk(sk)->num && inet_autobind(sk)) + if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); } @@ -689,17 +689,17 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin_family = AF_INET; if (peer) { - if (!inet->dport || + if (!inet->inet_dport || (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && peer == 1)) return -ENOTCONN; - sin->sin_port = inet->dport; - sin->sin_addr.s_addr = inet->daddr; + sin->sin_port = inet->inet_dport; + sin->sin_addr.s_addr = inet->inet_daddr; } else { - __be32 addr = inet->rcv_saddr; + __be32 addr = inet->inet_rcv_saddr; if (!addr) - addr = inet->saddr; - sin->sin_port = inet->sport; + addr = inet->inet_saddr; + sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; } memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); @@ -714,7 +714,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sock *sk = sock->sk; /* We may need to bind the socket. */ - if (!inet_sk(sk)->num && inet_autobind(sk)) + if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->sendmsg(iocb, sk, msg, size); @@ -728,7 +728,7 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, struct sock *sk = sock->sk; /* We may need to bind the socket. */ - if (!inet_sk(sk)->num && inet_autobind(sk)) + if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; if (sk->sk_prot->sendpage) @@ -1059,9 +1059,9 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct inet_sock *inet = inet_sk(sk); int err; struct rtable *rt; - __be32 old_saddr = inet->saddr; + __be32 old_saddr = inet->inet_saddr; __be32 new_saddr; - __be32 daddr = inet->daddr; + __be32 daddr = inet->inet_daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; @@ -1071,7 +1071,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, sk->sk_protocol, - inet->sport, inet->dport, sk, 0); + inet->inet_sport, inet->inet_dport, sk, 0); if (err) return err; @@ -1087,7 +1087,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) __func__, &old_saddr, &new_saddr); } - inet->saddr = inet->rcv_saddr = new_saddr; + inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; /* * XXX The only one ugly spot where we need to @@ -1113,7 +1113,7 @@ int inet_sk_rebuild_header(struct sock *sk) return 0; /* Reroute. */ - daddr = inet->daddr; + daddr = inet->inet_daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; { @@ -1123,7 +1123,7 @@ int inet_sk_rebuild_header(struct sock *sk) .nl_u = { .ip4_u = { .daddr = daddr, - .saddr = inet->saddr, + .saddr = inet->inet_saddr, .tos = RT_CONN_FLAGS(sk), }, }, @@ -1131,8 +1131,8 @@ int inet_sk_rebuild_header(struct sock *sk) .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { - .sport = inet->sport, - .dport = inet->dport, + .sport = inet->inet_sport, + .dport = inet->inet_dport, }, }, }; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 5e6c5a0f3fde..fb2465811b48 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -39,7 +39,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_reset(sk); oif = sk->sk_bound_dev_if; - saddr = inet->saddr; + saddr = inet->inet_saddr; if (ipv4_is_multicast(usin->sin_addr.s_addr)) { if (!oif) oif = inet->mc_index; @@ -49,7 +49,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, sk->sk_protocol, - inet->sport, usin->sin_port, sk, 1); + inet->inet_sport, usin->sin_port, sk, 1); if (err) { if (err == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); @@ -60,14 +60,14 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ip_rt_put(rt); return -EACCES; } - if (!inet->saddr) - inet->saddr = rt->rt_src; /* Update source address */ - if (!inet->rcv_saddr) - inet->rcv_saddr = rt->rt_src; - inet->daddr = rt->rt_dst; - inet->dport = usin->sin_port; + if (!inet->inet_saddr) + inet->inet_saddr = rt->rt_src; /* Update source address */ + if (!inet->inet_rcv_saddr) + inet->inet_rcv_saddr = rt->rt_src; + inet->inet_daddr = rt->rt_dst; + inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; - inet->id = jiffies; + inet->inet_id = jiffies; sk_dst_set(sk, &rt->u.dst); return(0); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 9139e8f6fdb1..f6a0af759932 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -368,7 +368,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = - { .sport = inet_sk(sk)->sport, + { .sport = inet_sk(sk)->inet_sport, .dport = ireq->rmt_port } } }; struct net *net = sock_net(sk); @@ -547,9 +547,9 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newsk->sk_state = TCP_SYN_RECV; newicsk->icsk_bind_hash = NULL; - inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; - inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port); - inet_sk(newsk)->sport = inet_rsk(req)->loc_port; + inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port; + inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port); + inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; @@ -580,8 +580,8 @@ void inet_csk_destroy_sock(struct sock *sk) /* It cannot be in hash table! */ WARN_ON(!sk_unhashed(sk)); - /* If it has not 0 inet_sk(sk)->num, it must be bound */ - WARN_ON(inet_sk(sk)->num && !inet_csk(sk)->icsk_bind_hash); + /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ + WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); sk->sk_prot->destroy(sk); @@ -616,8 +616,8 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) * after validation is complete. */ sk->sk_state = TCP_LISTEN; - if (!sk->sk_prot->get_port(sk, inet->num)) { - inet->sport = htons(inet->num); + if (!sk->sk_prot->get_port(sk, inet->inet_num)) { + inet->inet_sport = htons(inet->inet_num); sk_dst_reset(sk); sk->sk_prot->hash(sk); @@ -693,8 +693,8 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) const struct inet_sock *inet = inet_sk(sk); sin->sin_family = AF_INET; - sin->sin_addr.s_addr = inet->daddr; - sin->sin_port = inet->dport; + sin->sin_addr.s_addr = inet->inet_daddr; + sin->sin_port = inet->inet_dport; } EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index cb73fdefba91..bdb78dd180ce 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -116,10 +116,10 @@ static int inet_csk_diag_fill(struct sock *sk, r->id.idiag_cookie[0] = (u32)(unsigned long)sk; r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); - r->id.idiag_sport = inet->sport; - r->id.idiag_dport = inet->dport; - r->id.idiag_src[0] = inet->rcv_saddr; - r->id.idiag_dst[0] = inet->daddr; + r->id.idiag_sport = inet->inet_sport; + r->id.idiag_dport = inet->inet_dport; + r->id.idiag_src[0] = inet->inet_rcv_saddr; + r->id.idiag_dst[0] = inet->inet_daddr; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->idiag_family == AF_INET6) { @@ -504,11 +504,11 @@ static int inet_csk_diag_dump(struct sock *sk, } else #endif { - entry.saddr = &inet->rcv_saddr; - entry.daddr = &inet->daddr; + entry.saddr = &inet->inet_rcv_saddr; + entry.daddr = &inet->inet_daddr; } - entry.sport = inet->num; - entry.dport = ntohs(inet->dport); + entry.sport = inet->inet_num; + entry.dport = ntohs(inet->inet_dport); entry.userlocks = sk->sk_userlocks; if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) @@ -584,7 +584,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, if (tmo < 0) tmo = 0; - r->id.idiag_sport = inet->sport; + r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = ireq->rmt_port; r->id.idiag_src[0] = ireq->loc_addr; r->id.idiag_dst[0] = ireq->rmt_addr; @@ -639,7 +639,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { bc = (struct rtattr *)(r + 1); - entry.sport = inet->num; + entry.sport = inet->inet_num; entry.userlocks = sk->sk_userlocks; } @@ -732,7 +732,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } - if (r->id.idiag_sport != inet->sport && + if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next_listen; @@ -797,10 +797,10 @@ skip_listen_ht: goto next_normal; if (!(r->idiag_states & (1 << sk->sk_state))) goto next_normal; - if (r->id.idiag_sport != inet->sport && + if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next_normal; - if (r->id.idiag_dport != inet->dport && + if (r->id.idiag_dport != inet->inet_dport && r->id.idiag_dport) goto next_normal; if (inet_csk_diag_dump(sk, skb, cb) < 0) { diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index a45aaf3d48b1..47ad7aab51e3 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -64,7 +64,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, atomic_inc(&hashinfo->bsockets); - inet_sk(sk)->num = snum; + inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &tb->owners); tb->num_owners++; inet_csk(sk)->icsk_bind_hash = tb; @@ -76,7 +76,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, static void __inet_put_port(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num, + const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num, hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; @@ -88,7 +88,7 @@ static void __inet_put_port(struct sock *sk) __sk_del_bind_node(sk); tb->num_owners--; inet_csk(sk)->icsk_bind_hash = NULL; - inet_sk(sk)->num = 0; + inet_sk(sk)->inet_num = 0; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); spin_unlock(&head->lock); } @@ -105,7 +105,7 @@ EXPORT_SYMBOL(inet_put_port); void __inet_inherit_port(struct sock *sk, struct sock *child) { struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num, + const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; struct inet_bind_bucket *tb; @@ -126,9 +126,9 @@ static inline int compute_score(struct sock *sk, struct net *net, int score = -1; struct inet_sock *inet = inet_sk(sk); - if (net_eq(sock_net(sk), net) && inet->num == hnum && + if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && !ipv6_only_sock(sk)) { - __be32 rcv_saddr = inet->rcv_saddr; + __be32 rcv_saddr = inet->inet_rcv_saddr; score = sk->sk_family == PF_INET ? 1 : 0; if (rcv_saddr) { if (rcv_saddr != daddr) @@ -273,13 +273,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); - __be32 daddr = inet->rcv_saddr; - __be32 saddr = inet->daddr; + __be32 daddr = inet->inet_rcv_saddr; + __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) - const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); + const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); - unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); + unsigned int hash = inet_ehashfn(net, daddr, lport, + saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; @@ -312,8 +313,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, unique: /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ - inet->num = lport; - inet->sport = htons(lport); + inet->inet_num = lport; + inet->inet_sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); @@ -341,8 +342,9 @@ not_unique: static inline u32 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, - inet->dport); + return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr, + inet->inet_daddr, + inet->inet_dport); } void __inet_hash_nolisten(struct sock *sk) @@ -424,7 +426,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, void (*hash)(struct sock *sk)) { struct inet_hashinfo *hinfo = death_row->hashinfo; - const unsigned short snum = inet_sk(sk)->num; + const unsigned short snum = inet_sk(sk)->inet_num; struct inet_bind_hashbucket *head; struct inet_bind_bucket *tb; int ret; @@ -485,7 +487,7 @@ ok: /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(port); + inet_sk(sk)->inet_sport = htons(port); hash(sk); } spin_unlock(&head->lock); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 2fe571155b22..1f5d508bb18b 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -86,7 +86,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; @@ -124,14 +124,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat kmemcheck_annotate_bitfield(tw, flags); /* Give us an identity. */ - tw->tw_daddr = inet->daddr; - tw->tw_rcv_saddr = inet->rcv_saddr; + tw->tw_daddr = inet->inet_daddr; + tw->tw_rcv_saddr = inet->inet_rcv_saddr; tw->tw_bound_dev_if = sk->sk_bound_dev_if; - tw->tw_num = inet->num; + tw->tw_num = inet->inet_num; tw->tw_state = TCP_TIME_WAIT; tw->tw_substate = state; - tw->tw_sport = inet->sport; - tw->tw_dport = inet->dport; + tw->tw_sport = inet->inet_sport; + tw->tw_dport = inet->inet_dport; tw->tw_family = sk->sk_family; tw->tw_reuse = sk->sk_reuse; tw->tw_hash = sk->sk_hash; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 6c98b43badf4..fdf51badc8e5 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -161,7 +161,7 @@ int ip_call_ra_chain(struct sk_buff *skb) /* If socket is bound to an interface, only report * the packet if it came from that interface. */ - if (sk && inet_sk(sk)->num == protocol && + if (sk && inet_sk(sk)->inet_num == protocol && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) && sock_net(sk) == dev_net(dev)) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f9895180f481..322b40864ac0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -329,7 +329,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) __be32 daddr; /* Use correct destination address if we have options. */ - daddr = inet->daddr; + daddr = inet->inet_daddr; if(opt && opt->srr) daddr = opt->faddr; @@ -338,13 +338,13 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) .mark = sk->sk_mark, .nl_u = { .ip4_u = { .daddr = daddr, - .saddr = inet->saddr, + .saddr = inet->inet_saddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = - { .sport = inet->sport, - .dport = inet->dport } } }; + { .sport = inet->inet_sport, + .dport = inet->inet_dport } } }; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times @@ -379,7 +379,7 @@ packet_routed: if (opt && opt->optlen) { iph->ihl += opt->optlen >> 2; - ip_options_build(skb, opt, inet->daddr, rt, 0); + ip_options_build(skb, opt, inet->inet_daddr, rt, 0); } ip_select_ident_more(iph, &rt->u.dst, sk, @@ -846,7 +846,8 @@ int ip_append_data(struct sock *sk, maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; if (inet->cork.length + length > 0xFFFF - fragheaderlen) { - ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen); + ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, + mtu-exthdrlen); return -EMSGSIZE; } @@ -1100,7 +1101,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; if (inet->cork.length + size > 0xFFFF - fragheaderlen) { - ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu); + ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu); return -EMSGSIZE; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0c0b6e363a20..c602d985fe14 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -245,7 +245,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, { struct ip_ra_chain *ra, *new_ra, **rap; - if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num == IPPROTO_RAW) + if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) return -EINVAL; new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; @@ -492,7 +492,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (sk->sk_family == PF_INET || (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && - inet->daddr != LOOPBACK4_IPV6)) { + inet->inet_daddr != LOOPBACK4_IPV6)) { #endif if (inet->opt) icsk->icsk_ext_hdr_len -= inet->opt->optlen; @@ -1181,8 +1181,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, if (inet->cmsg_flags & IP_CMSG_PKTINFO) { struct in_pktinfo info; - info.ipi_addr.s_addr = inet->rcv_saddr; - info.ipi_spec_dst.s_addr = inet->rcv_saddr; + info.ipi_addr.s_addr = inet->inet_rcv_saddr; + info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr; info.ipi_ifindex = inet->mc_index; put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c757f0b4b74c..694974502ea6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -956,7 +956,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi switch (optname) { case MRT_INIT: if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->num != IPPROTO_IGMP) + inet_sk(sk)->inet_num != IPPROTO_IGMP) return -EOPNOTSUPP; if (optlen != sizeof(int)) return -ENOPROTOOPT; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index aa95bb82ee6c..9cd423ffafa8 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -255,10 +255,10 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) struct nf_conntrack_tuple tuple; memset(&tuple, 0, sizeof(tuple)); - tuple.src.u3.ip = inet->rcv_saddr; - tuple.src.u.tcp.port = inet->sport; - tuple.dst.u3.ip = inet->daddr; - tuple.dst.u.tcp.port = inet->dport; + tuple.src.u3.ip = inet->inet_rcv_saddr; + tuple.src.u.tcp.port = inet->inet_sport; + tuple.dst.u3.ip = inet->inet_daddr; + tuple.dst.u.tcp.port = inet->inet_dport; tuple.src.l3num = PF_INET; tuple.dst.protonum = sk->sk_protocol; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 39e2a6b8752c..9ef8c0829a77 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -87,7 +87,7 @@ void raw_hash_sk(struct sock *sk) struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; struct hlist_head *head; - head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)]; + head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)]; write_lock_bh(&h->lock); sk_add_node(sk, head); @@ -115,9 +115,9 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, sk_for_each_from(sk, node) { struct inet_sock *inet = inet_sk(sk); - if (net_eq(sock_net(sk), net) && inet->num == num && - !(inet->daddr && inet->daddr != raddr) && - !(inet->rcv_saddr && inet->rcv_saddr != laddr) && + if (net_eq(sock_net(sk), net) && inet->inet_num == num && + !(inet->inet_daddr && inet->inet_daddr != raddr) && + !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ } @@ -326,7 +326,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, int err; if (length > rt->u.dst.dev->mtu) { - ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, + ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, rt->u.dst.dev->mtu); return -EMSGSIZE; } @@ -489,10 +489,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) goto out; - daddr = inet->daddr; + daddr = inet->inet_daddr; } - ipc.addr = inet->saddr; + ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.shtx.flags = 0; ipc.oif = sk->sk_bound_dev_if; @@ -634,9 +634,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; - inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; + inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) - inet->saddr = 0; /* Use device */ + inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; out: return ret; @@ -706,7 +706,7 @@ static int raw_init(struct sock *sk) { struct raw_sock *rp = raw_sk(sk); - if (inet_sk(sk)->num == IPPROTO_ICMP) + if (inet_sk(sk)->inet_num == IPPROTO_ICMP) memset(&rp->filter, 0, sizeof(rp->filter)); return 0; } @@ -743,7 +743,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { if (optname == ICMP_FILTER) { - if (inet_sk(sk)->num != IPPROTO_ICMP) + if (inet_sk(sk)->inet_num != IPPROTO_ICMP) return -EOPNOTSUPP; else return raw_seticmpfilter(sk, optval, optlen); @@ -773,7 +773,7 @@ static int do_raw_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (optname == ICMP_FILTER) { - if (inet_sk(sk)->num != IPPROTO_ICMP) + if (inet_sk(sk)->inet_num != IPPROTO_ICMP) return -EOPNOTSUPP; else return raw_geticmpfilter(sk, optval, optlen); @@ -932,10 +932,10 @@ EXPORT_SYMBOL_GPL(raw_seq_stop); static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) { struct inet_sock *inet = inet_sk(sp); - __be32 dest = inet->daddr, - src = inet->rcv_saddr; + __be32 dest = inet->inet_daddr, + src = inet->inet_rcv_saddr; __u16 destp = 0, - srcp = inet->num; + srcp = inet->inet_num; seq_printf(seq, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cf13726259cd..206a291dff03 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1998,7 +1998,7 @@ int tcp_disconnect(struct sock *sk, int flags) __skb_queue_purge(&sk->sk_async_wait_queue); #endif - inet->dport = 0; + inet->inet_dport = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -2022,7 +2022,7 @@ int tcp_disconnect(struct sock *sk, int flags) memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - WARN_ON(inet->num && !icsk->icsk_bind_hash); + WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 99718703d040..a4a3390a5287 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -165,10 +165,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = inet->opt->faddr; } - tmp = ip_route_connect(&rt, nexthop, inet->saddr, + tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, - inet->sport, usin->sin_port, sk, 1); + inet->inet_sport, usin->sin_port, sk, 1); if (tmp < 0) { if (tmp == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); @@ -183,11 +183,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!inet->opt || !inet->opt->srr) daddr = rt->rt_dst; - if (!inet->saddr) - inet->saddr = rt->rt_src; - inet->rcv_saddr = inet->saddr; + if (!inet->inet_saddr) + inet->inet_saddr = rt->rt_src; + inet->inet_rcv_saddr = inet->inet_saddr; - if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { + if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { /* Reset inherited state */ tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; @@ -210,8 +210,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } } - inet->dport = usin->sin_port; - inet->daddr = daddr; + inet->inet_dport = usin->sin_port; + inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt) @@ -230,7 +230,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto failure; err = ip_route_newports(&rt, IPPROTO_TCP, - inet->sport, inet->dport, sk); + inet->inet_sport, inet->inet_dport, sk); if (err) goto failure; @@ -239,12 +239,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_setup_caps(sk, &rt->u.dst); if (!tp->write_seq) - tp->write_seq = secure_tcp_sequence_number(inet->saddr, - inet->daddr, - inet->sport, + tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, usin->sin_port); - inet->id = tp->write_seq ^ jiffies; + inet->inet_id = tp->write_seq ^ jiffies; err = tcp_connect(sk); rt = NULL; @@ -261,7 +261,7 @@ failure: tcp_set_state(sk, TCP_CLOSE); ip_rt_put(rt); sk->sk_route_caps = 0; - inet->dport = 0; + inet->inet_dport = 0; return err; } @@ -520,12 +520,13 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) struct tcphdr *th = tcp_hdr(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v4_check(len, inet->saddr, - inet->daddr, 0); + th->check = ~tcp_v4_check(len, inet->inet_saddr, + inet->inet_daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); } else { - th->check = tcp_v4_check(len, inet->saddr, inet->daddr, + th->check = tcp_v4_check(len, inet->inet_saddr, + inet->inet_daddr, csum_partial(th, th->doff << 2, skb->csum)); @@ -848,7 +849,7 @@ static struct tcp_md5sig_key * struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, struct sock *addr_sk) { - return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr); + return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); } EXPORT_SYMBOL(tcp_v4_md5_lookup); @@ -923,7 +924,7 @@ EXPORT_SYMBOL(tcp_v4_md5_do_add); static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, u8 *newkey, u8 newkeylen) { - return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr, + return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr, newkey, newkeylen); } @@ -1089,8 +1090,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, __be32 saddr, daddr; if (sk) { - saddr = inet_sk(sk)->saddr; - daddr = inet_sk(sk)->daddr; + saddr = inet_sk(sk)->inet_saddr; + daddr = inet_sk(sk)->inet_daddr; } else if (req) { saddr = inet_rsk(req)->loc_addr; daddr = inet_rsk(req)->rmt_addr; @@ -1380,9 +1381,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp = tcp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->daddr = ireq->rmt_addr; - newinet->rcv_saddr = ireq->loc_addr; - newinet->saddr = ireq->loc_addr; + newinet->inet_daddr = ireq->rmt_addr; + newinet->inet_rcv_saddr = ireq->loc_addr; + newinet->inet_saddr = ireq->loc_addr; newinet->opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); @@ -1390,7 +1391,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newinet->opt) inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; - newinet->id = newtp->write_seq ^ jiffies; + newinet->inet_id = newtp->write_seq ^ jiffies; tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); @@ -1403,7 +1404,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) { + key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr); + if (key != NULL) { /* * We're using one, so create a matching key * on the newsk structure. If we fail to get @@ -1412,7 +1414,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); if (newkey != NULL) - tcp_v4_md5_do_add(newsk, newinet->daddr, + tcp_v4_md5_do_add(newsk, newinet->inet_daddr, newkey, key->keylen); newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; } @@ -1711,8 +1713,8 @@ int tcp_v4_remember_stamp(struct sock *sk) struct inet_peer *peer = NULL; int release_it = 0; - if (!rt || rt->rt_dst != inet->daddr) { - peer = inet_getpeer(inet->daddr, 1); + if (!rt || rt->rt_dst != inet->inet_daddr) { + peer = inet_getpeer(inet->inet_daddr, 1); release_it = 1; } else { if (!rt->peer) @@ -2225,7 +2227,7 @@ static void get_openreq4(struct sock *sk, struct request_sock *req, " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n", i, ireq->loc_addr, - ntohs(inet_sk(sk)->sport), + ntohs(inet_sk(sk)->inet_sport), ireq->rmt_addr, ntohs(ireq->rmt_port), TCP_SYN_RECV, @@ -2248,10 +2250,10 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); - __be32 dest = inet->daddr; - __be32 src = inet->rcv_saddr; - __u16 destp = ntohs(inet->dport); - __u16 srcp = ntohs(inet->sport); + __be32 dest = inet->inet_daddr; + __be32 src = inet->inet_rcv_saddr; + __u16 destp = ntohs(inet->inet_dport); + __u16 srcp = ntohs(inet->inet_sport); if (icsk->icsk_pending == ICSK_TIME_RETRANS) { timer_active = 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fcd278a7080e..2e2eb74ac4cc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -661,8 +661,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, /* Build TCP header and checksum it. */ th = tcp_hdr(skb); - th->source = inet->sport; - th->dest = inet->dport; + th->source = inet->inet_sport; + th->dest = inet->inet_dport; th->seq = htonl(tcb->seq); th->ack_seq = htonl(tp->rcv_nxt); *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 59f5b5e7c566..7a3cc2ffad84 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -94,7 +94,8 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct inet_sock *inet = inet_sk(sk); /* Only update if port matches */ - if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) + if ((port == 0 || ntohs(inet->inet_dport) == port || + ntohs(inet->inet_sport) == port) && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { spin_lock(&tcp_probe.lock); @@ -103,10 +104,10 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, struct tcp_log *p = tcp_probe.log + tcp_probe.head; p->tstamp = ktime_get(); - p->saddr = inet->saddr; - p->sport = inet->sport; - p->daddr = inet->daddr; - p->dport = inet->dport; + p->saddr = inet->inet_saddr; + p->sport = inet->inet_sport; + p->daddr = inet->inet_daddr; + p->dport = inet->inet_dport; p->length = skb->len; p->snd_nxt = tp->snd_nxt; p->snd_una = tp->snd_una; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cdb2ca7684d4..6e8996cb79d0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -303,15 +303,15 @@ void tcp_retransmit_timer(struct sock *sk) struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", - &inet->daddr, ntohs(inet->dport), - inet->num, tp->snd_una, tp->snd_nxt); + &inet->inet_daddr, ntohs(inet->inet_dport), + inet->inet_num, tp->snd_una, tp->snd_nxt); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", - &np->daddr, ntohs(inet->dport), - inet->num, tp->snd_una, tp->snd_nxt); + &np->daddr, ntohs(inet->inet_dport), + inet->inet_num, tp->snd_una, tp->snd_nxt); } #endif #endif diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 45a8a7e374d8..ec5c7a720c0c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -214,7 +214,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, goto fail_unlock; } found: - inet_sk(sk)->num = snum; + inet_sk(sk)->inet_num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { sk_nulls_add_node_rcu(sk, &hslot->head); @@ -233,8 +233,8 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); return (!ipv6_only_sock(sk2) && - (!inet1->rcv_saddr || !inet2->rcv_saddr || - inet1->rcv_saddr == inet2->rcv_saddr)); + (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || + inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); } int udp_v4_get_port(struct sock *sk, unsigned short snum) @@ -253,18 +253,18 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, struct inet_sock *inet = inet_sk(sk); score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) + if (inet->inet_rcv_saddr) { + if (inet->inet_rcv_saddr != daddr) return -1; score += 2; } - if (inet->daddr) { - if (inet->daddr != saddr) + if (inet->inet_daddr) { + if (inet->inet_daddr != saddr) return -1; score += 2; } - if (inet->dport) { - if (inet->dport != sport) + if (inet->inet_dport) { + if (inet->inet_dport != sport) return -1; score += 2; } @@ -360,9 +360,10 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, if (!net_eq(sock_net(s), net) || s->sk_hash != hnum || - (inet->daddr && inet->daddr != rmt_addr) || - (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || + (inet->inet_daddr && inet->inet_daddr != rmt_addr) || + (inet->inet_dport != rmt_port && inet->inet_dport) || + (inet->inet_rcv_saddr && + inet->inet_rcv_saddr != loc_addr) || ipv6_only_sock(s) || (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) continue; @@ -646,14 +647,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = inet->daddr; - dport = inet->dport; + daddr = inet->inet_daddr; + dport = inet->inet_dport; /* Open fast path for connected socket. Route will not be used, if at least one option is set. */ connected = 1; } - ipc.addr = inet->saddr; + ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; err = sock_tx_timestamp(msg, sk, &ipc.shtx); @@ -708,7 +709,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = - { .sport = inet->sport, + { .sport = inet->inet_sport, .dport = dport } } }; struct net *net = sock_net(sk); @@ -752,7 +753,7 @@ back_from_confirm: inet->cork.fl.fl4_dst = daddr; inet->cork.fl.fl_ip_dport = dport; inet->cork.fl.fl4_src = saddr; - inet->cork.fl.fl_ip_sport = inet->sport; + inet->cork.fl.fl_ip_sport = inet->inet_sport; up->pending = AF_INET; do_append_data: @@ -1029,15 +1030,15 @@ int udp_disconnect(struct sock *sk, int flags) */ sk->sk_state = TCP_CLOSE; - inet->daddr = 0; - inet->dport = 0; + inet->inet_daddr = 0; + inet->inet_dport = 0; sk->sk_bound_dev_if = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { sk->sk_prot->unhash(sk); - inet->sport = 0; + inet->inet_sport = 0; } sk_dst_reset(sk); return 0; @@ -1053,7 +1054,7 @@ void udp_lib_unhash(struct sock *sk) spin_lock_bh(&hslot->lock); if (sk_nulls_del_node_init_rcu(sk)) { - inet_sk(sk)->num = 0; + inet_sk(sk)->inet_num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } spin_unlock_bh(&hslot->lock); @@ -1752,10 +1753,10 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, int bucket, int *len) { struct inet_sock *inet = inet_sk(sp); - __be32 dest = inet->daddr; - __be32 src = inet->rcv_saddr; - __u16 destp = ntohs(inet->dport); - __u16 srcp = ntohs(inet->sport); + __be32 dest = inet->inet_daddr; + __be32 src = inet->inet_rcv_saddr; + __u16 destp = ntohs(inet->inet_dport); + __u16 srcp = ntohs(inet->inet_sport); seq_printf(f, "%5d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 94216519873c..b6d058818673 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -185,7 +185,7 @@ lookup_protocol: inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; if (SOCK_RAW == sock->type) { - inet->num = protocol; + inet->inet_num = protocol; if (IPPROTO_RAW == protocol) inet->hdrincl = 1; } @@ -228,12 +228,12 @@ lookup_protocol: */ sk_refcnt_debug_inc(sk); - if (inet->num) { + if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically shares. */ - inet->sport = htons(inet->num); + inet->inet_sport = htons(inet->inet_num); sk->sk_prot->hash(sk); } if (sk->sk_prot->init) { @@ -281,7 +281,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) lock_sock(sk); /* Check these errors (active socket, double bind). */ - if (sk->sk_state != TCP_CLOSE || inet->num) { + if (sk->sk_state != TCP_CLOSE || inet->inet_num) { err = -EINVAL; goto out; } @@ -353,8 +353,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } } - inet->rcv_saddr = v4addr; - inet->saddr = v4addr; + inet->inet_rcv_saddr = v4addr; + inet->inet_saddr = v4addr; ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); @@ -375,9 +375,9 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - inet->sport = htons(inet->num); - inet->dport = 0; - inet->daddr = 0; + inet->inet_sport = htons(inet->inet_num); + inet->inet_dport = 0; + inet->inet_daddr = 0; out: release_sock(sk); return err; @@ -441,12 +441,12 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_flowinfo = 0; sin->sin6_scope_id = 0; if (peer) { - if (!inet->dport) + if (!inet->inet_dport) return -ENOTCONN; if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && peer == 1) return -ENOTCONN; - sin->sin6_port = inet->dport; + sin->sin6_port = inet->inet_dport; ipv6_addr_copy(&sin->sin6_addr, &np->daddr); if (np->sndflow) sin->sin6_flowinfo = np->flow_label; @@ -456,7 +456,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, else ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr); - sin->sin6_port = inet->sport; + sin->sin6_port = inet->inet_sport; } if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = sk->sk_bound_dev_if; @@ -655,8 +655,8 @@ int inet6_sk_rebuild_header(struct sock *sk) fl.fl6_flowlabel = np->flow_label; fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; + fl.fl_ip_dport = inet->inet_dport; + fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index dbfec7147aa5..9f70452a69e7 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -98,13 +98,14 @@ ipv4_connected: if (err) goto out; - ipv6_addr_set_v4mapped(inet->daddr, &np->daddr); + ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); if (ipv6_addr_any(&np->saddr)) - ipv6_addr_set_v4mapped(inet->saddr, &np->saddr); + ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); if (ipv6_addr_any(&np->rcv_saddr)) - ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr); + ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, + &np->rcv_saddr); goto out; } @@ -133,7 +134,7 @@ ipv4_connected: ipv6_addr_copy(&np->daddr, daddr); np->flow_label = fl.fl6_flowlabel; - inet->dport = usin->sin6_port; + inet->inet_dport = usin->sin6_port; /* * Check for a route to destination an obtain the @@ -145,8 +146,8 @@ ipv4_connected: ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; + fl.fl_ip_dport = inet->inet_dport; + fl.fl_ip_sport = inet->inet_sport; if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) fl.oif = np->mcast_oif; @@ -188,7 +189,7 @@ ipv4_connected: if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); - inet->rcv_saddr = LOOPBACK4_IPV6; + inet->inet_rcv_saddr = LOOPBACK4_IPV6; } ip6_dst_store(sk, dst, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index a9f4a21b31ea..19dceef4fcca 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -132,7 +132,7 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) sin6->sin6_family = AF_INET6; ipv6_addr_copy(&sin6->sin6_addr, &np->daddr); - sin6->sin6_port = inet_sk(sk)->dport; + sin6->sin6_port = inet_sk(sk)->inet_dport; /* We do not store received flowlabel for TCP */ sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; @@ -195,8 +195,8 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; - fl.fl_ip_sport = inet->sport; - fl.fl_ip_dport = inet->dport; + fl.fl_ip_sport = inet->inet_sport; + fl.fl_ip_dport = inet->inet_dport; security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 874aed86e1a2..00c6a3e6cddf 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -125,7 +125,7 @@ static int inline compute_score(struct sock *sk, struct net *net, { int score = -1; - if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && + if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -214,10 +214,10 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const struct in6_addr *daddr = &np->rcv_saddr; const struct in6_addr *saddr = &np->daddr; const int dif = sk->sk_bound_dev_if; - const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); + const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, - inet->dport); + inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; @@ -248,8 +248,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, unique: /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ - inet->num = lport; - inet->sport = htons(lport); + inet->inet_num = lport; + inet->inet_sport = htons(lport); WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); sk->sk_hash = hash; @@ -279,7 +279,7 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk) const struct ipv6_pinfo *np = inet6_sk(sk); return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, np->daddr.s6_addr32, - inet->dport); + inet->inet_dport); } int inet6_hash_connect(struct inet_timewait_death_row *death_row, diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 716153941fc4..85849b4f5a36 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1297,7 +1297,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns switch (optname) { case MRT6_INIT: if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->num != IPPROTO_ICMPV6) + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; if (optlen < sizeof(int)) return -EINVAL; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index dc0f7366073d..39e10ac88019 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -64,7 +64,7 @@ int ip6_ra_control(struct sock *sk, int sel) struct ip6_ra_chain *ra, *new_ra, **rap; /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ - if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW) + if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW) return -ENOPROTOOPT; new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; @@ -106,7 +106,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, if (inet_sk(sk)->is_icsk) { if (opt && !((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && - inet_sk(sk)->daddr != LOOPBACK4_IPV6) { + inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) { struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); @@ -234,7 +234,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_V6ONLY: if (optlen < sizeof(int) || - inet_sk(sk)->num) + inet_sk(sk)->inet_num) goto e_inval; np->ipv6only = valbool; retv = 0; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index fd737efed96c..52ed7d7f9dab 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -72,7 +72,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, int is_multicast = ipv6_addr_is_multicast(loc_addr); sk_for_each_from(sk, node) - if (inet_sk(sk)->num == num) { + if (inet_sk(sk)->inet_num == num) { struct ipv6_pinfo *np = inet6_sk(sk); if (!net_eq(sock_net(sk), net)) @@ -298,7 +298,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) dev_put(dev); } - inet->rcv_saddr = inet->saddr = v4addr; + inet->inet_rcv_saddr = inet->inet_saddr = v4addr; ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); if (!(addr_type & IPV6_ADDR_MULTICAST)) ipv6_addr_copy(&np->saddr, &addr->sin6_addr); @@ -415,14 +415,14 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) skb_network_header_len(skb)); if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, - skb->len, inet->num, skb->csum)) + skb->len, inet->inet_num, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } if (!skb_csum_unnecessary(skb)) skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, - inet->num, 0)); + inet->inet_num, 0)); if (inet->hdrincl) { if (skb_checksum_complete(skb)) { @@ -765,8 +765,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, proto = ntohs(sin6->sin6_port); if (!proto) - proto = inet->num; - else if (proto != inet->num) + proto = inet->inet_num; + else if (proto != inet->inet_num) return(-EINVAL); if (proto > 255) @@ -799,7 +799,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - proto = inet->num; + proto = inet->inet_num; daddr = &np->daddr; fl.fl6_flowlabel = np->flow_label; } @@ -966,7 +966,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_CHECKSUM: - if (inet_sk(sk)->num == IPPROTO_ICMPV6 && + if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && level == IPPROTO_IPV6) { /* * RFC3542 tells that IPV6_CHECKSUM socket @@ -1006,7 +1006,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, break; case SOL_ICMPV6: - if (inet_sk(sk)->num != IPPROTO_ICMPV6) + if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_seticmpfilter(sk, level, optname, optval, optlen); @@ -1029,7 +1029,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, case SOL_RAW: break; case SOL_ICMPV6: - if (inet_sk(sk)->num != IPPROTO_ICMPV6) + if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_seticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: @@ -1086,7 +1086,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, break; case SOL_ICMPV6: - if (inet_sk(sk)->num != IPPROTO_ICMPV6) + if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_geticmpfilter(sk, level, optname, optval, optlen); @@ -1109,7 +1109,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, case SOL_RAW: break; case SOL_ICMPV6: - if (inet_sk(sk)->num != IPPROTO_ICMPV6) + if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_geticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: @@ -1156,7 +1156,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) static void rawv6_close(struct sock *sk, long timeout) { - if (inet_sk(sk)->num == IPPROTO_RAW) + if (inet_sk(sk)->inet_num == IPPROTO_RAW) ip6_ra_control(sk, -1); ip6mr_sk_done(sk); sk_common_release(sk); @@ -1175,7 +1175,7 @@ static int rawv6_init_sk(struct sock *sk) { struct raw6_sock *rp = raw6_sk(sk); - switch (inet_sk(sk)->num) { + switch (inet_sk(sk)->inet_num) { case IPPROTO_ICMPV6: rp->checksum = 1; rp->offset = 2; @@ -1225,7 +1225,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) dest = &np->daddr; src = &np->rcv_saddr; destp = 0; - srcp = inet_sk(sp)->num; + srcp = inet_sk(sp)->inet_num; seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index cbe55e5d9f96..c46da533888a 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -254,7 +254,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->sport; + fl.fl_ip_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out_free; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 451763059142..c54ec3615ded 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -226,8 +226,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, #endif goto failure; } else { - ipv6_addr_set_v4mapped(inet->saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr); + ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); + ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, + &np->rcv_saddr); } return err; @@ -243,7 +244,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->sport; + fl.fl_ip_sport = inet->inet_sport; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; @@ -275,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, /* set the source address */ ipv6_addr_copy(&np->saddr, saddr); - inet->rcv_saddr = LOOPBACK4_IPV6; + inet->inet_rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(sk, dst, NULL, NULL); @@ -287,7 +288,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); - inet->dport = usin->sin6_port; + inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); err = inet6_hash_connect(&tcp_death_row, sk); @@ -297,8 +298,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!tp->write_seq) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, - inet->sport, - inet->dport); + inet->inet_sport, + inet->inet_dport); err = tcp_connect(sk); if (err) @@ -310,7 +311,7 @@ late_failure: tcp_set_state(sk, TCP_CLOSE); __sk_dst_reset(sk); failure: - inet->dport = 0; + inet->inet_dport = 0; sk->sk_route_caps = 0; return err; } @@ -383,8 +384,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; + fl.fl_ip_dport = inet->inet_dport; + fl.fl_ip_sport = inet->inet_sport; security_skb_classify_flow(skb, &fl); if ((err = ip6_dst_lookup(sk, &dst, &fl))) { @@ -1291,9 +1292,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->daddr, &newnp->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); - ipv6_addr_set_v4mapped(newinet->saddr, &newnp->saddr); + ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); @@ -1431,7 +1432,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = dst_metric(dst, RTAX_ADVMSS); tcp_initialize_rcv_mss(newsk); - newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; + newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; + newinet->inet_rcv_saddr = LOOPBACK4_IPV6; #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ @@ -1931,8 +1933,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) dest = &np->daddr; src = &np->rcv_saddr; - destp = ntohs(inet->dport); - srcp = ntohs(inet->sport); + destp = ntohs(inet->inet_dport); + srcp = ntohs(inet->inet_sport); if (icsk->icsk_pending == ICSK_TIME_RETRANS) { timer_active = 1; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b86425b7ea22..829d300a6f35 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,7 +53,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; + __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr; __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); @@ -63,8 +63,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) /* if both are mapped, treat as IPv4 */ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) return (!sk2_ipv6only && - (!sk_rcv_saddr || !sk2_rcv_saddr || - sk_rcv_saddr == sk2_rcv_saddr)); + (!sk1_rcv_saddr || !sk2_rcv_saddr || + sk1_rcv_saddr == sk2_rcv_saddr)); if (addr_type2 == IPV6_ADDR_ANY && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) @@ -100,8 +100,8 @@ static inline int compute_score(struct sock *sk, struct net *net, struct inet_sock *inet = inet_sk(sk); score = 0; - if (inet->dport) { - if (inet->dport != sport) + if (inet->inet_dport) { + if (inet->inet_dport != sport) return -1; score++; } @@ -417,8 +417,8 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, if (s->sk_hash == num && s->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(s); - if (inet->dport) { - if (inet->dport != rmt_port) + if (inet->inet_dport) { + if (inet->inet_dport != rmt_port) continue; } if (!ipv6_addr_any(&np->daddr) && @@ -792,7 +792,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (ipv6_addr_v4mapped(daddr)) { struct sockaddr_in sin; sin.sin_family = AF_INET; - sin.sin_port = sin6 ? sin6->sin6_port : inet->dport; + sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport; sin.sin_addr.s_addr = daddr->s6_addr32[3]; msg->msg_name = &sin; msg->msg_namelen = sizeof(sin); @@ -865,7 +865,7 @@ do_udp_sendmsg: if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl.fl_ip_dport = inet->dport; + fl.fl_ip_dport = inet->inet_dport; daddr = &np->daddr; fl.fl6_flowlabel = np->flow_label; connected = 1; @@ -911,7 +911,7 @@ do_udp_sendmsg: fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl_ip_sport = inet->sport; + fl.fl_ip_sport = inet->inet_sport; /* merge ip6_build_xmit from ip6_output */ if (opt && opt->srcrt) { @@ -1192,8 +1192,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket dest = &np->daddr; src = &np->rcv_saddr; - destp = ntohs(inet->dport); - srcp = ntohs(inet->sport); + destp = ntohs(inet->inet_dport); + srcp = ntohs(inet->inet_sport); seq_printf(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index ebf00ad5b194..362afbd60a96 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -149,7 +149,7 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par, /* Ignore sockets listening on INADDR_ANY */ wildcard = (sk->sk_state != TCP_TIME_WAIT && - inet_sk(sk)->rcv_saddr == 0); + inet_sk(sk)->inet_rcv_saddr == 0); /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 24b743eb0b1b..45474a436862 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -67,11 +67,11 @@ static int rds_tcp_accept_one(struct socket *sock) inet = inet_sk(new_sock->sk); rdsdebug("accepted tcp %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n", - NIPQUAD(inet->saddr), ntohs(inet->sport), - NIPQUAD(inet->daddr), ntohs(inet->dport)); + NIPQUAD(inet->inet_saddr), ntohs(inet->inet_sport), + NIPQUAD(inet->inet_daddr), ntohs(inet->inet_dport)); - conn = rds_conn_create(inet->saddr, inet->daddr, &rds_tcp_transport, - GFP_KERNEL); + conn = rds_conn_create(inet->inet_saddr, inet->inet_daddr, + &rds_tcp_transport, GFP_KERNEL); if (IS_ERR(conn)) { ret = PTR_ERR(conn); goto out; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 612dc878e05c..d9f4cc2c7869 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -296,19 +296,19 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v4.sin_family = AF_INET; addr->v4.sin_port = 0; - addr->v4.sin_addr.s_addr = inet_sk(sk)->rcv_saddr; + addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ static void sctp_v4_to_sk_saddr(union sctp_addr *addr, struct sock *sk) { - inet_sk(sk)->rcv_saddr = addr->v4.sin_addr.s_addr; + inet_sk(sk)->inet_rcv_saddr = addr->v4.sin_addr.s_addr; } /* Initialize sk->sk_daddr from sctp_addr. */ static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk) { - inet_sk(sk)->daddr = addr->v4.sin_addr.s_addr; + inet_sk(sk)->inet_daddr = addr->v4.sin_addr.s_addr; } /* Initialize a sctp_addr from an address parameter. */ @@ -598,7 +598,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, newinet = inet_sk(newsk); - newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; + newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; sk_refcnt_debug_inc(newsk); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0970e92c6acd..4085db99033d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -394,7 +394,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Refresh ephemeral port. */ if (!bp->port) - bp->port = inet_sk(sk)->num; + bp->port = inet_sk(sk)->inet_num; /* Add the address to the bind address list. * Use GFP_ATOMIC since BHs will be disabled. @@ -403,7 +403,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Copy back into socket for getsockname() use. */ if (!ret) { - inet_sk(sk)->sport = htons(inet_sk(sk)->num); + inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num); af->to_sk_saddr(addr, sk); } @@ -1115,7 +1115,7 @@ static int __sctp_connect(struct sock* sk, } /* Initialize sk's dport and daddr for getpeername() */ - inet_sk(sk)->dport = htons(asoc->peer.port); + inet_sk(sk)->inet_dport = htons(asoc->peer.port); af = sctp_get_af_specific(sa_addr->sa.sa_family); af->to_sk_daddr(sa_addr, sk); sk->sk_err = 0; @@ -5851,7 +5851,7 @@ pp_not_found: */ success: if (!sctp_sk(sk)->bind_hash) { - inet_sk(sk)->num = snum; + inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &pp->owner); sctp_sk(sk)->bind_hash = pp; } @@ -5923,7 +5923,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) if (sctp_autobind(sk)) return -EAGAIN; } else { - if (sctp_get_port(sk, inet_sk(sk)->num)) { + if (sctp_get_port(sk, inet_sk(sk)->inet_num)) { sk->sk_state = SCTP_SS_CLOSED; return -EADDRINUSE; } @@ -6094,14 +6094,14 @@ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) static inline void __sctp_put_port(struct sock *sk) { struct sctp_bind_hashbucket *head = - &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->num)]; + &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->inet_num)]; struct sctp_bind_bucket *pp; sctp_spin_lock(&head->lock); pp = sctp_sk(sk)->bind_hash; __sk_del_bind_node(sk); sctp_sk(sk)->bind_hash = NULL; - inet_sk(sk)->num = 0; + inet_sk(sk)->inet_num = 0; sctp_bucket_destroy(pp); sctp_spin_unlock(&head->lock); } @@ -6128,7 +6128,7 @@ static int sctp_autobind(struct sock *sk) /* Initialize a local sockaddr structure to INADDR_ANY. */ af = sctp_sk(sk)->pf->af; - port = htons(inet_sk(sk)->num); + port = htons(inet_sk(sk)->inet_num); af->inaddr_any(&autoaddr, port); return sctp_do_bind(sk, &autoaddr, af->sockaddr_len); @@ -6697,12 +6697,12 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, /* Initialize sk's sport, dport, rcv_saddr and daddr for * getsockname() and getpeername() */ - newinet->sport = inet->sport; - newinet->saddr = inet->saddr; - newinet->rcv_saddr = inet->rcv_saddr; - newinet->dport = htons(asoc->peer.port); + newinet->inet_sport = inet->inet_sport; + newinet->inet_saddr = inet->inet_saddr; + newinet->inet_rcv_saddr = inet->inet_rcv_saddr; + newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; - newinet->id = asoc->next_tsn ^ jiffies; + newinet->inet_id = asoc->next_tsn ^ jiffies; newinet->uc_ttl = inet->uc_ttl; newinet->mc_loop = 1; @@ -6741,13 +6741,13 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsp->hmac = NULL; /* Hook this new socket in to the bind_hash list. */ - head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->num)]; + head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->inet_num)]; sctp_local_bh_disable(); sctp_spin_lock(&head->lock); pp = sctp_sk(oldsk)->bind_hash; sk_add_bind_node(newsk, &pp->owner); sctp_sk(newsk)->bind_hash = pp; - inet_sk(newsk)->num = inet_sk(oldsk)->num; + inet_sk(newsk)->inet_num = inet_sk(oldsk)->inet_num; sctp_spin_unlock(&head->lock); sctp_local_bh_enable(); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index ccc5e83cae5d..c2a17876defd 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -272,14 +272,14 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) case PF_INET: len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n", proto_name, - &inet_sk(sk)->rcv_saddr, - inet_sk(sk)->num); + &inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_num); break; case PF_INET6: len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", proto_name, &inet6_sk(sk)->rcv_saddr, - inet_sk(sk)->num); + inet_sk(sk)->inet_num); break; default: len = snprintf(buf, remaining, "*unknown-%d*\n", @@ -1311,7 +1311,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) *errp = svc_register(serv, inet->sk_family, inet->sk_protocol, - ntohs(inet_sk(inet)->sport)); + ntohs(inet_sk(inet)->inet_sport)); if (*errp < 0) { kfree(svsk); diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 3bb90b6f1dd3..e04566a2c4e5 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -273,11 +273,11 @@ static void dump_common_audit_data(struct audit_buffer *ab, case AF_INET: { struct inet_sock *inet = inet_sk(sk); - print_ipv4_addr(ab, inet->rcv_saddr, - inet->sport, + print_ipv4_addr(ab, inet->inet_rcv_saddr, + inet->inet_sport, "laddr", "lport"); - print_ipv4_addr(ab, inet->daddr, - inet->dport, + print_ipv4_addr(ab, inet->inet_daddr, + inet->inet_dport, "faddr", "fport"); break; } @@ -286,10 +286,10 @@ static void dump_common_audit_data(struct audit_buffer *ab, struct ipv6_pinfo *inet6 = inet6_sk(sk); print_ipv6_addr(ab, &inet6->rcv_saddr, - inet->sport, + inet->inet_sport, "laddr", "lport"); print_ipv6_addr(ab, &inet6->daddr, - inet->dport, + inet->inet_dport, "faddr", "fport"); break; } -- cgit v1.3-8-gc7d7 From 0e1227d356e9b2fe0500d6cc7084f752040a1e0e Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 19 Oct 2009 11:53:06 +0900 Subject: crypto: ghash - Add PCLMULQDQ accelerated implementation PCLMULQDQ is used to accelerate the most time-consuming part of GHASH, carry-less multiplication. More information about PCLMULQDQ can be found at: http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ Because PCLMULQDQ changes XMM state, its usage must be enclosed with kernel_fpu_begin/end, which can be used only in process context, the acceleration is implemented as crypto_ahash. That is, request in soft IRQ context will be defered to the cryptd kernel thread. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 3 + arch/x86/crypto/ghash-clmulni-intel_asm.S | 157 ++++++++++++++ arch/x86/crypto/ghash-clmulni-intel_glue.c | 333 +++++++++++++++++++++++++++++ arch/x86/include/asm/cpufeature.h | 1 + crypto/Kconfig | 8 + crypto/cryptd.c | 7 + include/crypto/cryptd.h | 1 + 7 files changed, 510 insertions(+) create mode 100644 arch/x86/crypto/ghash-clmulni-intel_asm.S create mode 100644 arch/x86/crypto/ghash-clmulni-intel_glue.c (limited to 'include') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index cfb0010fa940..1a58ad89fdf7 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o +obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o @@ -24,3 +25,5 @@ twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o + +ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S new file mode 100644 index 000000000000..b9e787a511da --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -0,0 +1,157 @@ +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains accelerated part of ghash + * implementation. More information about PCLMULQDQ can be found at: + * + * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Erdinc Ozturk + * Deniz Karakoyunlu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include + +.align 16 +.Lbswap_mask: + .octa 0x000102030405060708090a0b0c0d0e0f +.Lpoly: + .octa 0xc2000000000000000000000000000001 +.Ltwo_one: + .octa 0x00000001000000000000000000000001 + +#define DATA %xmm0 +#define SHASH %xmm1 +#define T1 %xmm2 +#define T2 %xmm3 +#define T3 %xmm4 +#define BSWAP %xmm5 +#define IN1 %xmm6 + +.text + +/* + * __clmul_gf128mul_ble: internal ABI + * input: + * DATA: operand1 + * SHASH: operand2, hash_key << 1 mod poly + * output: + * DATA: operand1 * operand2 mod poly + * changed: + * T1 + * T2 + * T3 + */ +__clmul_gf128mul_ble: + movaps DATA, T1 + pshufd $0b01001110, DATA, T2 + pshufd $0b01001110, SHASH, T3 + pxor DATA, T2 + pxor SHASH, T3 + + # pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0 + .byte 0x66, 0x0f, 0x3a, 0x44, 0xc1, 0x00 + # pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1 + .byte 0x66, 0x0f, 0x3a, 0x44, 0xd1, 0x11 + # pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0) + .byte 0x66, 0x0f, 0x3a, 0x44, 0xdc, 0x00 + pxor DATA, T2 + pxor T1, T2 # T2 = a0 * b1 + a1 * b0 + + movaps T2, T3 + pslldq $8, T3 + psrldq $8, T2 + pxor T3, DATA + pxor T2, T1 # is result of + # carry-less multiplication + + # first phase of the reduction + movaps DATA, T3 + psllq $1, T3 + pxor DATA, T3 + psllq $5, T3 + pxor DATA, T3 + psllq $57, T3 + movaps T3, T2 + pslldq $8, T2 + psrldq $8, T3 + pxor T2, DATA + pxor T3, T1 + + # second phase of the reduction + movaps DATA, T2 + psrlq $5, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor T2, T1 + pxor T1, DATA + ret + +/* void clmul_ghash_mul(char *dst, const be128 *shash) */ +ENTRY(clmul_ghash_mul) + movups (%rdi), DATA + movups (%rsi), SHASH + movaps .Lbswap_mask, BSWAP + pshufb BSWAP, DATA + call __clmul_gf128mul_ble + pshufb BSWAP, DATA + movups DATA, (%rdi) + ret + +/* + * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + * const be128 *shash); + */ +ENTRY(clmul_ghash_update) + cmp $16, %rdx + jb .Lupdate_just_ret # check length + movaps .Lbswap_mask, BSWAP + movups (%rdi), DATA + movups (%rcx), SHASH + pshufb BSWAP, DATA +.align 4 +.Lupdate_loop: + movups (%rsi), IN1 + pshufb BSWAP, IN1 + pxor IN1, DATA + call __clmul_gf128mul_ble + sub $16, %rdx + add $16, %rsi + cmp $16, %rdx + jge .Lupdate_loop + pshufb BSWAP, DATA + movups DATA, (%rdi) +.Lupdate_just_ret: + ret + +/* + * void clmul_ghash_setkey(be128 *shash, const u8 *key); + * + * Calculate hash_key << 1 mod poly + */ +ENTRY(clmul_ghash_setkey) + movaps .Lbswap_mask, BSWAP + movups (%rsi), %xmm0 + pshufb BSWAP, %xmm0 + movaps %xmm0, %xmm1 + psllq $1, %xmm0 + psrlq $63, %xmm1 + movaps %xmm1, %xmm2 + pslldq $8, %xmm1 + psrldq $8, %xmm2 + por %xmm1, %xmm0 + # reduction + pshufd $0b00100100, %xmm2, %xmm1 + pcmpeqd .Ltwo_one, %xmm1 + pand .Lpoly, %xmm1 + pxor %xmm1, %xmm0 + movups %xmm0, (%rdi) + ret diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c new file mode 100644 index 000000000000..65d409644d72 --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -0,0 +1,333 @@ +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains glue code. + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +void clmul_ghash_mul(char *dst, const be128 *shash); + +void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + const be128 *shash); + +void clmul_ghash_setkey(be128 *shash, const u8 *key); + +struct ghash_async_ctx { + struct cryptd_ahash *cryptd_tfm; +}; + +struct ghash_ctx { + be128 shash; +}; + +struct ghash_desc_ctx { + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + clmul_ghash_setkey(&ctx->shash, key); + + return 0; +} + +static int ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *dst = dctx->buffer; + + kernel_fpu_begin(); + if (dctx->bytes) { + int n = min(srclen, dctx->bytes); + u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + dctx->bytes -= n; + srclen -= n; + + while (n--) + *pos++ ^= *src++; + + if (!dctx->bytes) + clmul_ghash_mul(dst, &ctx->shash); + } + + clmul_ghash_update(dst, src, srclen, &ctx->shash); + kernel_fpu_end(); + + if (srclen & 0xf) { + src += srclen - (srclen & 0xf); + srclen &= 0xf; + dctx->bytes = GHASH_BLOCK_SIZE - srclen; + while (srclen--) + *dst++ ^= *src++; + } + + return 0; +} + +static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +{ + u8 *dst = dctx->buffer; + + if (dctx->bytes) { + u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + while (dctx->bytes--) + *tmp++ ^= 0; + + kernel_fpu_begin(); + clmul_ghash_mul(dst, &ctx->shash); + kernel_fpu_end(); + } + + dctx->bytes = 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *buf = dctx->buffer; + + ghash_flush(ctx, dctx); + memcpy(dst, buf, GHASH_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "__ghash", + .cra_driver_name = "__ghash-pclmulqdqni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), + }, +}; + +static int ghash_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (irq_fpu_usable()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_init(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return crypto_shash_init(desc); + } +} + +static int ghash_async_update(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (irq_fpu_usable()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_update(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return shash_ahash_update(req, desc); + } +} + +static int ghash_async_final(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (irq_fpu_usable()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_final(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return crypto_shash_final(desc, req->result); + } +} + +static int ghash_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (irq_fpu_usable()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_digest(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return shash_ahash_digest(req, desc); + } +} + +static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct crypto_ahash *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ahash_setkey(child, key, keylen); + crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) + & CRYPTO_TFM_RES_MASK); + + return 0; +} + +static int ghash_async_init_tfm(struct crypto_tfm *tfm) +{ + struct cryptd_ahash *cryptd_tfm; + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ctx->cryptd_tfm = cryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&cryptd_tfm->base)); + + return 0; +} + +static void ghash_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ahash(ctx->cryptd_tfm); +} + +static struct ahash_alg ghash_async_alg = { + .init = ghash_async_init, + .update = ghash_async_update, + .final = ghash_async_final, + .setkey = ghash_async_setkey, + .digest = ghash_async_digest, + .halg = { + .digestsize = GHASH_DIGEST_SIZE, + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-clmulni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_type = &crypto_ahash_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), + .cra_init = ghash_async_init_tfm, + .cra_exit = ghash_async_exit_tfm, + }, + }, +}; + +static int __init ghash_pclmulqdqni_mod_init(void) +{ + int err; + + if (!cpu_has_pclmulqdq) { + printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not" + " detected.\n"); + return -ENODEV; + } + + err = crypto_register_shash(&ghash_alg); + if (err) + goto err_out; + err = crypto_register_ahash(&ghash_async_alg); + if (err) + goto err_shash; + + return 0; + +err_shash: + crypto_unregister_shash(&ghash_alg); +err_out: + return err; +} + +static void __exit ghash_pclmulqdqni_mod_exit(void) +{ + crypto_unregister_ahash(&ghash_async_alg); + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_pclmulqdqni_mod_init); +module_exit(ghash_pclmulqdqni_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " + "acclerated by PCLMULQDQ-NI"); +MODULE_ALIAS("ghash"); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 9cfc88b97742..613700f27a4a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -248,6 +248,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) +#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/crypto/Kconfig b/crypto/Kconfig index 26b5dd0cb564..fd6871102b60 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -440,6 +440,14 @@ config CRYPTO_WP512 See also: +config CRYPTO_GHASH_CLMUL_NI_INTEL + tristate "GHASH digest algorithm (CLMUL-NI accelerated)" + select CRYPTO_SHASH + select CRYPTO_CRYPTD + help + GHASH is message digest algorithm for GCM (Galois/Counter Mode). + The implementation is accelerated by CLMUL-NI of Intel. + comment "Ciphers" config CRYPTO_AES diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 35335825a4ef..f8ae0d94a647 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -711,6 +711,13 @@ struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm) } EXPORT_SYMBOL_GPL(cryptd_ahash_child); +struct shash_desc *cryptd_shash_desc(struct ahash_request *req) +{ + struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + return &rctx->desc; +} +EXPORT_SYMBOL_GPL(cryptd_shash_desc); + void cryptd_free_ahash(struct cryptd_ahash *tfm) { crypto_free_ahash(&tfm->base); diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h index 2f65a6e8ea4d..1c96b255017c 100644 --- a/include/crypto/cryptd.h +++ b/include/crypto/cryptd.h @@ -39,6 +39,7 @@ static inline struct cryptd_ahash *__cryptd_ahash_cast( struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name, u32 type, u32 mask); struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm); +struct shash_desc *cryptd_shash_desc(struct ahash_request *req); void cryptd_free_ahash(struct cryptd_ahash *tfm); #endif -- cgit v1.3-8-gc7d7 From 2141b6309b1fce535329c195cb5e5274a4c84ebc Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Mon, 19 Oct 2009 12:53:37 +0900 Subject: crypto: hash - Remove legacy hash/digest code 6941c3a0 disabled compilation of the legacy digest code but didn't actually remove it. Rectify this. Also, remove the crypto_hash_type extern declaration from algapi.h now that the struct is gone. Signed-off-by: Benjamin Gilbert Signed-off-by: Herbert Xu --- crypto/digest.c | 240 ------------------------------------------------ crypto/hash.c | 183 ------------------------------------ include/crypto/algapi.h | 1 - 3 files changed, 424 deletions(-) delete mode 100644 crypto/digest.c delete mode 100644 crypto/hash.c (limited to 'include') diff --git a/crypto/digest.c b/crypto/digest.c deleted file mode 100644 index 5d3f1303da98..000000000000 --- a/crypto/digest.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Cryptographic API. - * - * Digest operations. - * - * Copyright (c) 2002 James Morris - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "internal.h" - -static int init(struct hash_desc *desc) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); - - tfm->__crt_alg->cra_digest.dia_init(tfm); - return 0; -} - -static int update2(struct hash_desc *desc, - struct scatterlist *sg, unsigned int nbytes) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); - unsigned int alignmask = crypto_tfm_alg_alignmask(tfm); - - if (!nbytes) - return 0; - - for (;;) { - struct page *pg = sg_page(sg); - unsigned int offset = sg->offset; - unsigned int l = sg->length; - - if (unlikely(l > nbytes)) - l = nbytes; - nbytes -= l; - - do { - unsigned int bytes_from_page = min(l, ((unsigned int) - (PAGE_SIZE)) - - offset); - char *src = crypto_kmap(pg, 0); - char *p = src + offset; - - if (unlikely(offset & alignmask)) { - unsigned int bytes = - alignmask + 1 - (offset & alignmask); - bytes = min(bytes, bytes_from_page); - tfm->__crt_alg->cra_digest.dia_update(tfm, p, - bytes); - p += bytes; - bytes_from_page -= bytes; - l -= bytes; - } - tfm->__crt_alg->cra_digest.dia_update(tfm, p, - bytes_from_page); - crypto_kunmap(src, 0); - crypto_yield(desc->flags); - offset = 0; - pg++; - l -= bytes_from_page; - } while (l > 0); - - if (!nbytes) - break; - sg = scatterwalk_sg_next(sg); - } - - return 0; -} - -static int update(struct hash_desc *desc, - struct scatterlist *sg, unsigned int nbytes) -{ - if (WARN_ON_ONCE(in_irq())) - return -EDEADLK; - return update2(desc, sg, nbytes); -} - -static int final(struct hash_desc *desc, u8 *out) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); - unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); - struct digest_alg *digest = &tfm->__crt_alg->cra_digest; - - if (unlikely((unsigned long)out & alignmask)) { - unsigned long align = alignmask + 1; - unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm); - u8 *dst = (u8 *)ALIGN(addr, align) + - ALIGN(tfm->__crt_alg->cra_ctxsize, align); - - digest->dia_final(tfm, dst); - memcpy(out, dst, digest->dia_digestsize); - } else - digest->dia_final(tfm, out); - - return 0; -} - -static int nosetkey(struct crypto_hash *tfm, const u8 *key, unsigned int keylen) -{ - crypto_hash_clear_flags(tfm, CRYPTO_TFM_RES_MASK); - return -ENOSYS; -} - -static int setkey(struct crypto_hash *hash, const u8 *key, unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(hash); - - crypto_hash_clear_flags(hash, CRYPTO_TFM_RES_MASK); - return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen); -} - -static int digest(struct hash_desc *desc, - struct scatterlist *sg, unsigned int nbytes, u8 *out) -{ - if (WARN_ON_ONCE(in_irq())) - return -EDEADLK; - - init(desc); - update2(desc, sg, nbytes); - return final(desc, out); -} - -int crypto_init_digest_ops(struct crypto_tfm *tfm) -{ - struct hash_tfm *ops = &tfm->crt_hash; - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - if (dalg->dia_digestsize > PAGE_SIZE / 8) - return -EINVAL; - - ops->init = init; - ops->update = update; - ops->final = final; - ops->digest = digest; - ops->setkey = dalg->dia_setkey ? setkey : nosetkey; - ops->digestsize = dalg->dia_digestsize; - - return 0; -} - -void crypto_exit_digest_ops(struct crypto_tfm *tfm) -{ -} - -static int digest_async_nosetkey(struct crypto_ahash *tfm_async, const u8 *key, - unsigned int keylen) -{ - crypto_ahash_clear_flags(tfm_async, CRYPTO_TFM_RES_MASK); - return -ENOSYS; -} - -static int digest_async_setkey(struct crypto_ahash *tfm_async, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_ahash_tfm(tfm_async); - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - crypto_ahash_clear_flags(tfm_async, CRYPTO_TFM_RES_MASK); - return dalg->dia_setkey(tfm, key, keylen); -} - -static int digest_async_init(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - dalg->dia_init(tfm); - return 0; -} - -static int digest_async_update(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - update(&desc, req->src, req->nbytes); - return 0; -} - -static int digest_async_final(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - final(&desc, req->result); - return 0; -} - -static int digest_async_digest(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return digest(&desc, req->src, req->nbytes, req->result); -} - -int crypto_init_digest_ops_async(struct crypto_tfm *tfm) -{ - struct ahash_tfm *crt = &tfm->crt_ahash; - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - if (dalg->dia_digestsize > PAGE_SIZE / 8) - return -EINVAL; - - crt->init = digest_async_init; - crt->update = digest_async_update; - crt->final = digest_async_final; - crt->digest = digest_async_digest; - crt->setkey = dalg->dia_setkey ? digest_async_setkey : - digest_async_nosetkey; - crt->digestsize = dalg->dia_digestsize; - - return 0; -} diff --git a/crypto/hash.c b/crypto/hash.c deleted file mode 100644 index cb86b19fd105..000000000000 --- a/crypto/hash.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Cryptographic Hash operations. - * - * Copyright (c) 2006 Herbert Xu - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - -#include -#include -#include -#include -#include -#include - -#include "internal.h" - -static unsigned int crypto_hash_ctxsize(struct crypto_alg *alg, u32 type, - u32 mask) -{ - return alg->cra_ctxsize; -} - -static int hash_setkey_unaligned(struct crypto_hash *crt, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(crt); - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - unsigned long alignmask = crypto_hash_alignmask(crt); - int ret; - u8 *buffer, *alignbuffer; - unsigned long absize; - - absize = keylen + alignmask; - buffer = kmalloc(absize, GFP_ATOMIC); - if (!buffer) - return -ENOMEM; - - alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); - memcpy(alignbuffer, key, keylen); - ret = alg->setkey(crt, alignbuffer, keylen); - memset(alignbuffer, 0, keylen); - kfree(buffer); - return ret; -} - -static int hash_setkey(struct crypto_hash *crt, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(crt); - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - unsigned long alignmask = crypto_hash_alignmask(crt); - - if ((unsigned long)key & alignmask) - return hash_setkey_unaligned(crt, key, keylen); - - return alg->setkey(crt, key, keylen); -} - -static int hash_async_setkey(struct crypto_ahash *tfm_async, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_ahash_tfm(tfm_async); - struct crypto_hash *tfm_hash = __crypto_hash_cast(tfm); - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - return alg->setkey(tfm_hash, key, keylen); -} - -static int hash_async_init(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->init(&desc); -} - -static int hash_async_update(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->update(&desc, req->src, req->nbytes); -} - -static int hash_async_final(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->final(&desc, req->result); -} - -static int hash_async_digest(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->digest(&desc, req->src, req->nbytes, req->result); -} - -static int crypto_init_hash_ops_async(struct crypto_tfm *tfm) -{ - struct ahash_tfm *crt = &tfm->crt_ahash; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - crt->init = hash_async_init; - crt->update = hash_async_update; - crt->final = hash_async_final; - crt->digest = hash_async_digest; - crt->setkey = hash_async_setkey; - crt->digestsize = alg->digestsize; - - return 0; -} - -static int crypto_init_hash_ops_sync(struct crypto_tfm *tfm) -{ - struct hash_tfm *crt = &tfm->crt_hash; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - crt->init = alg->init; - crt->update = alg->update; - crt->final = alg->final; - crt->digest = alg->digest; - crt->setkey = hash_setkey; - crt->digestsize = alg->digestsize; - - return 0; -} - -static int crypto_init_hash_ops(struct crypto_tfm *tfm, u32 type, u32 mask) -{ - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - if (alg->digestsize > PAGE_SIZE / 8) - return -EINVAL; - - if ((mask & CRYPTO_ALG_TYPE_HASH_MASK) != CRYPTO_ALG_TYPE_HASH_MASK) - return crypto_init_hash_ops_async(tfm); - else - return crypto_init_hash_ops_sync(tfm); -} - -static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); -static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg) -{ - seq_printf(m, "type : hash\n"); - seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); - seq_printf(m, "digestsize : %u\n", alg->cra_hash.digestsize); -} - -const struct crypto_type crypto_hash_type = { - .ctxsize = crypto_hash_ctxsize, - .init = crypto_init_hash_ops, -#ifdef CONFIG_PROC_FS - .show = crypto_hash_show, -#endif -}; -EXPORT_SYMBOL_GPL(crypto_hash_type); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Generic cryptographic hash type"); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 1ffb53f74d37..fc0d575c71e0 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -106,7 +106,6 @@ struct blkcipher_walk { extern const struct crypto_type crypto_ablkcipher_type; extern const struct crypto_type crypto_aead_type; extern const struct crypto_type crypto_blkcipher_type; -extern const struct crypto_type crypto_hash_type; void crypto_mod_put(struct crypto_alg *alg); -- cgit v1.3-8-gc7d7 From 8ffd1be6779c86ebc2a1013f43fdcee8bdbba2b7 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Mon, 19 Oct 2009 12:58:55 +0900 Subject: crypto: hash - Remove cra_u.{digest,hash} Remove unused digest_alg and hash_alg structs from crypto_alg union and kill their definitions. This also ensures that old-style digest/hash algorithms maintained out of tree will break at build time rather than oopsing at runtime. Signed-off-by: Benjamin Gilbert Signed-off-by: Herbert Xu --- include/linux/crypto.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index fd929889e8dc..24d2e30f1b46 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -250,29 +250,6 @@ struct cipher_alg { void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; -struct digest_alg { - unsigned int dia_digestsize; - void (*dia_init)(struct crypto_tfm *tfm); - void (*dia_update)(struct crypto_tfm *tfm, const u8 *data, - unsigned int len); - void (*dia_final)(struct crypto_tfm *tfm, u8 *out); - int (*dia_setkey)(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); -}; - -struct hash_alg { - int (*init)(struct hash_desc *desc); - int (*update)(struct hash_desc *desc, struct scatterlist *sg, - unsigned int nbytes); - int (*final)(struct hash_desc *desc, u8 *out); - int (*digest)(struct hash_desc *desc, struct scatterlist *sg, - unsigned int nbytes, u8 *out); - int (*setkey)(struct crypto_hash *tfm, const u8 *key, - unsigned int keylen); - - unsigned int digestsize; -}; - struct compress_alg { int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen); @@ -293,8 +270,6 @@ struct rng_alg { #define cra_aead cra_u.aead #define cra_blkcipher cra_u.blkcipher #define cra_cipher cra_u.cipher -#define cra_digest cra_u.digest -#define cra_hash cra_u.hash #define cra_compress cra_u.compress #define cra_rng cra_u.rng @@ -320,8 +295,6 @@ struct crypto_alg { struct aead_alg aead; struct blkcipher_alg blkcipher; struct cipher_alg cipher; - struct digest_alg digest; - struct hash_alg hash; struct compress_alg compress; struct rng_alg rng; } cra_u; -- cgit v1.3-8-gc7d7 From 49cbf952488e7fa2f1160c80016e09e6c2854b24 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 7 Oct 2009 22:47:16 +0000 Subject: ah: Add struct crypto_ahash to ah_data To support for ahash algorithms, we add a pointer to a crypto_ahash to ah_data. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/ah.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ah.h b/include/net/ah.h index ae1c322f4242..7ac52214ba0f 100644 --- a/include/net/ah.h +++ b/include/net/ah.h @@ -14,6 +14,7 @@ struct ah_data int icv_trunc_len; struct crypto_hash *tfm; + struct crypto_ahash *ahash; }; static inline int ah_mac_digest(struct ah_data *ahp, struct sk_buff *skb, -- cgit v1.3-8-gc7d7 From 2ad9afbf5ca27482aa275de517261d56fd1e7ba0 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 7 Oct 2009 22:49:57 +0000 Subject: ah: Remove obsolete code ah4 and ah6 are converted to ahash now, so we can remove the code for the obsolete hash algorithm. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/ah.h | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/net/ah.h b/include/net/ah.h index 7ac52214ba0f..7573a7152a72 100644 --- a/include/net/ah.h +++ b/include/net/ah.h @@ -1,44 +1,21 @@ #ifndef _NET_AH_H #define _NET_AH_H -#include -#include +#include /* This is the maximum truncated ICV length that we know of. */ #define MAX_AH_AUTH_LEN 12 +struct crypto_ahash; + struct ah_data { - u8 *work_icv; int icv_full_len; int icv_trunc_len; - struct crypto_hash *tfm; struct crypto_ahash *ahash; }; -static inline int ah_mac_digest(struct ah_data *ahp, struct sk_buff *skb, - u8 *auth_data) -{ - struct hash_desc desc; - int err; - - desc.tfm = ahp->tfm; - desc.flags = 0; - - memset(auth_data, 0, ahp->icv_trunc_len); - err = crypto_hash_init(&desc); - if (unlikely(err)) - goto out; - err = skb_icv_walk(skb, &desc, 0, skb->len, crypto_hash_update); - if (unlikely(err)) - goto out; - err = crypto_hash_final(&desc, ahp->work_icv); - -out: - return err; -} - struct ip_auth_hdr; static inline struct ip_auth_hdr *ip_auth_hdr(const struct sk_buff *skb) -- cgit v1.3-8-gc7d7 From eb2ff967a587a4a784fd2390f38e324a5bec01ec Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 7 Oct 2009 22:50:40 +0000 Subject: xfrm: remove skb_icv_walk The last users of skb_icv_walk are converted to ahash now, so skb_icv_walk is unused and can be removed. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 -- net/xfrm/xfrm_algo.c | 78 ---------------------------------------------------- 2 files changed, 81 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 223e90a44824..d9c6dbb92719 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1500,9 +1500,6 @@ struct scatterlist; typedef int (icv_update_fn_t)(struct hash_desc *, struct scatterlist *, unsigned int); -extern int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *tfm, - int offset, int len, icv_update_fn_t icv_update); - static inline int xfrm_addr_cmp(xfrm_address_t *a, xfrm_address_t *b, int family) { diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index faf54c6bf96b..b39341072aa6 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -689,84 +689,6 @@ int xfrm_count_enc_supported(void) } EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); -/* Move to common area: it is shared with AH. */ - -int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, - int offset, int len, icv_update_fn_t icv_update) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - struct sk_buff *frag_iter; - struct scatterlist sg; - int err; - - /* Checksum header. */ - if (copy > 0) { - if (copy > len) - copy = len; - - sg_init_one(&sg, skb->data + offset, copy); - - err = icv_update(desc, &sg, copy); - if (unlikely(err)) - return err; - - if ((len -= copy) == 0) - return 0; - offset += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - WARN_ON(start > offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - - sg_init_table(&sg, 1); - sg_set_page(&sg, frag->page, copy, - frag->page_offset + offset-start); - - err = icv_update(desc, &sg, copy); - if (unlikely(err)) - return err; - - if (!(len -= copy)) - return 0; - offset += copy; - } - start = end; - } - - skb_walk_frags(skb, frag_iter) { - int end; - - WARN_ON(start > offset + len); - - end = start + frag_iter->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - err = skb_icv_walk(frag_iter, desc, offset-start, - copy, icv_update); - if (unlikely(err)) - return err; - if ((len -= copy) == 0) - return 0; - offset += copy; - } - start = end; - } - BUG_ON(len); - return 0; -} -EXPORT_SYMBOL_GPL(skb_icv_walk); - #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) -- cgit v1.3-8-gc7d7 From 4c2b1a11646bf74e2926ce8b13a21884adc1e05c Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Wed, 2 Sep 2009 15:36:05 -0700 Subject: wimax: allow specifying debug levels as command line option Add "debug" module options to all the wimax modules (including drivers) so that the debug levels can be set upon kernel boot or module load time. This is needed as currently there was a limitation where the debug levels could only be set when a device was succesfully enumerated. This made it difficult to debug issues that made a device not probe properly. Signed-off-by: Inaky Perez-Gonzalez --- drivers/net/wimax/i2400m/driver.c | 10 ++++++ drivers/net/wimax/i2400m/sdio.c | 10 ++++++ drivers/net/wimax/i2400m/usb.c | 9 +++++ include/linux/wimax/debug.h | 72 +++++++++++++++++++++++++++++++++++++++ net/wimax/stack.c | 11 ++++++ 5 files changed, 112 insertions(+) (limited to 'include') diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c index 7ba00de5dd9b..e3b2c246cad7 100644 --- a/drivers/net/wimax/i2400m/driver.c +++ b/drivers/net/wimax/i2400m/driver.c @@ -90,6 +90,14 @@ MODULE_PARM_DESC(power_save_disabled, "False by default (so the device is told to do power " "saving)."); +static char i2400m_debug_params[128]; +module_param_string(debug, i2400m_debug_params, sizeof(i2400m_debug_params), + 0644); +MODULE_PARM_DESC(debug, + "String of space-separated NAME:VALUE pairs, where NAMEs " + "are the different debug submodules and VALUE are the " + "initial debug value to set."); + /** * i2400m_queue_work - schedule work on a i2400m's queue * @@ -794,6 +802,8 @@ size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); static int __init i2400m_driver_init(void) { + d_parse_params(D_LEVEL, D_LEVEL_SIZE, i2400m_debug_params, + "i2400m.debug"); return 0; } module_init(i2400m_driver_init); diff --git a/drivers/net/wimax/i2400m/sdio.c b/drivers/net/wimax/i2400m/sdio.c index 7c1b843b63e9..2d2cc5ac6d86 100644 --- a/drivers/net/wimax/i2400m/sdio.c +++ b/drivers/net/wimax/i2400m/sdio.c @@ -71,6 +71,14 @@ static int ioe_timeout = 2; module_param(ioe_timeout, int, 0); +static char i2400ms_debug_params[128]; +module_param_string(debug, i2400ms_debug_params, sizeof(i2400ms_debug_params), + 0644); +MODULE_PARM_DESC(debug, + "String of space-separated NAME:VALUE pairs, where NAMEs " + "are the different debug submodules and VALUE are the " + "initial debug value to set."); + /* Our firmware file name list */ static const char *i2400ms_bus_fw_names[] = { #define I2400MS_FW_FILE_NAME "i2400m-fw-sdio-1.3.sbcf" @@ -559,6 +567,8 @@ struct sdio_driver i2400m_sdio_driver = { static int __init i2400ms_driver_init(void) { + d_parse_params(D_LEVEL, D_LEVEL_SIZE, i2400ms_debug_params, + "i2400m_sdio.debug"); return sdio_register_driver(&i2400m_sdio_driver); } module_init(i2400ms_driver_init); diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index a5879e21bbf3..063422290a43 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -71,6 +71,13 @@ #define D_SUBMODULE usb #include "usb-debug-levels.h" +static char i2400mu_debug_params[128]; +module_param_string(debug, i2400mu_debug_params, sizeof(i2400mu_debug_params), + 0644); +MODULE_PARM_DESC(debug, + "String of space-separated NAME:VALUE pairs, where NAMEs " + "are the different debug submodules and VALUE are the " + "initial debug value to set."); /* Our firmware file name */ static const char *i2400mu_bus_fw_names[] = { @@ -633,6 +640,8 @@ struct usb_driver i2400mu_driver = { static int __init i2400mu_driver_init(void) { + d_parse_params(D_LEVEL, D_LEVEL_SIZE, i2400mu_debug_params, + "i2400m_usb.debug"); return usb_register(&i2400mu_driver); } module_init(i2400mu_driver_init); diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h index c703e0340423..db8096e88533 100644 --- a/include/linux/wimax/debug.h +++ b/include/linux/wimax/debug.h @@ -450,4 +450,76 @@ do { \ }) +static inline +void d_submodule_set(struct d_level *d_level, size_t d_level_size, + const char *submodule, u8 level, const char *tag) +{ + struct d_level *itr, *top; + int index = -1; + + for (itr = d_level, top = itr + d_level_size; itr < top; itr++) { + index++; + if (itr->name == NULL) { + printk(KERN_ERR "%s: itr->name NULL?? (%p, #%d)\n", + tag, itr, index); + continue; + } + if (!strcmp(itr->name, submodule)) { + itr->level = level; + return; + } + } + printk(KERN_ERR "%s: unknown submodule %s\n", tag, submodule); +} + + +/** + * d_parse_params - Parse a string with debug parameters from the + * command line + * + * @d_level: level structure (D_LEVEL) + * @d_level_size: number of items in the level structure + * (D_LEVEL_SIZE). + * @_params: string with the parameters; this is a space (not tab!) + * separated list of NAME:VALUE, where value is the debug level + * and NAME is the name of the submodule. + * @tag: string for error messages (example: MODULE.ARGNAME). + */ +static inline +void d_parse_params(struct d_level *d_level, size_t d_level_size, + const char *_params, const char *tag) +{ + char submodule[130], *params, *params_orig, *token, *colon; + unsigned level, tokens; + + if (_params == NULL) + return; + params_orig = kstrdup(_params, GFP_KERNEL); + params = params_orig; + while (1) { + token = strsep(¶ms, " "); + if (token == NULL) + break; + if (*token == '\0') /* eat joint spaces */ + continue; + /* kernel's sscanf %s eats until whitespace, so we + * replace : by \n so it doesn't get eaten later by + * strsep */ + colon = strchr(token, ':'); + if (colon != NULL) + *colon = '\n'; + tokens = sscanf(token, "%s\n%u", submodule, &level); + if (colon != NULL) + *colon = ':'; /* set back, for error messages */ + if (tokens == 2) + d_submodule_set(d_level, d_level_size, + submodule, level, tag); + else + printk(KERN_ERR "%s: can't parse '%s' as a " + "SUBMODULE:LEVEL (%d tokens)\n", + tag, token, tokens); + } + kfree(params_orig); +} + #endif /* #ifndef __debug__h__ */ diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 79fb7d7c640f..c8866412f830 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -60,6 +60,14 @@ #define D_SUBMODULE stack #include "debug-levels.h" +static char wimax_debug_params[128]; +module_param_string(debug, wimax_debug_params, sizeof(wimax_debug_params), + 0644); +MODULE_PARM_DESC(debug, + "String of space-separated NAME:VALUE pairs, where NAMEs " + "are the different debug submodules and VALUE are the " + "initial debug value to set."); + /* * Authoritative source for the RE_STATE_CHANGE attribute policy * @@ -562,6 +570,9 @@ int __init wimax_subsys_init(void) int result, cnt; d_fnstart(4, NULL, "()\n"); + d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params, + "wimax.debug"); + snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name), "WiMAX"); result = genl_register_family(&wimax_gnl_family); -- cgit v1.3-8-gc7d7 From 32742e6158657f19ad31653705bef56d983508e7 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Thu, 3 Sep 2009 15:56:40 -0700 Subject: wimax/i2400m: decide properly if using signed vs non-signed firmware loading The i2400m based devices can boot two main types of firmware images: signed and non-signed. Signed images have signature data included that must match that of a certificate stored in the device. Currently the code is making the decission on what type of firmware load (signed vs non-signed) is going to be loaded based on a hardcoded decission in __i2400m_ack_verify(), based on the barker the device sent upon boot. This is not flexible enough as future hardware will emit more barkers; thus the bit has to be set in a place where there is better knowledge of what is going on. This will be done in follow-up commits -- however this patch paves the way for it. So the querying of the mode is packed into i2400m_boot_is_signed(); the main changes are just using i2400m_boot_is_signed() to determine the method to follow and setting i2400m->sboot in i2400m_is_boot_barker(). The modifications in i2400m_dnload_init() and i2400m_dnload_finalize() are just reorganizing the order of the if blocks and thus look larger than they really are. Signed-off-by: Inaky Perez-Gonzalez --- drivers/net/wimax/i2400m/fw.c | 35 +++++++++++++++++++++-------------- include/linux/wimax/i2400m.h | 10 ---------- 2 files changed, 21 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c index 92d4d605dc2d..c962a8d8df7e 100644 --- a/drivers/net/wimax/i2400m/fw.c +++ b/drivers/net/wimax/i2400m/fw.c @@ -508,6 +508,17 @@ error_send: } +/* + * Indicate if the device emitted a reboot barker that indicates + * "signed boot" + */ +static +unsigned i2400m_boot_is_signed(struct i2400m *i2400m) +{ + return likely(i2400m->sboot); +} + + /* * Do the final steps of uploading firmware * @@ -529,7 +540,7 @@ int i2400m_dnload_finalize(struct i2400m *i2400m, d_fnstart(3, dev, "offset %zu\n", offset); cmd = (void *) bcf + offset; - if (i2400m->sboot == 0) { + if (i2400m_boot_is_signed(i2400m) == 0) { struct i2400m_bootrom_header jump_ack; d_printf(1, dev, "unsecure boot, jumping to 0x%08x\n", le32_to_cpu(cmd->target_addr)); @@ -846,28 +857,24 @@ int i2400m_dnload_init(struct i2400m *i2400m, const struct i2400m_bcf_hdr *bcf) { int result; struct device *dev = i2400m_dev(i2400m); - u32 module_id = le32_to_cpu(bcf->module_id); - if (i2400m->sboot == 0 - && (module_id & I2400M_BCF_MOD_ID_POKES) == 0) { - /* non-signed boot process without pokes */ - result = i2400m_dnload_init_nonsigned(i2400m); + if (i2400m_boot_is_signed(i2400m)) { + d_printf(1, dev, "signed boot\n"); + result = i2400m_dnload_init_signed(i2400m, bcf); if (result == -ERESTARTSYS) return result; if (result < 0) - dev_err(dev, "fw %s: non-signed download " + dev_err(dev, "firmware %s: signed boot download " "initialization failed: %d\n", i2400m->fw_name, result); - } else if (i2400m->sboot == 0 - && (module_id & I2400M_BCF_MOD_ID_POKES)) { - /* non-signed boot process with pokes, nothing to do */ - result = 0; - } else { /* signed boot process */ - result = i2400m_dnload_init_signed(i2400m, bcf); + } else { + /* non-signed boot process without pokes */ + d_printf(1, dev, "non-signed boot\n"); + result = i2400m_dnload_init_nonsigned(i2400m); if (result == -ERESTARTSYS) return result; if (result < 0) - dev_err(dev, "fw %s: signed boot download " + dev_err(dev, "firmware %s: non-signed download " "initialization failed: %d\n", i2400m->fw_name, result); } diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h index 433693ef2bb0..d6e2a3595682 100644 --- a/include/linux/wimax/i2400m.h +++ b/include/linux/wimax/i2400m.h @@ -168,16 +168,6 @@ enum i2400m_brh { }; -/* Constants for bcf->module_id */ -enum i2400m_bcf_mod_id { - /* Firmware file carries its own pokes -- pokes are a set of - * magical values that have to be written in certain memory - * addresses to get the device up and ready for firmware - * download when it is in non-signed boot mode. */ - I2400M_BCF_MOD_ID_POKES = 0x000000001, -}; - - /** * i2400m_bootrom_header - Header for a boot-mode command * -- cgit v1.3-8-gc7d7 From 923d708fed9d47c7b4d67694500d766337663e29 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Fri, 4 Sep 2009 14:50:59 -0700 Subject: wimax/i2400m: fix reboot echo/ack barker deadlock The i2400m based devices can get in a sort of a deadlock some times; when they boot, they send a reboot "barker" (a magic number) and then the driver has to echo that same barker to ack reception (echo/ack). Then the device does a final ack by sending an ACK barker. The first time this happens, we don't know ahead of time with barker the device is going to send, as different device models and SKUs will send different barker depending on the EEPROM programming. If the device has sent the barker before the driver has been able to read it, the driver looses, as it doesn't know which barker it has to echo/ack back. With older devices, we tried a couple of combinations and that always worked; but now, with adding support for more, in which we have an unlimited number of new barkers, that is not an option. So we rework said case so that when the device gets stuck, we just cycle through all the known types until one forces the device to send an ack. Otherwise, the driver gives up and aborts. Signed-off-by: Inaky Perez-Gonzalez --- drivers/net/wimax/i2400m/fw.c | 63 ++++++++++++++++++++++++++++++++++--------- include/linux/wimax/i2400m.h | 2 +- 2 files changed, 52 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c index 55fe011a9633..eef236d85af3 100644 --- a/drivers/net/wimax/i2400m/fw.c +++ b/drivers/net/wimax/i2400m/fw.c @@ -812,7 +812,7 @@ int i2400m_dnload_finalize(struct i2400m *i2400m, * * @i2400m: device descriptor * @flags: - * I2400M_BRI_SOFT: a reboot notification has been seen + * I2400M_BRI_SOFT: a reboot barker has been seen * already, so don't wait for it. * * I2400M_BRI_NO_REBOOT: Don't send a reboot command, but wait @@ -829,8 +829,9 @@ int i2400m_dnload_finalize(struct i2400m *i2400m, * main phases to this: * * a. (1) send a reboot command and (2) get a reboot barker - * b. (1) ack the reboot sending a reboot barker and (2) getting an - * ack barker in return + * + * b. (1) echo/ack the reboot sending the reboot barker back and (2) + * getting an ack barker in return * * We want to skip (a) in some cases [soft]. The state machine is * horrible, but it is basically: on each phase, send what has to be @@ -838,6 +839,16 @@ int i2400m_dnload_finalize(struct i2400m *i2400m, * have to backtrack and retry, so we keep a max tries counter for * that. * + * It sucks because we don't know ahead of time which is going to be + * the reboot barker (the device might send different ones depending + * on its EEPROM config) and once the device reboots and waits for the + * echo/ack reboot barker being sent back, it doesn't understand + * anything else. So we can be left at the point where we don't know + * what to send to it -- cold reset and bus reset seem to have little + * effect. So the function iterates (in this case) through all the + * known barkers and tries them all until an ACK is + * received. Otherwise, it gives up. + * * If we get a timeout after sending a warm reset, we do it again. */ int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags) @@ -848,6 +859,7 @@ int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags) struct i2400m_bootrom_header ack; int count = i2400m->bus_bm_retries; int ack_timeout_cnt = 1; + unsigned i; BUILD_BUG_ON(sizeof(*cmd) != sizeof(i2400m_barker_db[0].data)); BUILD_BUG_ON(sizeof(ack) != sizeof(i2400m_ACK_BARKER)); @@ -858,6 +870,7 @@ int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags) if (flags & I2400M_BRI_SOFT) goto do_reboot_ack; do_reboot: + ack_timeout_cnt = 1; if (--count < 0) goto error_timeout; d_printf(4, dev, "device reboot: reboot command [%d # left]\n", @@ -869,22 +882,47 @@ do_reboot: flags &= ~I2400M_BRI_NO_REBOOT; switch (result) { case -ERESTARTSYS: + /* + * at this point, i2400m_bm_cmd(), through + * __i2400m_bm_ack_process(), has updated + * i2400m->barker and we are good to go. + */ d_printf(4, dev, "device reboot: got reboot barker\n"); break; case -EISCONN: /* we don't know how it got here...but we follow it */ d_printf(4, dev, "device reboot: got ack barker - whatever\n"); goto do_reboot; - case -ETIMEDOUT: /* device has timed out, we might be in boot - * mode already and expecting an ack, let's try - * that */ - if (i2400m->barker == NULL) { - dev_info(dev, "warm reset timed out, unknown barker " - "type, rebooting\n"); - goto do_reboot; - } else { - dev_info(dev, "warm reset timed out, trying an ack\n"); + case -ETIMEDOUT: + /* + * Device has timed out, we might be in boot mode + * already and expecting an ack; if we don't know what + * the barker is, we just send them all. Cold reset + * and bus reset don't work. Beats me. + */ + if (i2400m->barker != NULL) { + dev_err(dev, "device boot: reboot barker timed out, " + "trying (set) %08x echo/ack\n", + le32_to_cpu(i2400m->barker->data[0])); goto do_reboot_ack; } + for (i = 0; i < i2400m_barker_db_used; i++) { + struct i2400m_barker_db *barker = &i2400m_barker_db[i]; + memcpy(cmd, barker->data, sizeof(barker->data)); + result = i2400m_bm_cmd(i2400m, cmd, sizeof(*cmd), + &ack, sizeof(ack), + I2400M_BM_CMD_RAW); + if (result == -EISCONN) { + dev_warn(dev, "device boot: got ack barker " + "after sending echo/ack barker " + "#%d/%08x; rebooting j.i.c.\n", + i, le32_to_cpu(barker->data[0])); + flags &= ~I2400M_BRI_NO_REBOOT; + goto do_reboot; + } + } + dev_err(dev, "device boot: tried all the echo/acks, could " + "not get device to respond; giving up"); + result = -ESHUTDOWN; case -EPROTO: case -ESHUTDOWN: /* dev is gone */ case -EINTR: /* user cancelled */ @@ -892,6 +930,7 @@ do_reboot: default: dev_err(dev, "device reboot: error %d while waiting " "for reboot barker - rebooting\n", result); + d_dump(1, dev, &ack, result); goto do_reboot; } /* At this point we ack back with 4 REBOOT barkers and expect diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h index d6e2a3595682..fd5af05083cb 100644 --- a/include/linux/wimax/i2400m.h +++ b/include/linux/wimax/i2400m.h @@ -138,7 +138,7 @@ struct i2400m_bcf_hdr { __le32 module_id; __le32 module_vendor; __le32 date; /* BCD YYYMMDD */ - __le32 size; + __le32 size; /* in dwords */ __le32 key_size; /* in dwords */ __le32 modulus_size; /* in dwords */ __le32 exponent_size; /* in dwords */ -- cgit v1.3-8-gc7d7 From f8fc3295570115267ce1ce901f362d13d194aefc Mon Sep 17 00:00:00 2001 From: Cindy H Kao Date: Fri, 4 Sep 2009 17:38:46 -0700 Subject: wimax/iwmc3200: add new sdio device ID to support iwmc3200 2.5GHz sku Different sdio device IDs are designated to support different intel wimax silicon sku. The new macro SDIO_DEVICE_ID_IWMC3200_WIMAX_2G5(0x1407) is added to support iwmc3200 2.5GHz sku. The existing SDIO_DEVICE_ID_IWMC3200_WIMAX(0x1402) is for iwmc3200 general sku. Signed-off-by: Cindy H Kao Signed-off-by: Inaky Perez-Gonzalez --- drivers/net/wimax/i2400m/sdio.c | 2 ++ include/linux/mmc/sdio_ids.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/drivers/net/wimax/i2400m/sdio.c b/drivers/net/wimax/i2400m/sdio.c index 2d2cc5ac6d86..de158ee0c32c 100644 --- a/drivers/net/wimax/i2400m/sdio.c +++ b/drivers/net/wimax/i2400m/sdio.c @@ -550,6 +550,8 @@ const struct sdio_device_id i2400ms_sdio_ids[] = { /* Intel: i2400m WiMAX (iwmc3200) over SDIO */ { SDIO_DEVICE(SDIO_VENDOR_ID_INTEL, SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX) }, + { SDIO_DEVICE(SDIO_VENDOR_ID_INTEL, + SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX_2G5) }, { /* end: all zeroes */ }, }; MODULE_DEVICE_TABLE(sdio, i2400ms_sdio_ids); diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 2dbfb5a05994..33b2ea09a4ad 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -28,6 +28,7 @@ #define SDIO_DEVICE_ID_INTEL_IWMC3200TOP 0x1404 #define SDIO_DEVICE_ID_INTEL_IWMC3200GPS 0x1405 #define SDIO_DEVICE_ID_INTEL_IWMC3200BT 0x1406 +#define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX_2G5 0x1407 #define SDIO_VENDOR_ID_MARVELL 0x02df #define SDIO_DEVICE_ID_MARVELL_LIBERTAS 0x9103 -- cgit v1.3-8-gc7d7 From 7329012e673231dee9a21567cfb9881f5ea462ba Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Wed, 12 Aug 2009 11:29:46 -0700 Subject: wimax/i6x50: add Intel WiFi/WiMAX Link 6050 Series support Add support for the WiMAX device in the Intel WiFi/WiMAX Link 6050 Series; this involves: - adding the device ID to bind to and an endpoint mapping for the driver to use. - at probe() time, some things are set depending on the device id: + the list of firmware names to try + mapping of endpoints Signed-off-by: Dirk Brandewie Signed-off-by: Inaky Perez-Gonzalez --- drivers/net/wimax/i2400m/fw.c | 3 +++ drivers/net/wimax/i2400m/i2400m-usb.h | 3 +++ drivers/net/wimax/i2400m/usb.c | 26 +++++++++++++++++++------- include/linux/wimax/i2400m.h | 1 + 4 files changed, 26 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c index 84a39c30c3d3..5719f4a4080f 100644 --- a/drivers/net/wimax/i2400m/fw.c +++ b/drivers/net/wimax/i2400m/fw.c @@ -277,6 +277,9 @@ int i2400m_barker_db_known_barkers(void) result = i2400m_barker_db_add(I2400M_SBOOT_BARKER); if (result < 0) goto error_add; + result = i2400m_barker_db_add(I2400M_SBOOT_BARKER_6050); + if (result < 0) + goto error_add; error_add: return result; } diff --git a/drivers/net/wimax/i2400m/i2400m-usb.h b/drivers/net/wimax/i2400m/i2400m-usb.h index f73a067e0668..5cc0f279417e 100644 --- a/drivers/net/wimax/i2400m/i2400m-usb.h +++ b/drivers/net/wimax/i2400m/i2400m-usb.h @@ -148,6 +148,9 @@ enum { I2400MU_MAX_NOTIFICATION_LEN = 256, I2400MU_BLK_SIZE = 16, I2400MU_PL_SIZE_MAX = 0x3EFF, + + /* Device IDs */ + USB_DEVICE_ID_I6050 = 0x0186, }; diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index 063422290a43..77d08d928274 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -80,11 +80,16 @@ MODULE_PARM_DESC(debug, "initial debug value to set."); /* Our firmware file name */ -static const char *i2400mu_bus_fw_names[] = { +static const char *i2400mu_bus_fw_names_5x50[] = { #define I2400MU_FW_FILE_NAME_v1_4 "i2400m-fw-usb-1.4.sbcf" I2400MU_FW_FILE_NAME_v1_4, -#define I2400MU_FW_FILE_NAME_v1_3 "i2400m-fw-usb-1.3.sbcf" - I2400MU_FW_FILE_NAME_v1_3, + NULL, +}; + + +static const char *i2400mu_bus_fw_names_6050[] = { +#define I6050U_FW_FILE_NAME_v1_5 "i6050-fw-usb-1.5.sbcf" + I6050U_FW_FILE_NAME_v1_5, NULL, }; @@ -418,10 +423,16 @@ int i2400mu_probe(struct usb_interface *iface, i2400m->bus_bm_retries = I2400M_USB_BOOT_RETRIES; i2400m->bus_bm_cmd_send = i2400mu_bus_bm_cmd_send; i2400m->bus_bm_wait_for_ack = i2400mu_bus_bm_wait_for_ack; - i2400m->bus_fw_names = i2400mu_bus_fw_names; i2400m->bus_bm_mac_addr_impaired = 0; - { + if (id->idProduct == USB_DEVICE_ID_I6050) { + i2400m->bus_fw_names = i2400mu_bus_fw_names_6050; + i2400mu->endpoint_cfg.bulk_out = 0; + i2400mu->endpoint_cfg.notification = 3; + i2400mu->endpoint_cfg.reset_cold = 2; + i2400mu->endpoint_cfg.bulk_in = 1; + } else { + i2400m->bus_fw_names = i2400mu_bus_fw_names_5x50; i2400mu->endpoint_cfg.bulk_out = 0; i2400mu->endpoint_cfg.notification = 1; i2400mu->endpoint_cfg.reset_cold = 2; @@ -614,6 +625,7 @@ out: static struct usb_device_id i2400mu_id_table[] = { + { USB_DEVICE(0x8086, USB_DEVICE_ID_I6050) }, { USB_DEVICE(0x8086, 0x0181) }, { USB_DEVICE(0x8086, 0x1403) }, { USB_DEVICE(0x8086, 0x1405) }, @@ -656,7 +668,7 @@ void __exit i2400mu_driver_exit(void) module_exit(i2400mu_driver_exit); MODULE_AUTHOR("Intel Corporation "); -MODULE_DESCRIPTION("Intel 2400M WiMAX networking for USB"); +MODULE_DESCRIPTION("Driver for USB based Intel Wireless WiMAX Connection 2400M " + "(5x50 & 6050)"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(I2400MU_FW_FILE_NAME_v1_4); -MODULE_FIRMWARE(I2400MU_FW_FILE_NAME_v1_3); diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h index fd5af05083cb..62d356153565 100644 --- a/include/linux/wimax/i2400m.h +++ b/include/linux/wimax/i2400m.h @@ -266,6 +266,7 @@ enum { I2400M_WARM_RESET_BARKER = 0x50f750f7, I2400M_NBOOT_BARKER = 0xdeadbeef, I2400M_SBOOT_BARKER = 0x0ff1c1a1, + I2400M_SBOOT_BARKER_6050 = 0x80000001, I2400M_ACK_BARKER = 0xfeedbabe, I2400M_D2H_MSG_BARKER = 0xbeefbabe, }; -- cgit v1.3-8-gc7d7 From c2315b4ea9ac9c3f8caf03c3511d86fabe4a5fcd Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Wed, 16 Sep 2009 17:10:55 -0700 Subject: wimax/i2400m: clarify and fix i2400m->{ready,updown} The i2400m driver uses two different bits to distinguish how much the driver is up. i2400m->ready is used to denote that the infrastructure to communicate with the device is up and running. i2400m->updown is used to indicate if 'ready' and the device is up and running, ready to take control and data traffic. However, all this was pretty dirty and not clear, with many open spots where race conditions were present. This commit cleans up the situation by: - documenting the usage of both bits - setting them only in specific, well controlled places (i2400m_dev_start, i2400m_dev_stop) - ensuring the i2400m workqueue can't get in the middle of the setting by flushing it when i2400m->ready is set to zero. This allows the report hook not having to check again for the bit to be set [rx.c:i2400m_report_hook_work()]. - using i2400m->updown to determine if the device is up and running instead of the wimax state in i2400m_dev_reset_handle(). - not loosing missed messages sent by the hardware before i2400m->ready is set. In rx.c, whatever the device sends can be sent to user space over the message pipes as soon as the wimax device is registered, so don't wait for i2400m->ready to be set. Signed-off-by: Inaky Perez-Gonzalez --- drivers/net/wimax/i2400m/driver.c | 44 ++++++++++++++++++++++++++------------- drivers/net/wimax/i2400m/i2400m.h | 23 +++++++++++++++++++- drivers/net/wimax/i2400m/rx.c | 16 +++++++------- drivers/net/wimax/i2400m/usb.c | 7 ++++++- include/net/wimax.h | 6 ++++++ 5 files changed, 70 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c index a0ae19966c0c..6280646d7d7f 100644 --- a/drivers/net/wimax/i2400m/driver.c +++ b/drivers/net/wimax/i2400m/driver.c @@ -455,6 +455,8 @@ retry: result = i2400m->bus_dev_start(i2400m); if (result < 0) goto error_bus_dev_start; + i2400m->ready = 1; + wmb(); /* see i2400m->ready's documentation */ result = i2400m_firmware_check(i2400m); /* fw versions ok? */ if (result < 0) goto error_fw_check; @@ -462,7 +464,6 @@ retry: result = i2400m_check_mac_addr(i2400m); if (result < 0) goto error_check_mac_addr; - i2400m->ready = 1; wimax_state_change(wimax_dev, WIMAX_ST_UNINITIALIZED); result = i2400m_dev_initialize(i2400m); if (result < 0) @@ -475,6 +476,9 @@ retry: error_dev_initialize: error_check_mac_addr: + i2400m->ready = 0; + wmb(); /* see i2400m->ready's documentation */ + flush_workqueue(i2400m->work_queue); error_fw_check: i2400m->bus_dev_stop(i2400m); error_bus_dev_start: @@ -498,11 +502,15 @@ error_bootstrap: static int i2400m_dev_start(struct i2400m *i2400m, enum i2400m_bri bm_flags) { - int result; + int result = 0; mutex_lock(&i2400m->init_mutex); /* Well, start the device */ - result = __i2400m_dev_start(i2400m, bm_flags); - if (result >= 0) - i2400m->updown = 1; + if (i2400m->updown == 0) { + result = __i2400m_dev_start(i2400m, bm_flags); + if (result >= 0) { + i2400m->updown = 1; + wmb(); /* see i2400m->updown's documentation */ + } + } mutex_unlock(&i2400m->init_mutex); return result; } @@ -529,7 +537,14 @@ void __i2400m_dev_stop(struct i2400m *i2400m) wimax_state_change(wimax_dev, __WIMAX_ST_QUIESCING); i2400m_net_wake_stop(i2400m); i2400m_dev_shutdown(i2400m); - i2400m->ready = 0; + /* + * Make sure no report hooks are running *before* we stop the + * communication infrastructure with the device. + */ + i2400m->ready = 0; /* nobody can queue work anymore */ + wmb(); /* see i2400m->ready's documentation */ + flush_workqueue(i2400m->work_queue); + i2400m->bus_dev_stop(i2400m); destroy_workqueue(i2400m->work_queue); i2400m_rx_release(i2400m); @@ -551,6 +566,7 @@ void i2400m_dev_stop(struct i2400m *i2400m) if (i2400m->updown) { __i2400m_dev_stop(i2400m); i2400m->updown = 0; + wmb(); /* see i2400m->updown's documentation */ } mutex_unlock(&i2400m->init_mutex); } @@ -632,7 +648,6 @@ void __i2400m_dev_reset_handle(struct work_struct *ws) const char *reason; struct i2400m *i2400m = iw->i2400m; struct device *dev = i2400m_dev(i2400m); - enum wimax_st wimax_state; struct i2400m_reset_ctx *ctx = i2400m->reset_ctx; if (WARN_ON(iw->pl_size != sizeof(reason))) @@ -647,29 +662,28 @@ void __i2400m_dev_reset_handle(struct work_struct *ws) /* We are still in i2400m_dev_start() [let it fail] or * i2400m_dev_stop() [we are shutting down anyway, so * ignore it] or we are resetting somewhere else. */ - dev_err(dev, "device rebooted\n"); + dev_err(dev, "device rebooted somewhere else?\n"); i2400m_msg_to_dev_cancel_wait(i2400m, -EL3RST); complete(&i2400m->msg_completion); goto out; } - wimax_state = wimax_state_get(&i2400m->wimax_dev); - if (wimax_state < WIMAX_ST_UNINITIALIZED) { - dev_info(dev, "%s: it is down, ignoring\n", reason); - goto out_unlock; /* ifconfig up/down wasn't called */ + if (i2400m->updown == 0) { + dev_info(dev, "%s: device is down, doing nothing\n", reason); + goto out_unlock; } dev_err(dev, "%s: reinitializing driver\n", reason); __i2400m_dev_stop(i2400m); - i2400m->updown = 0; result = __i2400m_dev_start(i2400m, I2400M_BRI_SOFT | I2400M_BRI_MAC_REINIT); if (result < 0) { + i2400m->updown = 0; + wmb(); /* see i2400m->updown's documentation */ dev_err(dev, "%s: cannot start the device: %d\n", reason, result); result = i2400m->bus_reset(i2400m, I2400M_RT_BUS); if (result >= 0) result = -ENODEV; - } else - i2400m->updown = 1; + } out_unlock: if (i2400m->reset_ctx) { ctx->result = result; diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h index 2b9c400810b5..fbc156db5bfd 100644 --- a/drivers/net/wimax/i2400m/i2400m.h +++ b/drivers/net/wimax/i2400m/i2400m.h @@ -307,6 +307,27 @@ struct i2400m_barker_db; * force this to be the first field so that we can get from * netdev_priv() the right pointer. * + * @updown: the device is up and ready for transmitting control and + * data packets. This implies @ready (communication infrastructure + * with the device is ready) and the device's firmware has been + * loaded and the device initialized. + * + * Write to it only inside a i2400m->init_mutex protected area + * followed with a wmb(); rmb() before accesing (unless locked + * inside i2400m->init_mutex). Read access can be loose like that + * [just using rmb()] because the paths that use this also do + * other error checks later on. + * + * @ready: Communication infrastructure with the device is ready, data + * frames can start to be passed around (this is lighter than + * using the WiMAX state for certain hot paths). + * + * Write to it only inside a i2400m->init_mutex protected area + * followed with a wmb(); rmb() before accesing (unless locked + * inside i2400m->init_mutex). Read access can be loose like that + * [just using rmb()] because the paths that use this also do + * other error checks later on. + * * @rx_reorder: 1 if RX reordering is enabled; this can only be * set at probe time. * @@ -458,7 +479,7 @@ struct i2400m { unsigned updown:1; /* Network device is up or down */ unsigned boot_mode:1; /* is the device in boot mode? */ unsigned sboot:1; /* signed or unsigned fw boot */ - unsigned ready:1; /* all probing steps done */ + unsigned ready:1; /* Device comm infrastructure ready */ unsigned rx_reorder:1; /* RX reorder is enabled */ u8 trace_msg_from_user; /* echo rx msgs to 'trace' pipe */ /* typed u8 so /sys/kernel/debug/u8 can tweak */ diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c index bcd411f1a854..82c200ad9fdc 100644 --- a/drivers/net/wimax/i2400m/rx.c +++ b/drivers/net/wimax/i2400m/rx.c @@ -177,8 +177,7 @@ void i2400m_report_hook_work(struct work_struct *ws) struct i2400m_work *iw = container_of(ws, struct i2400m_work, ws); struct i2400m_report_hook_args *args = (void *) iw->pl; - if (iw->i2400m->ready) - i2400m_report_hook(iw->i2400m, args->l3l4_hdr, args->size); + i2400m_report_hook(iw->i2400m, args->l3l4_hdr, args->size); kfree_skb(args->skb_rx); i2400m_put(iw->i2400m); kfree(iw); @@ -305,11 +304,12 @@ void i2400m_rx_ctl(struct i2400m *i2400m, struct sk_buff *skb_rx, .l3l4_hdr = l3l4_hdr, .size = size }; - if (unlikely(i2400m->ready == 0)) /* only send if up */ - return; - skb_get(skb_rx); - i2400m_queue_work(i2400m, i2400m_report_hook_work, - GFP_KERNEL, &args, sizeof(args)); + rmb(); /* see i2400m->ready's documentation */ + if (likely(i2400m->ready)) { /* only send if up */ + skb_get(skb_rx); + i2400m_queue_work(i2400m, i2400m_report_hook_work, + GFP_KERNEL, &args, sizeof(args)); + } if (unlikely(i2400m->trace_msg_from_user)) wimax_msg(&i2400m->wimax_dev, "echo", l3l4_hdr, size, GFP_KERNEL); @@ -363,8 +363,6 @@ void i2400m_rx_trace(struct i2400m *i2400m, msg_type & I2400M_MT_REPORT_MASK ? "REPORT" : "CMD/SET/GET", msg_type, size); d_dump(2, dev, l3l4_hdr, size); - if (unlikely(i2400m->ready == 0)) /* only send if up */ - return; result = wimax_msg(wimax_dev, "trace", l3l4_hdr, size, GFP_KERNEL); if (result < 0) dev_err(dev, "error sending trace to userspace: %d\n", diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index 07653ded6c59..f4dfb60bb628 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -516,7 +516,10 @@ void i2400mu_disconnect(struct usb_interface *iface) * So at the end, the three cases require common handling. * * If at the time of this call the device's firmware is not loaded, - * nothing has to be done. + * nothing has to be done. Note we can be "loose" about not reading + * i2400m->updown under i2400m->init_mutex. If it happens to change + * inmediately, other parts of the call flow will fail and effectively + * catch it. * * If the firmware is loaded, we need to: * @@ -555,6 +558,7 @@ int i2400mu_suspend(struct usb_interface *iface, pm_message_t pm_msg) #endif d_fnstart(3, dev, "(iface %p pm_msg %u)\n", iface, pm_msg.event); + rmb(); /* see i2400m->updown's documentation */ if (i2400m->updown == 0) goto no_firmware; if (i2400m->state == I2400M_SS_DATA_PATH_CONNECTED && is_autosuspend) { @@ -608,6 +612,7 @@ int i2400mu_resume(struct usb_interface *iface) struct i2400m *i2400m = &i2400mu->i2400m; d_fnstart(3, dev, "(iface %p)\n", iface); + rmb(); /* see i2400m->updown's documentation */ if (i2400m->updown == 0) { d_printf(1, dev, "fw was down, no resume neeed\n"); goto out; diff --git a/include/net/wimax.h b/include/net/wimax.h index 2af7bf839f23..d69c4a7a1267 100644 --- a/include/net/wimax.h +++ b/include/net/wimax.h @@ -195,6 +195,12 @@ * defining the `struct nla_policy` for each message, it has to have * an array size of WIMAX_GNL_ATTR_MAX+1. * + * The op_*() function pointers will not be called if the wimax_dev is + * in a state <= %WIMAX_ST_UNINITIALIZED. The exception is: + * + * - op_reset: can be called at any time after wimax_dev_add() has + * been called. + * * THE PIPE INTERFACE: * * This interface is kept intentionally simple. The driver can send -- cgit v1.3-8-gc7d7 From 7e75f93eda027d9f9e0203ee6ffd210ea92e98f3 Mon Sep 17 00:00:00 2001 From: jamal Date: Mon, 19 Oct 2009 02:17:56 +0000 Subject: pkt_sched: ingress socket filter by mark Allow bpf to set a filter to drop packets that dont match a specific mark Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- net/core/filter.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1354aaf6abbe..909193e25395 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -123,7 +123,8 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_IFINDEX 8 #define SKF_AD_NLATTR 12 #define SKF_AD_NLATTR_NEST 16 -#define SKF_AD_MAX 20 +#define SKF_AD_MARK 20 +#define SKF_AD_MAX 24 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index d1d779ca096d..e3987e1d4839 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -303,6 +303,9 @@ load_b: case SKF_AD_IFINDEX: A = skb->dev->ifindex; continue; + case SKF_AD_MARK: + A = skb->mark; + continue; case SKF_AD_NLATTR: { struct nlattr *nla; -- cgit v1.3-8-gc7d7 From 7b6856a0296a8f187bb88ba31fa83a08abba7966 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Tue, 20 Oct 2009 00:08:01 -0700 Subject: can: provide library functions for skb allocation This patch makes the private functions alloc_can_skb() and alloc_can_err_skb() of the at91_can driver public and adapts all drivers to use these. While making the patch I realized, that the skb's are *not* setup consistently. It's now done as shown below: skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); memset(*cf, 0, sizeof(struct can_frame)); The frame is zeroed out to avoid uninitialized data to be passed to user space. Some drivers or library code did not set "pkt_type" or "ip_summed". Also, "__constant_htons()" should not be used for runtime invocations, as pointed out by David Miller. Signed-off-by: Wolfgang Grandegger Signed-off-by: David S. Miller --- drivers/net/can/at91_can.c | 32 ----------------------------- drivers/net/can/dev.c | 42 ++++++++++++++++++++++++++++++++------- drivers/net/can/sja1000/sja1000.c | 12 ++--------- drivers/net/can/ti_hecc.c | 17 ++++------------ drivers/net/can/usb/ems_usb.c | 16 ++------------- include/linux/can/dev.h | 4 ++++ 6 files changed, 47 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index b13fd9114130..cbe3fce53e3b 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -221,38 +221,6 @@ static inline void set_mb_mode(const struct at91_priv *priv, unsigned int mb, set_mb_mode_prio(priv, mb, mode, 0); } -static struct sk_buff *alloc_can_skb(struct net_device *dev, - struct can_frame **cf) -{ - struct sk_buff *skb; - - skb = netdev_alloc_skb(dev, sizeof(struct can_frame)); - if (unlikely(!skb)) - return NULL; - - skb->protocol = htons(ETH_P_CAN); - skb->ip_summed = CHECKSUM_UNNECESSARY; - *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); - - return skb; -} - -static struct sk_buff *alloc_can_err_skb(struct net_device *dev, - struct can_frame **cf) -{ - struct sk_buff *skb; - - skb = alloc_can_skb(dev, cf); - if (unlikely(!skb)) - return NULL; - - memset(*cf, 0, sizeof(struct can_frame)); - (*cf)->can_id = CAN_ERR_FLAG; - (*cf)->can_dlc = CAN_ERR_DLC; - - return skb; -} - /* * Swtich transceiver on or off */ diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 39b99f57c265..c3db111d2ff5 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -366,17 +366,12 @@ void can_restart(unsigned long data) can_flush_echo_skb(dev); /* send restart message upstream */ - skb = dev_alloc_skb(sizeof(struct can_frame)); + skb = alloc_can_err_skb(dev, &cf); if (skb == NULL) { err = -ENOMEM; goto restart; } - skb->dev = dev; - skb->protocol = htons(ETH_P_CAN); - cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); - memset(cf, 0, sizeof(struct can_frame)); - cf->can_id = CAN_ERR_FLAG | CAN_ERR_RESTARTED; - cf->can_dlc = CAN_ERR_DLC; + cf->can_id |= CAN_ERR_RESTARTED; netif_rx(skb); @@ -449,6 +444,39 @@ static void can_setup(struct net_device *dev) dev->features = NETIF_F_NO_CSUM; } +struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) +{ + struct sk_buff *skb; + + skb = netdev_alloc_skb(dev, sizeof(struct can_frame)); + if (unlikely(!skb)) + return NULL; + + skb->protocol = htons(ETH_P_CAN); + skb->pkt_type = PACKET_BROADCAST; + skb->ip_summed = CHECKSUM_UNNECESSARY; + *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); + memset(*cf, 0, sizeof(struct can_frame)); + + return skb; +} +EXPORT_SYMBOL_GPL(alloc_can_skb); + +struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf) +{ + struct sk_buff *skb; + + skb = alloc_can_skb(dev, cf); + if (unlikely(!skb)) + return NULL; + + (*cf)->can_id = CAN_ERR_FLAG; + (*cf)->can_dlc = CAN_ERR_DLC; + + return skb; +} +EXPORT_SYMBOL_GPL(alloc_can_err_skb); + /* * Allocate and setup space for the CAN network device */ diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 96d8be4253f8..1a9c5958bbd7 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -296,11 +296,9 @@ static void sja1000_rx(struct net_device *dev) uint8_t dlc; int i; - skb = dev_alloc_skb(sizeof(struct can_frame)); + skb = alloc_can_skb(dev, &cf); if (skb == NULL) return; - skb->dev = dev; - skb->protocol = htons(ETH_P_CAN); fi = priv->read_reg(priv, REG_FI); dlc = fi & 0x0F; @@ -351,15 +349,9 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) enum can_state state = priv->can.state; uint8_t ecc, alc; - skb = dev_alloc_skb(sizeof(struct can_frame)); + skb = alloc_can_err_skb(dev, &cf); if (skb == NULL) return -ENOMEM; - skb->dev = dev; - skb->protocol = htons(ETH_P_CAN); - cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); - memset(cf, 0, sizeof(struct can_frame)); - cf->can_id = CAN_ERR_FLAG; - cf->can_dlc = CAN_ERR_DLC; if (isrc & IRQ_DOI) { /* data overrun interrupt */ diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 23a7128e4eb7..07e8016b17ec 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -535,18 +535,15 @@ static int ti_hecc_rx_pkt(struct ti_hecc_priv *priv, int mbxno) u32 data, mbx_mask; unsigned long flags; - skb = netdev_alloc_skb(priv->ndev, sizeof(struct can_frame)); + skb = alloc_can_skb(priv->ndev, &cf); if (!skb) { if (printk_ratelimit()) dev_err(priv->ndev->dev.parent, - "ti_hecc_rx_pkt: netdev_alloc_skb() failed\n"); + "ti_hecc_rx_pkt: alloc_can_skb() failed\n"); return -ENOMEM; } - skb->protocol = __constant_htons(ETH_P_CAN); - skb->ip_summed = CHECKSUM_UNNECESSARY; mbx_mask = BIT(mbxno); - cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); data = hecc_read_mbx(priv, mbxno, HECC_CANMID); if (data & HECC_CANMID_IDE) cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG; @@ -656,19 +653,13 @@ static int ti_hecc_error(struct net_device *ndev, int int_status, struct sk_buff *skb; /* propogate the error condition to the can stack */ - skb = netdev_alloc_skb(ndev, sizeof(struct can_frame)); + skb = alloc_can_err_skb(ndev, &cf); if (!skb) { if (printk_ratelimit()) dev_err(priv->ndev->dev.parent, - "ti_hecc_error: netdev_alloc_skb() failed\n"); + "ti_hecc_error: alloc_can_err_skb() failed\n"); return -ENOMEM; } - skb->protocol = __constant_htons(ETH_P_CAN); - skb->ip_summed = CHECKSUM_UNNECESSARY; - cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); - memset(cf, 0, sizeof(struct can_frame)); - cf->can_id = CAN_ERR_FLAG; - cf->can_dlc = CAN_ERR_DLC; if (int_status & HECC_CANGIF_WLIF) { /* warning level int */ if ((int_status & HECC_CANGIF_BOIF) == 0) { diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index a65f56a9cd3d..3685f3e42d12 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -311,14 +311,10 @@ static void ems_usb_rx_can_msg(struct ems_usb *dev, struct ems_cpc_msg *msg) int i; struct net_device_stats *stats = &dev->netdev->stats; - skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame)); + skb = alloc_can_skb(dev->netdev, &cf); if (skb == NULL) return; - skb->protocol = htons(ETH_P_CAN); - - cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); - cf->can_id = msg->msg.can_msg.id; cf->can_dlc = min_t(u8, msg->msg.can_msg.length, 8); @@ -346,18 +342,10 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg) struct sk_buff *skb; struct net_device_stats *stats = &dev->netdev->stats; - skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame)); + skb = alloc_can_err_skb(dev->netdev, &cf); if (skb == NULL) return; - skb->protocol = htons(ETH_P_CAN); - - cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); - memset(cf, 0, sizeof(struct can_frame)); - - cf->can_id = CAN_ERR_FLAG; - cf->can_dlc = CAN_ERR_DLC; - if (msg->type == CPC_MSG_TYPE_CAN_STATE) { u8 state = msg->msg.can_state; diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 1d3f7f00e3af..1ed2a5cc03f5 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -68,4 +68,8 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, void can_get_echo_skb(struct net_device *dev, unsigned int idx); void can_free_echo_skb(struct net_device *dev, unsigned int idx); +struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); +struct sk_buff *alloc_can_err_skb(struct net_device *dev, + struct can_frame **cf); + #endif /* CAN_DEV_H */ -- cgit v1.3-8-gc7d7 From d19742fb1c68e6db83b76e06dea5a374c99e104f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Oct 2009 01:06:22 -0700 Subject: filter: Add SKF_AD_QUEUE instruction It can help being able to filter packets on their queue_mapping. If filter performance is not good, we could add a "numqueue" field in struct packet_type, so that netif_nit_deliver() and other functions can directly ignore packets with not expected queue number. Lets experiment this simple filter extension first. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- net/core/filter.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 909193e25395..bb3b4352978a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -124,7 +124,8 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_NLATTR 12 #define SKF_AD_NLATTR_NEST 16 #define SKF_AD_MARK 20 -#define SKF_AD_MAX 24 +#define SKF_AD_QUEUE 24 +#define SKF_AD_MAX 28 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index e3987e1d4839..08db7b9143a3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -306,6 +306,9 @@ load_b: case SKF_AD_MARK: A = skb->mark; continue; + case SKF_AD_QUEUE: + A = skb->queue_mapping; + continue; case SKF_AD_NLATTR: { struct nlattr *nla; -- cgit v1.3-8-gc7d7 From 748879776e3b738d53e64df6dbec7394b829462a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2009 01:09:17 -0700 Subject: net: Avoid compiler warning for mmsghdr when CONFIG_COMPAT is not selected Reported-by: Stephen Rothwell Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/compat.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/compat.h b/include/net/compat.h index 9679f05e9896..3c7d4e38fa1d 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -33,7 +33,11 @@ extern int compat_sock_get_timestamp(struct sock *, struct timeval __user *); extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *); #else /* defined(CONFIG_COMPAT) */ -#define compat_msghdr msghdr /* to avoid compiler warnings */ +/* + * To avoid compiler warnings: + */ +#define compat_msghdr msghdr +#define compat_mmsghdr mmsghdr #endif /* defined(CONFIG_COMPAT) */ extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *); -- cgit v1.3-8-gc7d7 From e022f0b4a03f4fff9323b509df023b8af635716e Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Mon, 19 Oct 2009 23:46:20 +0000 Subject: net: Introduce sk_tx_queue_mapping Introduce sk_tx_queue_mapping; and functions that set, test and get this value. Reset sk_tx_queue_mapping to -1 whenever the dst cache is set/reset, and in socket alloc. Setting txq to -1 and using valid txq=<0 to n-1> allows the tx path to use the value of sk_tx_queue_mapping directly instead of subtracting 1 on every tx. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- include/net/sock.h | 26 ++++++++++++++++++++++++++ net/core/sock.c | 5 ++++- 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 1364428f53f2..55de3bd719a5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -107,6 +107,7 @@ struct net; * @skc_node: main hash linkage for various protocol lookup tables * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol * @skc_refcnt: reference count + * @skc_tx_queue_mapping: tx queue number for this connection * @skc_hash: hash value used with various protocol lookup tables * @skc_family: network address family * @skc_state: Connection state @@ -128,6 +129,7 @@ struct sock_common { struct hlist_nulls_node skc_nulls_node; }; atomic_t skc_refcnt; + int skc_tx_queue_mapping; unsigned int skc_hash; unsigned short skc_family; @@ -215,6 +217,7 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_nulls_node __sk_common.skc_nulls_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping #define sk_copy_start __sk_common.skc_hash #define sk_hash __sk_common.skc_hash @@ -1094,8 +1097,29 @@ static inline void sock_put(struct sock *sk) extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested); +static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) +{ + sk->sk_tx_queue_mapping = tx_queue; +} + +static inline void sk_tx_queue_clear(struct sock *sk) +{ + sk->sk_tx_queue_mapping = -1; +} + +static inline int sk_tx_queue_get(const struct sock *sk) +{ + return sk->sk_tx_queue_mapping; +} + +static inline bool sk_tx_queue_recorded(const struct sock *sk) +{ + return (sk && sk->sk_tx_queue_mapping >= 0); +} + static inline void sk_set_socket(struct sock *sk, struct socket *sock) { + sk_tx_queue_clear(sk); sk->sk_socket = sock; } @@ -1152,6 +1176,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) { struct dst_entry *old_dst; + sk_tx_queue_clear(sk); old_dst = sk->sk_dst_cache; sk->sk_dst_cache = dst; dst_release(old_dst); @@ -1170,6 +1195,7 @@ __sk_dst_reset(struct sock *sk) { struct dst_entry *old_dst; + sk_tx_queue_clear(sk); old_dst = sk->sk_dst_cache; sk->sk_dst_cache = NULL; dst_release(old_dst); diff --git a/net/core/sock.c b/net/core/sock.c index 38713aa3faf2..934d9673f084 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -357,6 +357,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) struct dst_entry *dst = sk->sk_dst_cache; if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + sk_tx_queue_clear(sk); sk->sk_dst_cache = NULL; dst_release(dst); return NULL; @@ -953,7 +954,8 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) void *sptr = nsk->sk_security; #endif BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != - sizeof(osk->sk_node) + sizeof(osk->sk_refcnt)); + sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) + + sizeof(osk->sk_tx_queue_mapping)); memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); #ifdef CONFIG_SECURITY_NETWORK @@ -997,6 +999,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!try_module_get(prot->owner)) goto out_free_sec; + sk_tx_queue_clear(sk); } return sk; -- cgit v1.3-8-gc7d7 From ea94ff3b55188df157a8740bdf3976a87563d705 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Mon, 19 Oct 2009 23:46:45 +0000 Subject: net: Fix for dst_negative_advice dst_negative_advice() should check for changed dst and reset sk_tx_queue_mapping accordingly. Pass sock to the callers of dst_negative_advice. (sk_reset_txq is defined just for use by dst_negative_advice. The only way I could find to get around this is to move dst_negative_() from dst.h to dst.c, include sock.h in dst.c, etc) Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- include/net/dst.h | 12 ++++++++++-- net/core/sock.c | 6 ++++++ net/dccp/timer.c | 4 ++-- net/decnet/af_decnet.c | 2 +- net/ipv4/tcp_timer.c | 4 ++-- 5 files changed, 21 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 5a900ddcf10d..720d90653a8e 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -222,11 +222,19 @@ static inline void dst_confirm(struct dst_entry *dst) neigh_confirm(dst->neighbour); } -static inline void dst_negative_advice(struct dst_entry **dst_p) +static inline void dst_negative_advice(struct dst_entry **dst_p, + struct sock *sk) { struct dst_entry * dst = *dst_p; - if (dst && dst->ops->negative_advice) + if (dst && dst->ops->negative_advice) { *dst_p = dst->ops->negative_advice(dst); + + if (dst != *dst_p) { + extern void sk_reset_txq(struct sock *sk); + + sk_reset_txq(sk); + } + } } static inline void dst_link_failure(struct sk_buff *skb) diff --git a/net/core/sock.c b/net/core/sock.c index 934d9673f084..5a51512f638a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -352,6 +352,12 @@ discard_and_relse: } EXPORT_SYMBOL(sk_receive_skb); +void sk_reset_txq(struct sock *sk) +{ + sk_tx_queue_clear(sk); +} +EXPORT_SYMBOL(sk_reset_txq); + struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) { struct dst_entry *dst = sk->sk_dst_cache; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 162d1e683c39..bbfeb5eae46a 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -38,7 +38,7 @@ static int dccp_write_timeout(struct sock *sk) if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { if (icsk->icsk_retransmits != 0) - dst_negative_advice(&sk->sk_dst_cache); + dst_negative_advice(&sk->sk_dst_cache, sk); retry_until = icsk->icsk_syn_retries ? : sysctl_dccp_request_retries; } else { @@ -63,7 +63,7 @@ static int dccp_write_timeout(struct sock *sk) Golden words :-). */ - dst_negative_advice(&sk->sk_dst_cache); + dst_negative_advice(&sk->sk_dst_cache, sk); } retry_until = sysctl_dccp_retries2; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 4d3060660a14..664965c87e16 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1955,7 +1955,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, } if ((flags & MSG_TRYHARD) && sk->sk_dst_cache) - dst_negative_advice(&sk->sk_dst_cache); + dst_negative_advice(&sk->sk_dst_cache, sk); mss = scp->segsize_rem; fctype = scp->services_rem & NSP_FC_MASK; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6e8996cb79d0..8353a538cd4c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -141,14 +141,14 @@ static int tcp_write_timeout(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) - dst_negative_advice(&sk->sk_dst_cache); + dst_negative_advice(&sk->sk_dst_cache, sk); retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; } else { if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); - dst_negative_advice(&sk->sk_dst_cache); + dst_negative_advice(&sk->sk_dst_cache, sk); } retry_until = sysctl_tcp_retries2; -- cgit v1.3-8-gc7d7 From 1c55d62e77fa16cdace417834fc7b8a421a1877f Mon Sep 17 00:00:00 2001 From: jamal Date: Thu, 15 Oct 2009 03:09:18 +0000 Subject: pkt_sched: skbedit add support for setting mark This adds support for setting the skb mark. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/tc_act/tc_skbedit.h | 2 ++ include/net/tc_act/tc_skbedit.h | 2 ++ net/sched/act_skbedit.c | 17 ++++++++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tc_act/tc_skbedit.h b/include/linux/tc_act/tc_skbedit.h index a14e461a7af7..7a2e910a5f08 100644 --- a/include/linux/tc_act/tc_skbedit.h +++ b/include/linux/tc_act/tc_skbedit.h @@ -26,6 +26,7 @@ #define SKBEDIT_F_PRIORITY 0x1 #define SKBEDIT_F_QUEUE_MAPPING 0x2 +#define SKBEDIT_F_MARK 0x4 struct tc_skbedit { tc_gen; @@ -37,6 +38,7 @@ enum { TCA_SKBEDIT_PARMS, TCA_SKBEDIT_PRIORITY, TCA_SKBEDIT_QUEUE_MAPPING, + TCA_SKBEDIT_MARK, __TCA_SKBEDIT_MAX }; #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 6abb3ed3ebf7..e103fe02f375 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -26,7 +26,9 @@ struct tcf_skbedit { struct tcf_common common; u32 flags; u32 priority; + u32 mark; u16 queue_mapping; + /* XXX: 16-bit pad here? */ }; #define to_skbedit(pc) \ container_of(pc, struct tcf_skbedit, common) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 4ab916b8074b..e9607fe55b58 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -54,6 +54,8 @@ static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a, if (d->flags & SKBEDIT_F_QUEUE_MAPPING && skb->dev->real_num_tx_queues > d->queue_mapping) skb_set_queue_mapping(skb, d->queue_mapping); + if (d->flags & SKBEDIT_F_MARK) + skb->mark = d->mark; spin_unlock(&d->tcf_lock); return d->tcf_action; @@ -63,6 +65,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_PARMS] = { .len = sizeof(struct tc_skbedit) }, [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) }, [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, + [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, }; static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, @@ -72,7 +75,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, struct tc_skbedit *parm; struct tcf_skbedit *d; struct tcf_common *pc; - u32 flags = 0, *priority = NULL; + u32 flags = 0, *priority = NULL, *mark = NULL; u16 *queue_mapping = NULL; int ret = 0, err; @@ -95,6 +98,12 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, flags |= SKBEDIT_F_QUEUE_MAPPING; queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); } + + if (tb[TCA_SKBEDIT_MARK] != NULL) { + flags |= SKBEDIT_F_MARK; + mark = nla_data(tb[TCA_SKBEDIT_MARK]); + } + if (!flags) return -EINVAL; @@ -124,6 +133,9 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, d->priority = *priority; if (flags & SKBEDIT_F_QUEUE_MAPPING) d->queue_mapping = *queue_mapping; + if (flags & SKBEDIT_F_MARK) + d->mark = *mark; + d->tcf_action = parm->action; spin_unlock_bh(&d->tcf_lock); @@ -161,6 +173,9 @@ static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, if (d->flags & SKBEDIT_F_QUEUE_MAPPING) NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING, sizeof(d->queue_mapping), &d->queue_mapping); + if (d->flags & SKBEDIT_F_MARK) + NLA_PUT(skb, TCA_SKBEDIT_MARK, sizeof(d->mark), + &d->mark); t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(d->tcf_tm.expires); -- cgit v1.3-8-gc7d7 From 40b1f4e5113eafc5e84f2ba86822df66087fcb25 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 22 Oct 2009 14:39:28 +1100 Subject: irq: trivial: Fix typo in comment for #endif The comment suggests this #endif is CONFIG_X86 but it's really CONFIG_TRACE_IRQFLAGS_SUPPORT Signed-off-by: Michael Neuling Cc: michael@ellerman.id.au LKML-Reference: <18191.1256182768@neuling.org> Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index b02a3f1d46a0..006bf45eae30 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -124,6 +124,6 @@ typecheck(unsigned long, flags); \ raw_irqs_disabled_flags(flags); \ }) -#endif /* CONFIG_X86 */ +#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ #endif -- cgit v1.3-8-gc7d7 From 5c828713358cb9df8aa174371edcbbb62203a490 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 23 Oct 2009 14:58:11 +0200 Subject: ratelimit: Make suppressed output messages more useful Today I got: [39648.224782] Registered led device: iwl-phy0::TX [40676.545099] __ratelimit: 246 callbacks suppressed [40676.545103] abcdef[23675]: segfault at 0 ... as you can see the ratelimit message contains a function prefix. Since this is always __ratelimit, this wont help much. This patch changes __ratelimit and printk_ratelimit to print the function name that calls ratelimit. This will pinpoint the responsible function, as long as not several different places call ratelimit with the same ratelimit state at the same time. In that case we catch only one random function that calls ratelimit after the wait period. Signed-off-by: Christian Borntraeger Cc: Dave Young Cc: Linus Torvalds CC: Andrew Morton LKML-Reference: <200910231458.11832.borntraeger@de.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 3 ++- include/linux/ratelimit.h | 3 ++- kernel/printk.c | 6 +++--- lib/ratelimit.c | 6 +++--- 4 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3305f33201be..21d0d822c716 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -240,7 +240,8 @@ asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern int printk_ratelimit(void); +extern int __printk_ratelimit(const char *func); +#define printk_ratelimit() __printk_ratelimit(__func__) extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 187bc16c1f15..668cf1bef030 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -25,6 +25,7 @@ struct ratelimit_state { .burst = burst_init, \ } -extern int __ratelimit(struct ratelimit_state *rs); +extern int ___ratelimit(struct ratelimit_state *rs, const char *func); +#define __ratelimit(state) ___ratelimit(state, __func__) #endif /* _LINUX_RATELIMIT_H */ diff --git a/kernel/printk.c b/kernel/printk.c index b997c893cdcf..8283dbe15af4 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1364,11 +1364,11 @@ late_initcall(disable_boot_consoles); */ DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); -int printk_ratelimit(void) +int __printk_ratelimit(const char *func) { - return __ratelimit(&printk_ratelimit_state); + return ___ratelimit(&printk_ratelimit_state, func); } -EXPORT_SYMBOL(printk_ratelimit); +EXPORT_SYMBOL(__printk_ratelimit); /** * printk_timed_ratelimit - caller-controlled printk ratelimiting diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 5551731ae1d4..09f5ce1810dc 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -20,7 +20,7 @@ * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks * in every @rs->ratelimit_jiffies */ -int __ratelimit(struct ratelimit_state *rs) +int ___ratelimit(struct ratelimit_state *rs, const char *func) { unsigned long flags; int ret; @@ -43,7 +43,7 @@ int __ratelimit(struct ratelimit_state *rs) if (time_is_before_jiffies(rs->begin + rs->interval)) { if (rs->missed) printk(KERN_WARNING "%s: %d callbacks suppressed\n", - __func__, rs->missed); + func, rs->missed); rs->begin = 0; rs->printed = 0; rs->missed = 0; @@ -59,4 +59,4 @@ int __ratelimit(struct ratelimit_state *rs) return ret; } -EXPORT_SYMBOL(__ratelimit); +EXPORT_SYMBOL(___ratelimit); -- cgit v1.3-8-gc7d7 From ef9a9d1183b36fbf3de327f44596533b770c3005 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 23 Oct 2009 17:51:26 +0000 Subject: ipv6 sit: RCU conversion phase I SIT tunnels use one rwlock to protect their prl entries. This first patch adds RCU locking for prl management, with standard call_rcu() calls. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ipip.h | 1 + net/ipv6/sit.c | 73 +++++++++++++++++++++++++++++++++++++----------------- 2 files changed, 51 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/ipip.h b/include/net/ipip.h index 86f1c8bd040c..b3db2fd6e61c 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -45,6 +45,7 @@ struct ip_tunnel_prl_entry struct ip_tunnel_prl_entry *next; __be32 addr; u16 flags; + struct rcu_head rcu_head; }; #define IPTUNNEL_XMIT() do { \ diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 510d31f3cb96..8cdcc2ad048c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -240,15 +240,22 @@ failed: return NULL; } +static DEFINE_SPINLOCK(ipip6_prl_lock); + +#define for_each_prl_rcu(start) \ + for (prl = rcu_dereference(start); \ + prl; \ + prl = rcu_dereference(prl->next)) + static struct ip_tunnel_prl_entry * __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) { - struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *)NULL; + struct ip_tunnel_prl_entry *prl; - for (p = t->prl; p; p = p->next) - if (p->addr == addr) + for_each_prl_rcu(t->prl) + if (prl->addr == addr) break; - return p; + return prl; } @@ -273,7 +280,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, kcalloc(cmax, sizeof(*kp), GFP_KERNEL) : NULL; - read_lock(&ipip6_lock); + rcu_read_lock(); ca = t->prl_count < cmax ? t->prl_count : cmax; @@ -291,7 +298,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, } c = 0; - for (prl = t->prl; prl; prl = prl->next) { + for_each_prl_rcu(t->prl) { if (c >= cmax) break; if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) @@ -303,7 +310,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, break; } out: - read_unlock(&ipip6_lock); + rcu_read_unlock(); len = sizeof(*kp) * c; ret = 0; @@ -324,12 +331,14 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) if (a->addr == htonl(INADDR_ANY)) return -EINVAL; - write_lock(&ipip6_lock); + spin_lock(&ipip6_prl_lock); for (p = t->prl; p; p = p->next) { if (p->addr == a->addr) { - if (chg) - goto update; + if (chg) { + p->flags = a->flags; + goto out; + } err = -EEXIST; goto out; } @@ -346,46 +355,63 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) goto out; } + INIT_RCU_HEAD(&p->rcu_head); p->next = t->prl; - t->prl = p; - t->prl_count++; -update: p->addr = a->addr; p->flags = a->flags; + t->prl_count++; + rcu_assign_pointer(t->prl, p); out: - write_unlock(&ipip6_lock); + spin_unlock(&ipip6_prl_lock); return err; } +static void prl_entry_destroy_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head)); +} + +static void prl_list_destroy_rcu(struct rcu_head *head) +{ + struct ip_tunnel_prl_entry *p, *n; + + p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); + do { + n = p->next; + kfree(p); + p = n; + } while (p); +} + static int ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) { struct ip_tunnel_prl_entry *x, **p; int err = 0; - write_lock(&ipip6_lock); + spin_lock(&ipip6_prl_lock); if (a && a->addr != htonl(INADDR_ANY)) { for (p = &t->prl; *p; p = &(*p)->next) { if ((*p)->addr == a->addr) { x = *p; *p = x->next; - kfree(x); + call_rcu(&x->rcu_head, prl_entry_destroy_rcu); t->prl_count--; goto out; } } err = -ENXIO; } else { - while (t->prl) { + if (t->prl) { + t->prl_count = 0; x = t->prl; - t->prl = t->prl->next; - kfree(x); - t->prl_count--; + call_rcu(&x->rcu_head, prl_list_destroy_rcu); + t->prl = NULL; } } out: - write_unlock(&ipip6_lock); + spin_unlock(&ipip6_prl_lock); return err; } @@ -395,7 +421,7 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) struct ip_tunnel_prl_entry *p; int ok = 1; - read_lock(&ipip6_lock); + rcu_read_lock(); p = __ipip6_tunnel_locate_prl(t, iph->saddr); if (p) { if (p->flags & PRL_DEFAULT) @@ -411,7 +437,7 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) else ok = 0; } - read_unlock(&ipip6_lock); + rcu_read_unlock(); return ok; } @@ -1192,6 +1218,7 @@ static void __exit sit_cleanup(void) xfrm4_tunnel_deregister(&sit_handler, AF_INET6); unregister_pernet_gen_device(sit_net_id, &sit_net_ops); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } static int __init sit_init(void) -- cgit v1.3-8-gc7d7 From 7c28bd0b8ec4d128bd7660671d1b626b0abc471f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 24 Oct 2009 06:13:17 -0700 Subject: rtnetlink: speedup rtnl_dump_ifinfo() When handling large number of netdevice, rtnl_dump_ifinfo() is very slow because it has O(N^2) complexity. Instead of scanning one single list, we can use the 256 sub lists of the dev_index hash table. This considerably speedups "ip link" operations Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/net_namespace.h | 4 ++++ net/core/dev.c | 7 ++----- net/core/rtnetlink.c | 37 ++++++++++++++++++++++++------------- 3 files changed, 30 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 699410142bfa..0addd45038ac 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -28,6 +28,10 @@ struct ctl_table_header; struct net_generic; struct sock; + +#define NETDEV_HASHBITS 8 +#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) + struct net { atomic_t count; /* To decided when the network * namespace should be freed. diff --git a/net/core/dev.c b/net/core/dev.c index fa88dcd476d6..e7bada1d5ed9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -193,18 +193,15 @@ static struct list_head ptype_all __read_mostly; /* Taps */ DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); -#define NETDEV_HASHBITS 8 -#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) - static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); - return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; + return &net->dev_name_head[hash & (NETDEV_HASHENTRIES - 1)]; } static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) { - return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; + return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; } /* Device list insertion */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ba13b0974a7b..52ea418d5302 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -682,22 +682,33 @@ nla_put_failure: static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - int idx; - int s_idx = cb->args[0]; + int h, s_h; + int idx = 0, s_idx; struct net_device *dev; - - idx = 0; - for_each_netdev(net, dev) { - if (idx < s_idx) - goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) - break; + struct hlist_head *head; + struct hlist_node *node; + + s_h = cb->args[0]; + s_idx = cb->args[1]; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, node, head, index_hlist) { + if (idx < s_idx) + goto cont; + if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI) <= 0) + goto out; cont: - idx++; + idx++; + } } - cb->args[0] = idx; +out: + cb->args[1] = idx; + cb->args[0] = h; return skb->len; } -- cgit v1.3-8-gc7d7 From bb015f0c85362aa767f8f00f50a40d85e489414f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 19 Oct 2009 11:43:32 +0200 Subject: pcmcia: drop already defined PCI_IDs Out of 10 PCI_IDs found in the PCMCIA subsystem, only two were not defined in pci_ids.h. Move them and drop the duplicates. Successfully build-tested. Signed-off-by: Wolfram Sang Cc: Jesse Barnes Signed-off-by: Dominik Brodowski --- drivers/pcmcia/cirrus.h | 10 ---------- drivers/pcmcia/o2micro.h | 22 ---------------------- include/linux/pci_ids.h | 2 ++ 3 files changed, 2 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/pcmcia/cirrus.h b/drivers/pcmcia/cirrus.h index ecd4fc7f666f..446a4576e73e 100644 --- a/drivers/pcmcia/cirrus.h +++ b/drivers/pcmcia/cirrus.h @@ -30,16 +30,6 @@ #ifndef _LINUX_CIRRUS_H #define _LINUX_CIRRUS_H -#ifndef PCI_VENDOR_ID_CIRRUS -#define PCI_VENDOR_ID_CIRRUS 0x1013 -#endif -#ifndef PCI_DEVICE_ID_CIRRUS_6729 -#define PCI_DEVICE_ID_CIRRUS_6729 0x1100 -#endif -#ifndef PCI_DEVICE_ID_CIRRUS_6832 -#define PCI_DEVICE_ID_CIRRUS_6832 0x1110 -#endif - #define PD67_MISC_CTL_1 0x16 /* Misc control 1 */ #define PD67_FIFO_CTL 0x17 /* FIFO control */ #define PD67_MISC_CTL_2 0x1E /* Misc control 2 */ diff --git a/drivers/pcmcia/o2micro.h b/drivers/pcmcia/o2micro.h index 72188c462c9c..624442fc0d35 100644 --- a/drivers/pcmcia/o2micro.h +++ b/drivers/pcmcia/o2micro.h @@ -30,28 +30,6 @@ #ifndef _LINUX_O2MICRO_H #define _LINUX_O2MICRO_H -#ifndef PCI_VENDOR_ID_O2 -#define PCI_VENDOR_ID_O2 0x1217 -#endif -#ifndef PCI_DEVICE_ID_O2_6729 -#define PCI_DEVICE_ID_O2_6729 0x6729 -#endif -#ifndef PCI_DEVICE_ID_O2_6730 -#define PCI_DEVICE_ID_O2_6730 0x673a -#endif -#ifndef PCI_DEVICE_ID_O2_6832 -#define PCI_DEVICE_ID_O2_6832 0x6832 -#endif -#ifndef PCI_DEVICE_ID_O2_6836 -#define PCI_DEVICE_ID_O2_6836 0x6836 -#endif -#ifndef PCI_DEVICE_ID_O2_6812 -#define PCI_DEVICE_ID_O2_6812 0x6872 -#endif -#ifndef PCI_DEVICE_ID_O2_6933 -#define PCI_DEVICE_ID_O2_6933 0x6933 -#endif - /* Additional PCI configuration registers */ #define O2_MUX_CONTROL 0x90 /* 32 bit */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f490e7a7307a..857cc349bf71 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1632,6 +1632,8 @@ #define PCI_DEVICE_ID_O2_6730 0x673a #define PCI_DEVICE_ID_O2_6832 0x6832 #define PCI_DEVICE_ID_O2_6836 0x6836 +#define PCI_DEVICE_ID_O2_6812 0x6872 +#define PCI_DEVICE_ID_O2_6933 0x6933 #define PCI_VENDOR_ID_3DFX 0x121a #define PCI_DEVICE_ID_3DFX_VOODOO 0x0001 -- cgit v1.3-8-gc7d7 From ce0e7b28fb75cb003cfc8d0238613aaf1c55e797 Mon Sep 17 00:00:00 2001 From: Ryota Ozaki Date: Sat, 24 Oct 2009 01:20:10 +0900 Subject: sched, cpuacct: Fix niced guest time accounting CPU time of a guest is always accounted in 'user' time without concern for the nice value of its counterpart process although the guest is scheduled under the nice value. This patch fixes the defect and accounts cpu time of a niced guest in 'nice' time as same as a niced process. And also the patch adds 'guest_nice' to cpuacct. The value provides niced guest cpu time which is like 'nice' to 'user'. The original discussions can be found here: http://www.mail-archive.com/kvm@vger.kernel.org/msg23982.html http://www.mail-archive.com/kvm@vger.kernel.org/msg23860.html Signed-off-by: Ryota Ozaki Acked-by: Avi Kivity Cc: Peter Zijlstra LKML-Reference: <1256314810-7897-1-git-send-email-ozaki.ryota@gmail.com> Signed-off-by: Ingo Molnar --- Documentation/filesystems/proc.txt | 3 ++- fs/proc/stat.c | 19 +++++++++++++------ include/linux/kernel_stat.h | 1 + kernel/sched.c | 9 +++++++-- 4 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 2c48f945546b..4af0018533f2 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1072,7 +1072,8 @@ second). The meanings of the columns are as follows, from left to right: - irq: servicing interrupts - softirq: servicing softirqs - steal: involuntary wait -- guest: running a guest +- guest: running a normal guest +- guest_nice: running a niced guest The "intr" line gives counts of interrupts serviced since boot time, for each of the possible system interrupts. The first column is the total of all diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 7cc726c6d70a..b9b7aad2003d 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v) int i, j; unsigned long jif; cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; - cputime64_t guest; + cputime64_t guest, guest_nice; u64 sum = 0; u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; @@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v) user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; - guest = cputime64_zero; + guest = guest_nice = cputime64_zero; getboottime(&boottime); jif = boottime.tv_sec; @@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v) softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); + guest_nice = cputime64_add(guest_nice, + kstat_cpu(i).cpustat.guest_nice); for_each_irq_nr(j) { sum += kstat_irqs_cpu(j, i); } @@ -65,7 +67,8 @@ static int show_stat(struct seq_file *p, void *v) } sum += arch_irq_stat(); - seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu " + "%llu\n", (unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(nice), (unsigned long long)cputime64_to_clock_t(system), @@ -74,7 +77,8 @@ static int show_stat(struct seq_file *p, void *v) (unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(softirq), (unsigned long long)cputime64_to_clock_t(steal), - (unsigned long long)cputime64_to_clock_t(guest)); + (unsigned long long)cputime64_to_clock_t(guest), + (unsigned long long)cputime64_to_clock_t(guest_nice)); for_each_online_cpu(i) { /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ @@ -88,8 +92,10 @@ static int show_stat(struct seq_file *p, void *v) softirq = kstat_cpu(i).cpustat.softirq; steal = kstat_cpu(i).cpustat.steal; guest = kstat_cpu(i).cpustat.guest; + guest_nice = kstat_cpu(i).cpustat.guest_nice; seq_printf(p, - "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " + "%llu\n", i, (unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(nice), @@ -99,7 +105,8 @@ static int show_stat(struct seq_file *p, void *v) (unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(softirq), (unsigned long long)cputime64_to_clock_t(steal), - (unsigned long long)cputime64_to_clock_t(guest)); + (unsigned long long)cputime64_to_clock_t(guest), + (unsigned long long)cputime64_to_clock_t(guest_nice)); } seq_printf(p, "intr %llu", (unsigned long long)sum); diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 348fa8874b52..c059044bc6dc 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -25,6 +25,7 @@ struct cpu_usage_stat { cputime64_t iowait; cputime64_t steal; cputime64_t guest; + cputime64_t guest_nice; }; struct kernel_stat { diff --git a/kernel/sched.c b/kernel/sched.c index e5205811c19e..67be4d0dddaa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5017,8 +5017,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, p->gtime = cputime_add(p->gtime, cputime); /* Add guest time to cpustat. */ - cpustat->user = cputime64_add(cpustat->user, tmp); - cpustat->guest = cputime64_add(cpustat->guest, tmp); + if (TASK_NICE(p) > 0) { + cpustat->nice = cputime64_add(cpustat->nice, tmp); + cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp); + } else { + cpustat->user = cputime64_add(cpustat->user, tmp); + cpustat->guest = cputime64_add(cpustat->guest, tmp); + } } /* -- cgit v1.3-8-gc7d7 From 0b83ddebc6e884dc0221358cf68c461520fbdd8e Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 22 Oct 2009 13:26:45 +0300 Subject: MFD: twl4030: add twl4030_codec MFD as a new child to the core New MFD child to twl4030 MFD device. Reason for the twl4030_codec MFD: the vibra control is actually in the codec part of the twl4030. If both the vibra and the audio functionality is needed from the twl4030 at the same time, than they need to control the codec power and APLL at the same time without breaking the other driver. Also these two has to be able to work without the need for the other driver. This MFD device will be used by the drivers, which needs resources from the twl4030 codec like audio and vibra. The platform specific configuration data is passed along to the child drivers (audio, vibra). Signed-off-by: Peter Ujfalusi Acked-by: Samuel Ortiz Signed-off-by: Mark Brown --- drivers/mfd/Kconfig | 6 + drivers/mfd/Makefile | 1 + drivers/mfd/twl4030-codec.c | 241 +++++++++++++++++++++++++++++++++ drivers/mfd/twl4030-core.c | 14 ++ include/linux/i2c/twl4030.h | 18 +++ include/linux/mfd/twl4030-codec.h | 271 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 551 insertions(+) create mode 100644 drivers/mfd/twl4030-codec.c create mode 100644 include/linux/mfd/twl4030-codec.h (limited to 'include') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 570be139f9df..08f2d07bf56a 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -121,6 +121,12 @@ config TWL4030_POWER and load scripts controling which resources are switched off/on or reset when a sleep, wakeup or warm reset event occurs. +config TWL4030_CODEC + bool + depends on TWL4030_CORE + select MFD_CORE + default n + config MFD_TMIO bool default n diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index f3b277b90d40..af0fc903cec8 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_MENELAUS) += menelaus.o obj-$(CONFIG_TWL4030_CORE) += twl4030-core.o twl4030-irq.o obj-$(CONFIG_TWL4030_POWER) += twl4030-power.o +obj-$(CONFIG_TWL4030_CODEC) += twl4030-codec.o obj-$(CONFIG_MFD_MC13783) += mc13783-core.o diff --git a/drivers/mfd/twl4030-codec.c b/drivers/mfd/twl4030-codec.c new file mode 100644 index 000000000000..97103078dc2a --- /dev/null +++ b/drivers/mfd/twl4030-codec.c @@ -0,0 +1,241 @@ +/* + * MFD driver for twl4030 codec submodule + * + * Author: Peter Ujfalusi + * + * Copyright: (C) 2009 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define TWL4030_CODEC_CELLS 2 + +static struct platform_device *twl4030_codec_dev; + +struct twl4030_codec_resource { + int request_count; + u8 reg; + u8 mask; +}; + +struct twl4030_codec { + struct mutex mutex; + struct twl4030_codec_resource resource[TWL4030_CODEC_RES_MAX]; + struct mfd_cell cells[TWL4030_CODEC_CELLS]; +}; + +/* + * Modify the resource, the function returns the content of the register + * after the modification. + */ +static int twl4030_codec_set_resource(enum twl4030_codec_res id, int enable) +{ + struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev); + u8 val; + + twl4030_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &val, + codec->resource[id].reg); + + if (enable) + val |= codec->resource[id].mask; + else + val &= ~codec->resource[id].mask; + + twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE, + val, codec->resource[id].reg); + + return val; +} + +static inline int twl4030_codec_get_resource(enum twl4030_codec_res id) +{ + struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev); + u8 val; + + twl4030_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &val, + codec->resource[id].reg); + + return val; +} + +/* + * Enable the resource. + * The function returns with error or the content of the register + */ +int twl4030_codec_enable_resource(enum twl4030_codec_res id) +{ + struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev); + int val; + + if (id >= TWL4030_CODEC_RES_MAX) { + dev_err(&twl4030_codec_dev->dev, + "Invalid resource ID (%u)\n", id); + return -EINVAL; + } + + mutex_lock(&codec->mutex); + if (!codec->resource[id].request_count) + /* Resource was disabled, enable it */ + val = twl4030_codec_set_resource(id, 1); + else + val = twl4030_codec_get_resource(id); + + codec->resource[id].request_count++; + mutex_unlock(&codec->mutex); + + return val; +} +EXPORT_SYMBOL_GPL(twl4030_codec_enable_resource); + +/* + * Disable the resource. + * The function returns with error or the content of the register + */ +int twl4030_codec_disable_resource(unsigned id) +{ + struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev); + int val; + + if (id >= TWL4030_CODEC_RES_MAX) { + dev_err(&twl4030_codec_dev->dev, + "Invalid resource ID (%u)\n", id); + return -EINVAL; + } + + mutex_lock(&codec->mutex); + if (!codec->resource[id].request_count) { + dev_err(&twl4030_codec_dev->dev, + "Resource has been disabled already (%u)\n", id); + mutex_unlock(&codec->mutex); + return -EPERM; + } + codec->resource[id].request_count--; + + if (!codec->resource[id].request_count) + /* Resource can be disabled now */ + val = twl4030_codec_set_resource(id, 0); + else + val = twl4030_codec_get_resource(id); + + mutex_unlock(&codec->mutex); + + return val; +} +EXPORT_SYMBOL_GPL(twl4030_codec_disable_resource); + +static int __devinit twl4030_codec_probe(struct platform_device *pdev) +{ + struct twl4030_codec *codec; + struct twl4030_codec_data *pdata = pdev->dev.platform_data; + struct mfd_cell *cell = NULL; + int ret, childs = 0; + + codec = kzalloc(sizeof(struct twl4030_codec), GFP_KERNEL); + if (!codec) + return -ENOMEM; + + platform_set_drvdata(pdev, codec); + + twl4030_codec_dev = pdev; + mutex_init(&codec->mutex); + + /* Codec power */ + codec->resource[TWL4030_CODEC_RES_POWER].reg = TWL4030_REG_CODEC_MODE; + codec->resource[TWL4030_CODEC_RES_POWER].mask = TWL4030_CODECPDZ; + + /* PLL */ + codec->resource[TWL4030_CODEC_RES_APLL].reg = TWL4030_REG_APLL_CTL; + codec->resource[TWL4030_CODEC_RES_APLL].mask = TWL4030_APLL_EN; + + if (pdata->audio) { + cell = &codec->cells[childs]; + cell->name = "twl4030_codec_audio"; + cell->platform_data = pdata->audio; + cell->data_size = sizeof(*pdata->audio); + childs++; + } + if (pdata->vibra) { + cell = &codec->cells[childs]; + cell->name = "twl4030_codec_vibra"; + cell->platform_data = pdata->vibra; + cell->data_size = sizeof(*pdata->vibra); + childs++; + } + + if (childs) + ret = mfd_add_devices(&pdev->dev, pdev->id, codec->cells, + childs, NULL, 0); + else { + dev_err(&pdev->dev, "No platform data found for childs\n"); + ret = -ENODEV; + } + + if (!ret) + return 0; + + platform_set_drvdata(pdev, NULL); + kfree(codec); + twl4030_codec_dev = NULL; + return ret; +} + +static int __devexit twl4030_codec_remove(struct platform_device *pdev) +{ + struct twl4030_codec *codec = platform_get_drvdata(pdev); + + mfd_remove_devices(&pdev->dev); + platform_set_drvdata(pdev, NULL); + kfree(codec); + twl4030_codec_dev = NULL; + + return 0; +} + +MODULE_ALIAS("platform:twl4030_codec"); + +static struct platform_driver twl4030_codec_driver = { + .probe = twl4030_codec_probe, + .remove = __devexit_p(twl4030_codec_remove), + .driver = { + .owner = THIS_MODULE, + .name = "twl4030_codec", + }, +}; + +static int __devinit twl4030_codec_init(void) +{ + return platform_driver_register(&twl4030_codec_driver); +} +module_init(twl4030_codec_init); + +static void __devexit twl4030_codec_exit(void) +{ + platform_driver_unregister(&twl4030_codec_driver); +} +module_exit(twl4030_codec_exit); + +MODULE_AUTHOR("Peter Ujfalusi "); +MODULE_LICENSE("GPL"); + diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c index e424cf6d8e9e..0ee81e46bfbd 100644 --- a/drivers/mfd/twl4030-core.c +++ b/drivers/mfd/twl4030-core.c @@ -114,6 +114,12 @@ #define twl_has_watchdog() false #endif +#if defined(CONFIG_TWL4030_CODEC) || defined(CONFIG_TWL4030_CODEC_MODULE) +#define twl_has_codec() true +#else +#define twl_has_codec() false +#endif + /* Triton Core internal information (BEGIN) */ /* Last - for index max*/ @@ -557,6 +563,14 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features) return PTR_ERR(child); } + if (twl_has_codec() && pdata->codec) { + child = add_child(1, "twl4030_codec", + pdata->codec, sizeof(*pdata->codec), + false, 0, 0); + if (IS_ERR(child)) + return PTR_ERR(child); + } + if (twl_has_regulator()) { /* child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1); diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index 2d02dfd7076c..42d6c722bd82 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -401,6 +401,23 @@ struct twl4030_power_data { extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts); +struct twl4030_codec_audio_data { + unsigned int audio_mclk; + unsigned int ramp_delay_value; + unsigned int hs_extmute:1; + void (*set_hs_extmute)(int mute); +}; + +struct twl4030_codec_vibra_data { + unsigned int audio_mclk; + unsigned int coexist; +}; + +struct twl4030_codec_data { + struct twl4030_codec_audio_data *audio; + struct twl4030_codec_vibra_data *vibra; +}; + struct twl4030_platform_data { unsigned irq_base, irq_end; struct twl4030_bci_platform_data *bci; @@ -409,6 +426,7 @@ struct twl4030_platform_data { struct twl4030_keypad_data *keypad; struct twl4030_usb_data *usb; struct twl4030_power_data *power; + struct twl4030_codec_data *codec; /* LDO regulators */ struct regulator_init_data *vdac; diff --git a/include/linux/mfd/twl4030-codec.h b/include/linux/mfd/twl4030-codec.h new file mode 100644 index 000000000000..ef0a3041d759 --- /dev/null +++ b/include/linux/mfd/twl4030-codec.h @@ -0,0 +1,271 @@ +/* + * MFD driver for twl4030 codec submodule + * + * Author: Peter Ujfalusi + * + * Copyright: (C) 2009 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __TWL4030_CODEC_H__ +#define __TWL4030_CODEC_H__ + +/* Codec registers */ +#define TWL4030_REG_CODEC_MODE 0x01 +#define TWL4030_REG_OPTION 0x02 +#define TWL4030_REG_UNKNOWN 0x03 +#define TWL4030_REG_MICBIAS_CTL 0x04 +#define TWL4030_REG_ANAMICL 0x05 +#define TWL4030_REG_ANAMICR 0x06 +#define TWL4030_REG_AVADC_CTL 0x07 +#define TWL4030_REG_ADCMICSEL 0x08 +#define TWL4030_REG_DIGMIXING 0x09 +#define TWL4030_REG_ATXL1PGA 0x0A +#define TWL4030_REG_ATXR1PGA 0x0B +#define TWL4030_REG_AVTXL2PGA 0x0C +#define TWL4030_REG_AVTXR2PGA 0x0D +#define TWL4030_REG_AUDIO_IF 0x0E +#define TWL4030_REG_VOICE_IF 0x0F +#define TWL4030_REG_ARXR1PGA 0x10 +#define TWL4030_REG_ARXL1PGA 0x11 +#define TWL4030_REG_ARXR2PGA 0x12 +#define TWL4030_REG_ARXL2PGA 0x13 +#define TWL4030_REG_VRXPGA 0x14 +#define TWL4030_REG_VSTPGA 0x15 +#define TWL4030_REG_VRX2ARXPGA 0x16 +#define TWL4030_REG_AVDAC_CTL 0x17 +#define TWL4030_REG_ARX2VTXPGA 0x18 +#define TWL4030_REG_ARXL1_APGA_CTL 0x19 +#define TWL4030_REG_ARXR1_APGA_CTL 0x1A +#define TWL4030_REG_ARXL2_APGA_CTL 0x1B +#define TWL4030_REG_ARXR2_APGA_CTL 0x1C +#define TWL4030_REG_ATX2ARXPGA 0x1D +#define TWL4030_REG_BT_IF 0x1E +#define TWL4030_REG_BTPGA 0x1F +#define TWL4030_REG_BTSTPGA 0x20 +#define TWL4030_REG_EAR_CTL 0x21 +#define TWL4030_REG_HS_SEL 0x22 +#define TWL4030_REG_HS_GAIN_SET 0x23 +#define TWL4030_REG_HS_POPN_SET 0x24 +#define TWL4030_REG_PREDL_CTL 0x25 +#define TWL4030_REG_PREDR_CTL 0x26 +#define TWL4030_REG_PRECKL_CTL 0x27 +#define TWL4030_REG_PRECKR_CTL 0x28 +#define TWL4030_REG_HFL_CTL 0x29 +#define TWL4030_REG_HFR_CTL 0x2A +#define TWL4030_REG_ALC_CTL 0x2B +#define TWL4030_REG_ALC_SET1 0x2C +#define TWL4030_REG_ALC_SET2 0x2D +#define TWL4030_REG_BOOST_CTL 0x2E +#define TWL4030_REG_SOFTVOL_CTL 0x2F +#define TWL4030_REG_DTMF_FREQSEL 0x30 +#define TWL4030_REG_DTMF_TONEXT1H 0x31 +#define TWL4030_REG_DTMF_TONEXT1L 0x32 +#define TWL4030_REG_DTMF_TONEXT2H 0x33 +#define TWL4030_REG_DTMF_TONEXT2L 0x34 +#define TWL4030_REG_DTMF_TONOFF 0x35 +#define TWL4030_REG_DTMF_WANONOFF 0x36 +#define TWL4030_REG_I2S_RX_SCRAMBLE_H 0x37 +#define TWL4030_REG_I2S_RX_SCRAMBLE_M 0x38 +#define TWL4030_REG_I2S_RX_SCRAMBLE_L 0x39 +#define TWL4030_REG_APLL_CTL 0x3A +#define TWL4030_REG_DTMF_CTL 0x3B +#define TWL4030_REG_DTMF_PGA_CTL2 0x3C +#define TWL4030_REG_DTMF_PGA_CTL1 0x3D +#define TWL4030_REG_MISC_SET_1 0x3E +#define TWL4030_REG_PCMBTMUX 0x3F +#define TWL4030_REG_RX_PATH_SEL 0x43 +#define TWL4030_REG_VDL_APGA_CTL 0x44 +#define TWL4030_REG_VIBRA_CTL 0x45 +#define TWL4030_REG_VIBRA_SET 0x46 +#define TWL4030_REG_VIBRA_PWM_SET 0x47 +#define TWL4030_REG_ANAMIC_GAIN 0x48 +#define TWL4030_REG_MISC_SET_2 0x49 + +/* Bitfield Definitions */ + +/* TWL4030_CODEC_MODE (0x01) Fields */ +#define TWL4030_APLL_RATE 0xF0 +#define TWL4030_APLL_RATE_8000 0x00 +#define TWL4030_APLL_RATE_11025 0x10 +#define TWL4030_APLL_RATE_12000 0x20 +#define TWL4030_APLL_RATE_16000 0x40 +#define TWL4030_APLL_RATE_22050 0x50 +#define TWL4030_APLL_RATE_24000 0x60 +#define TWL4030_APLL_RATE_32000 0x80 +#define TWL4030_APLL_RATE_44100 0x90 +#define TWL4030_APLL_RATE_48000 0xA0 +#define TWL4030_APLL_RATE_96000 0xE0 +#define TWL4030_SEL_16K 0x08 +#define TWL4030_CODECPDZ 0x02 +#define TWL4030_OPT_MODE 0x01 +#define TWL4030_OPTION_1 (1 << 0) +#define TWL4030_OPTION_2 (0 << 0) + +/* TWL4030_OPTION (0x02) Fields */ +#define TWL4030_ATXL1_EN (1 << 0) +#define TWL4030_ATXR1_EN (1 << 1) +#define TWL4030_ATXL2_VTXL_EN (1 << 2) +#define TWL4030_ATXR2_VTXR_EN (1 << 3) +#define TWL4030_ARXL1_VRX_EN (1 << 4) +#define TWL4030_ARXR1_EN (1 << 5) +#define TWL4030_ARXL2_EN (1 << 6) +#define TWL4030_ARXR2_EN (1 << 7) + +/* TWL4030_REG_MICBIAS_CTL (0x04) Fields */ +#define TWL4030_MICBIAS2_CTL 0x40 +#define TWL4030_MICBIAS1_CTL 0x20 +#define TWL4030_HSMICBIAS_EN 0x04 +#define TWL4030_MICBIAS2_EN 0x02 +#define TWL4030_MICBIAS1_EN 0x01 + +/* ANAMICL (0x05) Fields */ +#define TWL4030_CNCL_OFFSET_START 0x80 +#define TWL4030_OFFSET_CNCL_SEL 0x60 +#define TWL4030_OFFSET_CNCL_SEL_ARX1 0x00 +#define TWL4030_OFFSET_CNCL_SEL_ARX2 0x20 +#define TWL4030_OFFSET_CNCL_SEL_VRX 0x40 +#define TWL4030_OFFSET_CNCL_SEL_ALL 0x60 +#define TWL4030_MICAMPL_EN 0x10 +#define TWL4030_CKMIC_EN 0x08 +#define TWL4030_AUXL_EN 0x04 +#define TWL4030_HSMIC_EN 0x02 +#define TWL4030_MAINMIC_EN 0x01 + +/* ANAMICR (0x06) Fields */ +#define TWL4030_MICAMPR_EN 0x10 +#define TWL4030_AUXR_EN 0x04 +#define TWL4030_SUBMIC_EN 0x01 + +/* AVADC_CTL (0x07) Fields */ +#define TWL4030_ADCL_EN 0x08 +#define TWL4030_AVADC_CLK_PRIORITY 0x04 +#define TWL4030_ADCR_EN 0x02 + +/* TWL4030_REG_ADCMICSEL (0x08) Fields */ +#define TWL4030_DIGMIC1_EN 0x08 +#define TWL4030_TX2IN_SEL 0x04 +#define TWL4030_DIGMIC0_EN 0x02 +#define TWL4030_TX1IN_SEL 0x01 + +/* AUDIO_IF (0x0E) Fields */ +#define TWL4030_AIF_SLAVE_EN 0x80 +#define TWL4030_DATA_WIDTH 0x60 +#define TWL4030_DATA_WIDTH_16S_16W 0x00 +#define TWL4030_DATA_WIDTH_32S_16W 0x40 +#define TWL4030_DATA_WIDTH_32S_24W 0x60 +#define TWL4030_AIF_FORMAT 0x18 +#define TWL4030_AIF_FORMAT_CODEC 0x00 +#define TWL4030_AIF_FORMAT_LEFT 0x08 +#define TWL4030_AIF_FORMAT_RIGHT 0x10 +#define TWL4030_AIF_FORMAT_TDM 0x18 +#define TWL4030_AIF_TRI_EN 0x04 +#define TWL4030_CLK256FS_EN 0x02 +#define TWL4030_AIF_EN 0x01 + +/* VOICE_IF (0x0F) Fields */ +#define TWL4030_VIF_SLAVE_EN 0x80 +#define TWL4030_VIF_DIN_EN 0x40 +#define TWL4030_VIF_DOUT_EN 0x20 +#define TWL4030_VIF_SWAP 0x10 +#define TWL4030_VIF_FORMAT 0x08 +#define TWL4030_VIF_TRI_EN 0x04 +#define TWL4030_VIF_SUB_EN 0x02 +#define TWL4030_VIF_EN 0x01 + +/* EAR_CTL (0x21) */ +#define TWL4030_EAR_GAIN 0x30 + +/* HS_GAIN_SET (0x23) Fields */ +#define TWL4030_HSR_GAIN 0x0C +#define TWL4030_HSR_GAIN_PWR_DOWN 0x00 +#define TWL4030_HSR_GAIN_PLUS_6DB 0x04 +#define TWL4030_HSR_GAIN_0DB 0x08 +#define TWL4030_HSR_GAIN_MINUS_6DB 0x0C +#define TWL4030_HSL_GAIN 0x03 +#define TWL4030_HSL_GAIN_PWR_DOWN 0x00 +#define TWL4030_HSL_GAIN_PLUS_6DB 0x01 +#define TWL4030_HSL_GAIN_0DB 0x02 +#define TWL4030_HSL_GAIN_MINUS_6DB 0x03 + +/* HS_POPN_SET (0x24) Fields */ +#define TWL4030_VMID_EN 0x40 +#define TWL4030_EXTMUTE 0x20 +#define TWL4030_RAMP_DELAY 0x1C +#define TWL4030_RAMP_DELAY_20MS 0x00 +#define TWL4030_RAMP_DELAY_40MS 0x04 +#define TWL4030_RAMP_DELAY_81MS 0x08 +#define TWL4030_RAMP_DELAY_161MS 0x0C +#define TWL4030_RAMP_DELAY_323MS 0x10 +#define TWL4030_RAMP_DELAY_645MS 0x14 +#define TWL4030_RAMP_DELAY_1291MS 0x18 +#define TWL4030_RAMP_DELAY_2581MS 0x1C +#define TWL4030_RAMP_EN 0x02 + +/* PREDL_CTL (0x25) */ +#define TWL4030_PREDL_GAIN 0x30 + +/* PREDR_CTL (0x26) */ +#define TWL4030_PREDR_GAIN 0x30 + +/* PRECKL_CTL (0x27) */ +#define TWL4030_PRECKL_GAIN 0x30 + +/* PRECKR_CTL (0x28) */ +#define TWL4030_PRECKR_GAIN 0x30 + +/* HFL_CTL (0x29, 0x2A) Fields */ +#define TWL4030_HF_CTL_HB_EN 0x04 +#define TWL4030_HF_CTL_LOOP_EN 0x08 +#define TWL4030_HF_CTL_RAMP_EN 0x10 +#define TWL4030_HF_CTL_REF_EN 0x20 + +/* APLL_CTL (0x3A) Fields */ +#define TWL4030_APLL_EN 0x10 +#define TWL4030_APLL_INFREQ 0x0F +#define TWL4030_APLL_INFREQ_19200KHZ 0x05 +#define TWL4030_APLL_INFREQ_26000KHZ 0x06 +#define TWL4030_APLL_INFREQ_38400KHZ 0x0F + +/* REG_MISC_SET_1 (0x3E) Fields */ +#define TWL4030_CLK64_EN 0x80 +#define TWL4030_SCRAMBLE_EN 0x40 +#define TWL4030_FMLOOP_EN 0x20 +#define TWL4030_SMOOTH_ANAVOL_EN 0x02 +#define TWL4030_DIGMIC_LR_SWAP_EN 0x01 + +/* VIBRA_CTL (0x45) */ +#define TWL4030_VIBRA_EN 0x01 +#define TWL4030_VIBRA_DIR 0x02 +#define TWL4030_VIBRA_AUDIO_SEL_L1 (0x00 << 2) +#define TWL4030_VIBRA_AUDIO_SEL_R1 (0x01 << 2) +#define TWL4030_VIBRA_AUDIO_SEL_L2 (0x02 << 2) +#define TWL4030_VIBRA_AUDIO_SEL_R2 (0x03 << 2) +#define TWL4030_VIBRA_SEL 0x10 +#define TWL4030_VIBRA_DIR_SEL 0x20 + +/* TWL4030 codec resource IDs */ +enum twl4030_codec_res { + TWL4030_CODEC_RES_POWER = 0, + TWL4030_CODEC_RES_APLL, + TWL4030_CODEC_RES_MAX, +}; + +int twl4030_codec_disable_resource(enum twl4030_codec_res id); +int twl4030_codec_enable_resource(enum twl4030_codec_res id); + +#endif /* End of __TWL4030_CODEC_H__ */ -- cgit v1.3-8-gc7d7 From 9b1d82fa1611706fa7ee1505f290160a18caf95d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 25 Oct 2009 19:03:50 -0700 Subject: rcu: "Tiny RCU", The Bloatwatch Edition This patch is a version of RCU designed for !SMP provided for a small-footprint RCU implementation. In particular, the implementation of synchronize_rcu() is extremely lightweight and high performance. It passes rcutorture testing in each of the four relevant configurations (combinations of NO_HZ and PREEMPT) on x86. This saves about 1K bytes compared to old Classic RCU (which is no longer in mainline), and more than three kilobytes compared to Hierarchical RCU (updated to 2.6.30): CONFIG_TREE_RCU: text data bss dec filename 183 4 0 187 kernel/rcupdate.o 2783 520 36 3339 kernel/rcutree.o 3526 Total (vs 4565 for v7) CONFIG_TREE_PREEMPT_RCU: text data bss dec filename 263 4 0 267 kernel/rcupdate.o 4594 776 52 5422 kernel/rcutree.o 5689 Total (6155 for v7) CONFIG_TINY_RCU: text data bss dec filename 96 4 0 100 kernel/rcupdate.o 734 24 0 758 kernel/rcutiny.o 858 Total (vs 848 for v7) The above is for x86. Your mileage may vary on other platforms. Further compression is possible, but is being procrastinated. Changes from v7 (http://lkml.org/lkml/2009/10/9/388) o Apply Lai Jiangshan's review comments (aside from might_sleep() in synchronize_sched(), which is covered by SMP builds). o Fix up expedited primitives. Changes from v6 (http://lkml.org/lkml/2009/9/23/293). o Forward ported to put it into the 2.6.33 stream. o Added lockdep support. o Make lightweight rcu_barrier. Changes from v5 (http://lkml.org/lkml/2009/6/23/12). o Ported to latest pre-2.6.32 merge window kernel. - Renamed rcu_qsctr_inc() to rcu_sched_qs(). - Renamed rcu_bh_qsctr_inc() to rcu_bh_qs(). - Provided trivial rcu_cpu_notify(). - Provided trivial exit_rcu(). - Provided trivial rcu_needs_cpu(). - Fixed up the rcu_*_enter/exit() functions in linux/hardirq.h. o Removed the dependence on EMBEDDED, with a view to making TINY_RCU default for !SMP at some time in the future. o Added (trivial) support for expedited grace periods. Changes from v4 (http://lkml.org/lkml/2009/5/2/91) include: o Squeeze the size down a bit further by removing the ->completed field from struct rcu_ctrlblk. o This permits synchronize_rcu() to become the empty function. Previous concerns about rcutorture were unfounded, as rcutorture correctly handles a constant value from rcu_batches_completed() and rcu_batches_completed_bh(). Changes from v3 (http://lkml.org/lkml/2009/3/29/221) include: o Changed rcu_batches_completed(), rcu_batches_completed_bh() rcu_enter_nohz(), rcu_exit_nohz(), rcu_nmi_enter(), and rcu_nmi_exit(), to be static inlines, as suggested by David Howells. Doing this saves about 100 bytes from rcutiny.o. (The numbers between v3 and this v4 of the patch are not directly comparable, since they are against different versions of Linux.) Changes from v2 (http://lkml.org/lkml/2009/2/3/333) include: o Fix whitespace issues. o Change short-circuit "||" operator to instead be "+" in order to fix performance bug noted by "kraai" on LWN. (http://lwn.net/Articles/324348/) Changes from v1 (http://lkml.org/lkml/2009/1/13/440) include: o This version depends on EMBEDDED as well as !SMP, as suggested by Ingo. o Updated rcu_needs_cpu() to unconditionally return zero, permitting the CPU to enter dynticks-idle mode at any time. This works because callbacks can be invoked upon entry to dynticks-idle mode. o Paul is now OK with this being included, based on a poll at the Kernel Miniconf at linux.conf.au, where about ten people said that they cared about saving 900 bytes on single-CPU systems. o Applies to both mainline and tip/core/rcu. Signed-off-by: Paul E. McKenney Acked-by: David Howells Acked-by: Josh Triplett Reviewed-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: avi@redhat.com Cc: mtosatti@redhat.com LKML-Reference: <12565226351355-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 24 ++++ include/linux/rcupdate.h | 6 + include/linux/rcutiny.h | 97 ++++++++++++++++ init/Kconfig | 9 ++ kernel/Makefile | 1 + kernel/rcupdate.c | 4 + kernel/rcutiny.c | 294 +++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 435 insertions(+) create mode 100644 include/linux/rcutiny.h create mode 100644 kernel/rcutiny.c (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 6d527ee82b2b..d5b387669dab 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -139,10 +139,34 @@ static inline void account_system_vtime(struct task_struct *tsk) #endif #if defined(CONFIG_NO_HZ) +#if defined(CONFIG_TINY_RCU) +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +static inline void rcu_irq_enter(void) +{ + rcu_exit_nohz(); +} + +static inline void rcu_irq_exit(void) +{ + rcu_enter_nohz(); +} + +static inline void rcu_nmi_enter(void) +{ +} + +static inline void rcu_nmi_exit(void) +{ +} + +#else extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); extern void rcu_nmi_enter(void); extern void rcu_nmi_exit(void); +#endif #else # define rcu_irq_enter() do { } while (0) # define rcu_irq_exit() do { } while (0) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3ebd0b7bcb08..6dd71fa48429 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -68,11 +68,17 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); extern void rcu_scheduler_starting(void); +#ifndef CONFIG_TINY_RCU extern int rcu_needs_cpu(int cpu); +#else +static inline int rcu_needs_cpu(int cpu) { return 0; } +#endif extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include +#elif CONFIG_TINY_RCU +#include #else #error "Unknown RCU implementation specified to kernel configuration" #endif diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h new file mode 100644 index 000000000000..891073c264dc --- /dev/null +++ b/include/linux/rcutiny.h @@ -0,0 +1,97 @@ +/* + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Paul E. McKenney + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ + +#ifndef __LINUX_TINY_H +#define __LINUX_TINY_H + +#include + +void rcu_sched_qs(int cpu); +void rcu_bh_qs(int cpu); + +#define __rcu_read_lock() preempt_disable() +#define __rcu_read_unlock() preempt_enable() +#define __rcu_read_lock_bh() local_bh_disable() +#define __rcu_read_unlock_bh() local_bh_enable() +#define call_rcu_sched call_rcu + +#define rcu_init_sched() do { } while (0) +extern void rcu_check_callbacks(int cpu, int user); +extern void __rcu_init(void); + +/* + * Return the number of grace periods. + */ +static inline long rcu_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of bottom-half grace periods. + */ +static inline long rcu_batches_completed_bh(void) +{ + return 0; +} + +extern int rcu_expedited_torture_stats(char *page); + +static inline void synchronize_rcu_expedited(void) +{ + synchronize_sched(); +} + +static inline void synchronize_rcu_bh_expedited(void) +{ + synchronize_sched(); +} + +struct notifier_block; +extern int rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu); + +#ifdef CONFIG_NO_HZ + +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +#else /* #ifdef CONFIG_NO_HZ */ + +static inline void rcu_enter_nohz(void) +{ +} + +static inline void rcu_exit_nohz(void) +{ +} + +#endif /* #else #ifdef CONFIG_NO_HZ */ + +static inline void exit_rcu(void) +{ +} + +#endif /* __LINUX_RCUTINY_H */ diff --git a/init/Kconfig b/init/Kconfig index 09c5c6431f42..d367bf6bcc34 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -334,6 +334,15 @@ config TREE_PREEMPT_RCU is also required. It also scales down nicely to smaller systems. +config TINY_RCU + bool "UP-only small-memory-footprint RCU" + depends on !SMP + help + This option selects the RCU implementation that is + designed for UP systems from which real-time response + is not required. This option greatly reduces the + memory footprint of RCU. + endchoice config RCU_TRACE diff --git a/kernel/Makefile b/kernel/Makefile index b8d4cd8ac0b9..e9a0e6ed25cc 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -82,6 +82,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o +obj-$(CONFIG_TINY_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 400183346ad2..7625f207f65e 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -67,6 +67,8 @@ void wakeme_after_rcu(struct rcu_head *head) complete(&rcu->completion); } +#ifndef CONFIG_TINY_RCU + #ifdef CONFIG_TREE_PREEMPT_RCU /** @@ -157,6 +159,8 @@ void synchronize_rcu_bh(void) } EXPORT_SYMBOL_GPL(synchronize_rcu_bh); +#endif /* #ifndef CONFIG_TINY_RCU */ + static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, unsigned long action, void *hcpu) { diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c new file mode 100644 index 000000000000..0b54efde66ac --- /dev/null +++ b/kernel/rcutiny.c @@ -0,0 +1,294 @@ +/* + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Paul E. McKenney + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ + struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ + struct rcu_head **curtail; /* ->next pointer of last CB. */ +}; + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_ctrlblk = { + .rcucblist = NULL, + .donetail = &rcu_ctrlblk.rcucblist, + .curtail = &rcu_ctrlblk.rcucblist, +}; +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .rcucblist = NULL, + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, +}; + +#ifdef CONFIG_NO_HZ + +static long rcu_dynticks_nesting = 1; + +/* + * Enter dynticks-idle mode, which is an extended quiescent state + * if we have fully entered that mode (i.e., if the new value of + * dynticks_nesting is zero). + */ +void rcu_enter_nohz(void) +{ + if (--rcu_dynticks_nesting == 0) + rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ +} + +/* + * Exit dynticks-idle mode, so that we are no longer in an extended + * quiescent state. + */ +void rcu_exit_nohz(void) +{ + rcu_dynticks_nesting++; +} + +#endif /* #ifdef CONFIG_NO_HZ */ + +/* + * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc(). + * Also disable irqs to avoid confusion due to interrupt handlers invoking + * call_rcu(). + */ +static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) +{ + unsigned long flags; + + local_irq_save(flags); + if (rcp->rcucblist != NULL && + rcp->donetail != rcp->curtail) { + rcp->donetail = rcp->curtail; + local_irq_restore(flags); + return 1; + } + local_irq_restore(flags); + return 0; +} + +/* + * Record an rcu quiescent state. And an rcu_bh quiescent state while we + * are at it, given that any rcu quiescent state is also an rcu_bh + * quiescent state. Use "+" instead of "||" to defeat short circuiting. + */ +void rcu_sched_qs(int cpu) +{ + if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Record an rcu_bh quiescent state. + */ +void rcu_bh_qs(int cpu) +{ + if (rcu_qsctr_help(&rcu_bh_ctrlblk)) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Check to see if the scheduling-clock interrupt came from an extended + * quiescent state, and, if so, tell RCU about it. + */ +void rcu_check_callbacks(int cpu, int user) +{ + if (user || + (idle_cpu(cpu) && + !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) + rcu_sched_qs(cpu); + else if (!in_softirq()) + rcu_bh_qs(cpu); +} + +/* + * Helper function for rcu_process_callbacks() that operates on the + * specified rcu_ctrlkblk structure. + */ +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) +{ + unsigned long flags; + struct rcu_head *next, *list; + + /* If no RCU callbacks ready to invoke, just return. */ + if (&rcp->rcucblist == rcp->donetail) + return; + + /* Move the ready-to-invoke callbacks to a local list. */ + local_irq_save(flags); + list = rcp->rcucblist; + rcp->rcucblist = *rcp->donetail; + *rcp->donetail = NULL; + if (rcp->curtail == rcp->donetail) + rcp->curtail = &rcp->rcucblist; + rcp->donetail = &rcp->rcucblist; + local_irq_restore(flags); + + /* Invoke the callbacks on the local list. */ + while (list) { + next = list->next; + prefetch(next); + list->func(list); + list = next; + } +} + +/* + * Invoke any callbacks whose grace period has completed. + */ +static void rcu_process_callbacks(struct softirq_action *unused) +{ + __rcu_process_callbacks(&rcu_ctrlblk); + __rcu_process_callbacks(&rcu_bh_ctrlblk); +} + +/* + * Null function to handle CPU being onlined. Longer term, we want to + * make TINY_RCU avoid using rcupdate.c, but later... + */ +int rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + return NOTIFY_OK; +} + +/* + * Wait for a grace period to elapse. But it is illegal to invoke + * synchronize_sched() from within an RCU read-side critical section. + * Therefore, any legal call to synchronize_sched() is a quiescent + * state, and so on a UP system, synchronize_sched() need do nothing. + * Ditto for synchronize_rcu_bh(). (But Lai Jiangshan points out the + * benefits of doing might_sleep() to reduce latency.) + * + * Cool, huh? (Due to Josh Triplett.) + * + * But we want to make this a static inline later. + */ +void synchronize_sched(void) +{ + cond_resched(); +} +EXPORT_SYMBOL_GPL(synchronize_sched); + +void synchronize_rcu_bh(void) +{ + synchronize_sched(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_bh); + +/* + * Helper function for call_rcu() and call_rcu_bh(). + */ +static void __call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu), + struct rcu_ctrlblk *rcp) +{ + unsigned long flags; + + head->func = func; + head->next = NULL; + local_irq_save(flags); + *rcp->curtail = head; + rcp->curtail = &head->next; + local_irq_restore(flags); +} + +/* + * Post an RCU callback to be invoked after the end of an RCU grace + * period. But since we have but one CPU, that would be after any + * quiescent state. + */ +void call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_ctrlblk); +} +EXPORT_SYMBOL_GPL(call_rcu); + +/* + * Post an RCU bottom-half callback to be invoked after any subsequent + * quiescent state. + */ +void call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_bh_ctrlblk); +} +EXPORT_SYMBOL_GPL(call_rcu_bh); + +void rcu_barrier(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier); + +void rcu_barrier_bh(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_bh(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier_bh); + +void rcu_barrier_sched(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_sched(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier_sched); + +void __rcu_init(void) +{ + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); +} -- cgit v1.3-8-gc7d7 From 0cd397d33608ae6c97d2ee6c8c43462b419b7e26 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 25 Oct 2009 19:03:51 -0700 Subject: rcu: Add synchronize_srcu_expedited() This patch creates a synchronize_srcu_expedited() that uses synchronize_sched_expedited() where synchronize_srcu() uses synchronize_sched(). The synchronize_srcu() and synchronize_srcu_expedited() functions become one-liners that pass synchronize_sched() or synchronize_sched_expedited(), repectively, to a new __synchronize_srcu() function. While in the file, move the EXPORT_SYMBOL_GPL()s to immediately follow the corresponding functions. Requested-by: Avi Kivity Tested-by: Marcelo Tosatti Signed-off-by: Paul E. McKenney Acked-by: Josh Triplett Reviewed-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: avi@redhat.com LKML-Reference: <12565226354038-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/srcu.h | 1 + kernel/srcu.c | 74 ++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 52 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index aca0eee53930..4765d97dcafb 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -48,6 +48,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp); int srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); +void synchronize_srcu_expedited(struct srcu_struct *sp); long srcu_batches_completed(struct srcu_struct *sp); #endif diff --git a/kernel/srcu.c b/kernel/srcu.c index b0aeeaf22ce4..818d7d9aa03c 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -49,6 +49,7 @@ int init_srcu_struct(struct srcu_struct *sp) sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); return (sp->per_cpu_ref ? 0 : -ENOMEM); } +EXPORT_SYMBOL_GPL(init_srcu_struct); /* * srcu_readers_active_idx -- returns approximate number of readers @@ -97,6 +98,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp) free_percpu(sp->per_cpu_ref); sp->per_cpu_ref = NULL; } +EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /** * srcu_read_lock - register a new reader for an SRCU-protected structure. @@ -118,6 +120,7 @@ int srcu_read_lock(struct srcu_struct *sp) preempt_enable(); return idx; } +EXPORT_SYMBOL_GPL(srcu_read_lock); /** * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. @@ -136,22 +139,12 @@ void srcu_read_unlock(struct srcu_struct *sp, int idx) per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; preempt_enable(); } +EXPORT_SYMBOL_GPL(srcu_read_unlock); -/** - * synchronize_srcu - wait for prior SRCU read-side critical-section completion - * @sp: srcu_struct with which to synchronize. - * - * Flip the completed counter, and wait for the old count to drain to zero. - * As with classic RCU, the updater must use some separate means of - * synchronizing concurrent updates. Can block; must be called from - * process context. - * - * Note that it is illegal to call synchornize_srcu() from the corresponding - * SRCU read-side critical section; doing so will result in deadlock. - * However, it is perfectly legal to call synchronize_srcu() on one - * srcu_struct from some other srcu_struct's read-side critical section. +/* + * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). */ -void synchronize_srcu(struct srcu_struct *sp) +void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) { int idx; @@ -173,7 +166,7 @@ void synchronize_srcu(struct srcu_struct *sp) return; } - synchronize_sched(); /* Force memory barrier on all CPUs. */ + sync_func(); /* Force memory barrier on all CPUs. */ /* * The preceding synchronize_sched() ensures that any CPU that @@ -190,7 +183,7 @@ void synchronize_srcu(struct srcu_struct *sp) idx = sp->completed & 0x1; sp->completed++; - synchronize_sched(); /* Force memory barrier on all CPUs. */ + sync_func(); /* Force memory barrier on all CPUs. */ /* * At this point, because of the preceding synchronize_sched(), @@ -203,7 +196,7 @@ void synchronize_srcu(struct srcu_struct *sp) while (srcu_readers_active_idx(sp, idx)) schedule_timeout_interruptible(1); - synchronize_sched(); /* Force memory barrier on all CPUs. */ + sync_func(); /* Force memory barrier on all CPUs. */ /* * The preceding synchronize_sched() forces all srcu_read_unlock() @@ -236,6 +229,47 @@ void synchronize_srcu(struct srcu_struct *sp) mutex_unlock(&sp->mutex); } +/** + * synchronize_srcu - wait for prior SRCU read-side critical-section completion + * @sp: srcu_struct with which to synchronize. + * + * Flip the completed counter, and wait for the old count to drain to zero. + * As with classic RCU, the updater must use some separate means of + * synchronizing concurrent updates. Can block; must be called from + * process context. + * + * Note that it is illegal to call synchronize_srcu() from the corresponding + * SRCU read-side critical section; doing so will result in deadlock. + * However, it is perfectly legal to call synchronize_srcu() on one + * srcu_struct from some other srcu_struct's read-side critical section. + */ +void synchronize_srcu(struct srcu_struct *sp) +{ + __synchronize_srcu(sp, synchronize_sched); +} +EXPORT_SYMBOL_GPL(synchronize_srcu); + +/** + * synchronize_srcu_expedited - like synchronize_srcu, but less patient + * @sp: srcu_struct with which to synchronize. + * + * Flip the completed counter, and wait for the old count to drain to zero. + * As with classic RCU, the updater must use some separate means of + * synchronizing concurrent updates. Can block; must be called from + * process context. + * + * Note that it is illegal to call synchronize_srcu_expedited() + * from the corresponding SRCU read-side critical section; doing so + * will result in deadlock. However, it is perfectly legal to call + * synchronize_srcu_expedited() on one srcu_struct from some other + * srcu_struct's read-side critical section. + */ +void synchronize_srcu_expedited(struct srcu_struct *sp) +{ + __synchronize_srcu(sp, synchronize_sched_expedited); +} +EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); + /** * srcu_batches_completed - return batches completed. * @sp: srcu_struct on which to report batch completion. @@ -248,10 +282,4 @@ long srcu_batches_completed(struct srcu_struct *sp) { return sp->completed; } - -EXPORT_SYMBOL_GPL(init_srcu_struct); -EXPORT_SYMBOL_GPL(cleanup_srcu_struct); -EXPORT_SYMBOL_GPL(srcu_read_lock); -EXPORT_SYMBOL_GPL(srcu_read_unlock); -EXPORT_SYMBOL_GPL(synchronize_srcu); EXPORT_SYMBOL_GPL(srcu_batches_completed); -- cgit v1.3-8-gc7d7 From 4ce5b90340879ce93d169b7b523c2cbbe7c45843 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Oct 2009 07:55:55 +0100 Subject: rcu: Do tiny cleanups in rcutiny No change in functionality - just straighten out a few small stylistic details. Cc: Paul E. McKenney Cc: David Howells Cc: Josh Triplett Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: avi@redhat.com Cc: mtosatti@redhat.com LKML-Reference: <12565226351355-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutiny.h | 6 ++---- kernel/rcutiny.c | 49 +++++++++++++++++++++++-------------------------- 2 files changed, 25 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 891073c264dc..2c1fe8373e71 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -20,9 +20,8 @@ * Author: Paul E. McKenney * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU + * Documentation/RCU */ - #ifndef __LINUX_TINY_H #define __LINUX_TINY_H @@ -70,8 +69,7 @@ static inline void synchronize_rcu_bh_expedited(void) } struct notifier_block; -extern int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); +extern int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); #ifdef CONFIG_NO_HZ diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 0b54efde66ac..b33ec3aa377a 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -20,22 +20,21 @@ * Author: Paul E. McKenney * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU + * Documentation/RCU */ - -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include #include -#include +#include +#include +#include #include +#include +#include +#include #include +#include /* Global control variables for rcupdate callback mechanism. */ struct rcu_ctrlblk { @@ -46,14 +45,13 @@ struct rcu_ctrlblk { /* Definition for rcupdate control block. */ static struct rcu_ctrlblk rcu_ctrlblk = { - .rcucblist = NULL, - .donetail = &rcu_ctrlblk.rcucblist, - .curtail = &rcu_ctrlblk.rcucblist, + .donetail = &rcu_ctrlblk.rcucblist, + .curtail = &rcu_ctrlblk.rcucblist, }; + static struct rcu_ctrlblk rcu_bh_ctrlblk = { - .rcucblist = NULL, - .donetail = &rcu_bh_ctrlblk.rcucblist, - .curtail = &rcu_bh_ctrlblk.rcucblist, + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, }; #ifdef CONFIG_NO_HZ @@ -84,8 +82,8 @@ void rcu_exit_nohz(void) /* * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc(). - * Also disable irqs to avoid confusion due to interrupt handlers invoking - * call_rcu(). + * Also disable irqs to avoid confusion due to interrupt handlers + * invoking call_rcu(). */ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) { @@ -99,6 +97,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) return 1; } local_irq_restore(flags); + return 0; } @@ -143,8 +142,8 @@ void rcu_check_callbacks(int cpu, int user) */ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) { - unsigned long flags; struct rcu_head *next, *list; + unsigned long flags; /* If no RCU callbacks ready to invoke, just return. */ if (&rcp->rcucblist == rcp->donetail) @@ -182,8 +181,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) * Null function to handle CPU being onlined. Longer term, we want to * make TINY_RCU avoid using rcupdate.c, but later... */ -int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { return NOTIFY_OK; } @@ -223,6 +221,7 @@ static void __call_rcu(struct rcu_head *head, head->func = func; head->next = NULL; + local_irq_save(flags); *rcp->curtail = head; rcp->curtail = &head->next; @@ -234,8 +233,7 @@ static void __call_rcu(struct rcu_head *head, * period. But since we have but one CPU, that would be after any * quiescent state. */ -void call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { __call_rcu(head, func, &rcu_ctrlblk); } @@ -245,8 +243,7 @@ EXPORT_SYMBOL_GPL(call_rcu); * Post an RCU bottom-half callback to be invoked after any subsequent * quiescent state. */ -void call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) +void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { __call_rcu(head, func, &rcu_bh_ctrlblk); } -- cgit v1.3-8-gc7d7 From b2c18e1e08a5a9663094d57bb4be2f02226ee61c Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Fri, 23 Oct 2009 17:14:49 -0400 Subject: cfq: calculate the seek_mean per cfq_queue not per cfq_io_context async cfq_queue's are already shared between processes within the same priority, and forthcoming patches will change the mapping of cic to sync cfq_queue from 1:1 to 1:N. So, calculate the seekiness of a process based on the cfq_queue instead of the cfq_io_context. Signed-off-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 68 +++++++++++++++++++++++------------------------ include/linux/iocontext.h | 5 ---- 2 files changed, 33 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 069a61017c02..78cc8ee5da41 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -112,6 +112,11 @@ struct cfq_queue { unsigned short ioprio, org_ioprio; unsigned short ioprio_class, org_ioprio_class; + unsigned int seek_samples; + u64 seek_total; + sector_t seek_mean; + sector_t last_request_pos; + pid_t pid; }; @@ -962,16 +967,16 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, return cfqd->last_position - blk_rq_pos(rq); } -#define CIC_SEEK_THR 8 * 1024 -#define CIC_SEEKY(cic) ((cic)->seek_mean > CIC_SEEK_THR) +#define CFQQ_SEEK_THR 8 * 1024 +#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR) -static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) +static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, + struct request *rq) { - struct cfq_io_context *cic = cfqd->active_cic; - sector_t sdist = cic->seek_mean; + sector_t sdist = cfqq->seek_mean; - if (!sample_valid(cic->seek_samples)) - sdist = CIC_SEEK_THR; + if (!sample_valid(cfqq->seek_samples)) + sdist = CFQQ_SEEK_THR; return cfq_dist_from_last(cfqd, rq) <= sdist; } @@ -1000,7 +1005,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, * will contain the closest sector. */ __cfqq = rb_entry(parent, struct cfq_queue, p_node); - if (cfq_rq_close(cfqd, __cfqq->next_rq)) + if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) return __cfqq; if (blk_rq_pos(__cfqq->next_rq) < sector) @@ -1011,7 +1016,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, return NULL; __cfqq = rb_entry(node, struct cfq_queue, p_node); - if (cfq_rq_close(cfqd, __cfqq->next_rq)) + if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) return __cfqq; return NULL; @@ -1033,13 +1038,6 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, { struct cfq_queue *cfqq; - /* - * A valid cfq_io_context is necessary to compare requests against - * the seek_mean of the current cfqq. - */ - if (!cfqd->active_cic) - return NULL; - /* * We should notice if some of the queues are cooperating, eg * working closely on the same area of the disk. In that case, @@ -1110,7 +1108,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) * seeks. so allow a little bit of time for him to submit a new rq */ sl = cfqd->cfq_slice_idle; - if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic)) + if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)) sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); mod_timer(&cfqd->idle_slice_timer, jiffies + sl); @@ -1947,33 +1945,33 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) } static void -cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, +cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct request *rq) { sector_t sdist; u64 total; - if (!cic->last_request_pos) + if (!cfqq->last_request_pos) sdist = 0; - else if (cic->last_request_pos < blk_rq_pos(rq)) - sdist = blk_rq_pos(rq) - cic->last_request_pos; + else if (cfqq->last_request_pos < blk_rq_pos(rq)) + sdist = blk_rq_pos(rq) - cfqq->last_request_pos; else - sdist = cic->last_request_pos - blk_rq_pos(rq); + sdist = cfqq->last_request_pos - blk_rq_pos(rq); /* * Don't allow the seek distance to get too large from the * odd fragment, pagein, etc */ - if (cic->seek_samples <= 60) /* second&third seek */ - sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024); + if (cfqq->seek_samples <= 60) /* second&third seek */ + sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*1024); else - sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64); + sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*64); - cic->seek_samples = (7*cic->seek_samples + 256) / 8; - cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8; - total = cic->seek_total + (cic->seek_samples/2); - do_div(total, cic->seek_samples); - cic->seek_mean = (sector_t)total; + cfqq->seek_samples = (7*cfqq->seek_samples + 256) / 8; + cfqq->seek_total = (7*cfqq->seek_total + (u64)256*sdist) / 8; + total = cfqq->seek_total + (cfqq->seek_samples/2); + do_div(total, cfqq->seek_samples); + cfqq->seek_mean = (sector_t)total; } /* @@ -1995,11 +1993,11 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || - (!cfqd->cfq_latency && cfqd->hw_tag && CIC_SEEKY(cic))) + (!cfqd->cfq_latency && cfqd->hw_tag && CFQQ_SEEKY(cfqq))) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { unsigned int slice_idle = cfqd->cfq_slice_idle; - if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic)) + if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)) slice_idle = msecs_to_jiffies(CFQ_MIN_TT); if (cic->ttime_mean > slice_idle) enable_idle = 0; @@ -2066,7 +2064,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, * if this request is as-good as one we would expect from the * current cfqq, let it preempt */ - if (cfq_rq_close(cfqd, rq)) + if (cfq_rq_close(cfqd, cfqq, rq)) return true; return false; @@ -2108,10 +2106,10 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqq->meta_pending++; cfq_update_io_thinktime(cfqd, cic); - cfq_update_io_seektime(cfqd, cic, rq); + cfq_update_io_seektime(cfqd, cfqq, rq); cfq_update_idle_window(cfqd, cfqq, cic); - cic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); + cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); if (cfqq == cfqd->active_queue) { /* diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 4da4a75c3f1e..eb73632440f1 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -40,16 +40,11 @@ struct cfq_io_context { struct io_context *ioc; unsigned long last_end_request; - sector_t last_request_pos; unsigned long ttime_total; unsigned long ttime_samples; unsigned long ttime_mean; - unsigned int seek_samples; - u64 seek_total; - sector_t seek_mean; - struct list_head queue_list; struct hlist_node cic_list; -- cgit v1.3-8-gc7d7 From a5f523bc0cdee2a163a034344ebf1163799b3c5d Mon Sep 17 00:00:00 2001 From: Tias Guns Date: Sun, 25 Oct 2009 12:13:58 -0700 Subject: Input: add driver for Dynapro serial touchscreen This is a driver for Dynapro serial touchscreen, which used to be supported in Xorg. The driver needs updated inputattach utility to initialize serial port and create proper serio device before the driver will be bound to it. Signed-off-by: Tias Guns Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/Kconfig | 12 +++ drivers/input/touchscreen/Makefile | 1 + drivers/input/touchscreen/dynapro.c | 206 ++++++++++++++++++++++++++++++++++++ include/linux/serio.h | 1 + 4 files changed, 220 insertions(+) create mode 100644 drivers/input/touchscreen/dynapro.c (limited to 'include') diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 8cc453c85ea7..1cd9e8c8efb3 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -111,6 +111,18 @@ config TOUCHSCREEN_DA9034 Say Y here to enable the support for the touchscreen found on Dialog Semiconductor DA9034 PMIC. +config TOUCHSCREEN_DYNAPRO + tristate "Dynapro serial touchscreen" + select SERIO + help + Say Y here if you have a Dynapro serial touchscreen connected to + your system. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called dynapro. + config TOUCHSCREEN_EETI tristate "EETI touchscreen panel support" depends on I2C diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 15fa62cffc77..1f5cccd3a16a 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_TOUCHSCREEN_ADS7846) += ads7846.o obj-$(CONFIG_TOUCHSCREEN_ATMEL_TSADCC) += atmel_tsadcc.o obj-$(CONFIG_TOUCHSCREEN_BITSY) += h3600_ts_input.o obj-$(CONFIG_TOUCHSCREEN_CORGI) += corgi_ts.o +obj-$(CONFIG_TOUCHSCREEN_DYNAPRO) += dynapro.o obj-$(CONFIG_TOUCHSCREEN_GUNZE) += gunze.o obj-$(CONFIG_TOUCHSCREEN_EETI) += eeti_ts.o obj-$(CONFIG_TOUCHSCREEN_ELO) += elo.o diff --git a/drivers/input/touchscreen/dynapro.c b/drivers/input/touchscreen/dynapro.c new file mode 100644 index 000000000000..455353908bdf --- /dev/null +++ b/drivers/input/touchscreen/dynapro.c @@ -0,0 +1,206 @@ +/* + * Dynapro serial touchscreen driver + * + * Copyright (c) 2009 Tias Guns + * Based on the inexio driver (c) Vojtech Pavlik and Dan Streetman and + * Richard Lemon + * + */ + +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +/* + * 2009/09/19 Tias Guns + * Copied inexio.c and edited for Dynapro protocol (from retired Xorg module) + */ + +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_DESC "Dynapro serial touchscreen driver" + +MODULE_AUTHOR("Tias Guns "); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); + +/* + * Definitions & global arrays. + */ + +#define DYNAPRO_FORMAT_TOUCH_BIT 0x40 +#define DYNAPRO_FORMAT_LENGTH 3 +#define DYNAPRO_RESPONSE_BEGIN_BYTE 0x80 + +#define DYNAPRO_MIN_XC 0 +#define DYNAPRO_MAX_XC 0x3ff +#define DYNAPRO_MIN_YC 0 +#define DYNAPRO_MAX_YC 0x3ff + +#define DYNAPRO_GET_XC(data) (data[1] | ((data[0] & 0x38) << 4)) +#define DYNAPRO_GET_YC(data) (data[2] | ((data[0] & 0x07) << 7)) +#define DYNAPRO_GET_TOUCHED(data) (DYNAPRO_FORMAT_TOUCH_BIT & data[0]) + +/* + * Per-touchscreen data. + */ + +struct dynapro { + struct input_dev *dev; + struct serio *serio; + int idx; + unsigned char data[DYNAPRO_FORMAT_LENGTH]; + char phys[32]; +}; + +static void dynapro_process_data(struct dynapro *pdynapro) +{ + struct input_dev *dev = pdynapro->dev; + + if (DYNAPRO_FORMAT_LENGTH == ++pdynapro->idx) { + input_report_abs(dev, ABS_X, DYNAPRO_GET_XC(pdynapro->data)); + input_report_abs(dev, ABS_Y, DYNAPRO_GET_YC(pdynapro->data)); + input_report_key(dev, BTN_TOUCH, + DYNAPRO_GET_TOUCHED(pdynapro->data)); + input_sync(dev); + + pdynapro->idx = 0; + } +} + +static irqreturn_t dynapro_interrupt(struct serio *serio, + unsigned char data, unsigned int flags) +{ + struct dynapro *pdynapro = serio_get_drvdata(serio); + + pdynapro->data[pdynapro->idx] = data; + + if (DYNAPRO_RESPONSE_BEGIN_BYTE & pdynapro->data[0]) + dynapro_process_data(pdynapro); + else + dev_dbg(&serio->dev, "unknown/unsynchronized data: %x\n", + pdynapro->data[0]); + + return IRQ_HANDLED; +} + +static void dynapro_disconnect(struct serio *serio) +{ + struct dynapro *pdynapro = serio_get_drvdata(serio); + + input_get_device(pdynapro->dev); + input_unregister_device(pdynapro->dev); + serio_close(serio); + serio_set_drvdata(serio, NULL); + input_put_device(pdynapro->dev); + kfree(pdynapro); +} + +/* + * dynapro_connect() is the routine that is called when someone adds a + * new serio device that supports dynapro protocol and registers it as + * an input device. This is usually accomplished using inputattach. + */ + +static int dynapro_connect(struct serio *serio, struct serio_driver *drv) +{ + struct dynapro *pdynapro; + struct input_dev *input_dev; + int err; + + pdynapro = kzalloc(sizeof(struct dynapro), GFP_KERNEL); + input_dev = input_allocate_device(); + if (!pdynapro || !input_dev) { + err = -ENOMEM; + goto fail1; + } + + pdynapro->serio = serio; + pdynapro->dev = input_dev; + snprintf(pdynapro->phys, sizeof(pdynapro->phys), + "%s/input0", serio->phys); + + input_dev->name = "Dynapro Serial TouchScreen"; + input_dev->phys = pdynapro->phys; + input_dev->id.bustype = BUS_RS232; + input_dev->id.vendor = SERIO_DYNAPRO; + input_dev->id.product = 0; + input_dev->id.version = 0x0001; + input_dev->dev.parent = &serio->dev; + input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); + input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); + input_set_abs_params(pdynapro->dev, ABS_X, + DYNAPRO_MIN_XC, DYNAPRO_MAX_XC, 0, 0); + input_set_abs_params(pdynapro->dev, ABS_Y, + DYNAPRO_MIN_YC, DYNAPRO_MAX_YC, 0, 0); + + serio_set_drvdata(serio, pdynapro); + + err = serio_open(serio, drv); + if (err) + goto fail2; + + err = input_register_device(pdynapro->dev); + if (err) + goto fail3; + + return 0; + + fail3: serio_close(serio); + fail2: serio_set_drvdata(serio, NULL); + fail1: input_free_device(input_dev); + kfree(pdynapro); + return err; +} + +/* + * The serio driver structure. + */ + +static struct serio_device_id dynapro_serio_ids[] = { + { + .type = SERIO_RS232, + .proto = SERIO_DYNAPRO, + .id = SERIO_ANY, + .extra = SERIO_ANY, + }, + { 0 } +}; + +MODULE_DEVICE_TABLE(serio, dynapro_serio_ids); + +static struct serio_driver dynapro_drv = { + .driver = { + .name = "dynapro", + }, + .description = DRIVER_DESC, + .id_table = dynapro_serio_ids, + .interrupt = dynapro_interrupt, + .connect = dynapro_connect, + .disconnect = dynapro_disconnect, +}; + +/* + * The functions for inserting/removing us as a module. + */ + +static int __init dynapro_init(void) +{ + return serio_register_driver(&dynapro_drv); +} + +static void __exit dynapro_exit(void) +{ + serio_unregister_driver(&dynapro_drv); +} + +module_init(dynapro_init); +module_exit(dynapro_exit); diff --git a/include/linux/serio.h b/include/linux/serio.h index a640bc2afe76..e2f3044d4a4a 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -215,5 +215,6 @@ static inline void serio_unpin_driver(struct serio *serio) #define SERIO_INEXIO 0x37 #define SERIO_TOUCHIT213 0x38 #define SERIO_W8001 0x39 +#define SERIO_DYNAPRO 0x3a #endif -- cgit v1.3-8-gc7d7 From 05423b241311c9380b7280179295bac7794281b6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 26 Oct 2009 18:40:35 -0700 Subject: vlan: allow null VLAN ID to be used We currently use a 16 bit field (vlan_tci) to store VLAN ID/PRIO on a skb. Null value is used as a special value, meaning vlan tagging not enabled. This forbids use of null vlan ID. As pointed by David, some drivers use the 3 high order bits (PRIO) As VLAN ID is 12 bits, we can use the remaining bit (CFI) as a flag, and allow null VLAN ID. In case future code really wants to use VLAN_CFI_MASK, we'll have to use a bit outside of vlan_tci. #define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ #define VLAN_PRIO_SHIFT 13 #define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ #define VLAN_TAG_PRESENT VLAN_CFI_MASK #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ Reported-by: Gertjan Hofman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 14 +++++++++----- net/8021q/vlan.h | 2 +- net/8021q/vlan_dev.c | 2 +- net/core/dev.c | 2 +- net/packet/af_packet.c | 5 +++-- 5 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 7ff9af1d0f05..8898cbebcf34 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -63,7 +63,11 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) return (struct vlan_ethhdr *)skb_mac_header(skb); } -#define VLAN_VID_MASK 0xfff +#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ +#define VLAN_PRIO_SHIFT 13 +#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ +#define VLAN_TAG_PRESENT VLAN_CFI_MASK +#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); @@ -105,8 +109,8 @@ static inline void vlan_group_set_device(struct vlan_group *vg, array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev; } -#define vlan_tx_tag_present(__skb) ((__skb)->vlan_tci) -#define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci) +#define vlan_tx_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) +#define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); @@ -231,7 +235,7 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, u16 vlan_tci) { - skb->vlan_tci = vlan_tci; + skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci; return skb; } @@ -284,7 +288,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { if (vlan_tx_tag_present(skb)) { - *vlan_tci = skb->vlan_tci; + *vlan_tci = vlan_tx_tag_get(skb); return 0; } else { *vlan_tci = 0; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 82570bc2a180..4ade5edf1033 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -89,7 +89,7 @@ static inline u32 vlan_get_ingress_priority(struct net_device *dev, { struct vlan_dev_info *vip = vlan_dev_info(dev); - return vip->ingress_priority_map[(vlan_tci >> 13) & 0x7]; + return vip->ingress_priority_map[(vlan_tci >> VLAN_PRIO_SHIFT) & 0x7]; } #ifdef CONFIG_VLAN_8021Q_GVRP diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4198ec5c8abc..e3701977f58d 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -393,7 +393,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, struct vlan_dev_info *vlan = vlan_dev_info(dev); struct vlan_priority_tci_mapping *mp = NULL; struct vlan_priority_tci_mapping *np; - u32 vlan_qos = (vlan_prio << 13) & 0xE000; + u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; /* See if a priority mapping exists.. */ mp = vlan->egress_priority_map[skb_prio & 0xF]; diff --git a/net/core/dev.c b/net/core/dev.c index e7bada1d5ed9..950c13fa60d2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2300,7 +2300,7 @@ int netif_receive_skb(struct sk_buff *skb) if (!skb->tstamp.tv64) net_timestamp(skb); - if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) + if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) return NET_RX_SUCCESS; /* if we've gotten here through NAPI, check netpoll */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ff752c606413..33e68f20ec61 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -79,6 +79,7 @@ #include #include #include +#include #ifdef CONFIG_INET #include @@ -766,7 +767,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, getnstimeofday(&ts); h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; - h.h2->tp_vlan_tci = skb->vlan_tci; + h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); hdrlen = sizeof(*h.h2); break; default: @@ -1493,7 +1494,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); - aux.tp_vlan_tci = skb->vlan_tci; + aux.tp_vlan_tci = vlan_tx_tag_get(skb); put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } -- cgit v1.3-8-gc7d7 From 2c28e2451dba2260e9f88811b29a7787db7e7616 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 26 Oct 2009 13:57:44 -0700 Subject: rcu: Fix TINY_RCU #elif condition Some compilers are happy with "#elif CONFIG_RCU_TINY", while others strongly prefer "#elif defined(CONFIG_RCU_TINY)". Change to the latter to make more compilers happy. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12565906642768-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6dd71fa48429..2f1bc42a3b82 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -77,7 +77,7 @@ extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include -#elif CONFIG_TINY_RCU +#elif defined(CONFIG_TINY_RCU) #include #else #error "Unknown RCU implementation specified to kernel configuration" -- cgit v1.3-8-gc7d7 From 8b45499ccb8a93cd68b1a8766786c2f8ea991ae2 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Fri, 9 Oct 2009 20:32:10 +0200 Subject: ssb: Put host pointers into a union This slightly shrinks the structure. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/ssb/driver_pcicore.c | 4 ++-- include/linux/ssb/ssb.h | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c index 538c570df337..f1dcd7969a5c 100644 --- a/drivers/ssb/driver_pcicore.c +++ b/drivers/ssb/driver_pcicore.c @@ -551,13 +551,13 @@ int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc, might_sleep_if(pdev->id.coreid != SSB_DEV_PCI); /* Enable interrupts for this device. */ - if (bus->host_pci && - ((pdev->id.revision >= 6) || (pdev->id.coreid == SSB_DEV_PCIE))) { + if ((pdev->id.revision >= 6) || (pdev->id.coreid == SSB_DEV_PCIE)) { u32 coremask; /* Calculate the "coremask" for the device. */ coremask = (1 << dev->core_index); + SSB_WARN_ON(bus->bustype != SSB_BUSTYPE_PCI); err = pci_read_config_dword(bus->host_pci, SSB_PCI_IRQMASK, &tmp); if (err) goto out; diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 3d0a9ff24f01..24f988547361 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -269,7 +269,8 @@ struct ssb_bus { const struct ssb_bus_ops *ops; - /* The core in the basic address register window. (PCI bus only) */ + /* The core currently mapped into the MMIO window. + * Not valid on all host-buses. So don't use outside of SSB. */ struct ssb_device *mapped_device; union { /* Currently mapped PCMCIA segment. (bustype == SSB_BUSTYPE_PCMCIA only) */ @@ -281,14 +282,17 @@ struct ssb_bus { * On PCMCIA-host busses this is used to protect the whole MMIO access. */ spinlock_t bar_lock; - /* The bus this backplane is running on. */ + /* The host-bus this backplane is running on. */ enum ssb_bustype bustype; - /* Pointer to the PCI bus (only valid if bustype == SSB_BUSTYPE_PCI). */ - struct pci_dev *host_pci; - /* Pointer to the PCMCIA device (only if bustype == SSB_BUSTYPE_PCMCIA). */ - struct pcmcia_device *host_pcmcia; - /* Pointer to the SDIO device (only if bustype == SSB_BUSTYPE_SDIO). */ - struct sdio_func *host_sdio; + /* Pointers to the host-bus. Check bustype before using any of these pointers. */ + union { + /* Pointer to the PCI bus (only valid if bustype == SSB_BUSTYPE_PCI). */ + struct pci_dev *host_pci; + /* Pointer to the PCMCIA device (only if bustype == SSB_BUSTYPE_PCMCIA). */ + struct pcmcia_device *host_pcmcia; + /* Pointer to the SDIO device (only if bustype == SSB_BUSTYPE_SDIO). */ + struct sdio_func *host_sdio; + }; /* See enum ssb_quirks */ unsigned int quirks; -- cgit v1.3-8-gc7d7 From ce470613cdfde70f25419cc52a4816315825f5d9 Mon Sep 17 00:00:00 2001 From: Holger Schurig Date: Tue, 13 Oct 2009 13:28:13 +0200 Subject: cfg80211: no cookies in cfg80211_send_XXX() Get rid of cookies in cfg80211_send_XXX() functions. Signed-off-by: Holger Schurig Signed-off-by: John W. Linville --- include/net/cfg80211.h | 31 +++++++++++++++++++++++-------- net/mac80211/mlme.c | 18 +++++++++++------- net/wireless/mlme.c | 39 ++++++++++++--------------------------- 3 files changed, 46 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6f4862b3ec2c..ff67865de231 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1809,30 +1809,45 @@ void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr); * @dev: network device * @buf: deauthentication frame (header + body) * @len: length of the frame data - * @cookie: cookie from ->deauth if called within that callback, - * %NULL otherwise * * This function is called whenever deauthentication has been processed in * station mode. This includes both received deauthentication frames and * locally generated ones. This function may sleep. */ -void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, - void *cookie); +void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len); + +/** + * __cfg80211_send_deauth - notification of processed deauthentication + * @dev: network device + * @buf: deauthentication frame (header + body) + * @len: length of the frame data + * + * Like cfg80211_send_deauth(), but doesn't take the wdev lock. + */ +void __cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len); /** * cfg80211_send_disassoc - notification of processed disassociation * @dev: network device * @buf: disassociation response frame (header + body) * @len: length of the frame data - * @cookie: cookie from ->disassoc if called within that callback, - * %NULL otherwise * * This function is called whenever disassociation has been processed in * station mode. This includes both received disassociation frames and locally * generated ones. This function may sleep. */ -void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, - void *cookie); +void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len); + +/** + * __cfg80211_send_disassoc - notification of processed disassociation + * @dev: network device + * @buf: disassociation response frame (header + body) + * @len: length of the frame data + * + * Like cfg80211_send_disassoc(), but doesn't take the wdev lock. + */ +void __cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, + size_t len); /** * cfg80211_michael_mic_failure - notification of Michael MIC failure (TKIP) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8d26e9bf8964..33a696f5f305 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -458,9 +458,15 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, mgmt->u.deauth.reason_code = cpu_to_le16(reason); if (stype == IEEE80211_STYPE_DEAUTH) - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len, cookie); + if (cookie) + __cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); + else + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); else - cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len, cookie); + if (cookie) + __cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len); + else + cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len); ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED); } @@ -1959,12 +1965,10 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, /* no action */ break; case RX_MGMT_CFG80211_DEAUTH: - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len, - NULL); + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); break; case RX_MGMT_CFG80211_DISASSOC: - cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len, - NULL); + cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len); break; default: WARN(1, "unexpected: %d", rma); @@ -2019,7 +2023,7 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, skb->len); break; case RX_MGMT_CFG80211_DEAUTH: - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len, NULL); + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); break; default: WARN(1, "unexpected: %d", rma); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index ceb2c14c8f47..a13a71205240 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -121,7 +121,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len) } EXPORT_SYMBOL(cfg80211_send_rx_assoc); -static void __cfg80211_send_deauth(struct net_device *dev, +void __cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -177,27 +177,19 @@ static void __cfg80211_send_deauth(struct net_device *dev, false, NULL); } } +EXPORT_SYMBOL(__cfg80211_send_deauth); - -void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, - void *cookie) +void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - BUG_ON(cookie && wdev != cookie); - - if (cookie) { - /* called within callback */ - __cfg80211_send_deauth(dev, buf, len); - } else { - wdev_lock(wdev); - __cfg80211_send_deauth(dev, buf, len); - wdev_unlock(wdev); - } + wdev_lock(wdev); + __cfg80211_send_deauth(dev, buf, len); + wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_deauth); -static void __cfg80211_send_disassoc(struct net_device *dev, +void __cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -238,22 +230,15 @@ static void __cfg80211_send_disassoc(struct net_device *dev, from_ap = memcmp(mgmt->sa, dev->dev_addr, ETH_ALEN) != 0; __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); } +EXPORT_SYMBOL(__cfg80211_send_disassoc); -void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, - void *cookie) +void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - BUG_ON(cookie && wdev != cookie); - - if (cookie) { - /* called within callback */ - __cfg80211_send_disassoc(dev, buf, len); - } else { - wdev_lock(wdev); - __cfg80211_send_disassoc(dev, buf, len); - wdev_unlock(wdev); - } + wdev_lock(wdev); + __cfg80211_send_disassoc(dev, buf, len); + wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_disassoc); -- cgit v1.3-8-gc7d7 From e36e49f7338f0f73cd7f5ba4f5b646a479ab60a8 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 13 Oct 2009 20:33:13 +0300 Subject: mac80211: add ieee80211_rx_ni() ieee80211_rx() must be called with bottom halves disabled. To simplify driver development implement ieee80211_rx_ni() which disables BH. This function must be used when in process context. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/net/mac80211.h | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c75b960c8ac8..c42c4a820b89 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1665,11 +1665,11 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw); * header if %RX_FLAG_RADIOTAP is set in the @status flags. * * This function may not be called in IRQ context. Calls to this function - * for a single hardware must be synchronized against each other. Calls - * to this function and ieee80211_rx_irqsafe() may not be mixed for a - * single hardware. + * for a single hardware must be synchronized against each other. Calls to + * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be + * mixed for a single hardware. * - * Note that right now, this function must be called with softirqs disabled. + * In process context use instead ieee80211_rx_ni(). * * @hw: the hardware this frame came in on * @skb: the buffer to receive, owned by mac80211 after this call @@ -1682,14 +1682,34 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb); * Like ieee80211_rx() but can be called in IRQ context * (internally defers to a tasklet.) * - * Calls to this function and ieee80211_rx() may not be mixed for a - * single hardware. + * Calls to this function, ieee80211_rx() or ieee80211_rx_ni() may not + * be mixed for a single hardware. * * @hw: the hardware this frame came in on * @skb: the buffer to receive, owned by mac80211 after this call */ void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb); +/** + * ieee80211_rx_ni - receive frame (in process context) + * + * Like ieee80211_rx() but can be called in process context + * (internally disables bottom halves). + * + * Calls to this function, ieee80211_rx() and ieee80211_rx_irqsafe() may + * not be mixed for a single hardware. + * + * @hw: the hardware this frame came in on + * @skb: the buffer to receive, owned by mac80211 after this call + */ +static inline void ieee80211_rx_ni(struct ieee80211_hw *hw, + struct sk_buff *skb) +{ + local_bh_disable(); + ieee80211_rx(hw, skb); + local_bh_enable(); +} + /** * ieee80211_tx_status - transmit status callback * -- cgit v1.3-8-gc7d7 From d6ba452128178091dab7a04d54f7e66fdc32fb39 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Mon, 26 Oct 2009 09:26:18 -0400 Subject: tpm add default function definitions Add default tpm_pcr_read/extend function definitions required by IMA/Kconfig changes. Signed-off-by: Mimi Zohar Reviewed-by: Eric Paris Signed-off-by: James Morris --- include/linux/tpm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 3338b3f5c21a..8eaa8f83effb 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -31,5 +31,12 @@ extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf); extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash); +#else +static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) { + return -ENODEV; +} +static inline int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) { + return -ENODEV; +} #endif #endif -- cgit v1.3-8-gc7d7 From f7d7986060b2890fc26db6ab5203efbd33aa2497 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 18 Oct 2009 01:09:29 +0000 Subject: perf_event: Add alignment-faults and emulation-faults software events Add two more software events that are common to many cpus. Alignment faults: When a load or store is not aligned properly. Emulation faults: When an instruction is emulated in software. Both cause a very significant slowdown (100x or worse), so identifying and fixing them is very important. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 2 ++ include/linux/perf_event.h | 2 ++ kernel/perf_event.c | 2 ++ tools/perf/design.txt | 2 ++ tools/perf/util/parse-events.c | 4 ++++ 5 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7b7fbf433cff..d6b95d1e79f0 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -106,6 +106,8 @@ enum perf_sw_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_MAX, /* non-ABI */ }; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e6d95f97419..a33707a3a788 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -102,6 +102,8 @@ enum perf_sw_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_MAX, /* non-ABI */ }; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9d0b5c665883..0683b33cbb28 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4186,6 +4186,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) case PERF_COUNT_SW_PAGE_FAULTS_MAJ: case PERF_COUNT_SW_CONTEXT_SWITCHES: case PERF_COUNT_SW_CPU_MIGRATIONS: + case PERF_COUNT_SW_ALIGNMENT_FAULTS: + case PERF_COUNT_SW_EMULATION_FAULTS: if (!event->parent) { atomic_inc(&perf_swevent_enabled[event_id]); event->destroy = sw_perf_event_destroy; diff --git a/tools/perf/design.txt b/tools/perf/design.txt index fdd42a824c98..f000c30877ac 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -137,6 +137,8 @@ enum sw_event_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, }; Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 8cfb48cbbea0..34bd84423933 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -46,6 +46,8 @@ static struct event_symbol event_symbols[] = { { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, + { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, + { CSW(EMULATION_FAULTS), "emulation-faults", "" }, }; #define __PERF_EVENT_FIELD(config, name) \ @@ -74,6 +76,8 @@ static const char *sw_event_names[] = { "CPU-migrations", "minor-faults", "major-faults", + "alignment-faults", + "emulation-faults", }; #define MAX_ALIASES 8 -- cgit v1.3-8-gc7d7 From 1af60fbd759d31f565552fea315c2033947cfbe6 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Fri, 2 Oct 2009 18:56:53 -0400 Subject: block: get rid of the WRITE_ODIRECT flag Hi, The WRITE_ODIRECT flag is only used in one place, and that code path happens to also call blk_run_address_space. The introduction of this flag, then, could result in the device being unplugged twice for every I/O. Further, with the batching changes in the next patch, we don't want an O_DIRECT write to imply a queue unplug. Signed-off-by: Jeff Moyer Signed-off-by: Jens Axboe --- fs/direct-io.c | 2 +- include/linux/fs.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/fs/direct-io.c b/fs/direct-io.c index 8b10b87dc01a..c86d35f142de 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1124,7 +1124,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, int acquire_i_mutex = 0; if (rw & WRITE) - rw = WRITE_ODIRECT; + rw = WRITE_SYNC_PLUG; if (bdev) bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); diff --git a/include/linux/fs.h b/include/linux/fs.h index 2620a8c63571..2f5fca4147c2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -129,7 +129,6 @@ struct inodes_stat_t { * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device * immediately after submission. The write equivalent * of READ_SYNC. - * WRITE_ODIRECT Special case write for O_DIRECT only. * SWRITE_SYNC * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. * See SWRITE. @@ -151,7 +150,6 @@ struct inodes_stat_t { #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) -#define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) #define SWRITE_SYNC_PLUG \ (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) -- cgit v1.3-8-gc7d7 From 44a0873d52282f24b1894c58c0f157e0f626ddc9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Oct 2009 07:03:04 +0000 Subject: net: Introduce unregister_netdevice_queue() This patchs adds an unreg_list anchor to struct net_device, and introduces an unregister_netdevice_queue() function, able to queue a net_device to a list instead of immediately unregister it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 ++++++++- net/core/dev.c | 20 +++++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 83800091a31a..0ded0a4768a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -683,6 +683,7 @@ struct net_device struct list_head dev_list; struct list_head napi_list; + struct list_head unreg_list; /* Net device features */ unsigned long features; @@ -1116,7 +1117,13 @@ extern int dev_close(struct net_device *dev); extern void dev_disable_lro(struct net_device *dev); extern int dev_queue_xmit(struct sk_buff *skb); extern int register_netdevice(struct net_device *dev); -extern void unregister_netdevice(struct net_device *dev); +extern void unregister_netdevice_queue(struct net_device *dev, + struct list_head *head); +static inline void unregister_netdevice(struct net_device *dev) +{ + unregister_netdevice_queue(dev, NULL); +} + extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); diff --git a/net/core/dev.c b/net/core/dev.c index 950c13fa60d2..ff94e2b8df7f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5245,25 +5245,31 @@ void synchronize_net(void) EXPORT_SYMBOL(synchronize_net); /** - * unregister_netdevice - remove device from the kernel + * unregister_netdevice_queue - remove device from the kernel * @dev: device - * + * @head: list + * This function shuts down a device interface and removes it * from the kernel tables. + * If head not NULL, device is queued to be unregistered later. * * Callers must hold the rtnl semaphore. You may want * unregister_netdev() instead of this. */ -void unregister_netdevice(struct net_device *dev) +void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) { ASSERT_RTNL(); - rollback_registered(dev); - /* Finish processing unregister after unlock */ - net_set_todo(dev); + if (head) { + list_add_tail(&dev->unreg_list, head); + } else { + rollback_registered(dev); + /* Finish processing unregister after unlock */ + net_set_todo(dev); + } } -EXPORT_SYMBOL(unregister_netdevice); +EXPORT_SYMBOL(unregister_netdevice_queue); /** * unregister_netdev - remove device from the kernel -- cgit v1.3-8-gc7d7 From 9b5e383c11b08784eb0087617f880077982ef769 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Oct 2009 07:04:19 +0000 Subject: net: Introduce unregister_netdevice_many() Introduce rollback_registered_many() and unregister_netdevice_many() rollback_registered_many() is able to perform necessary steps at device dismantle time, factorizing two expensive synchronize_net() calls. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 97 +++++++++++++++++++++++++++++++---------------- 2 files changed, 66 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0ded0a4768a0..e7c227d7cb98 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1119,6 +1119,7 @@ extern int dev_queue_xmit(struct sk_buff *skb); extern int register_netdevice(struct net_device *dev); extern void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); +extern void unregister_netdevice_many(struct list_head *head); static inline void unregister_netdevice(struct net_device *dev) { unregister_netdevice_queue(dev, NULL); diff --git a/net/core/dev.c b/net/core/dev.c index ff94e2b8df7f..04d3e3014020 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4637,59 +4637,76 @@ static void net_set_todo(struct net_device *dev) list_add_tail(&dev->todo_list, &net_todo_list); } -static void rollback_registered(struct net_device *dev) +static void rollback_registered_many(struct list_head *head) { + struct net_device *dev; + BUG_ON(dev_boot_phase); ASSERT_RTNL(); - /* Some devices call without registering for initialization unwind. */ - if (dev->reg_state == NETREG_UNINITIALIZED) { - printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " - "was registered\n", dev->name, dev); + list_for_each_entry(dev, head, unreg_list) { + /* Some devices call without registering + * for initialization unwind. + */ + if (dev->reg_state == NETREG_UNINITIALIZED) { + pr_debug("unregister_netdevice: device %s/%p never " + "was registered\n", dev->name, dev); - WARN_ON(1); - return; - } + WARN_ON(1); + return; + } - BUG_ON(dev->reg_state != NETREG_REGISTERED); + BUG_ON(dev->reg_state != NETREG_REGISTERED); - /* If device is running, close it first. */ - dev_close(dev); + /* If device is running, close it first. */ + dev_close(dev); - /* And unlink it from device chain. */ - unlist_netdevice(dev); + /* And unlink it from device chain. */ + unlist_netdevice(dev); - dev->reg_state = NETREG_UNREGISTERING; + dev->reg_state = NETREG_UNREGISTERING; + } synchronize_net(); - /* Shutdown queueing discipline. */ - dev_shutdown(dev); + list_for_each_entry(dev, head, unreg_list) { + /* Shutdown queueing discipline. */ + dev_shutdown(dev); - /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - /* - * Flush the unicast and multicast chains - */ - dev_unicast_flush(dev); - dev_addr_discard(dev); + /* + * Flush the unicast and multicast chains + */ + dev_unicast_flush(dev); + dev_addr_discard(dev); - if (dev->netdev_ops->ndo_uninit) - dev->netdev_ops->ndo_uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); - /* Notifier chain MUST detach us from master device. */ - WARN_ON(dev->master); + /* Notifier chain MUST detach us from master device. */ + WARN_ON(dev->master); - /* Remove entries from kobject tree */ - netdev_unregister_kobject(dev); + /* Remove entries from kobject tree */ + netdev_unregister_kobject(dev); + } synchronize_net(); - dev_put(dev); + list_for_each_entry(dev, head, unreg_list) + dev_put(dev); +} + +static void rollback_registered(struct net_device *dev) +{ + LIST_HEAD(single); + + list_add(&dev->unreg_list, &single); + rollback_registered_many(&single); } static void __netdev_init_queue_locks_one(struct net_device *dev, @@ -5271,6 +5288,22 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) } EXPORT_SYMBOL(unregister_netdevice_queue); +/** + * unregister_netdevice_many - unregister many devices + * @head: list of devices + * + */ +void unregister_netdevice_many(struct list_head *head) +{ + struct net_device *dev; + + if (!list_empty(head)) { + rollback_registered_many(head); + list_for_each_entry(dev, head, unreg_list) + net_set_todo(dev); + } +} + /** * unregister_netdev - remove device from the kernel * @dev: device -- cgit v1.3-8-gc7d7 From 23289a37e2b127dfc4de1313fba15bb4c9f0cd5b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Oct 2009 07:06:36 +0000 Subject: net: add a list_head parameter to dellink() method Adding a list_head parameter to rtnl_link_ops->dellink() methods allow us to queue devices on a list, in order to dismantle them all at once. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 6 +++--- drivers/net/veth.c | 2 +- include/net/rtnetlink.h | 3 ++- net/8021q/vlan.c | 8 ++++---- net/8021q/vlan.h | 2 +- net/core/dev.c | 2 +- net/core/rtnetlink.c | 14 +++++++------- 7 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 3aabfd9dd212..20b7707f38ef 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -555,13 +555,13 @@ static int macvlan_newlink(struct net_device *dev, return 0; } -static void macvlan_dellink(struct net_device *dev) +static void macvlan_dellink(struct net_device *dev, struct list_head *head) { struct macvlan_dev *vlan = netdev_priv(dev); struct macvlan_port *port = vlan->port; list_del(&vlan->list); - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); if (list_empty(&port->vlans)) macvlan_port_destroy(port->dev); @@ -601,7 +601,7 @@ static int macvlan_device_event(struct notifier_block *unused, break; case NETDEV_UNREGISTER: list_for_each_entry_safe(vlan, next, &port->vlans, list) - macvlan_dellink(vlan->dev); + macvlan_dellink(vlan->dev, NULL); break; } return NOTIFY_DONE; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index ade5b344f75d..ffb502daa916 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -442,7 +442,7 @@ err_register_peer: return err; } -static void veth_dellink(struct net_device *dev) +static void veth_dellink(struct net_device *dev, struct list_head *head) { struct veth_priv *priv; struct net_device *peer; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index c3aa044d3fc3..cd5af1f508f2 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -61,7 +61,8 @@ struct rtnl_link_ops { int (*changelink)(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]); - void (*dellink)(struct net_device *dev); + void (*dellink)(struct net_device *dev, + struct list_head *head); size_t (*get_size)(const struct net_device *dev); int (*fill_info)(struct sk_buff *skb, diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8836575f9d79..6b5c9dddaa72 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -140,7 +140,7 @@ static void vlan_rcu_free(struct rcu_head *rcu) vlan_group_free(container_of(rcu, struct vlan_group, rcu)); } -void unregister_vlan_dev(struct net_device *dev) +void unregister_vlan_dev(struct net_device *dev, struct list_head *head) { struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; @@ -164,7 +164,7 @@ void unregister_vlan_dev(struct net_device *dev) synchronize_net(); - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); /* If the group is now empty, kill off the group. */ if (grp->nr_vlans == 0) { @@ -535,7 +535,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (grp->nr_vlans == 1) i = VLAN_GROUP_ARRAY_LEN; - unregister_vlan_dev(vlandev); + unregister_vlan_dev(vlandev, NULL); } break; } @@ -642,7 +642,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) err = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - unregister_vlan_dev(dev); + unregister_vlan_dev(dev, NULL); err = 0; break; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 4ade5edf1033..68f9290e6837 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -82,7 +82,7 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id); void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); -void unregister_vlan_dev(struct net_device *dev); +void unregister_vlan_dev(struct net_device *dev, struct list_head *head); static inline u32 vlan_get_ingress_priority(struct net_device *dev, u16 vlan_tci) diff --git a/net/core/dev.c b/net/core/dev.c index 04d3e3014020..4513dfd5718e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5629,7 +5629,7 @@ restart: /* Delete virtual devices */ if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { - dev->rtnl_link_ops->dellink(dev); + dev->rtnl_link_ops->dellink(dev, NULL); goto restart; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 52ea418d5302..391a62cd9df6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -248,7 +248,7 @@ static LIST_HEAD(link_ops); int __rtnl_link_register(struct rtnl_link_ops *ops) { if (!ops->dellink) - ops->dellink = unregister_netdevice; + ops->dellink = unregister_netdevice_queue; list_add_tail(&ops->list, &link_ops); return 0; @@ -277,13 +277,13 @@ EXPORT_SYMBOL_GPL(rtnl_link_register); static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) { struct net_device *dev; -restart: + LIST_HEAD(list_kill); + for_each_netdev(net, dev) { - if (dev->rtnl_link_ops == ops) { - ops->dellink(dev); - goto restart; - } + if (dev->rtnl_link_ops == ops) + ops->dellink(dev, &list_kill); } + unregister_netdevice_many(&list_kill); } void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) @@ -972,7 +972,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!ops) return -EOPNOTSUPP; - ops->dellink(dev); + ops->dellink(dev, NULL); return 0; } -- cgit v1.3-8-gc7d7 From 63c8099d90096db56ee1c66c31f05d4fcfbc1c69 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Oct 2009 07:06:49 +0000 Subject: vlan: Optimize multiple unregistration Use unregister_netdevice_many() to speedup master device unregister. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 1 + net/8021q/vlan.c | 49 +++++++++++++++++++++++++++++++++---------------- net/core/dev.c | 1 + 3 files changed, 35 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8898cbebcf34..71a4870c09a9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -85,6 +85,7 @@ struct vlan_group { * the vlan is attached to. */ unsigned int nr_vlans; + int killall; struct hlist_node hlist; /* linked list */ struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS]; struct rcu_head rcu; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 6b5c9dddaa72..511afe72af31 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -159,11 +159,12 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) if (real_dev->features & NETIF_F_HW_VLAN_FILTER) ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); - vlan_group_set_device(grp, vlan_id, NULL); grp->nr_vlans--; - synchronize_net(); - + if (!grp->killall) { + vlan_group_set_device(grp, vlan_id, NULL); + synchronize_net(); + } unregister_netdevice_queue(dev, head); /* If the group is now empty, kill off the group. */ @@ -183,6 +184,34 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) dev_put(real_dev); } +void unregister_vlan_dev_alls(struct vlan_group *grp) +{ + LIST_HEAD(list); + int i; + struct net_device *vlandev; + struct vlan_group save; + + memcpy(&save, grp, sizeof(save)); + memset(&grp->vlan_devices_arrays, 0, sizeof(grp->vlan_devices_arrays)); + grp->killall = 1; + + synchronize_net(); + + /* Delete all VLANs for this dev. */ + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + vlandev = vlan_group_get_device(&save, i); + if (!vlandev) + continue; + + unregister_vlan_dev(vlandev, &list); + if (grp->nr_vlans == 0) + break; + } + unregister_netdevice_many(&list); + for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) + kfree(save.vlan_devices_arrays[i]); +} + static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev) { @@ -524,19 +553,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, break; case NETDEV_UNREGISTER: - /* Delete all VLANs for this dev. */ - for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - - /* unregistration of last vlan destroys group, abort - * afterwards */ - if (grp->nr_vlans == 1) - i = VLAN_GROUP_ARRAY_LEN; - - unregister_vlan_dev(vlandev, NULL); - } + unregister_vlan_dev_alls(grp); break; } diff --git a/net/core/dev.c b/net/core/dev.c index 4513dfd5718e..09551cc143a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5303,6 +5303,7 @@ void unregister_netdevice_many(struct list_head *head) net_set_todo(dev); } } +EXPORT_SYMBOL(unregister_netdevice_many); /** * unregister_netdev - remove device from the kernel -- cgit v1.3-8-gc7d7 From ff76ec18cabb12a6c8f3c65bd1d23f1a770fe908 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 28 Oct 2009 12:26:39 -0700 Subject: tpm: fix header for modular build Fix build for TCG_TPM=m. Header file doesn't handle this and incorrectly builds stubs. drivers/char/tpm/tpm.c:720: error: redefinition of 'tpm_pcr_read' include/linux/tpm.h:35: error:previous definition of 'tpm_pcr_read' was here drivers/char/tpm/tpm.c:752: error: redefinition of 'tpm_pcr_extend' include/linux/tpm.h:38: error:previous definition of 'tpm_pcr_extend' was here Repairs linux-next's commit d6ba452128178091dab7a04d54f7e66fdc32fb39 Author: Mimi Zohar Date: Mon Oct 26 09:26:18 2009 -0400 tpm add default function definitions Signed-off-by: Randy Dunlap Cc: Rajiv Andrade Cc: Mimi Zohar Cc: James Morris Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: James Morris --- include/linux/tpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 8eaa8f83effb..ac5d1c1285d9 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -27,7 +27,7 @@ */ #define TPM_ANY_NUM 0xFFFF -#if defined(CONFIG_TCG_TPM) +#if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf); extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash); -- cgit v1.3-8-gc7d7 From df5c79452f26f2a3d0883a213102515cfeb7aae9 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Wed, 28 Oct 2009 18:24:35 +0000 Subject: net: Add ndo_fcoe_get_wwn to net_device_ops Add ndo_fcoe_get_wwn so Fiber Channel over Ethernet (FCoE) can make use of the provided World Wide Port Name (WWPN) and World Wide Node Name (WWNN) from the underlying network interface driver. Signed-off-by: Yi Zou Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e7c227d7cb98..656110a46e96 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -635,6 +635,10 @@ struct net_device_ops { unsigned int sgc); int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid); +#define NETDEV_FCOE_WWNN 0 +#define NETDEV_FCOE_WWPN 1 + int (*ndo_fcoe_get_wwn)(struct net_device *dev, + u64 *wwn, int type); #endif }; -- cgit v1.3-8-gc7d7 From 022c3f7d82f0f1c68018696f2f027b87b9bb45c2 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Oct 2009 04:15:22 +0000 Subject: Allow tcp_parse_options to consult dst entry We need tcp_parse_options to be aware of dst_entry to take into account per dst_entry TCP options settings Signed-off-by: Gilad Ben-Yossef Sigend-off-by: Ori Finkelman Sigend-off-by: Yony Amit Signed-off-by: David S. Miller --- include/net/tcp.h | 3 ++- net/ipv4/syncookies.c | 27 ++++++++++++++------------- net/ipv4/tcp_input.c | 9 ++++++--- net/ipv4/tcp_ipv4.c | 21 ++++++++++++--------- net/ipv4/tcp_minisocks.c | 7 +++++-- net/ipv6/syncookies.c | 28 +++++++++++++++------------- net/ipv6/tcp_ipv6.c | 3 ++- 7 files changed, 56 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 03a49c703377..740d09be8e2d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -409,7 +409,8 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab); + int estab, + struct dst_entry *dst); extern u8 *tcp_parse_md5sig_option(struct tcphdr *th); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5ec678ad70ef..3146cc401748 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -276,13 +276,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0); - - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); - ret = NULL; req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ if (!req) @@ -298,12 +291,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; ireq->ecn_ok = 0; - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -352,6 +339,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } } + /* check for timestamp cookie support */ + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(skb, &tcp_opt, 0, &rt->u.dst); + + if (tcp_opt.saw_tstamp) + cookie_check_timestamp(&tcp_opt); + + ireq->snd_wscale = tcp_opt.snd_wscale; + ireq->rcv_wscale = tcp_opt.rcv_wscale; + ireq->sack_ok = tcp_opt.sack_ok; + ireq->wscale_ok = tcp_opt.wscale_ok; + ireq->tstamp_ok = tcp_opt.saw_tstamp; + req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + /* Try to redo what tcp_v4_send_synack did. */ req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a0c3700bae3a..c7625005486d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3698,12 +3698,14 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab) + int estab, struct dst_entry *dst) { unsigned char *ptr; struct tcphdr *th = tcp_hdr(skb); int length = (th->doff * 4) - sizeof(struct tcphdr); + BUG_ON(!estab && !dst); + ptr = (unsigned char *)(th + 1); opt_rx->saw_tstamp = 0; @@ -3820,7 +3822,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, if (tcp_parse_aligned_timestamp(tp, th)) return 1; } - tcp_parse_options(skb, &tp->rx_opt, 1); + tcp_parse_options(skb, &tp->rx_opt, 1, NULL); return 1; } @@ -5364,8 +5366,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); int saved_clamp = tp->rx_opt.mss_clamp; + struct dst_entry *dst = __sk_dst_get(sk); - tcp_parse_options(skb, &tp->rx_opt, 0); + tcp_parse_options(skb, &tp->rx_opt, 0, dst); if (th->ack) { /* rfc793: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a4a3390a5287..657ae334f125 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1257,11 +1257,21 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; #endif + ireq = inet_rsk(req); + ireq->loc_addr = daddr; + ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; + ireq->opt = tcp_v4_save_options(sk, skb); + + dst = inet_csk_route_req(sk, req); + if(!dst) + goto drop_and_free; + tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = 536; tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0); + tcp_parse_options(skb, &tmp_opt, 0, dst); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1270,14 +1280,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(sk, skb); - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; + goto drop_and_release; if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -1302,7 +1306,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 015e7c67dc88..463d51b53d37 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -102,7 +102,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, 1); + tcp_parse_options(skb, &tmp_opt, 1, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; @@ -500,10 +500,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, int paws_reject = 0; struct tcp_options_received tmp_opt; struct sock *child; + struct dst_entry *dst = inet_csk_route_req(sk, req); tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, 0); + tcp_parse_options(skb, &tmp_opt, 0, dst); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; @@ -516,6 +517,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, } } + dst_release(dst); + /* Check for pure retransmitted SYN. */ if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn && flg == TCP_FLAG_SYN && diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index c46da533888a..612fc53e0bb9 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -184,13 +184,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0); - - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); - ret = NULL; req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (!req) @@ -224,12 +217,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->retrans = 0; ireq->ecn_ok = 0; - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; @@ -265,6 +252,21 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out_free; } + /* check for timestamp cookie support */ + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(skb, &tcp_opt, 0, dst); + + if (tcp_opt.saw_tstamp) + cookie_check_timestamp(&tcp_opt); + + req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + + ireq->snd_wscale = tcp_opt.snd_wscale; + ireq->rcv_wscale = tcp_opt.rcv_wscale; + ireq->sack_ok = tcp_opt.sack_ok; + ireq->wscale_ok = tcp_opt.wscale_ok; + ireq->tstamp_ok = tcp_opt.saw_tstamp; + req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c54ec3615ded..34925f089e07 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1167,6 +1167,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct request_sock *req = NULL; __u32 isn = TCP_SKB_CB(skb)->when; + struct dst_entry *dst = __sk_dst_get(sk); #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1205,7 +1206,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0); + tcp_parse_options(skb, &tmp_opt, 0, dst); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); -- cgit v1.3-8-gc7d7 From 0c3adfb8ec9f85a63556b70f11e0fcf280545951 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Oct 2009 04:15:23 +0000 Subject: Add dst_feature to query route entry features Adding an accessor to existing dst_entry feautres field and refactor the only supported feature (allfrag) to use it. Signed-off-by: Gilad Ben-Yossef Sigend-off-by: Ori Finkelman Sigend-off-by: Yony Amit Signed-off-by: David S. Miller --- include/net/dst.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 720d90653a8e..6377ab2feba9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -111,6 +111,12 @@ dst_metric(const struct dst_entry *dst, int metric) return dst->metrics[metric-1]; } +static inline u32 +dst_feature(const struct dst_entry *dst, u32 feature) +{ + return dst_metric(dst, RTAX_FEATURES) & feature; +} + static inline u32 dst_mtu(const struct dst_entry *dst) { u32 mtu = dst_metric(dst, RTAX_MTU); @@ -136,7 +142,7 @@ static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, static inline u32 dst_allfrag(const struct dst_entry *dst) { - int ret = dst_metric(dst, RTAX_FEATURES) & RTAX_FEATURE_ALLFRAG; + int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG); /* Yes, _exactly_. This is paranoia. */ barrier(); return ret; -- cgit v1.3-8-gc7d7 From 1aba721eba1d84a2defce45b950272cee1e6c72a Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Oct 2009 04:15:24 +0000 Subject: Add the no SACK route option feature Implement querying and acting upon the no sack bit in the features field. Signed-off-by: Gilad Ben-Yossef Sigend-off-by: Ori Finkelman Sigend-off-by: Yony Amit Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 +- net/ipv4/tcp_input.c | 3 ++- net/ipv4/tcp_output.c | 4 +++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index adf2068d12b5..9c802a6b04d3 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -377,7 +377,7 @@ enum #define RTAX_MAX (__RTAX_MAX - 1) #define RTAX_FEATURE_ECN 0x00000001 -#define RTAX_FEATURE_SACK 0x00000002 +#define RTAX_FEATURE_NO_SACK 0x00000002 #define RTAX_FEATURE_TIMESTAMP 0x00000004 #define RTAX_FEATURE_ALLFRAG 0x00000008 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c7625005486d..5fb25f977451 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3763,7 +3763,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && sysctl_tcp_sack) { + !estab && sysctl_tcp_sack && + !dst_feature(dst, RTAX_FEATURE_NO_SACK)) { opt_rx->sack_ok = 1; tcp_sack_reset(opt_rx); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2e2eb74ac4cc..b35802af3c4c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -464,6 +464,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); unsigned size = 0; + struct dst_entry *dst = __sk_dst_get(sk); #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); @@ -498,7 +499,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->options |= OPTION_WSCALE; size += TCPOLEN_WSCALE_ALIGNED; } - if (likely(sysctl_tcp_sack)) { + if (likely(sysctl_tcp_sack && + !dst_feature(dst, RTAX_FEATURE_NO_SACK))) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) size += TCPOLEN_SACKPERM_ALIGNED; -- cgit v1.3-8-gc7d7 From cda42ebd67ee5fdf09d7057b5a4584d36fe8a335 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Oct 2009 04:15:25 +0000 Subject: Allow disabling TCP timestamp options per route Implement querying and acting upon the no timestamp bit in the feature field. Signed-off-by: Gilad Ben-Yossef Sigend-off-by: Ori Finkelman Sigend-off-by: Yony Amit Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 +- net/ipv4/tcp_input.c | 3 ++- net/ipv4/tcp_output.c | 8 ++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9c802a6b04d3..2ab8c758b46c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -378,7 +378,7 @@ enum #define RTAX_FEATURE_ECN 0x00000001 #define RTAX_FEATURE_NO_SACK 0x00000002 -#define RTAX_FEATURE_TIMESTAMP 0x00000004 +#define RTAX_FEATURE_NO_TSTAMP 0x00000004 #define RTAX_FEATURE_ALLFRAG 0x00000008 struct rta_session diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5fb25f977451..6097491aa9fc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3755,7 +3755,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps))) { + (!estab && sysctl_tcp_timestamps && + !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b35802af3c4c..8819eba8ebb8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -488,7 +488,9 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); size += TCPOLEN_MSS_ALIGNED; - if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { + if (likely(sysctl_tcp_timestamps && + !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) && + *md5 == NULL)) { opts->options |= OPTION_TS; opts->tsval = TCP_SKB_CB(skb)->when; opts->tsecr = tp->rx_opt.ts_recent; @@ -2317,7 +2319,9 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr) + - (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); + (sysctl_tcp_timestamps && + (!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ? + TCPOLEN_TSTAMP_ALIGNED : 0)); #ifdef CONFIG_TCP_MD5SIG if (tp->af_specific->md5_lookup(sk, sk) != NULL) -- cgit v1.3-8-gc7d7 From 345cda2fd695534be5a4494f1b59da9daed33663 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Oct 2009 04:15:26 +0000 Subject: Allow to turn off TCP window scale opt per route Add and use no window scale bit in the features field. Note that this is not the same as setting a window scale of 0 as would happen with window limit on route. Signed-off-by: Gilad Ben-Yossef Sigend-off-by: Ori Finkelman Sigend-off-by: Yony Amit Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + net/ipv4/tcp_input.c | 3 ++- net/ipv4/tcp_output.c | 6 ++++-- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2ab8c758b46c..6784b342cbbf 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -380,6 +380,7 @@ enum #define RTAX_FEATURE_NO_SACK 0x00000002 #define RTAX_FEATURE_NO_TSTAMP 0x00000004 #define RTAX_FEATURE_ALLFRAG 0x00000008 +#define RTAX_FEATURE_NO_WSCALE 0x00000010 struct rta_session { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6097491aa9fc..393c56921dcb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3739,7 +3739,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && sysctl_tcp_window_scaling) { + !estab && sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > 14) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8819eba8ebb8..616c686ca253 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -496,7 +496,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sysctl_tcp_window_scaling)) { + if (likely(sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; size += TCPOLEN_WSCALE_ALIGNED; @@ -2347,7 +2348,8 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sysctl_tcp_window_scaling, + (sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)), &rcv_wscale); tp->rx_opt.rcv_wscale = rcv_wscale; -- cgit v1.3-8-gc7d7 From dc343475ed062e13fc260acccaab91d7d80fd5b2 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Oct 2009 04:15:27 +0000 Subject: Allow disabling of DSACK TCP option per route Add and use no DSCAK bit in the features field. Signed-off-by: Gilad Ben-Yossef Sigend-off-by: Ori Finkelman Sigend-off-by: Yony Amit Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + net/ipv4/tcp_input.c | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 6784b342cbbf..e78b60cd65a4 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -381,6 +381,7 @@ enum #define RTAX_FEATURE_NO_TSTAMP 0x00000004 #define RTAX_FEATURE_ALLFRAG 0x00000008 #define RTAX_FEATURE_NO_WSCALE 0x00000010 +#define RTAX_FEATURE_NO_DSACK 0x00000020 struct rta_session { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 393c56921dcb..ba0eab65fe80 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4080,8 +4080,10 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack && + !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4110,13 +4112,15 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack && + !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) -- cgit v1.3-8-gc7d7 From fb699dfd426a189fe33b91586c15176a75c8aed0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Oct 2009 19:18:49 +0000 Subject: net: Introduce dev_get_by_index_rcu() Some workloads hit dev_base_lock rwlock pretty hard. We can use RCU lookups to avoid touching this rwlock. netdevices are already freed after a RCU grace period, so this patch adds no penalty at device dismantle time. dev_ifname() converted to dev_get_by_index_rcu() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 48 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 656110a46e96..ffc3106cc037 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1139,6 +1139,7 @@ extern void netdev_resync_ops(struct net_device *dev); extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); +extern struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); extern int dev_restart(struct net_device *dev); #ifdef CONFIG_NETPOLL_TRAP extern int netpoll_trap(void); diff --git a/net/core/dev.c b/net/core/dev.c index 09551cc143a9..68a1bb68b5a8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -214,12 +214,15 @@ static int list_netdevice(struct net_device *dev) write_lock_bh(&dev_base_lock); list_add_tail(&dev->dev_list, &net->dev_base_head); hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); - hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); + hlist_add_head_rcu(&dev->index_hlist, + dev_index_hash(net, dev->ifindex)); write_unlock_bh(&dev_base_lock); return 0; } -/* Device list removal */ +/* Device list removal + * caller must respect a RCU grace period before freeing/reusing dev + */ static void unlist_netdevice(struct net_device *dev) { ASSERT_RTNL(); @@ -228,7 +231,7 @@ static void unlist_netdevice(struct net_device *dev) write_lock_bh(&dev_base_lock); list_del(&dev->dev_list); hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); + hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); } @@ -646,6 +649,31 @@ struct net_device *__dev_get_by_index(struct net *net, int ifindex) } EXPORT_SYMBOL(__dev_get_by_index); +/** + * dev_get_by_index_rcu - find a device by its ifindex + * @net: the applicable net namespace + * @ifindex: index of device + * + * Search for an interface by index. Returns %NULL if the device + * is not found or a pointer to the device. The device has not + * had its reference counter increased so the caller must be careful + * about locking. The caller must hold RCU lock. + */ + +struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) +{ + struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_index_hash(net, ifindex); + + hlist_for_each_entry_rcu(dev, p, head, index_hlist) + if (dev->ifindex == ifindex) + return dev; + + return NULL; +} +EXPORT_SYMBOL(dev_get_by_index_rcu); + /** * dev_get_by_index - find a device by its ifindex @@ -662,11 +690,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) { struct net_device *dev; - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifindex); if (dev) dev_hold(dev); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } EXPORT_SYMBOL(dev_get_by_index); @@ -2939,15 +2967,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifr.ifr_ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); if (!dev) { - read_unlock(&dev_base_lock); + rcu_read_unlock(); return -ENODEV; } strcpy(ifr.ifr_name, dev->name); - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; -- cgit v1.3-8-gc7d7 From 38bfd8f5bec496e8e0db8849e01c99a33479418a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 29 Oct 2009 02:59:18 -0700 Subject: net,socket: introduce DECLARE_SOCKADDR helper to catch overflow at build time proto_ops->getname implies copying protocol specific data into storage unit (particulary to __kernel_sockaddr_storage). So when we implement new protocol support we should keep such a detail in mind (which is easy to forget about). Lets introduce DECLARE_SOCKADDR helper which check if storage unit is not overfowed at build time. Eventually inet_getname is switched to use DECLARE_SOCKADDR (to show example of usage). Signed-off-by: Cyrill Gorcunov Signed-off-by: David S. Miller --- include/linux/net.h | 3 +++ include/linux/socket.h | 3 +++ net/ipv4/af_inet.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index b42bb60fe92f..4da9d571b053 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -199,6 +199,9 @@ struct proto_ops { struct pipe_inode_info *pipe, size_t len, unsigned int flags); }; +#define DECLARE_SOCKADDR(type, dst, src) \ + type dst = ({ __sockaddr_check_size(sizeof(*dst)); (type) src; }) + struct net_proto_family { int family; int (*create)(struct net *net, struct socket *sock, int protocol); diff --git a/include/linux/socket.h b/include/linux/socket.h index 59966f12990c..7b3aae2052a6 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -24,6 +24,9 @@ struct __kernel_sockaddr_storage { #include /* pid_t */ #include /* __user */ +#define __sockaddr_check_size(size) \ + BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage))) + #ifdef __KERNEL__ # ifdef CONFIG_PROC_FS struct seq_file; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 04a14b1600ac..538e84d0bcba 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -685,7 +685,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, { struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; + DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr); sin->sin_family = AF_INET; if (peer) { -- cgit v1.3-8-gc7d7 From b9d128f1088ea5245109dfc9bbceb128b6371a77 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 29 Oct 2009 13:59:26 +0100 Subject: block: move bdi/address_space unplug functions to backing-dev.h There's nothing block related about them, the backing device is used by things like NFS etc as well. This gets rid of the need to protect such calls by CONFIG_BLOCK. Signed-off-by: Jens Axboe --- fs/aio.c | 1 + include/linux/backing-dev.h | 13 +++++++++++++ include/linux/blkdev.h | 13 ------------- 3 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/aio.c b/fs/aio.c index cf0bef428f88..c30dfc006108 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #define DEBUG 0 diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index b449e738533a..fcbc26af00e4 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -331,4 +331,17 @@ static inline int bdi_sched_wait(void *word) return 0; } +static inline void blk_run_backing_dev(struct backing_dev_info *bdi, + struct page *page) +{ + if (bdi && bdi->unplug_io_fn) + bdi->unplug_io_fn(bdi, page); +} + +static inline void blk_run_address_space(struct address_space *mapping) +{ + if (mapping) + blk_run_backing_dev(mapping->backing_dev_info, NULL); +} + #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 221cecd86bd3..39c601f783a0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -823,19 +823,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) return bdev->bd_disk->queue; } -static inline void blk_run_backing_dev(struct backing_dev_info *bdi, - struct page *page) -{ - if (bdi && bdi->unplug_io_fn) - bdi->unplug_io_fn(bdi, page); -} - -static inline void blk_run_address_space(struct address_space *mapping) -{ - if (mapping) - blk_run_backing_dev(mapping->backing_dev_info, NULL); -} - /* * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request -- cgit v1.3-8-gc7d7 From 5975c725dfd6f7d36f493ab1453fbdbd35c1f0e3 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Thu, 29 Oct 2009 11:40:17 -0500 Subject: define convenient securebits masks for prctl users (v2) Hi James, would you mind taking the following into security-testing? The securebits are used by passing them to prctl with the PR_{S,G}ET_SECUREBITS commands. But the defines must be shifted to be used in prctl, which begs to be confused and misused by userspace. So define some more convenient values for userspace to specify. This way userspace does prctl(PR_SET_SECUREBITS, SECBIT_NOROOT); instead of prctl(PR_SET_SECUREBITS, 1 << SECURE_NOROOT); (Thanks to Michael for the idea) This patch also adds include/linux/securebits to the installed headers. Then perhaps it can be included by glibc's sys/prctl.h. Changelog: Oct 29: Stephen Rothwell points out that issecure can be under __KERNEL__. Oct 14: (Suggestions by Michael Kerrisk): 1. spell out SETUID in SECBIT_NO_SETUID* 2. SECBIT_X_LOCKED does not imply SECBIT_X 3. add definitions for keepcaps Oct 14: As suggested by Michael Kerrisk, don't use SB_* as that convention is already in use. Use SECBIT_ prefix instead. Signed-off-by: Serge E. Hallyn Acked-by: Andrew G. Morgan Acked-by: Michael Kerrisk Cc: Ulrich Drepper Cc: James Morris Signed-off-by: James Morris --- include/linux/Kbuild | 1 + include/linux/securebits.h | 24 ++++++++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index cff4a101f266..ffcdb9b509db 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -329,6 +329,7 @@ unifdef-y += scc.h unifdef-y += sched.h unifdef-y += screen_info.h unifdef-y += sdla.h +unifdef-y += securebits.h unifdef-y += selinux_netlink.h unifdef-y += sem.h unifdef-y += serial_core.h diff --git a/include/linux/securebits.h b/include/linux/securebits.h index d2c5ed845bcc..33406174cbe8 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -1,6 +1,15 @@ #ifndef _LINUX_SECUREBITS_H #define _LINUX_SECUREBITS_H 1 +/* Each securesetting is implemented using two bits. One bit specifies + whether the setting is on or off. The other bit specify whether the + setting is locked or not. A setting which is locked cannot be + changed from user-level. */ +#define issecure_mask(X) (1 << (X)) +#ifdef __KERNEL__ +#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) +#endif + #define SECUREBITS_DEFAULT 0x00000000 /* When set UID 0 has no special privileges. When unset, we support @@ -12,6 +21,9 @@ #define SECURE_NOROOT 0 #define SECURE_NOROOT_LOCKED 1 /* make bit-0 immutable */ +#define SECBIT_NOROOT (issecure_mask(SECURE_NOROOT)) +#define SECBIT_NOROOT_LOCKED (issecure_mask(SECURE_NOROOT_LOCKED)) + /* When set, setuid to/from uid 0 does not trigger capability-"fixup". When unset, to provide compatiblility with old programs relying on set*uid to gain/lose privilege, transitions to/from uid 0 cause @@ -19,6 +31,10 @@ #define SECURE_NO_SETUID_FIXUP 2 #define SECURE_NO_SETUID_FIXUP_LOCKED 3 /* make bit-2 immutable */ +#define SECBIT_NO_SETUID_FIXUP (issecure_mask(SECURE_NO_SETUID_FIXUP)) +#define SECBIT_NO_SETUID_FIXUP_LOCKED \ + (issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)) + /* When set, a process can retain its capabilities even after transitioning to a non-root user (the set-uid fixup suppressed by bit 2). Bit-4 is cleared when a process calls exec(); setting both @@ -27,12 +43,8 @@ #define SECURE_KEEP_CAPS 4 #define SECURE_KEEP_CAPS_LOCKED 5 /* make bit-4 immutable */ -/* Each securesetting is implemented using two bits. One bit specifies - whether the setting is on or off. The other bit specify whether the - setting is locked or not. A setting which is locked cannot be - changed from user-level. */ -#define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) +#define SECBIT_KEEP_CAPS (issecure_mask(SECURE_KEEP_CAPS)) +#define SECBIT_KEEP_CAPS_LOCKED (issecure_mask(SECURE_KEEP_CAPS_LOCKED)) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ -- cgit v1.3-8-gc7d7 From 5b252f0c2f98df21fadf0f6cf189b87a0b938228 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 29 Oct 2009 07:17:09 +0000 Subject: gro: Name the GRO result enumeration type This clarifies which return and parameter types are GRO result codes and not RX result codes. Signed-off-by: Ben Hutchings Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++---- net/8021q/vlan_core.c | 5 +++-- net/core/dev.c | 19 ++++++++++++++----- 3 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ffc3106cc037..6e777efe149e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -348,13 +348,14 @@ enum NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ }; -enum { +enum gro_result { GRO_MERGED, GRO_MERGED_FREE, GRO_HELD, GRO_NORMAL, GRO_DROP, }; +typedef enum gro_result gro_result_t; extern void __napi_schedule(struct napi_struct *n); @@ -1480,16 +1481,17 @@ extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); extern void napi_gro_flush(struct napi_struct *napi); -extern int dev_gro_receive(struct napi_struct *napi, +extern gro_result_t dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb); -extern int napi_skb_finish(int ret, struct sk_buff *skb); +extern int napi_skb_finish(gro_result_t ret, struct sk_buff *skb); extern int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); extern void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb); extern struct sk_buff * napi_get_frags(struct napi_struct *napi); extern int napi_frags_finish(struct napi_struct *napi, - struct sk_buff *skb, int ret); + struct sk_buff *skb, + gro_result_t ret); extern struct sk_buff * napi_frags_skb(struct napi_struct *napi); extern int napi_gro_frags(struct napi_struct *napi); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 7f7de1a04de6..47a80d65c3b7 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -74,8 +74,9 @@ u16 vlan_dev_vlan_id(const struct net_device *dev) } EXPORT_SYMBOL(vlan_dev_vlan_id); -static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci, struct sk_buff *skb) +static gro_result_t +vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb) { struct sk_buff *p; diff --git a/net/core/dev.c b/net/core/dev.c index 68a1bb68b5a8..1dc13744684c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2476,7 +2476,7 @@ void napi_gro_flush(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_flush); -int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; struct packet_type *ptype; @@ -2484,7 +2484,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int same_flow; int mac_len; - int ret; + enum gro_result ret; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -2568,7 +2568,8 @@ normal: } EXPORT_SYMBOL(dev_gro_receive); -static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +static gro_result_t +__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; @@ -2585,7 +2586,7 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return dev_gro_receive(napi, skb); } -int napi_skb_finish(int ret, struct sk_buff *skb) +int napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { int err = NET_RX_SUCCESS; @@ -2600,6 +2601,10 @@ int napi_skb_finish(int ret, struct sk_buff *skb) case GRO_MERGED_FREE: kfree_skb(skb); break; + + case GRO_HELD: + case GRO_MERGED: + break; } return err; @@ -2652,7 +2657,8 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_get_frags); -int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) +int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, + gro_result_t ret) { int err = NET_RX_SUCCESS; @@ -2674,6 +2680,9 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) case GRO_MERGED_FREE: napi_reuse_skb(napi, skb); break; + + case GRO_MERGED: + break; } return err; -- cgit v1.3-8-gc7d7 From c7c4b3b6e976b95facbb723951bdcd554a3530a4 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 29 Oct 2009 21:36:53 -0700 Subject: gro: Change all receive functions to return GRO result codes This will allow drivers to adjust their receive path dynamically based on whether GRO is being applied successfully. Currently all in-tree callers ignore the return values of these functions and do not need to be changed. Signed-off-by: Ben Hutchings Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 25 ++++++++++++++----------- include/linux/netdevice.h | 8 ++++---- net/8021q/vlan_core.c | 16 +++++++++------- net/core/dev.c | 38 +++++++++++++++----------------------- 4 files changed, 42 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 71a4870c09a9..153f6b9e722c 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -120,10 +120,12 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); extern int vlan_hwaccel_do_receive(struct sk_buff *skb); -extern int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci, struct sk_buff *skb); -extern int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci); +extern gro_result_t +vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb); +extern gro_result_t +vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci); #else static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) @@ -150,17 +152,18 @@ static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) return 0; } -static inline int vlan_gro_receive(struct napi_struct *napi, - struct vlan_group *grp, - unsigned int vlan_tci, struct sk_buff *skb) +static inline gro_result_t +vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb) { - return NET_RX_DROP; + return GRO_DROP; } -static inline int vlan_gro_frags(struct napi_struct *napi, - struct vlan_group *grp, unsigned int vlan_tci) +static inline gro_result_t +vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci) { - return NET_RX_DROP; + return GRO_DROP; } #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6e777efe149e..193b637889f9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1483,17 +1483,17 @@ extern int netif_receive_skb(struct sk_buff *skb); extern void napi_gro_flush(struct napi_struct *napi); extern gro_result_t dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb); -extern int napi_skb_finish(gro_result_t ret, struct sk_buff *skb); -extern int napi_gro_receive(struct napi_struct *napi, +extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb); +extern gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); extern void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb); extern struct sk_buff * napi_get_frags(struct napi_struct *napi); -extern int napi_frags_finish(struct napi_struct *napi, +extern gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, gro_result_t ret); extern struct sk_buff * napi_frags_skb(struct napi_struct *napi); -extern int napi_gro_frags(struct napi_struct *napi); +extern gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) { diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 47a80d65c3b7..8d5ca2ac4f8d 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -102,11 +102,12 @@ drop: return GRO_DROP; } -int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci, struct sk_buff *skb) +gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb) { if (netpoll_rx_on(skb)) - return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); + return vlan_hwaccel_receive_skb(skb, grp, vlan_tci) + ? GRO_DROP : GRO_NORMAL; skb_gro_reset_offset(skb); @@ -114,17 +115,18 @@ int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, } EXPORT_SYMBOL(vlan_gro_receive); -int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci) +gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci) { struct sk_buff *skb = napi_frags_skb(napi); if (!skb) - return NET_RX_DROP; + return GRO_DROP; if (netpoll_rx_on(skb)) { skb->protocol = eth_type_trans(skb, skb->dev); - return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); + return vlan_hwaccel_receive_skb(skb, grp, vlan_tci) + ? GRO_DROP : GRO_NORMAL; } return napi_frags_finish(napi, skb, diff --git a/net/core/dev.c b/net/core/dev.c index 1dc13744684c..631cc40da197 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2586,18 +2586,15 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return dev_gro_receive(napi, skb); } -int napi_skb_finish(gro_result_t ret, struct sk_buff *skb) +gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { - int err = NET_RX_SUCCESS; - switch (ret) { case GRO_NORMAL: - return netif_receive_skb(skb); + if (netif_receive_skb(skb)) + ret = GRO_DROP; + break; case GRO_DROP: - err = NET_RX_DROP; - /* fall through */ - case GRO_MERGED_FREE: kfree_skb(skb); break; @@ -2607,7 +2604,7 @@ int napi_skb_finish(gro_result_t ret, struct sk_buff *skb) break; } - return err; + return ret; } EXPORT_SYMBOL(napi_skb_finish); @@ -2627,7 +2624,7 @@ void skb_gro_reset_offset(struct sk_buff *skb) } EXPORT_SYMBOL(skb_gro_reset_offset); -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { skb_gro_reset_offset(skb); @@ -2657,26 +2654,21 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_get_frags); -int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, - gro_result_t ret) +gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, + gro_result_t ret) { - int err = NET_RX_SUCCESS; - switch (ret) { case GRO_NORMAL: case GRO_HELD: skb->protocol = eth_type_trans(skb, napi->dev); - if (ret == GRO_NORMAL) - return netif_receive_skb(skb); - - skb_gro_pull(skb, -ETH_HLEN); + if (ret == GRO_HELD) + skb_gro_pull(skb, -ETH_HLEN); + else if (netif_receive_skb(skb)) + ret = GRO_DROP; break; case GRO_DROP: - err = NET_RX_DROP; - /* fall through */ - case GRO_MERGED_FREE: napi_reuse_skb(napi, skb); break; @@ -2685,7 +2677,7 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, break; } - return err; + return ret; } EXPORT_SYMBOL(napi_frags_finish); @@ -2726,12 +2718,12 @@ out: } EXPORT_SYMBOL(napi_frags_skb); -int napi_gro_frags(struct napi_struct *napi) +gro_result_t napi_gro_frags(struct napi_struct *napi) { struct sk_buff *skb = napi_frags_skb(napi); if (!skb) - return NET_RX_DROP; + return GRO_DROP; return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); } -- cgit v1.3-8-gc7d7 From 0c509a6c9393b27a8c5a01acd4a72616206cfc24 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 29 Oct 2009 14:18:21 +0000 Subject: net: Allow devices to specify a device specific sysfs group. This isn't beautifully abstracted, but it is simple, simplifies uses and so far is only needed for the bonding driver. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- net/core/net-sysfs.c | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 193b637889f9..e5ece8dceaad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -900,8 +900,8 @@ struct net_device /* class/net/name entry */ struct device dev; - /* space for optional statistics and wireless sysfs groups */ - const struct attribute_group *sysfs_groups[3]; + /* space for optional device, statistics, and wireless sysfs groups */ + const struct attribute_group *sysfs_groups[4]; /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 89de182353b0..157645c0da73 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -544,8 +544,11 @@ int netdev_register_kobject(struct net_device *net) dev_set_name(dev, "%s", net->name); #ifdef CONFIG_SYSFS - *groups++ = &netstat_group; + /* Allow for a device specific group */ + if (*groups) + groups++; + *groups++ = &netstat_group; #ifdef CONFIG_WIRELESS_EXT_SYSFS if (net->ieee80211_ptr) *groups++ = &wireless_group; -- cgit v1.3-8-gc7d7 From 6a86b9c78ebd0397eb953493c68ea9e194e7023c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Oct 2009 09:58:52 +0100 Subject: mac80211: fix radiotap header generation In commit 601ae7f25aea58f208a7f640f6174aac0652403a Author: Bruno Randolf Date: Thu May 8 19:22:43 2008 +0200 mac80211: make rx radiotap header more flexible code was added that tried to align the radiotap header position in memory based on the radiotap header length. Quite obviously, that is completely useless. Instead of trying to do that, use unaligned accesses to generate the radiotap header. To properly do that, we also need to mark struct ieee80211_radiotap_header packed, but that is fine since it's already packed (and it should be marked packed anyway since its a wire format). Cc: Bruno Randolf Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/ieee80211_radiotap.h | 2 +- net/mac80211/rx.c | 26 ++++++++++++-------------- 2 files changed, 13 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 23c3f3d97779..9d3d86aaccbb 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -80,7 +80,7 @@ struct ieee80211_radiotap_header { * Additional extensions are made * by setting bit 31. */ -}; +} __packed; /* Name Data type Units * ---- --------- ----- diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5c385e3c1d1f..01df328530ad 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -95,10 +95,6 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, if (len & 1) /* padding for RX_FLAGS if necessary */ len++; - /* make sure radiotap starts at a naturally aligned address */ - if (len % 8) - len = roundup(len, 8); - return len; } @@ -116,6 +112,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_radiotap_header *rthdr; unsigned char *pos; + u16 rx_flags = 0; rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len); memset(rthdr, 0, rtap_len); @@ -134,7 +131,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_TSFT */ if (status->flag & RX_FLAG_TSFT) { - *(__le64 *)pos = cpu_to_le64(status->mactime); + put_unaligned_le64(status->mactime, pos); rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT); pos += 8; @@ -166,17 +163,17 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos++; /* IEEE80211_RADIOTAP_CHANNEL */ - *(__le16 *)pos = cpu_to_le16(status->freq); + put_unaligned_le16(status->freq, pos); pos += 2; if (status->band == IEEE80211_BAND_5GHZ) - *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM | - IEEE80211_CHAN_5GHZ); + put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ, + pos); else if (rate->flags & IEEE80211_RATE_ERP_G) - *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM | - IEEE80211_CHAN_2GHZ); + put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ, + pos); else - *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_CCK | - IEEE80211_CHAN_2GHZ); + put_unaligned_le16(IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ, + pos); pos += 2; /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */ @@ -205,10 +202,11 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_RX_FLAGS */ /* ensure 2 byte alignment for the 2 byte field as required */ - if ((pos - (unsigned char *)rthdr) & 1) + if ((pos - (u8 *)rthdr) & 1) pos++; if (status->flag & RX_FLAG_FAILED_PLCP_CRC) - *(__le16 *)pos |= cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADPLCP); + rx_flags |= IEEE80211_RADIOTAP_F_RX_BADPLCP; + put_unaligned_le16(rx_flags, pos); pos += 2; } -- cgit v1.3-8-gc7d7 From 0869aea0eb711982cd2b8bebf41b3c0191c89cde Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Oct 2009 10:03:35 +0100 Subject: mac80211: remove RX_FLAG_RADIOTAP While there may be a case for a driver adding its own bits of radiotap information, none currently does. Also, drivers would have to copy the code to generate the radiotap bits that now mac80211 generates. If some driver in the future needs to add some driver-specific information I'd expect that to be in a radiotap vendor namespace and we can add a different way of passing such data up and having mac80211 include it. Additionally, rename IEEE80211_CONF_RADIOTAP to IEEE80211_CONF_MONITOR since it's still used by b43(legacy) to obtain per-frame timestamps. The purpose of this patch is to simplify the RX code in mac80211 to make it easier to add paged skb support. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/b43/main.c | 2 +- drivers/net/wireless/b43legacy/main.c | 2 +- include/net/mac80211.h | 15 +++++++-------- net/mac80211/iface.c | 8 ++++---- net/mac80211/rx.c | 33 ++++++++++++--------------------- 5 files changed, 25 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index ed6e96a34743..c806924c7b5c 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3573,7 +3573,7 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed) if (conf->channel->hw_value != phy->channel) b43_switch_channel(dev, conf->channel->hw_value); - dev->wl->radiotap_enabled = !!(conf->flags & IEEE80211_CONF_RADIOTAP); + dev->wl->radiotap_enabled = !!(conf->flags & IEEE80211_CONF_MONITOR); /* Adjust the desired TX power level. */ if (conf->power_level != 0) { diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 0983406f4630..d579bb9035c4 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2676,7 +2676,7 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw, if (conf->channel->hw_value != phy->channel) b43legacy_radio_selectchannel(dev, conf->channel->hw_value, 0); - dev->wl->radiotap_enabled = !!(conf->flags & IEEE80211_CONF_RADIOTAP); + dev->wl->radiotap_enabled = !!(conf->flags & IEEE80211_CONF_MONITOR); /* Adjust the desired TX power level. */ if (conf->power_level != 0) { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c42c4a820b89..2c9d3c719d8a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -494,7 +494,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_MMIC_ERROR: Michael MIC error was reported on this frame. * Use together with %RX_FLAG_MMIC_STRIPPED. * @RX_FLAG_DECRYPTED: This frame was decrypted in hardware. - * @RX_FLAG_RADIOTAP: This frame starts with a radiotap header. * @RX_FLAG_MMIC_STRIPPED: the Michael MIC is stripped off this frame, * verification has been done by the hardware. * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame. @@ -515,7 +514,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = 1<<0, RX_FLAG_DECRYPTED = 1<<1, - RX_FLAG_RADIOTAP = 1<<2, RX_FLAG_MMIC_STRIPPED = 1<<3, RX_FLAG_IV_STRIPPED = 1<<4, RX_FLAG_FAILED_FCS_CRC = 1<<5, @@ -565,7 +563,9 @@ struct ieee80211_rx_status { * * Flags to define PHY configuration options * - * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported) + * @IEEE80211_CONF_MONITOR: there's a monitor interface present -- use this + * to determine for example whether to calculate timestamps for packets + * or not, do not use instead of filter flags! * @IEEE80211_CONF_PS: Enable 802.11 power save mode (managed mode only) * @IEEE80211_CONF_IDLE: The device is running, but idle; if the flag is set * the driver should be prepared to handle configuration requests but @@ -574,7 +574,7 @@ struct ieee80211_rx_status { * it can also be unset in that case when monitor interfaces are active. */ enum ieee80211_conf_flags { - IEEE80211_CONF_RADIOTAP = (1<<0), + IEEE80211_CONF_MONITOR = (1<<0), IEEE80211_CONF_PS = (1<<1), IEEE80211_CONF_IDLE = (1<<2), }; @@ -584,7 +584,7 @@ enum ieee80211_conf_flags { * enum ieee80211_conf_changed - denotes which configuration changed * * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed - * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed + * @IEEE80211_CONF_CHANGE_MONITOR: the monitor flag changed * @IEEE80211_CONF_CHANGE_PS: the PS flag or dynamic PS timeout changed * @IEEE80211_CONF_CHANGE_POWER: the TX power changed * @IEEE80211_CONF_CHANGE_CHANNEL: the channel/channel_type changed @@ -593,7 +593,7 @@ enum ieee80211_conf_flags { */ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_LISTEN_INTERVAL = BIT(2), - IEEE80211_CONF_CHANGE_RADIOTAP = BIT(3), + IEEE80211_CONF_CHANGE_MONITOR = BIT(3), IEEE80211_CONF_CHANGE_PS = BIT(4), IEEE80211_CONF_CHANGE_POWER = BIT(5), IEEE80211_CONF_CHANGE_CHANNEL = BIT(6), @@ -1661,8 +1661,7 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw); * ieee80211_rx - receive frame * * Use this function to hand received frames to mac80211. The receive - * buffer in @skb must start with an IEEE 802.11 header or a radiotap - * header if %RX_FLAG_RADIOTAP is set in the @status flags. + * buffer in @skb must start with an IEEE 802.11 header. * * This function may not be called in IRQ context. Calls to this function * for a single hardware must be synchronized against each other. Calls to diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 14f10eb91c5c..8495161b99b8 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -214,8 +214,8 @@ static int ieee80211_open(struct net_device *dev) /* must be before the call to ieee80211_configure_filter */ local->monitors++; if (local->monitors == 1) { - local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - hw_reconf_flags |= IEEE80211_CONF_CHANGE_RADIOTAP; + local->hw.conf.flags |= IEEE80211_CONF_MONITOR; + hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; } if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) @@ -435,8 +435,8 @@ static int ieee80211_stop(struct net_device *dev) local->monitors--; if (local->monitors == 0) { - local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; - hw_reconf_flags |= IEEE80211_CONF_CHANGE_RADIOTAP; + local->hw.conf.flags &= ~IEEE80211_CONF_MONITOR; + hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; } if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 01df328530ad..798fa82b6ae3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -39,11 +39,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, * only useful for monitoring. */ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local, - struct sk_buff *skb, - int rtap_len) + struct sk_buff *skb) { - skb_pull(skb, rtap_len); - if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) { if (likely(skb->len > FCS_LEN)) skb_trim(skb, skb->len - FCS_LEN); @@ -59,15 +56,14 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local, } static inline int should_drop_frame(struct sk_buff *skb, - int present_fcs_len, - int radiotap_len) + int present_fcs_len) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) return 1; - if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len)) + if (unlikely(skb->len < 16 + present_fcs_len)) return 1; if (ieee80211_is_ctl(hdr->frame_control) && !ieee80211_is_pspoll(hdr->frame_control) && @@ -225,7 +221,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, struct sk_buff *skb, *skb2; struct net_device *prev_dev = NULL; int present_fcs_len = 0; - int rtap_len = 0; /* * First, we may need to make a copy of the skb because @@ -235,25 +230,23 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, * We don't need to, of course, if we aren't going to return * the SKB because it has a bad FCS/PLCP checksum. */ - if (status->flag & RX_FLAG_RADIOTAP) - rtap_len = ieee80211_get_radiotap_len(origskb->data); - else - /* room for the radiotap header based on driver features */ - needed_headroom = ieee80211_rx_radiotap_len(local, status); + + /* room for the radiotap header based on driver features */ + needed_headroom = ieee80211_rx_radiotap_len(local, status); if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) present_fcs_len = FCS_LEN; if (!local->monitors) { - if (should_drop_frame(origskb, present_fcs_len, rtap_len)) { + if (should_drop_frame(origskb, present_fcs_len)) { dev_kfree_skb(origskb); return NULL; } - return remove_monitor_info(local, origskb, rtap_len); + return remove_monitor_info(local, origskb); } - if (should_drop_frame(origskb, present_fcs_len, rtap_len)) { + if (should_drop_frame(origskb, present_fcs_len)) { /* only need to expand headroom if necessary */ skb = origskb; origskb = NULL; @@ -277,16 +270,14 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, */ skb = skb_copy_expand(origskb, needed_headroom, 0, GFP_ATOMIC); - origskb = remove_monitor_info(local, origskb, rtap_len); + origskb = remove_monitor_info(local, origskb); if (!skb) return origskb; } - /* if necessary, prepend radiotap information */ - if (!(status->flag & RX_FLAG_RADIOTAP)) - ieee80211_add_rx_radiotap_header(local, skb, rate, - needed_headroom); + /* prepend radiotap information */ + ieee80211_add_rx_radiotap_header(local, skb, rate, needed_headroom); skb_reset_mac_header(skb); skb->ip_summed = CHECKSUM_UNNECESSARY; -- cgit v1.3-8-gc7d7 From eddcbb94f75c3e8944503e9f13c1d29acd0d7052 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 29 Oct 2009 08:30:35 +0100 Subject: mac80211: introduce ieee80211_beacon_get_tim() Compared to ieee80211_beacon_get(), the new function ieee80211_beacon_get_tim() returns information on the location and length of the TIM IE, which some drivers need in order to generate the TIM on the device. The old function, ieee80211_beacon_get(), becomes a small static inline wrapper around the new one to not break all drivers. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 42 ++++++++++++++++++++++++++++++++++-------- net/mac80211/tx.c | 17 ++++++++++++++--- 2 files changed, 48 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2c9d3c719d8a..bd0bbc37a1ae 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1743,19 +1743,45 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb); /** - * ieee80211_beacon_get - beacon generation function + * ieee80211_beacon_get_tim - beacon generation function * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. + * @tim_offset: pointer to variable that will receive the TIM IE offset. + * Set to 0 if invalid (in non-AP modes). + * @tim_length: pointer to variable that will receive the TIM IE length, + * (including the ID and length bytes!). + * Set to 0 if invalid (in non-AP modes). + * + * If the driver implements beaconing modes, it must use this function to + * obtain the beacon frame/template. * * If the beacon frames are generated by the host system (i.e., not in - * hardware/firmware), the low-level driver uses this function to receive - * the next beacon frame from the 802.11 code. The low-level is responsible - * for calling this function before beacon data is needed (e.g., based on - * hardware interrupt). Returned skb is used only once and low-level driver - * is responsible for freeing it. + * hardware/firmware), the driver uses this function to get each beacon + * frame from mac80211 -- it is responsible for calling this function + * before the beacon is needed (e.g. based on hardware interrupt). + * + * If the beacon frames are generated by the device, then the driver + * must use the returned beacon as the template and change the TIM IE + * according to the current DTIM parameters/TIM bitmap. + * + * The driver is responsible for freeing the returned skb. + */ +struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 *tim_offset, u16 *tim_length); + +/** + * ieee80211_beacon_get - beacon generation function + * @hw: pointer obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. + * + * See ieee80211_beacon_get_tim(). */ -struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); +static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + return ieee80211_beacon_get_tim(hw, vif, NULL, NULL); +} /** * ieee80211_rts_get - RTS frame generation function diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index cb06d8e56496..8595d14c774c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2012,8 +2012,9 @@ static void ieee80211_beacon_add_tim(struct ieee80211_if_ap *bss, } } -struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) +struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 *tim_offset, u16 *tim_length) { struct ieee80211_local *local = hw_to_local(hw); struct sk_buff *skb = NULL; @@ -2030,6 +2031,11 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, sdata = vif_to_sdata(vif); + if (tim_offset) + *tim_offset = 0; + if (tim_length) + *tim_length = 0; + if (sdata->vif.type == NL80211_IFTYPE_AP) { ap = &sdata->u.ap; beacon = rcu_dereference(ap->beacon); @@ -2065,6 +2071,11 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, spin_unlock_irqrestore(&local->sta_lock, flags); } + if (tim_offset) + *tim_offset = beacon->head_len; + if (tim_length) + *tim_length = skb->len - beacon->head_len; + if (beacon->tail) memcpy(skb_put(skb, beacon->tail_len), beacon->tail, beacon->tail_len); @@ -2141,7 +2152,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, rcu_read_unlock(); return skb; } -EXPORT_SYMBOL(ieee80211_beacon_get); +EXPORT_SYMBOL(ieee80211_beacon_get_tim); void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const void *frame, size_t frame_len, -- cgit v1.3-8-gc7d7 From c27f2fded51948edf40007f4f31350e9e0c6ba23 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 29 Oct 2009 08:41:25 +0100 Subject: mac80211: deprecate qual value This value is unused by mac80211, because it was only be used by wireless extensions, and turned out to not be useful there because the quality value needs to be comparable between scan results and the current value which is impossible when the qual value is calculated taking into account noise, for example. Since it is unused anyway, this patch deprecates it in the hope that drivers will remove their sometimes quite expensive calculations of the value. I'm open to actual uses of the value, but the best way of using it seems to be what the Intel drivers do which should probably be generalised if we have noise values from the hardware. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 +- net/mac80211/debugfs_sta.c | 2 -- net/mac80211/rx.c | 1 - net/mac80211/sta_info.h | 2 -- 4 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bd0bbc37a1ae..e12293e60ac1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -552,7 +552,7 @@ struct ieee80211_rx_status { int freq; int signal; int noise; - int qual; + int __deprecated qual; int antenna; int rate_idx; int flag; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 8721679773da..4425b613552c 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -57,7 +57,6 @@ STA_FILE(tx_filtered, tx_filtered_count, LU); STA_FILE(tx_retry_failed, tx_retry_failed, LU); STA_FILE(tx_retry_count, tx_retry_count, LU); STA_FILE(last_signal, last_signal, D); -STA_FILE(last_qual, last_qual, D); STA_FILE(last_noise, last_noise, D); STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU); @@ -205,7 +204,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(tx_retry_failed); DEBUGFS_ADD(tx_retry_count); DEBUGFS_ADD(last_signal); - DEBUGFS_ADD(last_qual); DEBUGFS_ADD(last_noise); DEBUGFS_ADD(wep_weak_iv_count); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c832d408187e..f862399f7ce1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -859,7 +859,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; sta->last_signal = rx->status->signal; - sta->last_qual = rx->status->qual; sta->last_noise = rx->status->noise; /* diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index b3686c870b5e..703f5492ee65 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -193,7 +193,6 @@ struct sta_ampdu_mlme { * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA * @last_signal: signal of last received frame from this STA - * @last_qual: qual of last received frame from this STA * @last_noise: noise of last received frame from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) * @tx_filtered_count: number of frames the hardware filtered for this STA @@ -259,7 +258,6 @@ struct sta_info { unsigned long rx_fragments; unsigned long rx_dropped; int last_signal; - int last_qual; int last_noise; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; -- cgit v1.3-8-gc7d7 From 22403def134e2c1017cb04ae9129a38e841b2d8c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 30 Oct 2009 12:55:03 +0100 Subject: mac80211: also drop qos-nullfunc frames silently We drop nullfunc frames, but not qos-nullfunc frames, even though those could be used for PS state control as well. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 12 +++++++++++- net/mac80211/rx.c | 15 ++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 52e15e079c61..0aa831467493 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -472,7 +472,7 @@ static inline int ieee80211_is_cfendack(__le16 fc) } /** - * ieee80211_is_nullfunc - check if FTYPE=IEEE80211_FTYPE_DATA and STYPE=IEEE80211_STYPE_NULLFUNC + * ieee80211_is_nullfunc - check if frame is a regular (non-QoS) nullfunc frame * @fc: frame control bytes in little-endian byteorder */ static inline int ieee80211_is_nullfunc(__le16 fc) @@ -481,6 +481,16 @@ static inline int ieee80211_is_nullfunc(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC); } +/** + * ieee80211_is_qos_nullfunc - check if frame is a QoS nullfunc frame + * @fc: frame control bytes in little-endian byteorder + */ +static inline int ieee80211_is_qos_nullfunc(__le16 fc) +{ + return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == + cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); +} + struct ieee80211s_hdr { u8 flags; u8 ttl; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f862399f7ce1..51cb8bc3af81 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -886,12 +886,17 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) } } - /* Drop data::nullfunc frames silently, since they are used only to - * control station power saving mode. */ - if (ieee80211_is_nullfunc(hdr->frame_control)) { + /* + * Drop (qos-)data::nullfunc frames silently, since they + * are used only to control station power saving mode. + */ + if (ieee80211_is_nullfunc(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control)) { I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); - /* Update counter and free packet here to avoid counting this - * as a dropped packed. */ + /* + * Update counter and free packet here to avoid + * counting this as a dropped packed. + */ sta->rx_packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; -- cgit v1.3-8-gc7d7 From 244546f0d3101c5441f5b14cfe8a79d62679eaea Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Fri, 30 Oct 2009 08:54:53 +0000 Subject: RDS: Add GET_MR_FOR_DEST sockopt RDS currently supports a GET_MR sockopt to establish a memory region (MR) for a chunk of memory. However, the fastreg method ties a MR to a particular destination. The GET_MR_FOR_DEST sockopt allows the remote machine to be specified, and thus support for fastreg (aka FRWRs). Note that this patch does *not* do all of this - it simply implements the new sockopt in terms of the old one, so applications can begin to use the new sockopt in preparation for cutover to FRWRs. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- include/linux/rds.h | 8 ++++++++ net/rds/af_rds.c | 3 +++ net/rds/rdma.c | 24 ++++++++++++++++++++++++ net/rds/rdma.h | 1 + 4 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 89d46e1afbb1..cab4994c2f63 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -56,6 +56,7 @@ /* deprecated: RDS_BARRIER 4 */ #define RDS_RECVERR 5 #define RDS_CONG_MONITOR 6 +#define RDS_GET_MR_FOR_DEST 7 /* * Control message types for SOL_RDS. @@ -224,6 +225,13 @@ struct rds_get_mr_args { uint64_t flags; }; +struct rds_get_mr_for_dest_args { + struct sockaddr_storage dest_addr; + struct rds_iovec vec; + u_int64_t cookie_addr; + uint64_t flags; +}; + struct rds_free_mr_args { rds_rdma_cookie_t cookie; u_int64_t flags; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index a202e5b36079..2b978dc6e75d 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -265,6 +265,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname, case RDS_GET_MR: ret = rds_get_mr(rs, optval, optlen); break; + case RDS_GET_MR_FOR_DEST: + ret = rds_get_mr_for_dest(rs, optval, optlen); + break; case RDS_FREE_MR: ret = rds_free_mr(rs, optval, optlen); break; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 8dc83d2caa58..971b5a668458 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -317,6 +317,30 @@ int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen) return __rds_rdma_map(rs, &args, NULL, NULL); } +int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) +{ + struct rds_get_mr_for_dest_args args; + struct rds_get_mr_args new_args; + + if (optlen != sizeof(struct rds_get_mr_for_dest_args)) + return -EINVAL; + + if (copy_from_user(&args, (struct rds_get_mr_for_dest_args __user *)optval, + sizeof(struct rds_get_mr_for_dest_args))) + return -EFAULT; + + /* + * Initially, just behave like get_mr(). + * TODO: Implement get_mr as wrapper around this + * and deprecate it. + */ + new_args.vec = args.vec; + new_args.cookie_addr = args.cookie_addr; + new_args.flags = args.flags; + + return __rds_rdma_map(rs, &new_args, NULL, NULL); +} + /* * Free the MR indicated by the given R_Key */ diff --git a/net/rds/rdma.h b/net/rds/rdma.h index 425512098b0b..909c39835a5d 100644 --- a/net/rds/rdma.h +++ b/net/rds/rdma.h @@ -61,6 +61,7 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) } int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen); +int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen); int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen); void rds_rdma_drop_keys(struct rds_sock *rs); int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, -- cgit v1.3-8-gc7d7 From 19593ffdb6daa6ba691d247a2400cece12687c52 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Wed, 14 Oct 2009 20:40:10 +0200 Subject: firewire: ohci: 0 may be a valid DMA address I was told that there are obscure architectures with non-coherent DMA which may DMA-map to bus address 0. We shall not use 0 as a magic number of uninitialized bus address variables. The packet->payload_length > 0 test cannot be used either (except in at_context_queue_packet) because local requests are not DMA-mapped regardless of payload_length. Hence add a state flag to struct fw_packet. Signed-off-by: Stefan Richter --- drivers/firewire/core-transaction.c | 4 ++-- drivers/firewire/ohci.c | 9 +++++---- include/linux/firewire.h | 1 + 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 66789c3cc561..842739df23e2 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -226,7 +226,7 @@ static void fw_fill_request(struct fw_packet *packet, int tcode, int tlabel, packet->speed = speed; packet->generation = generation; packet->ack = 0; - packet->payload_bus = 0; + packet->payload_mapped = false; } /** @@ -601,7 +601,7 @@ void fw_fill_response(struct fw_packet *response, u32 *request_header, WARN(1, KERN_ERR "wrong tcode %d", tcode); } - response->payload_bus = 0; + response->payload_mapped = false; } EXPORT_SYMBOL(fw_fill_response); diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 418415564791..a71477541dc7 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -995,7 +995,8 @@ static int at_context_queue_packet(struct context *ctx, packet->ack = RCODE_SEND_ERROR; return -1; } - packet->payload_bus = payload_bus; + packet->payload_bus = payload_bus; + packet->payload_mapped = true; d[2].req_count = cpu_to_le16(packet->payload_length); d[2].data_address = cpu_to_le32(payload_bus); @@ -1023,7 +1024,7 @@ static int at_context_queue_packet(struct context *ctx, */ if (ohci->generation != packet->generation || reg_read(ohci, OHCI1394_IntEventSet) & OHCI1394_busReset) { - if (packet->payload_length > 0) + if (packet->payload_mapped) dma_unmap_single(ohci->card.device, payload_bus, packet->payload_length, DMA_TO_DEVICE); packet->ack = RCODE_GENERATION; @@ -1059,7 +1060,7 @@ static int handle_at_packet(struct context *context, /* This packet was cancelled, just continue. */ return 1; - if (packet->payload_bus) + if (packet->payload_mapped) dma_unmap_single(ohci->card.device, packet->payload_bus, packet->payload_length, DMA_TO_DEVICE); @@ -1723,7 +1724,7 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet) if (packet->ack != 0) goto out; - if (packet->payload_bus) + if (packet->payload_mapped) dma_unmap_single(ohci->card.device, packet->payload_bus, packet->payload_length, DMA_TO_DEVICE); diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 211a5d7d87b3..9416a461b696 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -267,6 +267,7 @@ struct fw_packet { void *payload; size_t payload_length; dma_addr_t payload_bus; + bool payload_mapped; u32 timestamp; /* -- cgit v1.3-8-gc7d7 From 72c9528bab94cc052d00ce241b8e85f5d71e45f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 30 Oct 2009 07:11:27 +0000 Subject: net: Introduce dev_get_by_name_rcu() Some workloads hit dev_base_lock rwlock pretty hard. We can use RCU lookups to avoid touching this rwlock (and avoid touching netdevice refcount) netdevices are already freed after a RCU grace period, so this patch adds no penalty at device dismantle time. However, it adds a synchronize_rcu() call in dev_change_name() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 49 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e5ece8dceaad..bcf1083857fc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1115,6 +1115,7 @@ extern void __dev_remove_pack(struct packet_type *pt); extern struct net_device *dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); extern struct net_device *dev_get_by_name(struct net *net, const char *name); +extern struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); extern struct net_device *__dev_get_by_name(struct net *net, const char *name); extern int dev_alloc_name(struct net_device *dev, const char *name); extern int dev_open(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 94f42a15fff1..f54d8b8a434b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -213,7 +213,7 @@ static int list_netdevice(struct net_device *dev) write_lock_bh(&dev_base_lock); list_add_tail(&dev->dev_list, &net->dev_base_head); - hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); write_unlock_bh(&dev_base_lock); @@ -230,7 +230,7 @@ static void unlist_netdevice(struct net_device *dev) /* Unlink dev from the device chain */ write_lock_bh(&dev_base_lock); list_del(&dev->dev_list); - hlist_del(&dev->name_hlist); + hlist_del_rcu(&dev->name_hlist); hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); } @@ -598,6 +598,32 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name) } EXPORT_SYMBOL(__dev_get_by_name); +/** + * dev_get_by_name_rcu - find a device by its name + * @net: the applicable net namespace + * @name: name to find + * + * Find an interface by name. + * If the name is found a pointer to the device is returned. + * If the name is not found then %NULL is returned. + * The reference counters are not incremented so the caller must be + * careful with locks. The caller must hold RCU lock. + */ + +struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) +{ + struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_name_hash(net, name); + + hlist_for_each_entry_rcu(dev, p, head, name_hlist) + if (!strncmp(dev->name, name, IFNAMSIZ)) + return dev; + + return NULL; +} +EXPORT_SYMBOL(dev_get_by_name_rcu); + /** * dev_get_by_name - find a device by its name * @net: the applicable net namespace @@ -614,11 +640,11 @@ struct net_device *dev_get_by_name(struct net *net, const char *name) { struct net_device *dev; - read_lock(&dev_base_lock); - dev = __dev_get_by_name(net, name); + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, name); if (dev) dev_hold(dev); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } EXPORT_SYMBOL(dev_get_by_name); @@ -960,7 +986,12 @@ rollback: write_lock_bh(&dev_base_lock); hlist_del(&dev->name_hlist); - hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + write_unlock_bh(&dev_base_lock); + + synchronize_rcu(); + + write_lock_bh(&dev_base_lock); + hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); write_unlock_bh(&dev_base_lock); ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); @@ -1062,9 +1093,9 @@ void dev_load(struct net *net, const char *name) { struct net_device *dev; - read_lock(&dev_base_lock); - dev = __dev_get_by_name(net, name); - read_unlock(&dev_base_lock); + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, name); + rcu_read_unlock(); if (!dev && capable(CAP_NET_ADMIN)) request_module("%s", name); -- cgit v1.3-8-gc7d7 From 4f570f995f68ef77aae7e5a441222f59232f2d0e Mon Sep 17 00:00:00 2001 From: Alberto Bertogli Date: Mon, 2 Nov 2009 11:40:16 +0100 Subject: Do not __always_inline bvec_kmap_irq() and bvec_kunmap_irq() So remove both the comment and the inline requirement, going back to the inline hint. Signed-off-by: Alberto Bertogli Signed-off-by: Jens Axboe --- include/linux/bio.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 5be93f18d842..474792b825d0 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -450,11 +450,8 @@ extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly; /* * remember never ever reenable interrupts between a bvec_kmap_irq and * bvec_kunmap_irq! - * - * This function MUST be inlined - it plays with the CPU interrupt flags. */ -static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec, - unsigned long *flags) +static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) { unsigned long addr; @@ -470,8 +467,7 @@ static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec, return (char *) addr + bvec->bv_offset; } -static __always_inline void bvec_kunmap_irq(char *buffer, - unsigned long *flags) +static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) { unsigned long ptr = (unsigned long) buffer & PAGE_MASK; -- cgit v1.3-8-gc7d7 From 7b2a35132ad0a70902dcd2844c27ed64cda0ce9b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 2 Nov 2009 08:50:52 +0800 Subject: compiler: Introduce __always_unused I wrote some code which is used as compile-time checker, and the code should be elided after compile. So I need to annotate the code as "always unused", compared to "maybe unused". Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Linus Torvalds LKML-Reference: <4AEE2CEC.8040206@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 1 + include/linux/compiler.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index a3ed7cb8ca34..73dcf804bc94 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -79,6 +79,7 @@ #define noinline __attribute__((noinline)) #define __attribute_const__ __attribute__((__const__)) #define __maybe_unused __attribute__((unused)) +#define __always_unused __attribute__((unused)) #define __gcc_header(x) #x #define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb5135b4e1..7947f4f6fa51 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -213,6 +213,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __maybe_unused /* unimplemented */ #endif +#ifndef __always_unused +# define __always_unused /* unimplemented */ +#endif + #ifndef noinline #define noinline #endif -- cgit v1.3-8-gc7d7 From e74c2e81fc9e1e674f2747c85fe8cfeaaafa55f6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 2 Nov 2009 21:57:39 -0800 Subject: Input: mark custom_data in ff_periodic_effect as __user The custom_data pointer in ff_periodict_effect structure is a userspace pointer and should be marked as such. Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index 0ccfc30cd40f..9ee67b4b2b48 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -890,7 +890,7 @@ struct ff_periodic_effect { struct ff_envelope envelope; __u32 custom_len; - __s16 *custom_data; + __s16 __user *custom_data; }; /** -- cgit v1.3-8-gc7d7 From 8649f13d2d810406da444a6101906041b796fbde Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Mon, 2 Nov 2009 14:30:00 +0000 Subject: broadcom: Consolidate dev_flags definitions This patch moves all the dev_flags enumerations outside the broadcom.c file to include/linux/brcmphy.h. The existing flags were not used yet and have been re-enumerated to avoid conflicts. Signed-off-by: Matt Carlson Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 11 +---------- include/linux/brcmphy.h | 17 +++++++++++------ 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 1a2b2f2a273a..ace0ccc87a00 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -16,6 +16,7 @@ #include #include +#include #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 @@ -138,16 +139,6 @@ #define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ #define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ -/* - * Device flags for PHYs that can be configured for different operating - * modes. - */ -#define PHY_BCM_FLAGS_VALID 0x80000000 -#define PHY_BCM_FLAGS_INTF_XAUI 0x00000020 -#define PHY_BCM_FLAGS_INTF_SGMII 0x00000010 -#define PHY_BCM_FLAGS_MODE_1000BX 0x00000002 -#define PHY_BCM_FLAGS_MODE_COPPER 0x00000001 - /*****************************************************************************/ /* Fast Ethernet Transceiver definitions. */ diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 9b64b6d67873..daa1480fcf49 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -1,6 +1,11 @@ -#define PHY_BRCM_WIRESPEED_ENABLE 0x00000001 -#define PHY_BRCM_AUTO_PWRDWN_ENABLE 0x00000002 -#define PHY_BRCM_APD_CLK125_ENABLE 0x00000004 -#define PHY_BRCM_STD_IBND_DISABLE 0x00000008 -#define PHY_BRCM_EXT_IBND_RX_ENABLE 0x00000010 -#define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00000020 +#define PHY_BCM_FLAGS_MODE_COPPER 0x00000001 +#define PHY_BCM_FLAGS_MODE_1000BX 0x00000002 +#define PHY_BCM_FLAGS_INTF_SGMII 0x00000010 +#define PHY_BCM_FLAGS_INTF_XAUI 0x00000020 +#define PHY_BRCM_WIRESPEED_ENABLE 0x00000100 +#define PHY_BRCM_AUTO_PWRDWN_ENABLE 0x00000200 +#define PHY_BRCM_APD_CLK125_ENABLE 0x00000400 +#define PHY_BRCM_STD_IBND_DISABLE 0x00000800 +#define PHY_BRCM_EXT_IBND_RX_ENABLE 0x00001000 +#define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 +#define PHY_BCM_FLAGS_VALID 0x80000000 -- cgit v1.3-8-gc7d7 From 63a14ce449dd6d647de2725809159eb072b2c44f Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Mon, 2 Nov 2009 14:30:40 +0000 Subject: tg3 / broadcom: Add PHY_BRCM_CLEAR_RGMII_MODE flag Broadcom 50610M parts changed the default definitions of the RGMII mode shadow register. The 5785 needs the RGMII mode selection bits [4:3] cleared. The default value of the remaining bits in this register are zero. Rather than unnecessarily burn an extra bit in the dev_flags member in an attempt to enumerate all possible combinations, this patch take a more course grained approach and labels the option as "clear all bits". Signed-off-by: Matt Carlson Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 6 ++++++ drivers/net/tg3.c | 1 + include/linux/brcmphy.h | 1 + 3 files changed, 8 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index ace0ccc87a00..5d2a2e90aba8 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -105,6 +105,7 @@ #define BCM5482_SHD_LEDS1_LED3(src) ((src & 0xf) << 4) /* LED1 / ~LINKSPD[1] selector */ #define BCM5482_SHD_LEDS1_LED1(src) ((src & 0xf) << 0) +#define BCM54XX_SHD_RGMII_MODE 0x0b /* 01011: RGMII Mode Selector */ #define BCM5482_SHD_SSD 0x14 /* 10100: Secondary SerDes control */ #define BCM5482_SHD_SSD_LEDM 0x0008 /* SSD LED Mode enable */ #define BCM5482_SHD_SSD_EN 0x0001 /* SSD enable */ @@ -330,6 +331,11 @@ static int bcm54xx_config_init(struct phy_device *phydev) if (err < 0) return err; + if ((BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610 || + BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610M) && + (phydev->dev_flags & PHY_BRCM_CLEAR_RGMII_MODE)) + bcm54xx_shadow_write(phydev, BCM54XX_SHD_RGMII_MODE, 0); + bcm54xx_phydsp_config(phydev); return 0; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 0aecd07d3f6b..e7128f6ae2d8 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -1100,6 +1100,7 @@ static int tg3_mdio_init(struct tg3 *tp) break; case TG3_PHY_ID_BCM50610: case TG3_PHY_ID_BCM50610M: + phydev->dev_flags |= PHY_BRCM_CLEAR_RGMII_MODE; if (tp->tg3_flags3 & TG3_FLG3_RGMII_STD_IBND_DISABLE) phydev->dev_flags |= PHY_BRCM_STD_IBND_DISABLE; if (tp->tg3_flags3 & TG3_FLG3_RGMII_EXT_IBND_RX_EN) diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index daa1480fcf49..6e7ffcee9c80 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -8,4 +8,5 @@ #define PHY_BRCM_STD_IBND_DISABLE 0x00000800 #define PHY_BRCM_EXT_IBND_RX_ENABLE 0x00001000 #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 +#define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000 #define PHY_BCM_FLAGS_VALID 0x80000000 -- cgit v1.3-8-gc7d7 From 32e5a8d651c0dbb02bf82ca954206282e44c4b11 Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Mon, 2 Nov 2009 14:31:39 +0000 Subject: tg3 / broadcom: Add code to disable rxc refclk The 5785 does not use the RXC reference clock. Turning it off is desirable as it saves power. By default, the 50610 enables the RXC reference clock and the 50610M disables it. Presumably this is one of the reasons why the hardware architect chose one over the other. Adding a "rx reference clock disable" flag is not the ideal way to describe the option, as it would force the MAC using a 50610M to set the flag. Ideally we want the flags to represent opt-in behavior that deviates from hardware defaults. Furthermore, the lack of a "disable" flag implies that the requester wants the rx reference clock enabled, which doesn't necessarily follow. By presenting the option as a passive statement (rx reference clock unused) rather than a command, I hope to convey an opt-in option to disable the rx reference clock that falls back to hardware defaults if not set. A secondary benefit of this is that it keeps the intelligence about phy defaults in the broadcom module where it belongs and allows the broadcom module more latitude should a bug arise. Signed-off-by: Matt Carlson Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 43 +++++++++++++++++++++++++++++++++++++++++++ drivers/net/tg3.c | 3 ++- include/linux/brcmphy.h | 2 +- 3 files changed, 46 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index bddf4a42ae68..74914335f72c 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -25,6 +25,9 @@ #define BRCM_PHY_MODEL(phydev) \ ((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask) +#define BRCM_PHY_REV(phydev) \ + ((phydev)->drv->phy_id & ~((phydev)->drv->phy_id_mask)) + #define MII_BCM54XX_ECR 0x10 /* BCM54xx extended control register */ #define MII_BCM54XX_ECR_IM 0x1000 /* Interrupt mask */ @@ -95,11 +98,16 @@ #define BCM_LED_SRC_OFF 0xe /* Tied high */ #define BCM_LED_SRC_ON 0xf /* Tied low */ + /* * BCM5482: Shadow registers * Shadow values go into bits [14:10] of register 0x1c to select a shadow * register to access. */ +/* 00101: Spare Control Register 3 */ +#define BCM54XX_SHD_SCR3 0x05 +#define BCM54XX_SHD_SCR3_DEF_CLK125 0x0001 + #define BCM5482_SHD_LEDS1 0x0d /* 01101: LED Selector 1 */ /* LED3 / ~LINKSPD[2] selector */ #define BCM5482_SHD_LEDS1_LED3(src) ((src & 0xf) << 4) @@ -112,6 +120,7 @@ #define BCM5482_SHD_MODE 0x1f /* 11111: Mode Control Register */ #define BCM5482_SHD_MODE_1000BX 0x0001 /* Enable 1000BASE-X registers */ + /* * EXPANSION SHADOW ACCESS REGISTERS. (PHY REG 0x15, 0x16, and 0x17) */ @@ -309,6 +318,37 @@ error: return err ? err : err2; } +static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev) +{ + u32 val, orig; + + /* Abort if we are using an untested phy. */ + if (BRCM_PHY_MODEL(phydev) != PHY_ID_BCM50610 || + BRCM_PHY_MODEL(phydev) != PHY_ID_BCM50610M) + return; + + val = bcm54xx_shadow_read(phydev, BCM54XX_SHD_SCR3); + if (val < 0) + return; + + orig = val; + + if (phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED) { + if ((BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610 || + BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610M) && + BRCM_PHY_REV(phydev) >= 0x3) { + /* Here, bit 0 _disables_ CLK125 when set */ + val |= BCM54XX_SHD_SCR3_DEF_CLK125; + } else { + /* Here, bit 0 _enables_ CLK125 when set */ + val &= ~BCM54XX_SHD_SCR3_DEF_CLK125; + } + } + + if (orig != val) + bcm54xx_shadow_write(phydev, BCM54XX_SHD_SCR3, val); +} + static int bcm54xx_config_init(struct phy_device *phydev) { int reg, err; @@ -336,6 +376,9 @@ static int bcm54xx_config_init(struct phy_device *phydev) (phydev->dev_flags & PHY_BRCM_CLEAR_RGMII_MODE)) bcm54xx_shadow_write(phydev, BCM54XX_SHD_RGMII_MODE, 0); + if (phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED) + bcm54xx_adjust_rxrefclk(phydev); + bcm54xx_phydsp_config(phydev); return 0; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 592b5bf09e40..369ddba95821 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -1100,7 +1100,8 @@ static int tg3_mdio_init(struct tg3 *tp) break; case TG3_PHY_ID_BCM50610: case TG3_PHY_ID_BCM50610M: - phydev->dev_flags |= PHY_BRCM_CLEAR_RGMII_MODE; + phydev->dev_flags |= PHY_BRCM_CLEAR_RGMII_MODE | + PHY_BRCM_RX_REFCLK_UNUSED; if (tp->tg3_flags3 & TG3_FLG3_RGMII_STD_IBND_DISABLE) phydev->dev_flags |= PHY_BRCM_STD_IBND_DISABLE; if (tp->tg3_flags3 & TG3_FLG3_RGMII_EXT_IBND_RX_EN) diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 6e7ffcee9c80..59432278ded2 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -4,7 +4,7 @@ #define PHY_BCM_FLAGS_INTF_XAUI 0x00000020 #define PHY_BRCM_WIRESPEED_ENABLE 0x00000100 #define PHY_BRCM_AUTO_PWRDWN_ENABLE 0x00000200 -#define PHY_BRCM_APD_CLK125_ENABLE 0x00000400 +#define PHY_BRCM_RX_REFCLK_UNUSED 0x00000400 #define PHY_BRCM_STD_IBND_DISABLE 0x00000800 #define PHY_BRCM_EXT_IBND_RX_ENABLE 0x00001000 #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 -- cgit v1.3-8-gc7d7 From 52fae0837153e86e4dabaf5df517a0b8b7a20bd7 Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Mon, 2 Nov 2009 14:32:38 +0000 Subject: tg3 / broadcom: Optionally disable TXC if no link This patch adds code to disable the TXC and RXC reference clocks if link is not available. Signed-off-by: Matt Carlson Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 5 +++++ drivers/net/tg3.c | 1 + include/linux/brcmphy.h | 1 + 3 files changed, 7 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 7b10495fe8fb..f63c96a4ecb4 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -108,6 +108,7 @@ #define BCM54XX_SHD_SCR3 0x05 #define BCM54XX_SHD_SCR3_DEF_CLK125 0x0001 #define BCM54XX_SHD_SCR3_DLLAPD_DIS 0x0002 +#define BCM54XX_SHD_SCR3_TRDDAPD 0x0004 /* 01010: Auto Power-Down */ #define BCM54XX_SHD_APD 0x0a @@ -362,6 +363,9 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev) else val |= BCM54XX_SHD_SCR3_DLLAPD_DIS; + if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) + val |= BCM54XX_SHD_SCR3_TRDDAPD; + if (orig != val) bcm54xx_shadow_write(phydev, BCM54XX_SHD_SCR3, val); @@ -409,6 +413,7 @@ static int bcm54xx_config_init(struct phy_device *phydev) bcm54xx_shadow_write(phydev, BCM54XX_SHD_RGMII_MODE, 0); if ((phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED) || + (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) || (phydev->dev_flags & PHY_BRCM_AUTO_PWRDWN_ENABLE)) bcm54xx_adjust_rxrefclk(phydev); diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index f74bf91e78cc..50bb53de5ebd 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -1103,6 +1103,7 @@ static int tg3_mdio_init(struct tg3 *tp) case TG3_PHY_ID_BCM50610M: phydev->dev_flags |= PHY_BRCM_CLEAR_RGMII_MODE | PHY_BRCM_RX_REFCLK_UNUSED | + PHY_BRCM_DIS_TXCRXC_NOENRGY | PHY_BRCM_AUTO_PWRDWN_ENABLE; if (tp->tg3_flags3 & TG3_FLG3_RGMII_STD_IBND_DISABLE) phydev->dev_flags |= PHY_BRCM_STD_IBND_DISABLE; diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 59432278ded2..2b31b91f5871 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -9,4 +9,5 @@ #define PHY_BRCM_EXT_IBND_RX_ENABLE 0x00001000 #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 #define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000 +#define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000 #define PHY_BCM_FLAGS_VALID 0x80000000 -- cgit v1.3-8-gc7d7 From 7a8b3372e29ff58ebdf94def26703afabd287f11 Mon Sep 17 00:00:00 2001 From: Sandeep Gopalpet Date: Mon, 2 Nov 2009 07:03:40 +0000 Subject: gianfar: Basic Support for programming hash rules This patch provides basic hash rules programming via the ethtool interface. Signed-off-by: Sandeep Gopalpet Signed-off-by: David S. Miller --- drivers/net/gianfar.c | 73 +++++++++++++ drivers/net/gianfar.h | 93 +++++++++++++++++ drivers/net/gianfar_ethtool.c | 236 ++++++++++++++++++++++++++++++++++++++++++ include/linux/ethtool.h | 2 + 4 files changed, 404 insertions(+) (limited to 'include') diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index dc9fba09b17c..086d40dd526d 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -431,6 +431,9 @@ static const struct net_device_ops gfar_netdev_ops = { #endif }; +unsigned int ftp_rqfpr[MAX_FILER_IDX + 1]; +unsigned int ftp_rqfcr[MAX_FILER_IDX + 1]; + void lock_rx_qs(struct gfar_private *priv) { int i = 0x0; @@ -766,6 +769,73 @@ static unsigned int reverse_bitmap(unsigned int bit_map, unsigned int max_qs) } return new_bit_map; } + +u32 cluster_entry_per_class(struct gfar_private *priv, u32 rqfar, u32 class) +{ + u32 rqfpr = FPR_FILER_MASK; + u32 rqfcr = 0x0; + + rqfar--; + rqfcr = RQFCR_CLE | RQFCR_PID_MASK | RQFCR_CMP_EXACT; + ftp_rqfpr[rqfar] = rqfpr; + ftp_rqfcr[rqfar] = rqfcr; + gfar_write_filer(priv, rqfar, rqfcr, rqfpr); + + rqfar--; + rqfcr = RQFCR_CMP_NOMATCH; + ftp_rqfpr[rqfar] = rqfpr; + ftp_rqfcr[rqfar] = rqfcr; + gfar_write_filer(priv, rqfar, rqfcr, rqfpr); + + rqfar--; + rqfcr = RQFCR_CMP_EXACT | RQFCR_PID_PARSE | RQFCR_CLE | RQFCR_AND; + rqfpr = class; + ftp_rqfcr[rqfar] = rqfcr; + ftp_rqfpr[rqfar] = rqfpr; + gfar_write_filer(priv, rqfar, rqfcr, rqfpr); + + rqfar--; + rqfcr = RQFCR_CMP_EXACT | RQFCR_PID_MASK | RQFCR_AND; + rqfpr = class; + ftp_rqfcr[rqfar] = rqfcr; + ftp_rqfpr[rqfar] = rqfpr; + gfar_write_filer(priv, rqfar, rqfcr, rqfpr); + + return rqfar; +} + +static void gfar_init_filer_table(struct gfar_private *priv) +{ + int i = 0x0; + u32 rqfar = MAX_FILER_IDX; + u32 rqfcr = 0x0; + u32 rqfpr = FPR_FILER_MASK; + + /* Default rule */ + rqfcr = RQFCR_CMP_MATCH; + ftp_rqfcr[rqfar] = rqfcr; + ftp_rqfpr[rqfar] = rqfpr; + gfar_write_filer(priv, rqfar, rqfcr, rqfpr); + + rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV6); + rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV6 | RQFPR_UDP); + rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV6 | RQFPR_TCP); + rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV4); + rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV4 | RQFPR_UDP); + rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV4 | RQFPR_TCP); + + /* cur_filer_idx indicated the fisrt non-masked rule */ + priv->cur_filer_idx = rqfar; + + /* Rest are masked rules */ + rqfcr = RQFCR_CMP_NOMATCH; + for (i = 0; i < rqfar; i++) { + ftp_rqfcr[i] = rqfcr; + ftp_rqfpr[i] = rqfpr; + gfar_write_filer(priv, i, rqfcr, rqfpr); + } +} + /* Set up the ethernet device structure, private data, * and anything else we need before we start */ static int gfar_probe(struct of_device *ofdev, @@ -1005,6 +1075,9 @@ static int gfar_probe(struct of_device *ofdev, priv->gfargrp[i].int_name_tx[len_devname] = '\0'; } + /* Initialize the filer table */ + gfar_init_filer_table(priv); + /* Create all the sysfs files */ gfar_init_sysfs(dev); diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h index a2c1f963cdd6..44b63daa7ff3 100644 --- a/drivers/net/gianfar.h +++ b/drivers/net/gianfar.h @@ -381,6 +381,84 @@ extern const char gfar_driver_version[]; #define BD_LFLAG(flags) ((flags) << 16) #define BD_LENGTH_MASK 0x0000ffff +#define CLASS_CODE_UNRECOG 0x00 +#define CLASS_CODE_DUMMY1 0x01 +#define CLASS_CODE_ETHERTYPE1 0x02 +#define CLASS_CODE_ETHERTYPE2 0x03 +#define CLASS_CODE_USER_PROG1 0x04 +#define CLASS_CODE_USER_PROG2 0x05 +#define CLASS_CODE_USER_PROG3 0x06 +#define CLASS_CODE_USER_PROG4 0x07 +#define CLASS_CODE_TCP_IPV4 0x08 +#define CLASS_CODE_UDP_IPV4 0x09 +#define CLASS_CODE_AH_ESP_IPV4 0x0a +#define CLASS_CODE_SCTP_IPV4 0x0b +#define CLASS_CODE_TCP_IPV6 0x0c +#define CLASS_CODE_UDP_IPV6 0x0d +#define CLASS_CODE_AH_ESP_IPV6 0x0e +#define CLASS_CODE_SCTP_IPV6 0x0f + +#define FPR_FILER_MASK 0xFFFFFFFF +#define MAX_FILER_IDX 0xFF + +/* RQFCR register bits */ +#define RQFCR_GPI 0x80000000 +#define RQFCR_HASHTBL_Q 0x00000000 +#define RQFCR_HASHTBL_0 0x00020000 +#define RQFCR_HASHTBL_1 0x00040000 +#define RQFCR_HASHTBL_2 0x00060000 +#define RQFCR_HASHTBL_3 0x00080000 +#define RQFCR_HASH 0x00010000 +#define RQFCR_CLE 0x00000200 +#define RQFCR_RJE 0x00000100 +#define RQFCR_AND 0x00000080 +#define RQFCR_CMP_EXACT 0x00000000 +#define RQFCR_CMP_MATCH 0x00000020 +#define RQFCR_CMP_NOEXACT 0x00000040 +#define RQFCR_CMP_NOMATCH 0x00000060 + +/* RQFCR PID values */ +#define RQFCR_PID_MASK 0x00000000 +#define RQFCR_PID_PARSE 0x00000001 +#define RQFCR_PID_ARB 0x00000002 +#define RQFCR_PID_DAH 0x00000003 +#define RQFCR_PID_DAL 0x00000004 +#define RQFCR_PID_SAH 0x00000005 +#define RQFCR_PID_SAL 0x00000006 +#define RQFCR_PID_ETY 0x00000007 +#define RQFCR_PID_VID 0x00000008 +#define RQFCR_PID_PRI 0x00000009 +#define RQFCR_PID_TOS 0x0000000A +#define RQFCR_PID_L4P 0x0000000B +#define RQFCR_PID_DIA 0x0000000C +#define RQFCR_PID_SIA 0x0000000D +#define RQFCR_PID_DPT 0x0000000E +#define RQFCR_PID_SPT 0x0000000F + +/* RQFPR when PID is 0x0001 */ +#define RQFPR_HDR_GE_512 0x00200000 +#define RQFPR_LERR 0x00100000 +#define RQFPR_RAR 0x00080000 +#define RQFPR_RARQ 0x00040000 +#define RQFPR_AR 0x00020000 +#define RQFPR_ARQ 0x00010000 +#define RQFPR_EBC 0x00008000 +#define RQFPR_VLN 0x00004000 +#define RQFPR_CFI 0x00002000 +#define RQFPR_JUM 0x00001000 +#define RQFPR_IPF 0x00000800 +#define RQFPR_FIF 0x00000400 +#define RQFPR_IPV4 0x00000200 +#define RQFPR_IPV6 0x00000100 +#define RQFPR_ICC 0x00000080 +#define RQFPR_ICV 0x00000040 +#define RQFPR_TCP 0x00000020 +#define RQFPR_UDP 0x00000010 +#define RQFPR_TUC 0x00000008 +#define RQFPR_TUV 0x00000004 +#define RQFPR_PER 0x00000002 +#define RQFPR_EER 0x00000001 + /* TxBD status field bits */ #define TXBD_READY 0x8000 #define TXBD_PADCRC 0x4000 @@ -959,6 +1037,8 @@ struct gfar_private { unsigned int rx_stash_size; unsigned int rx_stash_index; + u32 cur_filer_idx; + struct sk_buff_head rx_recycle; struct vlan_group *vlgrp; @@ -1002,6 +1082,9 @@ struct gfar_private { struct gfar_extra_stats extra_stats; }; +extern unsigned int ftp_rqfpr[MAX_FILER_IDX + 1]; +extern unsigned int ftp_rqfcr[MAX_FILER_IDX + 1]; + static inline u32 gfar_read(volatile unsigned __iomem *addr) { u32 val; @@ -1014,6 +1097,16 @@ static inline void gfar_write(volatile unsigned __iomem *addr, u32 val) out_be32(addr, val); } +static inline void gfar_write_filer(struct gfar_private *priv, + unsigned int far, unsigned int fcr, unsigned int fpr) +{ + struct gfar __iomem *regs = priv->gfargrp[0].regs; + + gfar_write(®s->rqfar, far); + gfar_write(®s->rqfcr, fcr); + gfar_write(®s->rqfpr, fpr); +} + extern void lock_rx_qs(struct gfar_private *priv); extern void lock_tx_qs(struct gfar_private *priv); extern void unlock_rx_qs(struct gfar_private *priv); diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c index 562f6c20f591..1010367695e4 100644 --- a/drivers/net/gianfar_ethtool.c +++ b/drivers/net/gianfar_ethtool.c @@ -645,6 +645,241 @@ static int gfar_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) } #endif +static int gfar_ethflow_to_class(int flow_type, u64 *class) +{ + switch (flow_type) { + case TCP_V4_FLOW: + *class = CLASS_CODE_TCP_IPV4; + break; + case UDP_V4_FLOW: + *class = CLASS_CODE_UDP_IPV4; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + *class = CLASS_CODE_AH_ESP_IPV4; + break; + case SCTP_V4_FLOW: + *class = CLASS_CODE_SCTP_IPV4; + break; + case TCP_V6_FLOW: + *class = CLASS_CODE_TCP_IPV6; + break; + case UDP_V6_FLOW: + *class = CLASS_CODE_UDP_IPV6; + break; + case AH_V6_FLOW: + case ESP_V6_FLOW: + *class = CLASS_CODE_AH_ESP_IPV6; + break; + case SCTP_V6_FLOW: + *class = CLASS_CODE_SCTP_IPV6; + break; + default: + return 0; + } + + return 1; +} + +static void ethflow_to_filer_rules (struct gfar_private *priv, u64 ethflow) +{ + u32 fcr = 0x0, fpr = FPR_FILER_MASK; + + if (ethflow & RXH_L2DA) { + fcr = RQFCR_PID_DAH |RQFCR_CMP_NOMATCH | + RQFCR_HASH | RQFCR_AND | RQFCR_HASHTBL_0; + ftp_rqfpr[priv->cur_filer_idx] = fpr; + ftp_rqfcr[priv->cur_filer_idx] = fcr; + gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr); + priv->cur_filer_idx = priv->cur_filer_idx - 1; + + fcr = RQFCR_PID_DAL | RQFCR_AND | RQFCR_CMP_NOMATCH | + RQFCR_HASH | RQFCR_AND | RQFCR_HASHTBL_0; + ftp_rqfpr[priv->cur_filer_idx] = fpr; + ftp_rqfcr[priv->cur_filer_idx] = fcr; + gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr); + priv->cur_filer_idx = priv->cur_filer_idx - 1; + } + + if (ethflow & RXH_VLAN) { + fcr = RQFCR_PID_VID | RQFCR_CMP_NOMATCH | RQFCR_HASH | + RQFCR_AND | RQFCR_HASHTBL_0; + gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr); + ftp_rqfpr[priv->cur_filer_idx] = fpr; + ftp_rqfcr[priv->cur_filer_idx] = fcr; + priv->cur_filer_idx = priv->cur_filer_idx - 1; + } + + if (ethflow & RXH_IP_SRC) { + fcr = RQFCR_PID_SIA | RQFCR_CMP_NOMATCH | RQFCR_HASH | + RQFCR_AND | RQFCR_HASHTBL_0; + ftp_rqfpr[priv->cur_filer_idx] = fpr; + ftp_rqfcr[priv->cur_filer_idx] = fcr; + gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr); + priv->cur_filer_idx = priv->cur_filer_idx - 1; + } + + if (ethflow & (RXH_IP_DST)) { + fcr = RQFCR_PID_DIA | RQFCR_CMP_NOMATCH | RQFCR_HASH | + RQFCR_AND | RQFCR_HASHTBL_0; + ftp_rqfpr[priv->cur_filer_idx] = fpr; + ftp_rqfcr[priv->cur_filer_idx] = fcr; + gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr); + priv->cur_filer_idx = priv->cur_filer_idx - 1; + } + + if (ethflow & RXH_L3_PROTO) { + fcr = RQFCR_PID_L4P | RQFCR_CMP_NOMATCH | RQFCR_HASH | + RQFCR_AND | RQFCR_HASHTBL_0; + ftp_rqfpr[priv->cur_filer_idx] = fpr; + ftp_rqfcr[priv->cur_filer_idx] = fcr; + gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr); + priv->cur_filer_idx = priv->cur_filer_idx - 1; + } + + if (ethflow & RXH_L4_B_0_1) { + fcr = RQFCR_PID_SPT | RQFCR_CMP_NOMATCH | RQFCR_HASH | + RQFCR_AND | RQFCR_HASHTBL_0; + ftp_rqfpr[priv->cur_filer_idx] = fpr; + ftp_rqfcr[priv->cur_filer_idx] = fcr; + gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr); + priv->cur_filer_idx = priv->cur_filer_idx - 1; + } + + if (ethflow & RXH_L4_B_2_3) { + fcr = RQFCR_PID_DPT | RQFCR_CMP_NOMATCH | RQFCR_HASH | + RQFCR_AND | RQFCR_HASHTBL_0; + ftp_rqfpr[priv->cur_filer_idx] = fpr; + ftp_rqfcr[priv->cur_filer_idx] = fcr; + gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr); + priv->cur_filer_idx = priv->cur_filer_idx - 1; + } +} + +static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u64 class) +{ + unsigned int last_rule_idx = priv->cur_filer_idx; + unsigned int cmp_rqfpr; + unsigned int local_rqfpr[MAX_FILER_IDX + 1]; + unsigned int local_rqfcr[MAX_FILER_IDX + 1]; + int i = 0x0, k = 0x0; + int j = MAX_FILER_IDX, l = 0x0; + + switch (class) { + case TCP_V4_FLOW: + cmp_rqfpr = RQFPR_IPV4 |RQFPR_TCP; + break; + case UDP_V4_FLOW: + cmp_rqfpr = RQFPR_IPV4 |RQFPR_UDP; + break; + case TCP_V6_FLOW: + cmp_rqfpr = RQFPR_IPV6 |RQFPR_TCP; + break; + case UDP_V6_FLOW: + cmp_rqfpr = RQFPR_IPV6 |RQFPR_UDP; + break; + case IPV4_FLOW: + cmp_rqfpr = RQFPR_IPV4; + case IPV6_FLOW: + cmp_rqfpr = RQFPR_IPV6; + break; + default: + printk(KERN_ERR "Right now this class is not supported\n"); + return 0; + } + + for (i = 0; i < MAX_FILER_IDX + 1; i++) { + local_rqfpr[j] = ftp_rqfpr[i]; + local_rqfcr[j] = ftp_rqfcr[i]; + j--; + if ((ftp_rqfcr[i] == (RQFCR_PID_PARSE | + RQFCR_CLE |RQFCR_AND)) && + (ftp_rqfpr[i] == cmp_rqfpr)) + break; + } + + if (i == MAX_FILER_IDX + 1) { + printk(KERN_ERR "No parse rule found, "); + printk(KERN_ERR "can't create hash rules\n"); + return 0; + } + + /* If a match was found, then it begins the starting of a cluster rule + * if it was already programmed, we need to overwrite these rules + */ + for (l = i+1; l < MAX_FILER_IDX; l++) { + if ((ftp_rqfcr[l] & RQFCR_CLE) && + !(ftp_rqfcr[l] & RQFCR_AND)) { + ftp_rqfcr[l] = RQFCR_CLE | RQFCR_CMP_EXACT | + RQFCR_HASHTBL_0 | RQFCR_PID_MASK; + ftp_rqfpr[l] = FPR_FILER_MASK; + gfar_write_filer(priv, l, ftp_rqfcr[l], ftp_rqfpr[l]); + break; + } + + if (!(ftp_rqfcr[l] & RQFCR_CLE) && (ftp_rqfcr[l] & RQFCR_AND)) + continue; + else { + local_rqfpr[j] = ftp_rqfpr[l]; + local_rqfcr[j] = ftp_rqfcr[l]; + j--; + } + } + + priv->cur_filer_idx = l - 1; + last_rule_idx = l; + + /* hash rules */ + ethflow_to_filer_rules(priv, ethflow); + + /* Write back the popped out rules again */ + for (k = j+1; k < MAX_FILER_IDX; k++) { + ftp_rqfpr[priv->cur_filer_idx] = local_rqfpr[k]; + ftp_rqfcr[priv->cur_filer_idx] = local_rqfcr[k]; + gfar_write_filer(priv, priv->cur_filer_idx, + local_rqfcr[k], local_rqfpr[k]); + if (!priv->cur_filer_idx) + break; + priv->cur_filer_idx = priv->cur_filer_idx - 1; + } + + return 1; +} + +static int gfar_set_hash_opts(struct gfar_private *priv, struct ethtool_rxnfc *cmd) +{ + u64 class; + + if (!gfar_ethflow_to_class(cmd->flow_type, &class)) + return -EINVAL; + + if (class < CLASS_CODE_USER_PROG1 || + class > CLASS_CODE_SCTP_IPV6) + return -EINVAL; + + /* write the filer rules here */ + if (!gfar_ethflow_to_filer_table(priv, cmd->data, cmd->flow_type)) + return -1; + + return 0; +} + +static int gfar_set_nfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct gfar_private *priv = netdev_priv(dev); + int ret = 0; + + switch(cmd->cmd) { + case ETHTOOL_SRXFH: + ret = gfar_set_hash_opts(priv, cmd); + break; + default: + ret = -EINVAL; + } + + return ret; +} + const struct ethtool_ops gfar_ethtool_ops = { .get_settings = gfar_gsettings, .set_settings = gfar_ssettings, @@ -670,4 +905,5 @@ const struct ethtool_ops gfar_ethtool_ops = { .get_wol = gfar_get_wol, .set_wol = gfar_set_wol, #endif + .set_rxnfc = gfar_set_nfc, }; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index eb1a48da2d43..edd03b79e8f8 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -674,6 +674,8 @@ struct ethtool_ops { #define AH_V6_FLOW 0x0b #define ESP_V6_FLOW 0x0c #define IP_USER_FLOW 0x0d +#define IPV4_FLOW 0x10 +#define IPV6_FLOW 0x11 /* L3-L4 network traffic flow hash options */ #define RXH_L2DA (1 << 1) -- cgit v1.3-8-gc7d7 From fb0459d75c1d0a4ba3cafdd2c754e7486968a676 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 25 Sep 2009 12:25:56 +0200 Subject: perf/core: Provide a kernel-internal interface to get to performance counters There are reasons for kernel code to ask for, and use, performance counters. For example, in CPU freq governors this tends to be a good idea, but there are other examples possible as well of course. This patch adds the needed bits to do enable this functionality; they have been tested in an experimental cpufreq driver that I'm working on, and the changes are all that I needed to access counters properly. [fweisbec@gmail.com: added pid to perf_event_create_kernel_counter so that we can profile a particular task too TODO: Have a better error reporting, don't just return NULL in fail case.] v2: Remove the wrong comment about the fact perf_event_create_kernel_counter must be called from a kernel thread. Signed-off-by: Arjan van de Ven Acked-by: Peter Zijlstra Cc: "K.Prasad" Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt Cc: Jan Kiszka Cc: Avi Kivity LKML-Reference: <20090925122556.2f8bd939@infradead.org> Signed-off-by: Frederic Weisbecker --- include/linux/perf_event.h | 6 ++++ kernel/perf_event.c | 75 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index df9d964c15fc..fa151d49a2ee 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -744,6 +744,12 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader, struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, int cpu); extern void perf_event_update_userpage(struct perf_event *event); +extern int perf_event_release_kernel(struct perf_event *event); +extern struct perf_event * +perf_event_create_kernel_counter(struct perf_event_attr *attr, + int cpu, + pid_t pid); +extern u64 perf_event_read_value(struct perf_event *event); struct perf_sample_data { u64 type; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 12b5ec39bf97..02d4ff041b01 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1725,6 +1725,26 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } +int perf_event_release_kernel(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + perf_event_remove_from_context(event); + mutex_unlock(&ctx->mutex); + + mutex_lock(&event->owner->perf_event_mutex); + list_del_init(&event->owner_entry); + mutex_unlock(&event->owner->perf_event_mutex); + put_task_struct(event->owner); + + free_event(event); + + return 0; +} +EXPORT_SYMBOL_GPL(perf_event_release_kernel); + static int perf_event_read_size(struct perf_event *event) { int entry = sizeof(u64); /* value */ @@ -1750,7 +1770,7 @@ static int perf_event_read_size(struct perf_event *event) return size; } -static u64 perf_event_read_value(struct perf_event *event) +u64 perf_event_read_value(struct perf_event *event) { struct perf_event *child; u64 total = 0; @@ -1761,6 +1781,7 @@ static u64 perf_event_read_value(struct perf_event *event) return total; } +EXPORT_SYMBOL_GPL(perf_event_read_value); static int perf_event_read_entry(struct perf_event *event, u64 read_format, char __user *buf) @@ -4638,6 +4659,58 @@ err_put_context: return err; } +/** + * perf_event_create_kernel_counter + * + * @attr: attributes of the counter to create + * @cpu: cpu in which the counter is bound + * @pid: task to profile + */ +struct perf_event * +perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, + pid_t pid) +{ + struct perf_event *event; + struct perf_event_context *ctx; + int err; + + /* + * Get the target context (task or percpu): + */ + + ctx = find_get_context(pid, cpu); + if (IS_ERR(ctx)) + return NULL ; + + event = perf_event_alloc(attr, cpu, ctx, NULL, + NULL, GFP_KERNEL); + err = PTR_ERR(event); + if (IS_ERR(event)) + goto err_put_context; + + event->filp = NULL; + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + perf_install_in_context(ctx, event, cpu); + ++ctx->generation; + mutex_unlock(&ctx->mutex); + + event->owner = current; + get_task_struct(current); + mutex_lock(¤t->perf_event_mutex); + list_add_tail(&event->owner_entry, ¤t->perf_event_list); + mutex_unlock(¤t->perf_event_mutex); + + return event; + +err_put_context: + if (err < 0) + put_ctx(ctx); + + return NULL; +} +EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); + /* * inherit a event from parent task to child task: */ -- cgit v1.3-8-gc7d7 From 97eaf5300b9d0cd99c310bf8c4a0f2f3296d88a3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 18 Oct 2009 15:33:50 +0200 Subject: perf/core: Add a callback to perf events A simple callback in a perf event can be used for multiple purposes. For example it is useful for triggered based events like hardware breakpoints that need a callback to dispatch a triggered breakpoint event. v2: Simplify a bit the callback attribution as suggested by Paul Mackerras Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: "K.Prasad" Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Paul Mackerras Cc: Mike Galbraith Cc: Paul Mundt --- include/linux/perf_event.h | 7 ++++++- kernel/perf_event.c | 14 ++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fa151d49a2ee..8d54e6d25eeb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -544,6 +544,8 @@ struct perf_pending_entry { void (*func)(struct perf_pending_entry *); }; +typedef void (*perf_callback_t)(struct perf_event *, void *); + /** * struct perf_event - performance event kernel representation: */ @@ -639,6 +641,8 @@ struct perf_event { struct event_filter *filter; #endif + perf_callback_t callback; + #endif /* CONFIG_PERF_EVENTS */ }; @@ -748,7 +752,8 @@ extern int perf_event_release_kernel(struct perf_event *event); extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid); + pid_t pid, + perf_callback_t callback); extern u64 perf_event_read_value(struct perf_event *event); struct perf_sample_data { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 02d4ff041b01..5087125e2a00 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4293,6 +4293,7 @@ perf_event_alloc(struct perf_event_attr *attr, struct perf_event_context *ctx, struct perf_event *group_leader, struct perf_event *parent_event, + perf_callback_t callback, gfp_t gfpflags) { const struct pmu *pmu; @@ -4335,6 +4336,11 @@ perf_event_alloc(struct perf_event_attr *attr, event->state = PERF_EVENT_STATE_INACTIVE; + if (!callback && parent_event) + callback = parent_event->callback; + + event->callback = callback; + if (attr->disabled) event->state = PERF_EVENT_STATE_OFF; @@ -4611,7 +4617,7 @@ SYSCALL_DEFINE5(perf_event_open, } event = perf_event_alloc(&attr, cpu, ctx, group_leader, - NULL, GFP_KERNEL); + NULL, NULL, GFP_KERNEL); err = PTR_ERR(event); if (IS_ERR(event)) goto err_put_context; @@ -4668,7 +4674,7 @@ err_put_context: */ struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid) + pid_t pid, perf_callback_t callback) { struct perf_event *event; struct perf_event_context *ctx; @@ -4683,7 +4689,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, return NULL ; event = perf_event_alloc(attr, cpu, ctx, NULL, - NULL, GFP_KERNEL); + NULL, callback, GFP_KERNEL); err = PTR_ERR(event); if (IS_ERR(event)) goto err_put_context; @@ -4736,7 +4742,7 @@ inherit_event(struct perf_event *parent_event, child_event = perf_event_alloc(&parent_event->attr, parent_event->cpu, child_ctx, group_leader, parent_event, - GFP_KERNEL); + NULL, GFP_KERNEL); if (IS_ERR(child_event)) return child_event; get_ctx(child_ctx); -- cgit v1.3-8-gc7d7 From fe3e78e073d25308756f38019956061153267769 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 Nov 2009 22:13:13 +0000 Subject: ASoC: Factor out snd_soc_init_card() snd_soc_init_card() is always called as the last part of the CODEC probe function so we can factor it out into the core card setup rather than have each CODEC replicate the code to do the initialiastation. This will be required to support multiple CODECs per card. Signed-off-by: Mark Brown --- include/sound/soc.h | 1 - sound/soc/codecs/ac97.c | 3 - sound/soc/codecs/ad1836.c | 6 -- sound/soc/codecs/ad1938.c | 6 -- sound/soc/codecs/ad1980.c | 5 -- sound/soc/codecs/ad73311.c | 8 --- sound/soc/codecs/ak4104.c | 8 --- sound/soc/codecs/ak4535.c | 8 --- sound/soc/codecs/ak4642.c | 9 --- sound/soc/codecs/ak4671.c | 9 --- sound/soc/codecs/cs4270.c | 7 -- sound/soc/codecs/cx20442.c | 6 -- sound/soc/codecs/pcm3008.c | 9 --- sound/soc/codecs/ssm2602.c | 8 --- sound/soc/codecs/stac9766.c | 3 - sound/soc/codecs/tlv320aic23.c | 8 --- sound/soc/codecs/tlv320aic26.c | 11 ---- sound/soc/codecs/tlv320aic3x.c | 10 --- sound/soc/codecs/tlv320dac33.c | 10 +-- sound/soc/codecs/twl4030.c | 12 ---- sound/soc/codecs/uda134x.c | 9 --- sound/soc/codecs/uda1380.c | 8 --- sound/soc/codecs/wm8350.c | 11 ---- sound/soc/codecs/wm8400.c | 6 -- sound/soc/codecs/wm8510.c | 9 +-- sound/soc/codecs/wm8523.c | 8 --- sound/soc/codecs/wm8580.c | 8 --- sound/soc/codecs/wm8711.c | 8 --- sound/soc/codecs/wm8727.c | 8 --- sound/soc/codecs/wm8728.c | 8 --- sound/soc/codecs/wm8731.c | 8 --- sound/soc/codecs/wm8750.c | 8 --- sound/soc/codecs/wm8753.c | 9 --- sound/soc/codecs/wm8776.c | 9 --- sound/soc/codecs/wm8900.c | 6 -- sound/soc/codecs/wm8903.c | 9 --- sound/soc/codecs/wm8940.c | 6 -- sound/soc/codecs/wm8960.c | 8 --- sound/soc/codecs/wm8961.c | 9 --- sound/soc/codecs/wm8971.c | 9 +-- sound/soc/codecs/wm8974.c | 8 --- sound/soc/codecs/wm8988.c | 9 --- sound/soc/codecs/wm8990.c | 9 +-- sound/soc/codecs/wm8993.c | 9 --- sound/soc/codecs/wm9081.c | 9 --- sound/soc/codecs/wm9705.c | 8 --- sound/soc/codecs/wm9712.c | 8 --- sound/soc/codecs/wm9713.c | 7 +- sound/soc/soc-core.c | 141 +++++++++++++++++++---------------------- 49 files changed, 69 insertions(+), 450 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index b1245e3acdfc..7f3a4c5028da 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -226,7 +226,6 @@ int snd_soc_codec_set_cache_io(struct snd_soc_codec *codec, /* pcm <-> DAI connect */ void snd_soc_free_pcms(struct snd_soc_device *socdev); int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid); -int snd_soc_init_card(struct snd_soc_device *socdev); /* set runtime hw params */ int snd_soc_set_runtime_hwparams(struct snd_pcm_substream *substream, diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c index 932299bb5d1e..69bd0acc81c8 100644 --- a/sound/soc/codecs/ac97.c +++ b/sound/soc/codecs/ac97.c @@ -117,9 +117,6 @@ static int ac97_soc_probe(struct platform_device *pdev) if (ret < 0) goto bus_err; - ret = snd_soc_init_card(socdev); - if (ret < 0) - goto bus_err; return 0; bus_err: diff --git a/sound/soc/codecs/ad1836.c b/sound/soc/codecs/ad1836.c index c48485f2c55d..2e360c243075 100644 --- a/sound/soc/codecs/ad1836.c +++ b/sound/soc/codecs/ad1836.c @@ -387,12 +387,6 @@ static int ad1836_probe(struct platform_device *pdev) snd_soc_dapm_add_routes(codec, audio_paths, ARRAY_SIZE(audio_paths)); snd_soc_dapm_new_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } - return ret; card_err: diff --git a/sound/soc/codecs/ad1938.c b/sound/soc/codecs/ad1938.c index 34b30efc3cb0..09c008ad1476 100644 --- a/sound/soc/codecs/ad1938.c +++ b/sound/soc/codecs/ad1938.c @@ -596,12 +596,6 @@ static int ad1938_probe(struct platform_device *pdev) ad1938_set_bias_level(codec, SND_SOC_BIAS_STANDBY); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } - return ret; card_err: diff --git a/sound/soc/codecs/ad1980.c b/sound/soc/codecs/ad1980.c index d7440a982d22..39c0f7584e65 100644 --- a/sound/soc/codecs/ad1980.c +++ b/sound/soc/codecs/ad1980.c @@ -257,11 +257,6 @@ static int ad1980_soc_probe(struct platform_device *pdev) snd_soc_add_controls(codec, ad1980_snd_ac97_controls, ARRAY_SIZE(ad1980_snd_ac97_controls)); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "ad1980: failed to register card\n"); - goto reset_err; - } return 0; diff --git a/sound/soc/codecs/ad73311.c b/sound/soc/codecs/ad73311.c index e61dac5e7b8f..d2fcc601722c 100644 --- a/sound/soc/codecs/ad73311.c +++ b/sound/soc/codecs/ad73311.c @@ -64,16 +64,8 @@ static int ad73311_soc_probe(struct platform_device *pdev) goto pcm_err; } - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "ad73311: failed to register card\n"); - goto register_err; - } - return ret; -register_err: - snd_soc_free_pcms(socdev); pcm_err: kfree(socdev->card->codec); socdev->card->codec = NULL; diff --git a/sound/soc/codecs/ak4104.c b/sound/soc/codecs/ak4104.c index 4d47bc4f7428..3a14c6fc4f5e 100644 --- a/sound/soc/codecs/ak4104.c +++ b/sound/soc/codecs/ak4104.c @@ -313,14 +313,6 @@ static int ak4104_probe(struct platform_device *pdev) return ret; } - /* Register the socdev */ - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card\n"); - snd_soc_free_pcms(socdev); - return ret; - } - return 0; } diff --git a/sound/soc/codecs/ak4535.c b/sound/soc/codecs/ak4535.c index 0abec0d29a96..57a6846a9a1f 100644 --- a/sound/soc/codecs/ak4535.c +++ b/sound/soc/codecs/ak4535.c @@ -485,17 +485,9 @@ static int ak4535_init(struct snd_soc_device *socdev) snd_soc_add_controls(codec, ak4535_snd_controls, ARRAY_SIZE(ak4535_snd_controls)); ak4535_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "ak4535: failed to register card\n"); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: kfree(codec->reg_cache); diff --git a/sound/soc/codecs/ak4642.c b/sound/soc/codecs/ak4642.c index e057c7b578df..b69861d52161 100644 --- a/sound/soc/codecs/ak4642.c +++ b/sound/soc/codecs/ak4642.c @@ -442,18 +442,9 @@ static int ak4642_probe(struct platform_device *pdev) goto pcm_err; } - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "ak4642: failed to register card\n"); - goto card_err; - } - dev_info(&pdev->dev, "AK4642 Audio Codec %s", AK4642_VERSION); return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; diff --git a/sound/soc/codecs/ak4671.c b/sound/soc/codecs/ak4671.c index b61214d1c5de..364832ccd748 100644 --- a/sound/soc/codecs/ak4671.c +++ b/sound/soc/codecs/ak4671.c @@ -662,19 +662,10 @@ static int ak4671_probe(struct platform_device *pdev) ARRAY_SIZE(ak4671_snd_controls)); ak4671_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } - ak4671_set_bias_level(codec, SND_SOC_BIAS_STANDBY); return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c index 565842dcfc65..ffe122d1cd76 100644 --- a/sound/soc/codecs/cs4270.c +++ b/sound/soc/codecs/cs4270.c @@ -599,13 +599,6 @@ static int cs4270_probe(struct platform_device *pdev) goto error_free_pcms; } - /* And finally, register the socdev */ - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card\n"); - goto error_free_pcms; - } - return 0; error_free_pcms: diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c index 38eac9c866e1..d7f9bf18b72e 100644 --- a/sound/soc/codecs/cx20442.c +++ b/sound/soc/codecs/cx20442.c @@ -355,12 +355,6 @@ static int cx20442_codec_probe(struct platform_device *pdev) cx20442_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(&pdev->dev, "failed to register card\n"); - goto card_err; - } - return ret; card_err: diff --git a/sound/soc/codecs/pcm3008.c b/sound/soc/codecs/pcm3008.c index 5cda9e6b5a74..2afcd0a8669d 100644 --- a/sound/soc/codecs/pcm3008.c +++ b/sound/soc/codecs/pcm3008.c @@ -90,13 +90,6 @@ static int pcm3008_soc_probe(struct platform_device *pdev) goto pcm_err; } - /* Register Card. */ - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "pcm3008: failed to register card\n"); - goto card_err; - } - /* DEM1 DEM0 DE-EMPHASIS_MODE * Low Low De-emphasis 44.1 kHz ON * Low High De-emphasis OFF @@ -136,8 +129,6 @@ static int pcm3008_soc_probe(struct platform_device *pdev) gpio_err: pcm3008_gpio_free(setup); -card_err: - snd_soc_free_pcms(socdev); pcm_err: kfree(socdev->card->codec); diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c index c550750c79c0..b3130339d29a 100644 --- a/sound/soc/codecs/ssm2602.c +++ b/sound/soc/codecs/ssm2602.c @@ -613,17 +613,9 @@ static int ssm2602_init(struct snd_soc_device *socdev) snd_soc_add_controls(codec, ssm2602_snd_controls, ARRAY_SIZE(ssm2602_snd_controls)); ssm2602_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - pr_err("ssm2602: failed to register card\n"); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: kfree(codec->reg_cache); return ret; diff --git a/sound/soc/codecs/stac9766.c b/sound/soc/codecs/stac9766.c index befc6488c39a..bbc72c2ddfca 100644 --- a/sound/soc/codecs/stac9766.c +++ b/sound/soc/codecs/stac9766.c @@ -418,9 +418,6 @@ static int stac9766_codec_probe(struct platform_device *pdev) snd_soc_add_controls(codec, stac9766_snd_ac97_controls, ARRAY_SIZE(stac9766_snd_ac97_controls)); - ret = snd_soc_init_card(socdev); - if (ret < 0) - goto reset_err; return 0; reset_err: diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c index 0b8dcb5cd729..ee8cb2c08b87 100644 --- a/sound/soc/codecs/tlv320aic23.c +++ b/sound/soc/codecs/tlv320aic23.c @@ -707,17 +707,9 @@ static int tlv320aic23_init(struct snd_soc_device *socdev) snd_soc_add_controls(codec, tlv320aic23_snd_controls, ARRAY_SIZE(tlv320aic23_snd_controls)); tlv320aic23_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "tlv320aic23: failed to register card\n"); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: kfree(codec->reg_cache); return ret; diff --git a/sound/soc/codecs/tlv320aic26.c b/sound/soc/codecs/tlv320aic26.c index 3387d9e736ea..357b609196e3 100644 --- a/sound/soc/codecs/tlv320aic26.c +++ b/sound/soc/codecs/tlv320aic26.c @@ -356,18 +356,7 @@ static int aic26_probe(struct platform_device *pdev) ARRAY_SIZE(aic26_snd_controls)); WARN_ON(err < 0); - /* CODEC is setup, we can register the card now */ - dev_dbg(&pdev->dev, "Registering card\n"); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(&pdev->dev, "aic26: failed to register card\n"); - goto card_err; - } return 0; - - card_err: - snd_soc_free_pcms(socdev); - return ret; } static int aic26_remove(struct platform_device *pdev) diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 3395cf945d56..03cad250f58d 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -1405,18 +1405,8 @@ static int aic3x_probe(struct platform_device *pdev) aic3x_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "aic3x: failed to register card\n"); - goto card_err; - } - return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); - pcm_err: kfree(codec->reg_cache); return ret; diff --git a/sound/soc/codecs/tlv320dac33.c b/sound/soc/codecs/tlv320dac33.c index 3ca8934fc26c..bff476d65d05 100644 --- a/sound/soc/codecs/tlv320dac33.c +++ b/sound/soc/codecs/tlv320dac33.c @@ -960,16 +960,8 @@ static int dac33_soc_probe(struct platform_device *pdev) /* power on device */ dac33_set_bias_level(codec, SND_SOC_BIAS_STANDBY); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card\n"); - goto card_err; - } - return 0; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); + pcm_err: dac33_hard_power(codec, 0); return ret; diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index c0b47dfc3328..928257b25111 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -2155,19 +2155,7 @@ static int twl4030_soc_probe(struct platform_device *pdev) ARRAY_SIZE(twl4030_snd_controls)); twl4030_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(&pdev->dev, "failed to register card\n"); - goto card_err; - } - return 0; - -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); - - return ret; } static int twl4030_soc_remove(struct platform_device *pdev) diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c index c33b92edbded..aa40d985138f 100644 --- a/sound/soc/codecs/uda134x.c +++ b/sound/soc/codecs/uda134x.c @@ -562,17 +562,8 @@ static int uda134x_soc_probe(struct platform_device *pdev) goto pcm_err; } - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "UDA134X: failed to register card\n"); - goto card_err; - } - return 0; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: kfree(codec->reg_cache); reg_err: diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c index 92ec03442154..a42e47d94630 100644 --- a/sound/soc/codecs/uda1380.c +++ b/sound/soc/codecs/uda1380.c @@ -713,17 +713,9 @@ static int uda1380_probe(struct platform_device *pdev) snd_soc_add_controls(codec, uda1380_snd_controls, ARRAY_SIZE(uda1380_snd_controls)); uda1380_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index 714114b50d18..2e35a354b166 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -1501,18 +1501,7 @@ static int wm8350_probe(struct platform_device *pdev) wm8350_set_bias_level(codec, SND_SOC_BIAS_STANDBY); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(&pdev->dev, "failed to register card\n"); - goto card_err; - } - return 0; - -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); - return ret; } static int wm8350_remove(struct platform_device *pdev) diff --git a/sound/soc/codecs/wm8400.c b/sound/soc/codecs/wm8400.c index bd7eecba20fe..0e30997c8db0 100644 --- a/sound/soc/codecs/wm8400.c +++ b/sound/soc/codecs/wm8400.c @@ -1400,12 +1400,6 @@ static int wm8400_probe(struct platform_device *pdev) wm8400_add_controls(codec); wm8400_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(&pdev->dev, "failed to register card\n"); - goto card_err; - } - return ret; card_err: diff --git a/sound/soc/codecs/wm8510.c b/sound/soc/codecs/wm8510.c index 5702435af81b..e3c21ebcc08e 100644 --- a/sound/soc/codecs/wm8510.c +++ b/sound/soc/codecs/wm8510.c @@ -604,16 +604,9 @@ static int wm8510_init(struct snd_soc_device *socdev, snd_soc_add_controls(codec, wm8510_snd_controls, ARRAY_SIZE(wm8510_snd_controls)); wm8510_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "wm8510: failed to register card\n"); - goto card_err; - } + return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); err: kfree(codec->reg_cache); return ret; diff --git a/sound/soc/codecs/wm8523.c b/sound/soc/codecs/wm8523.c index 268cab21c2cc..2e2b01d6c82b 100644 --- a/sound/soc/codecs/wm8523.c +++ b/sound/soc/codecs/wm8523.c @@ -448,17 +448,9 @@ static int wm8523_probe(struct platform_device *pdev) snd_soc_add_controls(codec, wm8523_snd_controls, ARRAY_SIZE(wm8523_snd_controls)); wm8523_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/wm8580.c b/sound/soc/codecs/wm8580.c index a09b23e03664..dde50d118181 100644 --- a/sound/soc/codecs/wm8580.c +++ b/sound/soc/codecs/wm8580.c @@ -800,17 +800,9 @@ static int wm8580_probe(struct platform_device *pdev) snd_soc_add_controls(codec, wm8580_snd_controls, ARRAY_SIZE(wm8580_snd_controls)); wm8580_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/wm8711.c b/sound/soc/codecs/wm8711.c index 54189fbf9e93..70e0675b5d4a 100644 --- a/sound/soc/codecs/wm8711.c +++ b/sound/soc/codecs/wm8711.c @@ -404,17 +404,9 @@ static int wm8711_probe(struct platform_device *pdev) snd_soc_add_controls(codec, wm8711_snd_controls, ARRAY_SIZE(wm8711_snd_controls)); wm8711_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/wm8727.c b/sound/soc/codecs/wm8727.c index 7df5a17eb733..d8ffbd641d71 100644 --- a/sound/soc/codecs/wm8727.c +++ b/sound/soc/codecs/wm8727.c @@ -68,17 +68,9 @@ static int wm8727_soc_probe(struct platform_device *pdev) printk(KERN_ERR "wm8727: failed to create pcms\n"); goto pcm_err; } - /* register card */ - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "wm8727: failed to register card\n"); - goto register_err; - } return ret; -register_err: - snd_soc_free_pcms(socdev); pcm_err: kfree(socdev->card->codec); socdev->card->codec = NULL; diff --git a/sound/soc/codecs/wm8728.c b/sound/soc/codecs/wm8728.c index 16e969a762c3..1252a8a486a6 100644 --- a/sound/soc/codecs/wm8728.c +++ b/sound/soc/codecs/wm8728.c @@ -287,17 +287,9 @@ static int wm8728_init(struct snd_soc_device *socdev, snd_soc_add_controls(codec, wm8728_snd_controls, ARRAY_SIZE(wm8728_snd_controls)); wm8728_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "wm8728: failed to register card\n"); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); err: kfree(codec->reg_cache); return ret; diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c index bb95af950971..e3675e7a9813 100644 --- a/sound/soc/codecs/wm8731.c +++ b/sound/soc/codecs/wm8731.c @@ -495,17 +495,9 @@ static int wm8731_probe(struct platform_device *pdev) snd_soc_add_controls(codec, wm8731_snd_controls, ARRAY_SIZE(wm8731_snd_controls)); wm8731_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c index 4ba1e7e93fb4..50a3d6590588 100644 --- a/sound/soc/codecs/wm8750.c +++ b/sound/soc/codecs/wm8750.c @@ -772,16 +772,8 @@ static int wm8750_init(struct snd_soc_device *socdev, snd_soc_add_controls(codec, wm8750_snd_controls, ARRAY_SIZE(wm8750_snd_controls)); wm8750_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "wm8750: failed to register card\n"); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); err: kfree(codec->reg_cache); return ret; diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c index 8f7305257d29..c652bc04cc81 100644 --- a/sound/soc/codecs/wm8753.c +++ b/sound/soc/codecs/wm8753.c @@ -1583,18 +1583,9 @@ static int wm8753_probe(struct platform_device *pdev) snd_soc_add_controls(codec, wm8753_snd_controls, ARRAY_SIZE(wm8753_snd_controls)); wm8753_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "wm8753: failed to register card\n"); - goto card_err; - } return 0; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); - pcm_err: return ret; } diff --git a/sound/soc/codecs/wm8776.c b/sound/soc/codecs/wm8776.c index a0bbb28eed75..ab2c0da18091 100644 --- a/sound/soc/codecs/wm8776.c +++ b/sound/soc/codecs/wm8776.c @@ -447,17 +447,8 @@ static int wm8776_probe(struct platform_device *pdev) ARRAY_SIZE(wm8776_dapm_widgets)); snd_soc_dapm_add_routes(codec, routes, ARRAY_SIZE(routes)); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } - return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c index b48804b5cacd..0d185cb6418d 100644 --- a/sound/soc/codecs/wm8900.c +++ b/sound/soc/codecs/wm8900.c @@ -1353,12 +1353,6 @@ static int wm8900_probe(struct platform_device *pdev) ARRAY_SIZE(wm8900_snd_controls)); wm8900_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to register card\n"); - goto card_err; - } - return ret; card_err: diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 94cdb8130415..bfeff4ee5de9 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -1695,17 +1695,8 @@ static int wm8903_probe(struct platform_device *pdev) ARRAY_SIZE(wm8903_snd_controls)); wm8903_add_widgets(socdev->card->codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(&pdev->dev, "wm8903: failed to register card\n"); - goto card_err; - } - return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); err: return ret; } diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c index 8d4fd3c08c09..fc80aa6c913c 100644 --- a/sound/soc/codecs/wm8940.c +++ b/sound/soc/codecs/wm8940.c @@ -731,12 +731,6 @@ static int wm8940_probe(struct platform_device *pdev) if (ret) goto error_free_pcms; - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto error_free_pcms; - } - return ret; error_free_pcms: diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index b9b096a85396..40390afa75f3 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -713,17 +713,9 @@ static int wm8960_probe(struct platform_device *pdev) snd_soc_add_controls(codec, wm8960_snd_controls, ARRAY_SIZE(wm8960_snd_controls)); wm8960_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/wm8961.c b/sound/soc/codecs/wm8961.c index b5c6f2cd5ae2..07e389574db1 100644 --- a/sound/soc/codecs/wm8961.c +++ b/sound/soc/codecs/wm8961.c @@ -988,17 +988,8 @@ static int wm8961_probe(struct platform_device *pdev) snd_soc_dapm_add_routes(codec, audio_paths, ARRAY_SIZE(audio_paths)); snd_soc_dapm_new_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } - return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/wm8971.c b/sound/soc/codecs/wm8971.c index d66efb0546ea..56a66e89ab91 100644 --- a/sound/soc/codecs/wm8971.c +++ b/sound/soc/codecs/wm8971.c @@ -703,16 +703,9 @@ static int wm8971_init(struct snd_soc_device *socdev, snd_soc_add_controls(codec, wm8971_snd_controls, ARRAY_SIZE(wm8971_snd_controls)); wm8971_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "wm8971: failed to register card\n"); - goto card_err; - } + return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); err: kfree(codec->reg_cache); return ret; diff --git a/sound/soc/codecs/wm8974.c b/sound/soc/codecs/wm8974.c index eff29331235b..c245f0ee0ec2 100644 --- a/sound/soc/codecs/wm8974.c +++ b/sound/soc/codecs/wm8974.c @@ -641,17 +641,9 @@ static int wm8974_probe(struct platform_device *pdev) snd_soc_add_controls(codec, wm8974_snd_controls, ARRAY_SIZE(wm8974_snd_controls)); wm8974_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/wm8988.c b/sound/soc/codecs/wm8988.c index d8d8f68b81ea..bee292e37d1b 100644 --- a/sound/soc/codecs/wm8988.c +++ b/sound/soc/codecs/wm8988.c @@ -792,17 +792,8 @@ static int wm8988_probe(struct platform_device *pdev) snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); snd_soc_dapm_new_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } - return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c index f657e9a5fe26..e43cb2c8b915 100644 --- a/sound/soc/codecs/wm8990.c +++ b/sound/soc/codecs/wm8990.c @@ -1409,16 +1409,9 @@ static int wm8990_init(struct snd_soc_device *socdev) snd_soc_add_controls(codec, wm8990_snd_controls, ARRAY_SIZE(wm8990_snd_controls)); wm8990_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "wm8990: failed to register card\n"); - goto card_err; - } + return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: kfree(codec->reg_cache); return ret; diff --git a/sound/soc/codecs/wm8993.c b/sound/soc/codecs/wm8993.c index dac397712147..0d4d2be92b64 100644 --- a/sound/soc/codecs/wm8993.c +++ b/sound/soc/codecs/wm8993.c @@ -1466,17 +1466,8 @@ static int wm8993_probe(struct platform_device *pdev) snd_soc_dapm_new_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card\n"); - goto card_err; - } - return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); err: return ret; } diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c index 4cb6b104b729..3f1f84421312 100644 --- a/sound/soc/codecs/wm9081.c +++ b/sound/soc/codecs/wm9081.c @@ -1264,17 +1264,8 @@ static int wm9081_probe(struct platform_device *pdev) snd_soc_dapm_add_routes(codec, audio_paths, ARRAY_SIZE(audio_paths)); snd_soc_dapm_new_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - dev_err(codec->dev, "failed to register card: %d\n", ret); - goto card_err; - } - return ret; -card_err: - snd_soc_free_pcms(socdev); - snd_soc_dapm_free(socdev); pcm_err: return ret; } diff --git a/sound/soc/codecs/wm9705.c b/sound/soc/codecs/wm9705.c index e7d2840d9e59..0e817b8705cd 100644 --- a/sound/soc/codecs/wm9705.c +++ b/sound/soc/codecs/wm9705.c @@ -403,16 +403,8 @@ static int wm9705_soc_probe(struct platform_device *pdev) ARRAY_SIZE(wm9705_snd_ac97_controls)); wm9705_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "wm9705: failed to register card\n"); - goto reset_err; - } - return 0; -reset_err: - snd_soc_free_pcms(socdev); pcm_err: snd_soc_free_ac97_codec(codec); codec_err: diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index 1fd4e88f50cf..155cacf124ea 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -695,17 +695,9 @@ static int wm9712_soc_probe(struct platform_device *pdev) snd_soc_add_controls(codec, wm9712_snd_ac97_controls, ARRAY_SIZE(wm9712_snd_ac97_controls)); wm9712_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) { - printk(KERN_ERR "wm9712: failed to register card\n"); - goto reset_err; - } return 0; -reset_err: - snd_soc_free_pcms(socdev); - pcm_err: snd_soc_free_ac97_codec(codec); diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index ca3d449ed89e..5f81ecd20a81 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -1247,13 +1247,8 @@ static int wm9713_soc_probe(struct platform_device *pdev) snd_soc_add_controls(codec, wm9713_snd_ac97_controls, ARRAY_SIZE(wm9713_snd_ac97_controls)); wm9713_add_widgets(codec); - ret = snd_soc_init_card(socdev); - if (ret < 0) - goto reset_err; - return 0; -reset_err: - snd_soc_free_pcms(socdev); + return 0; pcm_err: snd_soc_free_ac97_codec(codec); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index d81a16187769..e2b6d75f16e3 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -970,6 +970,7 @@ static void snd_soc_instantiate_card(struct snd_soc_card *card) struct platform_device, dev); struct snd_soc_codec_device *codec_dev = card->socdev->codec_dev; + struct snd_soc_codec *codec; struct snd_soc_platform *platform; struct snd_soc_dai *dai; int i, found, ret, ac97; @@ -1058,6 +1059,7 @@ static void snd_soc_instantiate_card(struct snd_soc_card *card) if (ret < 0) goto cpu_dai_err; } + codec = card->codec; if (platform->probe) { ret = platform->probe(pdev); @@ -1072,10 +1074,72 @@ static void snd_soc_instantiate_card(struct snd_soc_card *card) INIT_WORK(&card->deferred_resume_work, soc_resume_deferred); #endif + for (i = 0; i < card->num_links; i++) { + if (card->dai_link[i].init) { + ret = card->dai_link[i].init(codec); + if (ret < 0) { + printk(KERN_ERR "asoc: failed to init %s\n", + card->dai_link[i].stream_name); + continue; + } + } + if (card->dai_link[i].codec_dai->ac97_control) { + ac97 = 1; + snd_ac97_dev_add_pdata(codec->ac97, + card->dai_link[i].cpu_dai->ac97_pdata); + } + } + + snprintf(codec->card->shortname, sizeof(codec->card->shortname), + "%s", card->name); + snprintf(codec->card->longname, sizeof(codec->card->longname), + "%s (%s)", card->name, codec->name); + + /* Make sure all DAPM widgets are instantiated */ + snd_soc_dapm_new_widgets(codec); + + ret = snd_card_register(codec->card); + if (ret < 0) { + printk(KERN_ERR "asoc: failed to register soundcard for %s\n", + codec->name); + goto card_err; + } + + mutex_lock(&codec->mutex); +#ifdef CONFIG_SND_SOC_AC97_BUS + /* Only instantiate AC97 if not already done by the adaptor + * for the generic AC97 subsystem. + */ + if (ac97 && strcmp(codec->name, "AC97") != 0) { + ret = soc_ac97_dev_register(codec); + if (ret < 0) { + printk(KERN_ERR "asoc: AC97 device register failed\n"); + snd_card_free(codec->card); + mutex_unlock(&codec->mutex); + goto card_err; + } + } +#endif + + ret = snd_soc_dapm_sys_add(card->socdev->dev); + if (ret < 0) + printk(KERN_WARNING "asoc: failed to add dapm sysfs entries\n"); + + ret = device_create_file(card->socdev->dev, &dev_attr_codec_reg); + if (ret < 0) + printk(KERN_WARNING "asoc: failed to add codec sysfs files\n"); + + soc_init_codec_debugfs(codec); + mutex_unlock(&codec->mutex); + card->instantiated = 1; return; +card_err: + if (platform->remove) + platform->remove(pdev); + platform_err: if (codec_dev->remove) codec_dev->remove(pdev); @@ -1453,83 +1517,6 @@ int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid) } EXPORT_SYMBOL_GPL(snd_soc_new_pcms); -/** - * snd_soc_init_card - register sound card - * @socdev: the SoC audio device - * - * Register a SoC sound card. Also registers an AC97 device if the - * codec is AC97 for ad hoc devices. - * - * Returns 0 for success, else error. - */ -int snd_soc_init_card(struct snd_soc_device *socdev) -{ - struct snd_soc_card *card = socdev->card; - struct snd_soc_codec *codec = card->codec; - int ret = 0, i, ac97 = 0, err = 0; - - for (i = 0; i < card->num_links; i++) { - if (card->dai_link[i].init) { - err = card->dai_link[i].init(codec); - if (err < 0) { - printk(KERN_ERR "asoc: failed to init %s\n", - card->dai_link[i].stream_name); - continue; - } - } - if (card->dai_link[i].codec_dai->ac97_control) { - ac97 = 1; - snd_ac97_dev_add_pdata(codec->ac97, - card->dai_link[i].cpu_dai->ac97_pdata); - } - } - snprintf(codec->card->shortname, sizeof(codec->card->shortname), - "%s", card->name); - snprintf(codec->card->longname, sizeof(codec->card->longname), - "%s (%s)", card->name, codec->name); - - /* Make sure all DAPM widgets are instantiated */ - snd_soc_dapm_new_widgets(codec); - - ret = snd_card_register(codec->card); - if (ret < 0) { - printk(KERN_ERR "asoc: failed to register soundcard for %s\n", - codec->name); - goto out; - } - - mutex_lock(&codec->mutex); -#ifdef CONFIG_SND_SOC_AC97_BUS - /* Only instantiate AC97 if not already done by the adaptor - * for the generic AC97 subsystem. - */ - if (ac97 && strcmp(codec->name, "AC97") != 0) { - ret = soc_ac97_dev_register(codec); - if (ret < 0) { - printk(KERN_ERR "asoc: AC97 device register failed\n"); - snd_card_free(codec->card); - mutex_unlock(&codec->mutex); - goto out; - } - } -#endif - - err = snd_soc_dapm_sys_add(socdev->dev); - if (err < 0) - printk(KERN_WARNING "asoc: failed to add dapm sysfs entries\n"); - - err = device_create_file(socdev->dev, &dev_attr_codec_reg); - if (err < 0) - printk(KERN_WARNING "asoc: failed to add codec sysfs files\n"); - - soc_init_codec_debugfs(codec); - mutex_unlock(&codec->mutex); - -out: - return ret; -} -EXPORT_SYMBOL_GPL(snd_soc_init_card); - /** * snd_soc_free_pcms - free sound card and pcms * @socdev: the SoC audio device -- cgit v1.3-8-gc7d7 From 9dcaa7b25f2c8f6a0485854cd3641f585a154072 Mon Sep 17 00:00:00 2001 From: Rafael Ignacio Zurita Date: Tue, 3 Nov 2009 17:16:27 -0300 Subject: ALSA: sh: add SuperH DAC audio driver for ALSA V4 This is a port of the sound/oss/sh_dac_audio.c driver. The driver uses an on-chip 8-bit D/A converter, which has a speaker connected to one of its channels, found in several ancient HP machines. For interrupts it uses a high-resolution timer (hrtimer). Tested on SH7709 based hp6xx (HP Jornada 680/690 and HP Palmtop 620lx/660lx). Also, since OSS Emulation works, the old OSS sound/oss/sh_dac_audio.c driver would be obsolete soon, and it could be removed. Signed-off-by: Rafael Ignacio Zurita Acked-by: Paul Mundt Signed-off-by: Takashi Iwai --- arch/sh/boards/mach-hp6xx/setup.c | 55 ++++ arch/sh/include/mach-common/mach/hp6xx.h | 4 + include/sound/sh_dac_audio.h | 21 ++ sound/sh/Kconfig | 8 + sound/sh/Makefile | 2 + sound/sh/sh_dac_audio.c | 453 +++++++++++++++++++++++++++++++ 6 files changed, 543 insertions(+) create mode 100644 include/sound/sh_dac_audio.h create mode 100644 sound/sh/sh_dac_audio.c (limited to 'include') diff --git a/arch/sh/boards/mach-hp6xx/setup.c b/arch/sh/boards/mach-hp6xx/setup.c index 8f305b36358b..e6dd5e96321e 100644 --- a/arch/sh/boards/mach-hp6xx/setup.c +++ b/arch/sh/boards/mach-hp6xx/setup.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -51,9 +52,63 @@ static struct platform_device jornadakbd_device = { .id = -1, }; +static void dac_audio_start(struct dac_audio_pdata *pdata) +{ + u16 v; + u8 v8; + + /* HP Jornada 680/690 speaker on */ + v = inw(HD64461_GPADR); + v &= ~HD64461_GPADR_SPEAKER; + outw(v, HD64461_GPADR); + + /* HP Palmtop 620lx/660lx speaker on */ + v8 = inb(PKDR); + v8 &= ~PKDR_SPEAKER; + outb(v8, PKDR); + + sh_dac_enable(pdata->channel); +} + +static void dac_audio_stop(struct dac_audio_pdata *pdata) +{ + u16 v; + u8 v8; + + /* HP Jornada 680/690 speaker off */ + v = inw(HD64461_GPADR); + v |= HD64461_GPADR_SPEAKER; + outw(v, HD64461_GPADR); + + /* HP Palmtop 620lx/660lx speaker off */ + v8 = inb(PKDR); + v8 |= PKDR_SPEAKER; + outb(v8, PKDR); + + sh_dac_output(0, pdata->channel); + sh_dac_disable(pdata->channel); +} + +static struct dac_audio_pdata dac_audio_platform_data = { + .buffer_size = 64000, + .channel = 1, + .start = dac_audio_start, + .stop = dac_audio_stop, +}; + +static struct platform_device dac_audio_device = { + .name = "dac_audio", + .id = -1, + .dev = { + .platform_data = &dac_audio_platform_data, + } + +}; + static struct platform_device *hp6xx_devices[] __initdata = { &cf_ide_device, &jornadakbd_device, + &dac_audio_device, }; static void __init hp6xx_init_irq(void) diff --git a/arch/sh/include/mach-common/mach/hp6xx.h b/arch/sh/include/mach-common/mach/hp6xx.h index 0d4165a32dcd..bcc301ac12f4 100644 --- a/arch/sh/include/mach-common/mach/hp6xx.h +++ b/arch/sh/include/mach-common/mach/hp6xx.h @@ -29,6 +29,9 @@ #define PKDR_LED_GREEN 0x10 +/* HP Palmtop 620lx/660lx speaker on/off */ +#define PKDR_SPEAKER 0x20 + #define SCPDR_TS_SCAN_ENABLE 0x20 #define SCPDR_TS_SCAN_Y 0x02 #define SCPDR_TS_SCAN_X 0x01 @@ -42,6 +45,7 @@ #define ADC_CHANNEL_BACKUP 4 #define ADC_CHANNEL_CHARGE 5 +/* HP Jornada 680/690 speaker on/off */ #define HD64461_GPADR_SPEAKER 0x01 #define HD64461_GPADR_PCMCIA0 (0x02|0x08) diff --git a/include/sound/sh_dac_audio.h b/include/sound/sh_dac_audio.h new file mode 100644 index 000000000000..f5deaf1ddb9f --- /dev/null +++ b/include/sound/sh_dac_audio.h @@ -0,0 +1,21 @@ +/* + * SH_DAC specific configuration, for the dac_audio platform_device + * + * Copyright (C) 2009 Rafael Ignacio Zurita + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#ifndef __INCLUDE_SH_DAC_AUDIO_H +#define __INCLUDE_SH_DAC_AUDIO_H + +struct dac_audio_pdata { + int buffer_size; + int channel; + void (*start)(struct dac_audio_pdata *pd); + void (*stop)(struct dac_audio_pdata *pd); +}; + +#endif /* __INCLUDE_SH_DAC_AUDIO_H */ diff --git a/sound/sh/Kconfig b/sound/sh/Kconfig index aed0f90c3919..61139f3c1614 100644 --- a/sound/sh/Kconfig +++ b/sound/sh/Kconfig @@ -19,5 +19,13 @@ config SND_AICA help ALSA Sound driver for the SEGA Dreamcast console. +config SND_SH_DAC_AUDIO + tristate "SuperH DAC audio support" + depends on SND + depends on CPU_SH3 && HIGH_RES_TIMERS + select SND_PCM + help + Say Y here to include support for the on-chip DAC. + endif # SND_SUPERH diff --git a/sound/sh/Makefile b/sound/sh/Makefile index 8fdcb6e26f00..7d09b5188cf7 100644 --- a/sound/sh/Makefile +++ b/sound/sh/Makefile @@ -3,6 +3,8 @@ # snd-aica-objs := aica.o +snd-sh_dac_audio-objs := sh_dac_audio.o # Toplevel Module Dependency obj-$(CONFIG_SND_AICA) += snd-aica.o +obj-$(CONFIG_SND_SH_DAC_AUDIO) += snd-sh_dac_audio.o diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c new file mode 100644 index 000000000000..76d9ad27d91c --- /dev/null +++ b/sound/sh/sh_dac_audio.c @@ -0,0 +1,453 @@ +/* + * sh_dac_audio.c - SuperH DAC audio driver for ALSA + * + * Copyright (c) 2009 by Rafael Ignacio Zurita + * + * + * Based on sh_dac_audio.c (Copyright (C) 2004, 2005 by Andriy Skulysh) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Rafael Ignacio Zurita "); +MODULE_DESCRIPTION("SuperH DAC audio driver"); +MODULE_LICENSE("GPL"); +MODULE_SUPPORTED_DEVICE("{{SuperH DAC audio support}}"); + +/* Module Parameters */ +static int index = SNDRV_DEFAULT_IDX1; +static char *id = SNDRV_DEFAULT_STR1; +module_param(index, int, 0444); +MODULE_PARM_DESC(index, "Index value for SuperH DAC audio."); +module_param(id, charp, 0444); +MODULE_PARM_DESC(id, "ID string for SuperH DAC audio."); + +/* main struct */ +struct snd_sh_dac { + struct snd_card *card; + struct snd_pcm_substream *substream; + struct hrtimer hrtimer; + ktime_t wakeups_per_second; + + int rate; + int empty; + char *data_buffer, *buffer_begin, *buffer_end; + int processed; /* bytes proccesed, to compare with period_size */ + int buffer_size; + struct dac_audio_pdata *pdata; +}; + + +static void dac_audio_start_timer(struct snd_sh_dac *chip) +{ + hrtimer_start(&chip->hrtimer, chip->wakeups_per_second, + HRTIMER_MODE_REL); +} + +static void dac_audio_stop_timer(struct snd_sh_dac *chip) +{ + hrtimer_cancel(&chip->hrtimer); +} + +static void dac_audio_reset(struct snd_sh_dac *chip) +{ + dac_audio_stop_timer(chip); + chip->buffer_begin = chip->buffer_end = chip->data_buffer; + chip->processed = 0; + chip->empty = 1; +} + +static void dac_audio_set_rate(struct snd_sh_dac *chip) +{ + chip->wakeups_per_second = ktime_set(0, 1000000000 / chip->rate); +} + + +/* PCM INTERFACE */ + +static struct snd_pcm_hardware snd_sh_dac_pcm_hw = { + .info = (SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_HALF_DUPLEX), + .formats = SNDRV_PCM_FMTBIT_U8, + .rates = SNDRV_PCM_RATE_8000, + .rate_min = 8000, + .rate_max = 8000, + .channels_min = 1, + .channels_max = 1, + .buffer_bytes_max = (48*1024), + .period_bytes_min = 1, + .period_bytes_max = (48*1024), + .periods_min = 1, + .periods_max = 1024, +}; + +static int snd_sh_dac_pcm_open(struct snd_pcm_substream *substream) +{ + struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); + struct snd_pcm_runtime *runtime = substream->runtime; + + runtime->hw = snd_sh_dac_pcm_hw; + + chip->substream = substream; + chip->buffer_begin = chip->buffer_end = chip->data_buffer; + chip->processed = 0; + chip->empty = 1; + + chip->pdata->start(chip->pdata); + + return 0; +} + +static int snd_sh_dac_pcm_close(struct snd_pcm_substream *substream) +{ + struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); + + chip->substream = NULL; + + dac_audio_stop_timer(chip); + chip->pdata->stop(chip->pdata); + + return 0; +} + +static int snd_sh_dac_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + return snd_pcm_lib_malloc_pages(substream, + params_buffer_bytes(hw_params)); +} + +static int snd_sh_dac_pcm_hw_free(struct snd_pcm_substream *substream) +{ + return snd_pcm_lib_free_pages(substream); +} + +static int snd_sh_dac_pcm_prepare(struct snd_pcm_substream *substream) +{ + struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); + struct snd_pcm_runtime *runtime = chip->substream->runtime; + + chip->buffer_size = runtime->buffer_size; + memset(chip->data_buffer, 0, chip->pdata->buffer_size); + + return 0; +} + +static int snd_sh_dac_pcm_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + dac_audio_start_timer(chip); + break; + case SNDRV_PCM_TRIGGER_STOP: + chip->buffer_begin = chip->buffer_end = chip->data_buffer; + chip->processed = 0; + chip->empty = 1; + dac_audio_stop_timer(chip); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int snd_sh_dac_pcm_copy(struct snd_pcm_substream *substream, int channel, + snd_pcm_uframes_t pos, void __user *src, snd_pcm_uframes_t count) +{ + /* channel is not used (interleaved data) */ + struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); + struct snd_pcm_runtime *runtime = substream->runtime; + ssize_t b_count = frames_to_bytes(runtime , count); + ssize_t b_pos = frames_to_bytes(runtime , pos); + + if (count < 0) + return -EINVAL; + + if (!count) + return 0; + + memcpy_toio(chip->data_buffer + b_pos, src, b_count); + chip->buffer_end = chip->data_buffer + b_pos + b_count; + + if (chip->empty) { + chip->empty = 0; + dac_audio_start_timer(chip); + } + + return 0; +} + +static int snd_sh_dac_pcm_silence(struct snd_pcm_substream *substream, + int channel, snd_pcm_uframes_t pos, + snd_pcm_uframes_t count) +{ + /* channel is not used (interleaved data) */ + struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); + struct snd_pcm_runtime *runtime = substream->runtime; + ssize_t b_count = frames_to_bytes(runtime , count); + ssize_t b_pos = frames_to_bytes(runtime , pos); + + if (count < 0) + return -EINVAL; + + if (!count) + return 0; + + memset_io(chip->data_buffer + b_pos, 0, b_count); + chip->buffer_end = chip->data_buffer + b_pos + b_count; + + if (chip->empty) { + chip->empty = 0; + dac_audio_start_timer(chip); + } + + return 0; +} + +static +snd_pcm_uframes_t snd_sh_dac_pcm_pointer(struct snd_pcm_substream *substream) +{ + struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); + int pointer = chip->buffer_begin - chip->data_buffer; + + return pointer; +} + +/* pcm ops */ +static struct snd_pcm_ops snd_sh_dac_pcm_ops = { + .open = snd_sh_dac_pcm_open, + .close = snd_sh_dac_pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = snd_sh_dac_pcm_hw_params, + .hw_free = snd_sh_dac_pcm_hw_free, + .prepare = snd_sh_dac_pcm_prepare, + .trigger = snd_sh_dac_pcm_trigger, + .pointer = snd_sh_dac_pcm_pointer, + .copy = snd_sh_dac_pcm_copy, + .silence = snd_sh_dac_pcm_silence, + .mmap = snd_pcm_lib_mmap_iomem, +}; + +static int __devinit snd_sh_dac_pcm(struct snd_sh_dac *chip, int device) +{ + int err; + struct snd_pcm *pcm; + + /* device should be always 0 for us */ + err = snd_pcm_new(chip->card, "SH_DAC PCM", device, 1, 0, &pcm); + if (err < 0) + return err; + + pcm->private_data = chip; + strcpy(pcm->name, "SH_DAC PCM"); + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &snd_sh_dac_pcm_ops); + + /* buffer size=48K */ + snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_CONTINUOUS, + snd_dma_continuous_data(GFP_KERNEL), + 48 * 1024, + 48 * 1024); + + return 0; +} +/* END OF PCM INTERFACE */ + + +/* driver .remove -- destructor */ +static int snd_sh_dac_remove(struct platform_device *devptr) +{ + snd_card_free(platform_get_drvdata(devptr)); + platform_set_drvdata(devptr, NULL); + + return 0; +} + +/* free -- it has been defined by create */ +static int snd_sh_dac_free(struct snd_sh_dac *chip) +{ + /* release the data */ + kfree(chip->data_buffer); + kfree(chip); + + return 0; +} + +static int snd_sh_dac_dev_free(struct snd_device *device) +{ + struct snd_sh_dac *chip = device->device_data; + + return snd_sh_dac_free(chip); +} + +static enum hrtimer_restart sh_dac_audio_timer(struct hrtimer *handle) +{ + struct snd_sh_dac *chip = container_of(handle, struct snd_sh_dac, + hrtimer); + struct snd_pcm_runtime *runtime = chip->substream->runtime; + ssize_t b_ps = frames_to_bytes(runtime, runtime->period_size); + + if (!chip->empty) { + sh_dac_output(*chip->buffer_begin, chip->pdata->channel); + chip->buffer_begin++; + + chip->processed++; + if (chip->processed >= b_ps) { + chip->processed -= b_ps; + snd_pcm_period_elapsed(chip->substream); + } + + if (chip->buffer_begin == (chip->data_buffer + + chip->buffer_size - 1)) + chip->buffer_begin = chip->data_buffer; + + if (chip->buffer_begin == chip->buffer_end) + chip->empty = 1; + + } + + if (!chip->empty) + hrtimer_start(&chip->hrtimer, chip->wakeups_per_second, + HRTIMER_MODE_REL); + + return HRTIMER_NORESTART; +} + +/* create -- chip-specific constructor for the cards components */ +static int __devinit snd_sh_dac_create(struct snd_card *card, + struct platform_device *devptr, + struct snd_sh_dac **rchip) +{ + struct snd_sh_dac *chip; + int err; + + static struct snd_device_ops ops = { + .dev_free = snd_sh_dac_dev_free, + }; + + *rchip = NULL; + + chip = kzalloc(sizeof(*chip), GFP_KERNEL); + if (chip == NULL) + return -ENOMEM; + + chip->card = card; + + hrtimer_init(&chip->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + chip->hrtimer.function = sh_dac_audio_timer; + + dac_audio_reset(chip); + chip->rate = 8000; + dac_audio_set_rate(chip); + + chip->pdata = devptr->dev.platform_data; + + chip->data_buffer = kmalloc(chip->pdata->buffer_size, GFP_KERNEL); + if (chip->data_buffer == NULL) { + kfree(chip); + return -ENOMEM; + } + + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); + if (err < 0) { + snd_sh_dac_free(chip); + return err; + } + + *rchip = chip; + + return 0; +} + +/* driver .probe -- constructor */ +static int __devinit snd_sh_dac_probe(struct platform_device *devptr) +{ + struct snd_sh_dac *chip; + struct snd_card *card; + int err; + + err = snd_card_create(index, id, THIS_MODULE, 0, &card); + if (err < 0) { + snd_printk(KERN_ERR "cannot allocate the card\n"); + return err; + } + + err = snd_sh_dac_create(card, devptr, &chip); + if (err < 0) + goto probe_error; + + err = snd_sh_dac_pcm(chip, 0); + if (err < 0) + goto probe_error; + + strcpy(card->driver, "snd_sh_dac"); + strcpy(card->shortname, "SuperH DAC audio driver"); + printk(KERN_INFO "%s %s", card->longname, card->shortname); + + err = snd_card_register(card); + if (err < 0) + goto probe_error; + + snd_printk("ALSA driver for SuperH DAC audio"); + + platform_set_drvdata(devptr, card); + return 0; + +probe_error: + snd_card_free(card); + return err; +} + +/* + * "driver" definition + */ +static struct platform_driver driver = { + .probe = snd_sh_dac_probe, + .remove = snd_sh_dac_remove, + .driver = { + .name = "dac_audio", + }, +}; + +static int __init sh_dac_init(void) +{ + return platform_driver_register(&driver); +} + +static void __exit sh_dac_exit(void) +{ + platform_driver_unregister(&driver); +} + +module_init(sh_dac_init); +module_exit(sh_dac_exit); -- cgit v1.3-8-gc7d7 From 1477b6a7edd9ffa7bba4f9779ce9a76ce92761ed Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 16:14:16 +0900 Subject: sched: Remove unused __schedule() declaration __schedule() had been removed. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4AF129C8.3030008@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - kernel/kgdb.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60bf583..f18102c4d0b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -349,7 +349,6 @@ extern signed long schedule_timeout(signed long timeout); extern signed long schedule_timeout_interruptible(signed long timeout); extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout); -asmlinkage void __schedule(void); asmlinkage void schedule(void); extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 9147a3190c9d..7d7014634022 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -870,7 +870,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks) /* * All threads that don't have debuggerinfo should be - * in __schedule() sleeping, since all other CPUs + * in schedule() sleeping, since all other CPUs * are in kgdb_wait, and thus have debuggerinfo. */ if (local_debuggerinfo) { -- cgit v1.3-8-gc7d7 From 2a2bb3142d326bb28b03875cabfc49baaac9a14a Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 16:16:10 +0900 Subject: sched: Remove unused time_sync_thresh declaration time_sync_thresh had been removed. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4AF12A3A.5050200@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f18102c4d0b8..754b3deed02b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -171,8 +171,6 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) } #endif -extern unsigned long long time_sync_thresh; - /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). -- cgit v1.3-8-gc7d7 From 9824a2b728b63e7ff586b9fd9293c819be79f0f3 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 16:16:54 +0900 Subject: sched: Remove unused cpu_nr_migrations() cpu_nr_migrations() is not used, remove it. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4AF12A66.6020609@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - kernel/sched.c | 11 ----------- 2 files changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 754b3deed02b..dfc21fb76bf1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(void); -extern u64 cpu_nr_migrations(int cpu); extern unsigned long get_parent_ip(unsigned long addr); diff --git a/kernel/sched.c b/kernel/sched.c index 67be4d0dddaa..30fd0ba5f603 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -541,7 +541,6 @@ struct rq { struct load_weight load; unsigned long nr_load_updates; u64 nr_switches; - u64 nr_migrations_in; struct cfs_rq cfs; struct rt_rq rt; @@ -2049,7 +2048,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #endif if (old_cpu != new_cpu) { p->se.nr_migrations++; - new_rq->nr_migrations_in++; #ifdef CONFIG_SCHEDSTATS if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); @@ -2988,15 +2986,6 @@ static void calc_load_account_active(struct rq *this_rq) } } -/* - * Externally visible per-cpu scheduler statistics: - * cpu_nr_migrations(cpu) - number of migrations into that cpu - */ -u64 cpu_nr_migrations(int cpu) -{ - return cpu_rq(cpu)->nr_migrations_in; -} - /* * Update rq->cpu_load[] statistics. This function is usually called every * scheduler tick (TICK_NSEC). -- cgit v1.3-8-gc7d7 From acc3f5d7cabbfd6cec71f0c1f9900621fa2d6ae7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:53:40 +1030 Subject: cpumask: Partition_sched_domains takes array of cpumask_var_t Currently partition_sched_domains() takes a 'struct cpumask *doms_new' which is a kmalloc'ed array of cpumask_t. You can't have such an array if 'struct cpumask' is undefined, as we plan for CONFIG_CPUMASK_OFFSTACK=y. So, we make this an array of cpumask_var_t instead: this is the same for the CONFIG_CPUMASK_OFFSTACK=n case, but requires multiple allocations for the CONFIG_CPUMASK_OFFSTACK=y case. Hence we add alloc_sched_domains() and free_sched_domains() functions. Signed-off-by: Rusty Russell Cc: Peter Zijlstra LKML-Reference: <200911031453.40668.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++-- kernel/cpuset.c | 19 +++++++------- kernel/sched.c | 68 ++++++++++++++++++++++++++++++++++----------------- 3 files changed, 61 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index dfc21fb76bf1..78ba664474f3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1009,9 +1009,13 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) return to_cpumask(sd->span); } -extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new); +/* Allocate an array of sched domains, for partition_sched_domains(). */ +cpumask_var_t *alloc_sched_domains(unsigned int ndoms); +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); + /* Test a flag in parent sched domain */ static inline int test_sd_parent(struct sched_domain *sd, int flag) { @@ -1029,7 +1033,7 @@ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); struct sched_domain_attr; static inline void -partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d247381e7371..3cf2183b472d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -537,8 +537,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * element of the partition (one sched domain) to be passed to * partition_sched_domains(). */ -/* FIXME: see the FIXME in partition_sched_domains() */ -static int generate_sched_domains(struct cpumask **domains, +static int generate_sched_domains(cpumask_var_t **domains, struct sched_domain_attr **attributes) { LIST_HEAD(q); /* queue of cpusets to be scanned */ @@ -546,7 +545,7 @@ static int generate_sched_domains(struct cpumask **domains, struct cpuset **csa; /* array of all cpuset ptrs */ int csn; /* how many cpuset ptrs in csa so far */ int i, j, k; /* indices for partition finding loops */ - struct cpumask *doms; /* resulting partition; i.e. sched domains */ + cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ struct sched_domain_attr *dattr; /* attributes for custom domains */ int ndoms = 0; /* number of sched domains in result */ int nslot; /* next empty doms[] struct cpumask slot */ @@ -557,7 +556,8 @@ static int generate_sched_domains(struct cpumask **domains, /* Special case for the 99% of systems with one, full, sched domain */ if (is_sched_load_balance(&top_cpuset)) { - doms = kmalloc(cpumask_size(), GFP_KERNEL); + ndoms = 1; + doms = alloc_sched_domains(ndoms); if (!doms) goto done; @@ -566,9 +566,8 @@ static int generate_sched_domains(struct cpumask **domains, *dattr = SD_ATTR_INIT; update_domain_attr_tree(dattr, &top_cpuset); } - cpumask_copy(doms, top_cpuset.cpus_allowed); + cpumask_copy(doms[0], top_cpuset.cpus_allowed); - ndoms = 1; goto done; } @@ -636,7 +635,7 @@ restart: * Now we know how many domains to create. * Convert to and populate cpu masks. */ - doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL); + doms = alloc_sched_domains(ndoms); if (!doms) goto done; @@ -656,7 +655,7 @@ restart: continue; } - dp = doms + nslot; + dp = doms[nslot]; if (nslot == ndoms) { static int warnings = 10; @@ -718,7 +717,7 @@ done: static void do_rebuild_sched_domains(struct work_struct *unused) { struct sched_domain_attr *attr; - struct cpumask *doms; + cpumask_var_t *doms; int ndoms; get_online_cpus(); @@ -2052,7 +2051,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, unsigned long phase, void *unused_cpu) { struct sched_domain_attr *attr; - struct cpumask *doms; + cpumask_var_t *doms; int ndoms; switch (phase) { diff --git a/kernel/sched.c b/kernel/sched.c index 30fd0ba5f603..ae026aad145b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8846,7 +8846,7 @@ static int build_sched_domains(const struct cpumask *cpu_map) return __build_sched_domains(cpu_map, NULL); } -static struct cpumask *doms_cur; /* current sched domains */ +static cpumask_var_t *doms_cur; /* current sched domains */ static int ndoms_cur; /* number of sched domains in 'doms_cur' */ static struct sched_domain_attr *dattr_cur; /* attribues of custom domains in 'doms_cur' */ @@ -8868,6 +8868,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void) return 0; } +cpumask_var_t *alloc_sched_domains(unsigned int ndoms) +{ + int i; + cpumask_var_t *doms; + + doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL); + if (!doms) + return NULL; + for (i = 0; i < ndoms; i++) { + if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) { + free_sched_domains(doms, i); + return NULL; + } + } + return doms; +} + +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) +{ + unsigned int i; + for (i = 0; i < ndoms; i++) + free_cpumask_var(doms[i]); + kfree(doms); +} + /* * Set up scheduler domains and groups. Callers must hold the hotplug lock. * For now this just excludes isolated cpus, but could be used to @@ -8879,12 +8904,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) arch_update_cpu_topology(); ndoms_cur = 1; - doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); + doms_cur = alloc_sched_domains(ndoms_cur); if (!doms_cur) - doms_cur = fallback_doms; - cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); + doms_cur = &fallback_doms; + cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); dattr_cur = NULL; - err = build_sched_domains(doms_cur); + err = build_sched_domains(doms_cur[0]); register_sched_domain_sysctl(); return err; @@ -8934,19 +8959,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * doms_new[] to the current sched domain partitioning, doms_cur[]. * It destroys each deleted domain and builds each new domain. * - * 'doms_new' is an array of cpumask's of length 'ndoms_new'. + * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'. * The masks don't intersect (don't overlap.) We should setup one * sched domain for each mask. CPUs not in any of the cpumasks will * not be load balanced. If the same cpumask appears both in the * current 'doms_cur' domains and in the new 'doms_new', we can leave * it as it is. * - * The passed in 'doms_new' should be kmalloc'd. This routine takes - * ownership of it and will kfree it when done with it. If the caller - * failed the kmalloc call, then it can pass in doms_new == NULL && - * ndoms_new == 1, and partition_sched_domains() will fallback to - * the single partition 'fallback_doms', it also forces the domains - * to be rebuilt. + * The passed in 'doms_new' should be allocated using + * alloc_sched_domains. This routine takes ownership of it and will + * free_sched_domains it when done with it. If the caller failed the + * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1, + * and partition_sched_domains() will fallback to the single partition + * 'fallback_doms', it also forces the domains to be rebuilt. * * If doms_new == NULL it will be replaced with cpu_online_mask. * ndoms_new == 0 is a special case for destroying existing domains, @@ -8954,8 +8979,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * * Call with hotplug lock held */ -/* FIXME: Change to struct cpumask *doms_new[] */ -void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { int i, j, n; @@ -8974,40 +8998,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, /* Destroy deleted domains */ for (i = 0; i < ndoms_cur; i++) { for (j = 0; j < n && !new_topology; j++) { - if (cpumask_equal(&doms_cur[i], &doms_new[j]) + if (cpumask_equal(doms_cur[i], doms_new[j]) && dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; } /* no match - a current sched domain not in new doms_new[] */ - detach_destroy_domains(doms_cur + i); + detach_destroy_domains(doms_cur[i]); match1: ; } if (doms_new == NULL) { ndoms_cur = 0; - doms_new = fallback_doms; - cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); + doms_new = &fallback_doms; + cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); WARN_ON_ONCE(dattr_new); } /* Build new domains */ for (i = 0; i < ndoms_new; i++) { for (j = 0; j < ndoms_cur && !new_topology; j++) { - if (cpumask_equal(&doms_new[i], &doms_cur[j]) + if (cpumask_equal(doms_new[i], doms_cur[j]) && dattrs_equal(dattr_new, i, dattr_cur, j)) goto match2; } /* no match - add a new doms_new */ - __build_sched_domains(doms_new + i, + __build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL); match2: ; } /* Remember the new sched domains */ - if (doms_cur != fallback_doms) - kfree(doms_cur); + if (doms_cur != &fallback_doms) + free_sched_domains(doms_cur, ndoms_cur); kfree(dattr_cur); /* kfree(NULL) is safe */ doms_cur = doms_new; dattr_cur = dattr_new; -- cgit v1.3-8-gc7d7 From fd2c3ef761fbc5e6c27fa7d40b30cda06bfcd7d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Nov 2009 03:26:03 +0000 Subject: net: cleanup include/net This cleanup patch puts struct/union/enum opening braces, in first line to ease grep games. struct something { becomes : struct something { Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ah.h | 3 +-- include/net/dn_dev.h | 12 ++++------- include/net/dn_fib.h | 3 +-- include/net/dn_nsp.h | 24 ++++++++-------------- include/net/dst.h | 3 +-- include/net/fib_rules.h | 9 +++------ include/net/gen_stats.h | 3 +-- include/net/genetlink.h | 12 ++++------- include/net/if_inet6.h | 24 ++++++++-------------- include/net/inetpeer.h | 3 +-- include/net/ip.h | 15 +++++--------- include/net/ip6_fib.h | 15 +++++--------- include/net/ip6_route.h | 3 +-- include/net/ip_vs.h | 6 ++---- include/net/ipip.h | 9 +++------ include/net/ipv6.h | 12 ++++------- include/net/iw_handler.h | 12 ++++------- include/net/neighbour.h | 18 ++++++----------- include/net/netfilter/nf_conntrack_ecache.h | 3 +-- include/net/netfilter/nf_conntrack_expect.h | 6 ++---- include/net/netfilter/nf_conntrack_extend.h | 6 ++---- include/net/netfilter/nf_conntrack_helper.h | 3 +-- include/net/netfilter/nf_conntrack_l3proto.h | 3 +-- include/net/netfilter/nf_conntrack_l4proto.h | 3 +-- include/net/netfilter/nf_conntrack_tuple.h | 12 ++++------- include/net/netfilter/nf_nat.h | 15 +++++--------- include/net/netfilter/nf_nat_protocol.h | 3 +-- include/net/pkt_cls.h | 24 ++++++++-------------- include/net/pkt_sched.h | 3 +-- include/net/protocol.h | 3 +-- include/net/red.h | 6 ++---- include/net/route.h | 12 ++++------- include/net/sch_generic.h | 30 ++++++++++------------------ include/net/scm.h | 9 +++------ include/net/sctp/sctp.h | 3 +-- include/net/tcp.h | 3 +-- include/net/xfrm.h | 27 +++++++++---------------- 37 files changed, 120 insertions(+), 240 deletions(-) (limited to 'include') diff --git a/include/net/ah.h b/include/net/ah.h index 7573a7152a72..f0129f79a31a 100644 --- a/include/net/ah.h +++ b/include/net/ah.h @@ -8,8 +8,7 @@ struct crypto_ahash; -struct ah_data -{ +struct ah_data { int icv_full_len; int icv_trunc_len; diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h index cee46821dc53..28966cae3fd7 100644 --- a/include/net/dn_dev.h +++ b/include/net/dn_dev.h @@ -97,16 +97,14 @@ struct dn_dev { unsigned long uptime; /* Time device went up in jiffies */ }; -struct dn_short_packet -{ +struct dn_short_packet { __u8 msgflg; __le16 dstnode; __le16 srcnode; __u8 forward; } __attribute__((packed)); -struct dn_long_packet -{ +struct dn_long_packet { __u8 msgflg; __u8 d_area; __u8 d_subarea; @@ -122,8 +120,7 @@ struct dn_long_packet /*------------------------- DRP - Routing messages ---------------------*/ -struct endnode_hello_message -{ +struct endnode_hello_message { __u8 msgflg; __u8 tiver[3]; __u8 id[6]; @@ -138,8 +135,7 @@ struct endnode_hello_message __u8 data[2]; } __attribute__((packed)); -struct rtnode_hello_message -{ +struct rtnode_hello_message { __u8 msgflg; __u8 tiver[3]; __u8 id[6]; diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index c378be7bf960..52da6c3dd50d 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -4,8 +4,7 @@ /* WARNING: The ordering of these elements must match ordering * of RTA_* rtnetlink attribute numbers. */ -struct dn_kern_rta -{ +struct dn_kern_rta { void *rta_dst; void *rta_src; int *rta_iif; diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h index 96e816b6974d..17d43d2db5ec 100644 --- a/include/net/dn_nsp.h +++ b/include/net/dn_nsp.h @@ -70,30 +70,26 @@ extern struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int nobl /* Data Messages (data segment/interrupt/link service) */ -struct nsp_data_seg_msg -{ +struct nsp_data_seg_msg { __u8 msgflg; __le16 dstaddr; __le16 srcaddr; } __attribute__((packed)); -struct nsp_data_opt_msg -{ +struct nsp_data_opt_msg { __le16 acknum; __le16 segnum; __le16 lsflgs; } __attribute__((packed)); -struct nsp_data_opt_msg1 -{ +struct nsp_data_opt_msg1 { __le16 acknum; __le16 segnum; } __attribute__((packed)); /* Acknowledgment Message (data/other data) */ -struct nsp_data_ack_msg -{ +struct nsp_data_ack_msg { __u8 msgflg; __le16 dstaddr; __le16 srcaddr; @@ -101,16 +97,14 @@ struct nsp_data_ack_msg } __attribute__((packed)); /* Connect Acknowledgment Message */ -struct nsp_conn_ack_msg -{ +struct nsp_conn_ack_msg { __u8 msgflg; __le16 dstaddr; } __attribute__((packed)); /* Connect Initiate/Retransmit Initiate/Connect Confirm */ -struct nsp_conn_init_msg -{ +struct nsp_conn_init_msg { __u8 msgflg; #define NSP_CI 0x18 /* Connect Initiate */ #define NSP_RCI 0x68 /* Retrans. Conn Init */ @@ -126,8 +120,7 @@ struct nsp_conn_init_msg } __attribute__((packed)); /* Disconnect Initiate/Disconnect Confirm */ -struct nsp_disconn_init_msg -{ +struct nsp_disconn_init_msg { __u8 msgflg; __le16 dstaddr; __le16 srcaddr; @@ -136,8 +129,7 @@ struct nsp_disconn_init_msg -struct srcobj_fmt -{ +struct srcobj_fmt { __u8 format; __u8 task; __le16 grpcode; diff --git a/include/net/dst.h b/include/net/dst.h index 6377ab2feba9..39c4a5963e12 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -36,8 +36,7 @@ struct sk_buff; -struct dst_entry -{ +struct dst_entry { struct rcu_head rcu_head; struct dst_entry *child; struct net_device *dev; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index ca4b2e840078..2cd707b15d59 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -7,8 +7,7 @@ #include #include -struct fib_rule -{ +struct fib_rule { struct list_head list; atomic_t refcnt; int ifindex; @@ -25,15 +24,13 @@ struct fib_rule struct net * fr_net; }; -struct fib_lookup_arg -{ +struct fib_lookup_arg { void *lookup_ptr; void *result; struct fib_rule *rule; }; -struct fib_rules_ops -{ +struct fib_rules_ops { int family; struct list_head list; int rule_size; diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index eb87a1447ae1..fa157712e982 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -6,8 +6,7 @@ #include #include -struct gnet_dump -{ +struct gnet_dump { spinlock_t * lock; struct sk_buff * skb; struct nlattr * tail; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 2a1c06874c42..eb551baafc04 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -13,8 +13,7 @@ * @list: list entry for linking * @family: pointer to family, need not be set before registering */ -struct genl_multicast_group -{ +struct genl_multicast_group { struct genl_family *family; /* private */ struct list_head list; /* private */ char name[GENL_NAMSIZ]; @@ -35,8 +34,7 @@ struct genl_multicast_group * @family_list: family list * @mcast_groups: multicast groups list */ -struct genl_family -{ +struct genl_family { unsigned int id; unsigned int hdrsize; char name[GENL_NAMSIZ]; @@ -58,8 +56,7 @@ struct genl_family * @userhdr: user specific header * @attrs: netlink attributes */ -struct genl_info -{ +struct genl_info { u32 snd_seq; u32 snd_pid; struct nlmsghdr * nlhdr; @@ -102,8 +99,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) * @done: completion callback for dumps * @ops_list: operations list */ -struct genl_ops -{ +struct genl_ops { u8 cmd; unsigned int flags; const struct nla_policy *policy; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 38b78132019b..e9d69d198495 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -32,8 +32,7 @@ #ifdef __KERNEL__ -struct inet6_ifaddr -{ +struct inet6_ifaddr { struct in6_addr addr; __u32 prefix_len; @@ -67,8 +66,7 @@ struct inet6_ifaddr int dead; }; -struct ip6_sf_socklist -{ +struct ip6_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct in6_addr sl_addr[0]; @@ -79,8 +77,7 @@ struct ip6_sf_socklist #define IP6_SFBLOCK 10 /* allocate this many at once */ -struct ipv6_mc_socklist -{ +struct ipv6_mc_socklist { struct in6_addr addr; int ifindex; struct ipv6_mc_socklist *next; @@ -89,8 +86,7 @@ struct ipv6_mc_socklist struct ip6_sf_socklist *sflist; }; -struct ip6_sf_list -{ +struct ip6_sf_list { struct ip6_sf_list *sf_next; struct in6_addr sf_addr; unsigned long sf_count[2]; /* include/exclude counts */ @@ -105,8 +101,7 @@ struct ip6_sf_list #define MAF_NOREPORT 0x08 #define MAF_GSQUERY 0x10 -struct ifmcaddr6 -{ +struct ifmcaddr6 { struct in6_addr mca_addr; struct inet6_dev *idev; struct ifmcaddr6 *next; @@ -126,15 +121,13 @@ struct ifmcaddr6 /* Anycast stuff */ -struct ipv6_ac_socklist -{ +struct ipv6_ac_socklist { struct in6_addr acl_addr; int acl_ifindex; struct ipv6_ac_socklist *acl_next; }; -struct ifacaddr6 -{ +struct ifacaddr6 { struct in6_addr aca_addr; struct inet6_dev *aca_idev; struct rt6_info *aca_rt; @@ -157,8 +150,7 @@ struct ipv6_devstat { DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg); }; -struct inet6_dev -{ +struct inet6_dev { struct net_device *dev; struct inet6_ifaddr *addr_list; diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 15e1f8fe4c1f..35ad7b930467 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -13,8 +13,7 @@ #include #include -struct inet_peer -{ +struct inet_peer { /* group together avl_left,avl_right,v4daddr to speedup lookups */ struct inet_peer *avl_left, *avl_right; __be32 v4daddr; /* peer's address */ diff --git a/include/net/ip.h b/include/net/ip.h index 376adf47764e..e6b9d12d5f62 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -33,8 +33,7 @@ struct sock; -struct inet_skb_parm -{ +struct inet_skb_parm { struct ip_options opt; /* Compiled IP options */ unsigned char flags; @@ -50,8 +49,7 @@ static inline unsigned int ip_hdrlen(const struct sk_buff *skb) return ip_hdr(skb)->ihl * 4; } -struct ipcm_cookie -{ +struct ipcm_cookie { __be32 addr; int oif; struct ip_options *opt; @@ -60,8 +58,7 @@ struct ipcm_cookie #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) -struct ip_ra_chain -{ +struct ip_ra_chain { struct ip_ra_chain *next; struct sock *sk; void (*destructor)(struct sock *); @@ -159,8 +156,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); -struct ipv4_config -{ +struct ipv4_config { int log_martians; int no_pmtu_disc; }; @@ -336,8 +332,7 @@ extern int ip_call_ra_chain(struct sk_buff *skb); * Functions provided by ip_fragment.c */ -enum ip_defrag_users -{ +enum ip_defrag_users { IP_DEFRAG_LOCAL_DELIVER, IP_DEFRAG_CALL_RA_CHAIN, IP_DEFRAG_CONNTRACK_IN, diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 15b492a9aa79..257808188add 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -30,8 +30,7 @@ struct rt6_info; -struct fib6_config -{ +struct fib6_config { u32 fc_table; u32 fc_metric; int fc_dst_len; @@ -51,8 +50,7 @@ struct fib6_config struct nl_info fc_nlinfo; }; -struct fib6_node -{ +struct fib6_node { struct fib6_node *parent; struct fib6_node *left; struct fib6_node *right; @@ -78,16 +76,14 @@ struct fib6_node * */ -struct rt6key -{ +struct rt6key { struct in6_addr addr; int plen; }; struct fib6_table; -struct rt6_info -{ +struct rt6_info { union { struct dst_entry dst; } u; @@ -127,8 +123,7 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) return ((struct rt6_info *)dst)->rt6i_idev; } -struct fib6_walker_t -{ +struct fib6_walker_t { struct fib6_walker_t *prev, *next; struct fib6_node *root, *node; struct rt6_info *leaf; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 0e1b8aebaff8..4a808de7c0f6 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -103,8 +103,7 @@ extern void rt6_pmtu_discovery(struct in6_addr *daddr, struct netlink_callback; -struct rt6_rtnl_dump_arg -{ +struct rt6_rtnl_dump_arg { struct sk_buff *skb; struct netlink_callback *cb; struct net *net; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 98978e73f666..8dc3296b7bea 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -251,8 +251,7 @@ struct ip_vs_estimator { u32 outbps; }; -struct ip_vs_stats -{ +struct ip_vs_stats { struct ip_vs_stats_user ustats; /* statistics */ struct ip_vs_estimator est; /* estimator */ @@ -518,8 +517,7 @@ struct ip_vs_scheduler { /* * The application module object (a.k.a. app incarnation) */ -struct ip_vs_app -{ +struct ip_vs_app { struct list_head a_list; /* member in app list */ int type; /* IP_VS_APP_TYPE_xxx */ char *name; /* application module name */ diff --git a/include/net/ipip.h b/include/net/ipip.h index b3db2fd6e61c..11e8513d2d07 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -8,16 +8,14 @@ #define IPTUNNEL_ERR_TIMEO (30*HZ) /* 6rd prefix/relay information */ -struct ip_tunnel_6rd_parm -{ +struct ip_tunnel_6rd_parm { struct in6_addr prefix; __be32 relay_prefix; u16 prefixlen; u16 relay_prefixlen; }; -struct ip_tunnel -{ +struct ip_tunnel { struct ip_tunnel *next; struct net_device *dev; @@ -40,8 +38,7 @@ struct ip_tunnel unsigned int prl_count; /* # of entries in PRL */ }; -struct ip_tunnel_prl_entry -{ +struct ip_tunnel_prl_entry { struct ip_tunnel_prl_entry *next; __be32 addr; u16 flags; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8c31d8a0c1fe..92db8617d188 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -160,8 +160,7 @@ extern struct ctl_path net_ipv6_ctl_path[]; #define ICMP6MSGIN_INC_STATS_BH(net, idev, field) \ _DEVINC(net, icmpv6msg, _BH, idev, field) -struct ip6_ra_chain -{ +struct ip6_ra_chain { struct ip6_ra_chain *next; struct sock *sk; int sel; @@ -176,8 +175,7 @@ extern rwlock_t ip6_ra_lock; ancillary data and passed to IPv6. */ -struct ipv6_txoptions -{ +struct ipv6_txoptions { /* Length of this structure */ int tot_len; @@ -194,8 +192,7 @@ struct ipv6_txoptions /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ }; -struct ip6_flowlabel -{ +struct ip6_flowlabel { struct ip6_flowlabel *next; __be32 label; atomic_t users; @@ -212,8 +209,7 @@ struct ip6_flowlabel #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) #define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) -struct ipv6_fl_socklist -{ +struct ipv6_fl_socklist { struct ipv6_fl_socklist *next; struct ip6_flowlabel *fl; }; diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index d5d337170a56..b2b98f3fa265 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -300,8 +300,7 @@ * This struct is also my long term insurance. I can add new fields here * without breaking the prototype of iw_handler... */ -struct iw_request_info -{ +struct iw_request_info { __u16 cmd; /* Wireless Extension command */ __u16 flags; /* More to come ;-) */ }; @@ -321,8 +320,7 @@ typedef int (*iw_handler)(struct net_device *dev, struct iw_request_info *info, * shared by all driver instances... Same for the members... * This will be linked from net_device in */ -struct iw_handler_def -{ +struct iw_handler_def { /* Array of handlers for standard ioctls * We will call dev->wireless_handlers->standard[ioctl - SIOCSIWCOMMIT] @@ -372,8 +370,7 @@ struct iw_handler_def /* * Describe how a standard IOCTL looks like. */ -struct iw_ioctl_description -{ +struct iw_ioctl_description { __u8 header_type; /* NULL, iw_point or other */ __u8 token_type; /* Future */ __u16 token_size; /* Granularity of payload */ @@ -395,8 +392,7 @@ struct iw_ioctl_description /* * Instance specific spy data, i.e. addresses spied and quality for them. */ -struct iw_spy_data -{ +struct iw_spy_data { /* --- Standard spy support --- */ int spy_number; u_char spy_address[IW_MAX_SPY][ETH_ALEN]; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 3817fda82a80..db8e96dd114e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -37,8 +37,7 @@ struct neighbour; -struct neigh_parms -{ +struct neigh_parms { #ifdef CONFIG_NET_NS struct net *net; #endif @@ -70,8 +69,7 @@ struct neigh_parms int locktime; }; -struct neigh_statistics -{ +struct neigh_statistics { unsigned long allocs; /* number of allocated neighs */ unsigned long destroys; /* number of destroyed neighs */ unsigned long hash_grows; /* number of hash resizes */ @@ -97,8 +95,7 @@ struct neigh_statistics preempt_enable(); \ } while (0) -struct neighbour -{ +struct neighbour { struct neighbour *next; struct neigh_table *tbl; struct neigh_parms *parms; @@ -122,8 +119,7 @@ struct neighbour u8 primary_key[0]; }; -struct neigh_ops -{ +struct neigh_ops { int family; void (*solicit)(struct neighbour *, struct sk_buff*); void (*error_report)(struct neighbour *, struct sk_buff*); @@ -133,8 +129,7 @@ struct neigh_ops int (*queue_xmit)(struct sk_buff*); }; -struct pneigh_entry -{ +struct pneigh_entry { struct pneigh_entry *next; #ifdef CONFIG_NET_NS struct net *net; @@ -149,8 +144,7 @@ struct pneigh_entry */ -struct neigh_table -{ +struct neigh_table { struct neigh_table *next; int family; int entry_size; diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 4f20d58e2ab7..475facc3051a 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -13,8 +13,7 @@ #include /* Connection tracking event types */ -enum ip_conntrack_events -{ +enum ip_conntrack_events { IPCT_NEW = 0, /* new conntrack */ IPCT_RELATED = 1, /* related conntrack */ IPCT_DESTROY = 2, /* destroyed conntrack */ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index a9652806d0df..9a2b9cb52271 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -9,8 +9,7 @@ extern unsigned int nf_ct_expect_hsize; extern unsigned int nf_ct_expect_max; -struct nf_conntrack_expect -{ +struct nf_conntrack_expect { /* Conntrack expectation list member */ struct hlist_node lnode; @@ -64,8 +63,7 @@ static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) #endif } -struct nf_conntrack_expect_policy -{ +struct nf_conntrack_expect_policy { unsigned int max_expected; unsigned int timeout; }; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 7f8fc5d123c5..e192dc17c583 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -3,8 +3,7 @@ #include -enum nf_ct_ext_id -{ +enum nf_ct_ext_id { NF_CT_EXT_HELPER, NF_CT_EXT_NAT, NF_CT_EXT_ACCT, @@ -65,8 +64,7 @@ __nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); #define NF_CT_EXT_F_PREALLOC 0x0001 -struct nf_ct_ext_type -{ +struct nf_ct_ext_type { /* Destroys relationships (can be NULL). */ void (*destroy)(struct nf_conn *ct); /* Called when realloacted (can be NULL). diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 1b7068000927..d015de92e03f 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -16,8 +16,7 @@ struct module; #define NF_CT_HELPER_NAME_LEN 16 -struct nf_conntrack_helper -{ +struct nf_conntrack_helper { struct hlist_node hnode; /* Internal use. */ const char *name; /* name of the module */ diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 9f99d36d5de9..a7547611e8f1 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -16,8 +16,7 @@ #include #include -struct nf_conntrack_l3proto -{ +struct nf_conntrack_l3proto { /* L3 Protocol Family number. ex) PF_INET */ u_int16_t l3proto; diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 3767fb41e541..ca6dcf3445ab 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -15,8 +15,7 @@ struct seq_file; -struct nf_conntrack_l4proto -{ +struct nf_conntrack_l4proto { /* L3 Protocol number. */ u_int16_t l3proto; diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 2628c154d40e..4ee44c84a304 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -26,8 +26,7 @@ /* The protocol-specific manipulable parts of the tuple: always in network order! */ -union nf_conntrack_man_proto -{ +union nf_conntrack_man_proto { /* Add other protocols here. */ __be16 all; @@ -52,8 +51,7 @@ union nf_conntrack_man_proto }; /* The manipulable part of the tuple. */ -struct nf_conntrack_man -{ +struct nf_conntrack_man { union nf_inet_addr u3; union nf_conntrack_man_proto u; /* Layer 3 protocol */ @@ -61,8 +59,7 @@ struct nf_conntrack_man }; /* This contains the information to distinguish a connection. */ -struct nf_conntrack_tuple -{ +struct nf_conntrack_tuple { struct nf_conntrack_man src; /* These are the parts of the tuple which are fixed. */ @@ -100,8 +97,7 @@ struct nf_conntrack_tuple } dst; }; -struct nf_conntrack_tuple_mask -{ +struct nf_conntrack_tuple_mask { struct { union nf_inet_addr u3; union nf_conntrack_man_proto u; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 8df0b7f7fc6e..f5f09f032a90 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -5,8 +5,7 @@ #define NF_NAT_MAPPING_TYPE_MAX_NAMELEN 16 -enum nf_nat_manip_type -{ +enum nf_nat_manip_type { IP_NAT_MANIP_SRC, IP_NAT_MANIP_DST }; @@ -30,8 +29,7 @@ struct nf_nat_seq { }; /* Single range specification. */ -struct nf_nat_range -{ +struct nf_nat_range { /* Set to OR of flags above. */ unsigned int flags; @@ -43,8 +41,7 @@ struct nf_nat_range }; /* For backwards compat: don't use in modern code. */ -struct nf_nat_multi_range_compat -{ +struct nf_nat_multi_range_compat { unsigned int rangesize; /* Must be 1. */ /* hangs off end. */ @@ -57,8 +54,7 @@ struct nf_nat_multi_range_compat #include /* per conntrack: nat application helper private data */ -union nf_conntrack_nat_help -{ +union nf_conntrack_nat_help { /* insert nat helper private data here */ struct nf_nat_pptp nat_pptp_info; }; @@ -66,8 +62,7 @@ union nf_conntrack_nat_help struct nf_conn; /* The structure embedded in the conntrack structure. */ -struct nf_conn_nat -{ +struct nf_conn_nat { struct hlist_node bysource; struct nf_nat_seq seq[IP_CT_DIR_MAX]; struct nf_conn *ct; diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h index f3662c4394ef..c398017ccfa3 100644 --- a/include/net/netfilter/nf_nat_protocol.h +++ b/include/net/netfilter/nf_nat_protocol.h @@ -6,8 +6,7 @@ struct nf_nat_range; -struct nf_nat_protocol -{ +struct nf_nat_protocol { /* Protocol number. */ unsigned int protonum; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index d1ca31444644..3dd210d073ca 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -7,8 +7,7 @@ /* Basic packet classifier frontend definitions. */ -struct tcf_walker -{ +struct tcf_walker { int stop; int skip; int count; @@ -61,8 +60,7 @@ tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); } -struct tcf_exts -{ +struct tcf_exts { #ifdef CONFIG_NET_CLS_ACT struct tc_action *action; #endif @@ -71,8 +69,7 @@ struct tcf_exts /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. */ -struct tcf_ext_map -{ +struct tcf_ext_map { int action; int police; }; @@ -143,8 +140,7 @@ extern int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, /** * struct tcf_pkt_info - packet information */ -struct tcf_pkt_info -{ +struct tcf_pkt_info { unsigned char * ptr; int nexthdr; }; @@ -162,8 +158,7 @@ struct tcf_ematch_ops; * @datalen: length of the ematch specific configuration data * @data: ematch specific data */ -struct tcf_ematch -{ +struct tcf_ematch { struct tcf_ematch_ops * ops; unsigned long data; unsigned int datalen; @@ -211,8 +206,7 @@ static inline int tcf_em_early_end(struct tcf_ematch *em, int result) * @hdr: ematch tree header supplied by userspace * @matches: array of ematches */ -struct tcf_ematch_tree -{ +struct tcf_ematch_tree { struct tcf_ematch_tree_hdr hdr; struct tcf_ematch * matches; @@ -230,8 +224,7 @@ struct tcf_ematch_tree * @owner: owner, must be set to THIS_MODULE * @link: link to previous/next ematch module (internal use) */ -struct tcf_ematch_ops -{ +struct tcf_ematch_ops { int kind; int datalen; int (*change)(struct tcf_proto *, void *, @@ -302,8 +295,7 @@ static inline int tcf_em_tree_match(struct sk_buff *skb, #else /* CONFIG_NET_EMATCH */ -struct tcf_ematch_tree -{ +struct tcf_ematch_tree { }; #define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f911ec7598ef..2d567265363e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -5,8 +5,7 @@ #include #include -struct qdisc_walker -{ +struct qdisc_walker { int stop; int skip; int count; diff --git a/include/net/protocol.h b/include/net/protocol.h index 60249e51b669..89932d45da59 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -47,8 +47,7 @@ struct net_protocol { }; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -struct inet6_protocol -{ +struct inet6_protocol { int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, diff --git a/include/net/red.h b/include/net/red.h index 3cf31d466a81..995108e54d9f 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -90,8 +90,7 @@ #define RED_STAB_SIZE 256 #define RED_STAB_MASK (RED_STAB_SIZE - 1) -struct red_stats -{ +struct red_stats { u32 prob_drop; /* Early probability drops */ u32 prob_mark; /* Early probability marks */ u32 forced_drop; /* Forced drops, qavg > max_thresh */ @@ -101,8 +100,7 @@ struct red_stats u32 backlog; }; -struct red_parms -{ +struct red_parms { /* Parameters */ u32 qth_min; /* Min avg length threshold: A scaled */ u32 qth_max; /* Max avg length threshold: A scaled */ diff --git a/include/net/route.h b/include/net/route.h index 40f6346ef496..cfb4c071a136 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -49,10 +49,8 @@ struct fib_nh; struct inet_peer; -struct rtable -{ - union - { +struct rtable { + union { struct dst_entry dst; } u; @@ -77,16 +75,14 @@ struct rtable struct inet_peer *peer; /* long-living peer info */ }; -struct ip_rt_acct -{ +struct ip_rt_acct { __u32 o_bytes; __u32 o_packets; __u32 i_bytes; __u32 i_packets; }; -struct rt_cache_stat -{ +struct rt_cache_stat { unsigned int in_hit; unsigned int in_slow_tot; unsigned int in_slow_mc; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c33180dd42b4..dad558bc06fa 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -15,16 +15,14 @@ struct qdisc_walker; struct tcf_walker; struct module; -struct qdisc_rate_table -{ +struct qdisc_rate_table { struct tc_ratespec rate; u32 data[256]; struct qdisc_rate_table *next; int refcnt; }; -enum qdisc_state_t -{ +enum qdisc_state_t { __QDISC_STATE_RUNNING, __QDISC_STATE_SCHED, __QDISC_STATE_DEACTIVATED, @@ -37,8 +35,7 @@ struct qdisc_size_table { u16 data[]; }; -struct Qdisc -{ +struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); struct sk_buff * (*dequeue)(struct Qdisc *dev); unsigned flags; @@ -78,8 +75,7 @@ struct Qdisc struct gnet_stats_queue qstats; }; -struct Qdisc_class_ops -{ +struct Qdisc_class_ops { /* Child qdisc manipulation */ struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); int (*graft)(struct Qdisc *, unsigned long cl, @@ -108,8 +104,7 @@ struct Qdisc_class_ops struct gnet_dump *); }; -struct Qdisc_ops -{ +struct Qdisc_ops { struct Qdisc_ops *next; const struct Qdisc_class_ops *cl_ops; char id[IFNAMSIZ]; @@ -133,14 +128,12 @@ struct Qdisc_ops }; -struct tcf_result -{ +struct tcf_result { unsigned long class; u32 classid; }; -struct tcf_proto_ops -{ +struct tcf_proto_ops { struct tcf_proto_ops *next; char kind[IFNAMSIZ]; @@ -164,8 +157,7 @@ struct tcf_proto_ops struct module *owner; }; -struct tcf_proto -{ +struct tcf_proto { /* Fast access part */ struct tcf_proto *next; void *root; @@ -261,14 +253,12 @@ extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_fast_ops; extern struct Qdisc_ops mq_qdisc_ops; -struct Qdisc_class_common -{ +struct Qdisc_class_common { u32 classid; struct hlist_node hnode; }; -struct Qdisc_class_hash -{ +struct Qdisc_class_hash { struct hlist_head *hash; unsigned int hashsize; unsigned int hashmask; diff --git a/include/net/scm.h b/include/net/scm.h index cf48c800e926..8360e47aa7e3 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -12,15 +12,13 @@ */ #define SCM_MAX_FD 255 -struct scm_fp_list -{ +struct scm_fp_list { struct list_head list; int count; struct file *fp[SCM_MAX_FD]; }; -struct scm_cookie -{ +struct scm_cookie { struct ucred creds; /* Skb credentials */ struct scm_fp_list *fp; /* Passed files */ #ifdef CONFIG_SECURITY_NETWORK @@ -88,8 +86,7 @@ static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct sc static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, int flags) { - if (!msg->msg_control) - { + if (!msg->msg_control) { if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp) msg->msg_flags |= MSG_CTRUNC; scm_destroy(scm); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 8a6d5297de16..78740ec57d5d 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -227,8 +227,7 @@ DECLARE_SNMP_STAT(struct sctp_mib, sctp_statistics); #endif /* !TEST_FRAME */ /* sctp mib definitions */ -enum -{ +enum { SCTP_MIB_NUM = 0, SCTP_MIB_CURRESTAB, /* CurrEstab */ SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 740d09be8e2d..bf20f88fd033 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -359,8 +359,7 @@ TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th) inet_rsk(req)->ecn_ok = 1; } -enum tcp_tw_status -{ +enum tcp_tw_status { TCP_TW_SUCCESS = 0, TCP_TW_RST = 1, TCP_TW_ACK = 2, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d9c6dbb92719..7f38ef509957 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -121,8 +121,7 @@ struct xfrm_state_walk { }; /* Full description of state of transformer. */ -struct xfrm_state -{ +struct xfrm_state { #ifdef CONFIG_NET_NS struct net *xs_net; #endif @@ -237,8 +236,7 @@ enum { }; /* callback structure passed from either netlink or pfkey */ -struct km_event -{ +struct km_event { union { u32 hard; u32 proto; @@ -313,8 +311,7 @@ extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); extern void xfrm_state_delete_tunnel(struct xfrm_state *x); -struct xfrm_type -{ +struct xfrm_type { char *description; struct module *owner; __u8 proto; @@ -420,8 +417,7 @@ static inline struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipp return x->inner_mode_iaf; } -struct xfrm_tmpl -{ +struct xfrm_tmpl { /* id in template is interpreted as: * daddr - destination of tunnel, may be zero for transport mode. * spi - zero to acquire spi. Not zero if spi is static, then @@ -468,8 +464,7 @@ struct xfrm_policy_walk { u32 seq; }; -struct xfrm_policy -{ +struct xfrm_policy { #ifdef CONFIG_NET_NS struct net *xp_net; #endif @@ -538,8 +533,7 @@ struct xfrm_migrate { /* default seq threshold size */ #define XFRM_AE_SEQT_SIZE 2 -struct xfrm_mgr -{ +struct xfrm_mgr { struct list_head list; char *id; int (*notify)(struct xfrm_state *x, struct km_event *c); @@ -626,8 +620,7 @@ struct xfrm_spi_skb_cb { #define XFRM_SPI_SKB_CB(__skb) ((struct xfrm_spi_skb_cb *)&((__skb)->cb[0])) /* Audit Information */ -struct xfrm_audit -{ +struct xfrm_audit { u32 secid; uid_t loginuid; u32 sessionid; @@ -871,8 +864,7 @@ static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ct * bundles differing by session id. All the bundles grow from a parent * policy rule. */ -struct xfrm_dst -{ +struct xfrm_dst { union { struct dst_entry dst; struct rtable rt; @@ -907,8 +899,7 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) extern void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); -struct sec_path -{ +struct sec_path { atomic_t refcnt; int len; struct xfrm_state *xvec[XFRM_MAX_DEPTH]; -- cgit v1.3-8-gc7d7 From 663e69592856df53ef52969482ef413a96bc4e06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Nov 2009 14:22:21 +0100 Subject: irq: Remove unused debug_poll_all_shared_irqs() commit 74296a8ed added this function for debug purposes, but it was never used for anything. Remove it. Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 6 ------ kernel/irq/spurious.c | 14 +------------- 2 files changed, 1 insertion(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 7ca72b74eec7..75f3f00ac1e5 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -603,12 +603,6 @@ static inline void init_irq_proc(void) } #endif -#if defined(CONFIG_GENERIC_HARDIRQS) && defined(CONFIG_DEBUG_SHIRQ) -extern void debug_poll_all_shared_irqs(void); -#else -static inline void debug_poll_all_shared_irqs(void) { } -#endif - struct seq_file; int show_interrupts(struct seq_file *p, void *v); diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 114e704760fe..8996b98f9eb2 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -104,7 +104,7 @@ static int misrouted_irq(int irq) return ok; } -static void poll_all_shared_irqs(void) +static void poll_spurious_irqs(unsigned long dummy) { struct irq_desc *desc; int i; @@ -123,23 +123,11 @@ static void poll_all_shared_irqs(void) try_one_irq(i, desc); } -} - -static void poll_spurious_irqs(unsigned long dummy) -{ - poll_all_shared_irqs(); mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL); } -#ifdef CONFIG_DEBUG_SHIRQ -void debug_poll_all_shared_irqs(void) -{ - poll_all_shared_irqs(); -} -#endif - /* * If 99,900 of the previous 100,000 interrupts have not been handled * then assume that the IRQ is stuck in some manner. Drop a diagnostic -- cgit v1.3-8-gc7d7 From c6d14c84566d6b70ad9dc1618db0dec87cca9300 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Nov 2009 05:43:23 -0800 Subject: net: Introduce for_each_netdev_rcu() iterator Adds RCU management to the list of netdevices. Convert some for_each_netdev() users to RCU version, if it can avoid read_lock-ing dev_base_lock Ie: read_lock(&dev_base_loack); for_each_netdev(net, dev) some_action(); read_unlock(&dev_base_lock); becomes : rcu_read_lock(); for_each_netdev_rcu(net, dev) some_action(); rcu_read_unlock(); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 30 ++++++++++++++++-------------- net/decnet/af_decnet.c | 6 +++--- net/decnet/dn_fib.c | 6 +++--- net/decnet/dn_route.c | 6 +++--- net/ipv4/devinet.c | 30 +++++++++++------------------- net/ipv6/addrconf.c | 20 +++++++------------- net/ipv6/anycast.c | 6 +++--- net/netrom/nr_route.c | 15 ++++++++------- net/rose/rose_route.c | 18 +++++++++--------- net/sctp/protocol.c | 6 +++--- 11 files changed, 68 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bcf1083857fc..5077de028317 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1081,6 +1081,8 @@ extern rwlock_t dev_base_lock; /* Device list lock */ #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_rcu(net, d) \ + list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_safe(net, d, n) \ list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue(net, d) \ diff --git a/net/core/dev.c b/net/core/dev.c index 76a1502efe67..bf629ac08b87 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -175,7 +175,7 @@ static struct list_head ptype_all __read_mostly; /* Taps */ * The @dev_base_head list is protected by @dev_base_lock and the rtnl * semaphore. * - * Pure readers hold dev_base_lock for reading. + * Pure readers hold dev_base_lock for reading, or rcu_read_lock() * * Writers must hold the rtnl semaphore while they loop through the * dev_base_head list, and hold dev_base_lock for writing when they do the @@ -212,7 +212,7 @@ static int list_netdevice(struct net_device *dev) ASSERT_RTNL(); write_lock_bh(&dev_base_lock); - list_add_tail(&dev->dev_list, &net->dev_base_head); + list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); @@ -229,7 +229,7 @@ static void unlist_netdevice(struct net_device *dev) /* Unlink dev from the device chain */ write_lock_bh(&dev_base_lock); - list_del(&dev->dev_list); + list_del_rcu(&dev->dev_list); hlist_del_rcu(&dev->name_hlist); hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); @@ -799,15 +799,15 @@ struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, struct net_device *dev, *ret; ret = NULL; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { dev_hold(dev); ret = dev; break; } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return ret; } EXPORT_SYMBOL(dev_get_by_flags); @@ -3077,18 +3077,18 @@ static int dev_ifconf(struct net *net, char __user *arg) * in detail. */ void *dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { struct net *net = seq_file_net(seq); loff_t off; struct net_device *dev; - read_lock(&dev_base_lock); + rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; off = 1; - for_each_netdev(net, dev) + for_each_netdev_rcu(net, dev) if (off++ == *pos) return dev; @@ -3097,16 +3097,18 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net *net = seq_file_net(seq); + struct net_device *dev = (v == SEQ_START_TOKEN) ? + first_net_device(seq_file_net(seq)) : + next_net_device((struct net_device *)v); + ++*pos; - return v == SEQ_START_TOKEN ? - first_net_device(net) : next_net_device((struct net_device *)v); + return rcu_dereference(dev); } void dev_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 664965c87e16..2e355841ca99 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -749,9 +749,9 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(saddr->sdn_flags & SDF_WILD)) { if (le16_to_cpu(saddr->sdn_nodeaddrl)) { - read_lock(&dev_base_lock); + rcu_read_lock(); ldev = NULL; - for_each_netdev(&init_net, dev) { + for_each_netdev_rcu(&init_net, dev) { if (!dev->dn_ptr) continue; if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) { @@ -759,7 +759,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) break; } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (ldev == NULL) return -EADDRNOTAVAIL; } diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 27ea2e9b080a..fd641f65e092 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -607,8 +607,8 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) ASSERT_RTNL(); /* Scan device list */ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { dn_db = dev->dn_ptr; if (dn_db == NULL) continue; @@ -619,7 +619,7 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) } } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (found_it == 0) { fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 57662cabaf9b..860286a3921b 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -908,8 +908,8 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old dev_put(dev_out); goto out; } - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if (!dev->dn_ptr) continue; if (!dn_dev_islocal(dev, oldflp->fld_src)) @@ -922,7 +922,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old dev_out = dev; break; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (dev_out == NULL) goto out; dev_hold(dev_out); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ccccaae50b20..8aa7a134c1f1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -876,19 +876,16 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) if (!addr) addr = ifa->ifa_local; } endfor_ifa(in_dev); -no_in_dev: - rcu_read_unlock(); +no_in_dev: if (addr) - goto out; + goto out_unlock; /* Not loopback addresses on loopback should be preferred in this case. It is importnat that lo is the first interface in dev_base list. */ - read_lock(&dev_base_lock); - rcu_read_lock(); - for_each_netdev(net, dev) { + for_each_netdev_rcu(net, dev) { if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; @@ -896,12 +893,11 @@ no_in_dev: if (ifa->ifa_scope != RT_SCOPE_LINK && ifa->ifa_scope <= scope) { addr = ifa->ifa_local; - goto out_unlock_both; + goto out_unlock; } } endfor_ifa(in_dev); } -out_unlock_both: - read_unlock(&dev_base_lock); +out_unlock: rcu_read_unlock(); out: return addr; @@ -962,9 +958,8 @@ __be32 inet_confirm_addr(struct in_device *in_dev, return confirm_addr_indev(in_dev, dst, local, scope); net = dev_net(in_dev->dev); - read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(net, dev) { + for_each_netdev_rcu(net, dev) { if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) @@ -972,7 +967,6 @@ __be32 inet_confirm_addr(struct in_device *in_dev, } } rcu_read_unlock(); - read_unlock(&dev_base_lock); return addr; } @@ -1240,18 +1234,18 @@ static void devinet_copy_dflt_conf(struct net *net, int i) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { struct in_device *in_dev; - rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); if (in_dev && !test_bit(i, in_dev->cnf.state)) in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; - rcu_read_unlock(); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } +/* called with RTNL locked */ static void inet_forward_change(struct net *net) { struct net_device *dev; @@ -1260,7 +1254,6 @@ static void inet_forward_change(struct net *net) IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; - read_lock(&dev_base_lock); for_each_netdev(net, dev) { struct in_device *in_dev; if (on) @@ -1271,7 +1264,6 @@ static void inet_forward_change(struct net *net) IN_DEV_CONF_SET(in_dev, FORWARDING, on); rcu_read_unlock(); } - read_unlock(&dev_base_lock); } static int devinet_conf_proc(ctl_table *ctl, int write, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 918648409612..024bba30de21 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -481,9 +481,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - rcu_read_lock(); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); @@ -491,9 +490,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) if (changed) dev_forward_change(idev); } - rcu_read_unlock(); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) @@ -1137,10 +1135,9 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, hiscore->rule = -1; hiscore->ifa = NULL; - read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(net, dev) { + for_each_netdev_rcu(net, dev) { struct inet6_dev *idev; /* Candidate Source Address (section 4) @@ -1235,7 +1232,6 @@ try_nextdev: read_unlock_bh(&idev->lock); } rcu_read_unlock(); - read_unlock(&dev_base_lock); if (!hiscore->ifa) return -EADDRNOTAVAIL; @@ -4052,9 +4048,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - rcu_read_lock(); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); @@ -4062,9 +4057,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) if (changed) dev_disable_change(idev); } - rcu_read_unlock(); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 1ae58bec1de0..2f00ca83f049 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -404,13 +404,13 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, if (dev) return ipv6_chk_acast_dev(dev, addr); - read_lock(&dev_base_lock); - for_each_netdev(net, dev) + rcu_read_lock(); + for_each_netdev_rcu(net, dev) if (ipv6_chk_acast_dev(dev, addr)) { found = 1; break; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return found; } diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 4eb1ac9a7679..aacba76070fc 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -597,15 +597,15 @@ struct net_device *nr_dev_first(void) { struct net_device *dev, *first = NULL; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } if (first) dev_hold(first); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return first; } @@ -617,16 +617,17 @@ struct net_device *nr_dev_get(ax25_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && + ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } } dev = NULL; out: - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 9478d9b3d977..d936226916f2 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -600,13 +600,13 @@ struct net_device *rose_dev_first(void) { struct net_device *dev, *first = NULL; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return first; } @@ -618,8 +618,8 @@ struct net_device *rose_dev_get(rose_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; @@ -627,7 +627,7 @@ struct net_device *rose_dev_get(rose_address *addr) } dev = NULL; out: - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } @@ -635,14 +635,14 @@ static int rose_dev_exists(rose_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) goto out; } dev = NULL; out: - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev != NULL; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d9f4cc2c7869..fe44c57101de 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -205,14 +205,14 @@ static void sctp_get_local_addr_list(void) struct list_head *pos; struct sctp_af *af; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&sctp_local_addr_list, dev); } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } /* Free the existing local addresses. */ -- cgit v1.3-8-gc7d7 From 89e1838f5f2c2af80268a096b9a687643b0d0846 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Sep 2009 10:46:22 +0200 Subject: change default: by default, use socket buffer auto tuning Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 9d067ce46960..51f47a586ad8 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -70,11 +70,11 @@ /* I don't think that a tcp send buffer of more than 10M is usefull */ #define DRBD_SNDBUF_SIZE_MIN 0 #define DRBD_SNDBUF_SIZE_MAX (10<<20) -#define DRBD_SNDBUF_SIZE_DEF (2*65535) +#define DRBD_SNDBUF_SIZE_DEF 0 #define DRBD_RCVBUF_SIZE_MIN 0 #define DRBD_RCVBUF_SIZE_MAX (10<<20) -#define DRBD_RCVBUF_SIZE_DEF (2*65535) +#define DRBD_RCVBUF_SIZE_DEF 0 /* @4k PageSize -> 128kB - 512MB */ #define DRBD_MAX_BUFFERS_MIN 32 -- cgit v1.3-8-gc7d7 From ed814525f2e45188964c270fc3a5a0b644f7e4a9 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 27 Oct 2009 12:37:14 +0100 Subject: Now it is equal to DRBD release 8.3.5 without compat crap Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 233db5c18b86..18942ad115d9 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.3rc2" +#define REL_VERSION "8.3.5" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 91 -- cgit v1.3-8-gc7d7 From d4ac42a582e46d7f86f0acb4253a310423c72c4c Mon Sep 17 00:00:00 2001 From: Kristoffer Glembo Date: Wed, 4 Nov 2009 08:39:46 -0800 Subject: sparc: Support for GRLIB APBUART serial port This patch adds support for the APBUART serial port from Aeroflex Gaisler's IP library GRLIB. It is currently used in all LEON3 designs (SPARC V8) but can be used on other platforms as well (which support OF). Signed-off-by: Kristoffer Glembo Signed-off-by: David S. Miller --- drivers/serial/Kconfig | 13 + drivers/serial/Makefile | 1 + drivers/serial/apbuart.c | 710 ++++++++++++++++++++++++++++++++++++++++++++ drivers/serial/apbuart.h | 64 ++++ include/linux/serial_core.h | 3 + 5 files changed, 791 insertions(+) create mode 100644 drivers/serial/apbuart.c create mode 100644 drivers/serial/apbuart.h (limited to 'include') diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index e52257257279..50943ff78f4b 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -1477,4 +1477,17 @@ config SERIAL_BCM63XX_CONSOLE If you have enabled the serial port on the bcm63xx CPU you can make it the console by answering Y to this option. +config SERIAL_GRLIB_GAISLER_APBUART + tristate "GRLIB APBUART serial support" + depends on OF + ---help--- + Add support for the GRLIB APBUART serial port. + +config SERIAL_GRLIB_GAISLER_APBUART_CONSOLE + bool "Console on GRLIB APBUART serial port" + depends on SERIAL_GRLIB_GAISLER_APBUART=y + select SERIAL_CORE_CONSOLE + help + Support for running a console on the GRLIB APBUART + endmenu diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index d21d5dd5d048..5548fe7df61d 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -81,3 +81,4 @@ obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o obj-$(CONFIG_SERIAL_QE) += ucc_uart.o obj-$(CONFIG_SERIAL_TIMBERDALE) += timbuart.o +obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o diff --git a/drivers/serial/apbuart.c b/drivers/serial/apbuart.c new file mode 100644 index 000000000000..8a343045131f --- /dev/null +++ b/drivers/serial/apbuart.c @@ -0,0 +1,710 @@ +/* + * Driver for GRLIB serial ports (APBUART) + * + * Based on linux/drivers/serial/amba.c + * + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * Copyright (C) 2003 Konrad Eisele + * Copyright (C) 2006 Daniel Hellstrom , Aeroflex Gaisler AB + * Copyright (C) 2008 Gilead Kutnick + * Copyright (C) 2009 Kristoffer Glembo , Aeroflex Gaisler AB + */ + +#if defined(CONFIG_SERIAL_GRLIB_GAISLER_APBUART_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) +#define SUPPORT_SYSRQ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "apbuart.h" + +#define SERIAL_APBUART_MAJOR TTY_MAJOR +#define SERIAL_APBUART_MINOR 64 +#define UART_DUMMY_RSR_RX 0x8000 /* for ignore all read */ + +static void apbuart_tx_chars(struct uart_port *port); + +static void apbuart_stop_tx(struct uart_port *port) +{ + unsigned int cr; + + cr = UART_GET_CTRL(port); + cr &= ~UART_CTRL_TI; + UART_PUT_CTRL(port, cr); +} + +static void apbuart_start_tx(struct uart_port *port) +{ + unsigned int cr; + + cr = UART_GET_CTRL(port); + cr |= UART_CTRL_TI; + UART_PUT_CTRL(port, cr); + + if (UART_GET_STATUS(port) & UART_STATUS_THE) + apbuart_tx_chars(port); +} + +static void apbuart_stop_rx(struct uart_port *port) +{ + unsigned int cr; + + cr = UART_GET_CTRL(port); + cr &= ~(UART_CTRL_RI); + UART_PUT_CTRL(port, cr); +} + +static void apbuart_enable_ms(struct uart_port *port) +{ + /* No modem status change interrupts for APBUART */ +} + +static void apbuart_rx_chars(struct uart_port *port) +{ + struct tty_struct *tty = port->state->port.tty; + unsigned int status, ch, rsr, flag; + unsigned int max_chars = port->fifosize; + + status = UART_GET_STATUS(port); + + while (UART_RX_DATA(status) && (max_chars--)) { + + ch = UART_GET_CHAR(port); + flag = TTY_NORMAL; + + port->icount.rx++; + + rsr = UART_GET_STATUS(port) | UART_DUMMY_RSR_RX; + UART_PUT_STATUS(port, 0); + if (rsr & UART_STATUS_ERR) { + + if (rsr & UART_STATUS_BR) { + rsr &= ~(UART_STATUS_FE | UART_STATUS_PE); + port->icount.brk++; + if (uart_handle_break(port)) + goto ignore_char; + } else if (rsr & UART_STATUS_PE) { + port->icount.parity++; + } else if (rsr & UART_STATUS_FE) { + port->icount.frame++; + } + if (rsr & UART_STATUS_OE) + port->icount.overrun++; + + rsr &= port->read_status_mask; + + if (rsr & UART_STATUS_PE) + flag = TTY_PARITY; + else if (rsr & UART_STATUS_FE) + flag = TTY_FRAME; + } + + if (uart_handle_sysrq_char(port, ch)) + goto ignore_char; + + uart_insert_char(port, rsr, UART_STATUS_OE, ch, flag); + + + ignore_char: + status = UART_GET_STATUS(port); + } + + tty_flip_buffer_push(tty); +} + +static void apbuart_tx_chars(struct uart_port *port) +{ + struct circ_buf *xmit = &port->state->xmit; + int count; + + if (port->x_char) { + UART_PUT_CHAR(port, port->x_char); + port->icount.tx++; + port->x_char = 0; + return; + } + + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { + apbuart_stop_tx(port); + return; + } + + /* amba: fill FIFO */ + count = port->fifosize >> 1; + do { + UART_PUT_CHAR(port, xmit->buf[xmit->tail]); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + if (uart_circ_empty(xmit)) + break; + } while (--count > 0); + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + + if (uart_circ_empty(xmit)) + apbuart_stop_tx(port); +} + +static irqreturn_t apbuart_int(int irq, void *dev_id) +{ + struct uart_port *port = dev_id; + unsigned int status; + + spin_lock(&port->lock); + + status = UART_GET_STATUS(port); + if (status & UART_STATUS_DR) + apbuart_rx_chars(port); + if (status & UART_STATUS_THE) + apbuart_tx_chars(port); + + spin_unlock(&port->lock); + + return IRQ_HANDLED; +} + +static unsigned int apbuart_tx_empty(struct uart_port *port) +{ + unsigned int status = UART_GET_STATUS(port); + return status & UART_STATUS_THE ? TIOCSER_TEMT : 0; +} + +static unsigned int apbuart_get_mctrl(struct uart_port *port) +{ + /* The GRLIB APBUART handles flow control in hardware */ + return TIOCM_CAR | TIOCM_DSR | TIOCM_CTS; +} + +static void apbuart_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + /* The GRLIB APBUART handles flow control in hardware */ +} + +static void apbuart_break_ctl(struct uart_port *port, int break_state) +{ + /* We don't support sending break */ +} + +static int apbuart_startup(struct uart_port *port) +{ + int retval; + unsigned int cr; + + /* Allocate the IRQ */ + retval = request_irq(port->irq, apbuart_int, 0, "apbuart", port); + if (retval) + return retval; + + /* Finally, enable interrupts */ + cr = UART_GET_CTRL(port); + UART_PUT_CTRL(port, + cr | UART_CTRL_RE | UART_CTRL_TE | + UART_CTRL_RI | UART_CTRL_TI); + + return 0; +} + +static void apbuart_shutdown(struct uart_port *port) +{ + unsigned int cr; + + /* disable all interrupts, disable the port */ + cr = UART_GET_CTRL(port); + UART_PUT_CTRL(port, + cr & ~(UART_CTRL_RE | UART_CTRL_TE | + UART_CTRL_RI | UART_CTRL_TI)); + + /* Free the interrupt */ + free_irq(port->irq, port); +} + +static void apbuart_set_termios(struct uart_port *port, + struct ktermios *termios, struct ktermios *old) +{ + unsigned int cr; + unsigned long flags; + unsigned int baud, quot; + + /* Ask the core to calculate the divisor for us. */ + baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16); + if (baud == 0) + panic("invalid baudrate %i\n", port->uartclk / 16); + + /* uart_get_divisor calc a *16 uart freq, apbuart is *8 */ + quot = (uart_get_divisor(port, baud)) * 2; + cr = UART_GET_CTRL(port); + cr &= ~(UART_CTRL_PE | UART_CTRL_PS); + + if (termios->c_cflag & PARENB) { + cr |= UART_CTRL_PE; + if ((termios->c_cflag & PARODD)) + cr |= UART_CTRL_PS; + } + + /* Enable flow control. */ + if (termios->c_cflag & CRTSCTS) + cr |= UART_CTRL_FL; + + spin_lock_irqsave(&port->lock, flags); + + /* Update the per-port timeout. */ + uart_update_timeout(port, termios->c_cflag, baud); + + port->read_status_mask = UART_STATUS_OE; + if (termios->c_iflag & INPCK) + port->read_status_mask |= UART_STATUS_FE | UART_STATUS_PE; + + /* Characters to ignore */ + port->ignore_status_mask = 0; + if (termios->c_iflag & IGNPAR) + port->ignore_status_mask |= UART_STATUS_FE | UART_STATUS_PE; + + /* Ignore all characters if CREAD is not set. */ + if ((termios->c_cflag & CREAD) == 0) + port->ignore_status_mask |= UART_DUMMY_RSR_RX; + + /* Set baud rate */ + quot -= 1; + UART_PUT_SCAL(port, quot); + UART_PUT_CTRL(port, cr); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *apbuart_type(struct uart_port *port) +{ + return port->type == PORT_APBUART ? "GRLIB/APBUART" : NULL; +} + +static void apbuart_release_port(struct uart_port *port) +{ + release_mem_region(port->mapbase, 0x100); +} + +static int apbuart_request_port(struct uart_port *port) +{ + return request_mem_region(port->mapbase, 0x100, "grlib-apbuart") + != NULL ? 0 : -EBUSY; + return 0; +} + +/* Configure/autoconfigure the port */ +static void apbuart_config_port(struct uart_port *port, int flags) +{ + if (flags & UART_CONFIG_TYPE) { + port->type = PORT_APBUART; + apbuart_request_port(port); + } +} + +/* Verify the new serial_struct (for TIOCSSERIAL) */ +static int apbuart_verify_port(struct uart_port *port, + struct serial_struct *ser) +{ + int ret = 0; + if (ser->type != PORT_UNKNOWN && ser->type != PORT_APBUART) + ret = -EINVAL; + if (ser->irq < 0 || ser->irq >= NR_IRQS) + ret = -EINVAL; + if (ser->baud_base < 9600) + ret = -EINVAL; + return ret; +} + +static struct uart_ops grlib_apbuart_ops = { + .tx_empty = apbuart_tx_empty, + .set_mctrl = apbuart_set_mctrl, + .get_mctrl = apbuart_get_mctrl, + .stop_tx = apbuart_stop_tx, + .start_tx = apbuart_start_tx, + .stop_rx = apbuart_stop_rx, + .enable_ms = apbuart_enable_ms, + .break_ctl = apbuart_break_ctl, + .startup = apbuart_startup, + .shutdown = apbuart_shutdown, + .set_termios = apbuart_set_termios, + .type = apbuart_type, + .release_port = apbuart_release_port, + .request_port = apbuart_request_port, + .config_port = apbuart_config_port, + .verify_port = apbuart_verify_port, +}; + +static struct uart_port grlib_apbuart_ports[UART_NR]; +static struct device_node *grlib_apbuart_nodes[UART_NR]; + +static int apbuart_scan_fifo_size(struct uart_port *port, int portnumber) +{ + int ctrl, loop = 0; + int status; + int fifosize; + unsigned long flags; + + ctrl = UART_GET_CTRL(port); + + /* + * Enable the transceiver and wait for it to be ready to send data. + * Clear interrupts so that this process will not be externally + * interrupted in the middle (which can cause the transceiver to + * drain prematurely). + */ + + local_irq_save(flags); + + UART_PUT_CTRL(port, ctrl | UART_CTRL_TE); + + while (!UART_TX_READY(UART_GET_STATUS(port))) + loop++; + + /* + * Disable the transceiver so data isn't actually sent during the + * actual test. + */ + + UART_PUT_CTRL(port, ctrl & ~(UART_CTRL_TE)); + + fifosize = 1; + UART_PUT_CHAR(port, 0); + + /* + * So long as transmitting a character increments the tranceivier FIFO + * length the FIFO must be at least that big. These bytes will + * automatically drain off of the FIFO. + */ + + status = UART_GET_STATUS(port); + while (((status >> 20) & 0x3F) == fifosize) { + fifosize++; + UART_PUT_CHAR(port, 0); + status = UART_GET_STATUS(port); + } + + fifosize--; + + UART_PUT_CTRL(port, ctrl); + local_irq_restore(flags); + + if (fifosize == 0) + fifosize = 1; + + return fifosize; +} + +static void apbuart_flush_fifo(struct uart_port *port) +{ + int i; + + for (i = 0; i < port->fifosize; i++) + UART_GET_CHAR(port); +} + + +/* ======================================================================== */ +/* Console driver, if enabled */ +/* ======================================================================== */ + +#ifdef CONFIG_SERIAL_GRLIB_GAISLER_APBUART_CONSOLE + +static void apbuart_console_putchar(struct uart_port *port, int ch) +{ + unsigned int status; + do { + status = UART_GET_STATUS(port); + } while (!UART_TX_READY(status)); + UART_PUT_CHAR(port, ch); +} + +static void +apbuart_console_write(struct console *co, const char *s, unsigned int count) +{ + struct uart_port *port = &grlib_apbuart_ports[co->index]; + unsigned int status, old_cr, new_cr; + + /* First save the CR then disable the interrupts */ + old_cr = UART_GET_CTRL(port); + new_cr = old_cr & ~(UART_CTRL_RI | UART_CTRL_TI); + UART_PUT_CTRL(port, new_cr); + + uart_console_write(port, s, count, apbuart_console_putchar); + + /* + * Finally, wait for transmitter to become empty + * and restore the TCR + */ + do { + status = UART_GET_STATUS(port); + } while (!UART_TX_READY(status)); + UART_PUT_CTRL(port, old_cr); +} + +static void __init +apbuart_console_get_options(struct uart_port *port, int *baud, + int *parity, int *bits) +{ + if (UART_GET_CTRL(port) & (UART_CTRL_RE | UART_CTRL_TE)) { + + unsigned int quot, status; + status = UART_GET_STATUS(port); + + *parity = 'n'; + if (status & UART_CTRL_PE) { + if ((status & UART_CTRL_PS) == 0) + *parity = 'e'; + else + *parity = 'o'; + } + + *bits = 8; + quot = UART_GET_SCAL(port) / 8; + *baud = port->uartclk / (16 * (quot + 1)); + } +} + +static int __init apbuart_console_setup(struct console *co, char *options) +{ + struct uart_port *port; + int baud = 38400; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + + pr_debug("apbuart_console_setup co=%p, co->index=%i, options=%s\n", + co, co->index, options); + + /* + * Check whether an invalid uart number has been specified, and + * if so, search for the first available port that does have + * console support. + */ + if (co->index >= grlib_apbuart_port_nr) + co->index = 0; + + port = &grlib_apbuart_ports[co->index]; + + spin_lock_init(&port->lock); + + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + else + apbuart_console_get_options(port, &baud, &parity, &bits); + + return uart_set_options(port, co, baud, parity, bits, flow); +} + +static struct uart_driver grlib_apbuart_driver; + +static struct console grlib_apbuart_console = { + .name = "ttyS", + .write = apbuart_console_write, + .device = uart_console_device, + .setup = apbuart_console_setup, + .flags = CON_PRINTBUFFER, + .index = -1, + .data = &grlib_apbuart_driver, +}; + + +static void grlib_apbuart_configure(void); + +static int __init apbuart_console_init(void) +{ + grlib_apbuart_configure(); + register_console(&grlib_apbuart_console); + return 0; +} + +console_initcall(apbuart_console_init); + +#define APBUART_CONSOLE (&grlib_apbuart_console) +#else +#define APBUART_CONSOLE NULL +#endif + +static struct uart_driver grlib_apbuart_driver = { + .owner = THIS_MODULE, + .driver_name = "serial", + .dev_name = "ttyS", + .major = SERIAL_APBUART_MAJOR, + .minor = SERIAL_APBUART_MINOR, + .nr = UART_NR, + .cons = APBUART_CONSOLE, +}; + + +/* ======================================================================== */ +/* OF Platform Driver */ +/* ======================================================================== */ + +static int __devinit apbuart_probe(struct of_device *op, + const struct of_device_id *match) +{ + int i = -1; + struct uart_port *port = NULL; + + i = 0; + for (i = 0; i < grlib_apbuart_port_nr; i++) { + if (op->node == grlib_apbuart_nodes[i]) + break; + } + + port = &grlib_apbuart_ports[i]; + port->dev = &op->dev; + + uart_add_one_port(&grlib_apbuart_driver, (struct uart_port *) port); + + apbuart_flush_fifo((struct uart_port *) port); + + printk(KERN_INFO "grlib-apbuart at 0x%x, irq %d\n", + port->mapbase, port->irq); + return 0; + +} + +static struct of_device_id __initdata apbuart_match[] = { + { + .name = "GAISLER_APBUART", + }, + {}, +}; + +static struct of_platform_driver grlib_apbuart_of_driver = { + .match_table = apbuart_match, + .probe = apbuart_probe, + .driver = { + .owner = THIS_MODULE, + .name = "grlib-apbuart", + }, +}; + + +static void grlib_apbuart_configure(void) +{ + static int enum_done; + struct device_node *np; + struct uart_port *port = NULL; + + int node; + int freq_khz; + int v = 0, d = 0; + unsigned int addr; + int irq, line; + struct amba_prom_registers *regs; + + if (enum_done) + return; + + /* Get bus frequency */ + node = prom_getchild(prom_root_node); + freq_khz = prom_getint(node, "clock-frequency"); + + line = 0; + for_each_matching_node(np, apbuart_match) { + + int *vendor = (int *) of_get_property(np, "vendor", NULL); + int *device = (int *) of_get_property(np, "device", NULL); + int *irqs = (int *) of_get_property(np, "interrupts", NULL); + regs = (struct amba_prom_registers *) + of_get_property(np, "reg", NULL); + + if (vendor) + v = *vendor; + if (device) + d = *device; + + if (!irqs || !regs) + return; + + grlib_apbuart_nodes[line] = np; + + addr = regs->phys_addr; + irq = *irqs; + + port = &grlib_apbuart_ports[line]; + + port->mapbase = addr; + port->membase = ioremap(addr, sizeof(struct grlib_apbuart_regs_map)); + port->irq = irq; + port->iotype = UPIO_MEM; + port->ops = &grlib_apbuart_ops; + port->flags = UPF_BOOT_AUTOCONF; + port->line = line; + port->uartclk = freq_khz * 1000; + port->fifosize = apbuart_scan_fifo_size((struct uart_port *) port, line); + line++; + + /* We support maximum UART_NR uarts ... */ + if (line == UART_NR) + break; + + } + + enum_done = 1; + + grlib_apbuart_driver.nr = grlib_apbuart_port_nr = line; +} + +static int __init grlib_apbuart_init(void) +{ + int ret; + + /* Find all APBUARTS in device the tree and initialize their ports */ + grlib_apbuart_configure(); + + printk(KERN_INFO "Serial: GRLIB APBUART driver\n"); + + ret = uart_register_driver(&grlib_apbuart_driver); + + if (ret) { + printk(KERN_ERR "%s: uart_register_driver failed (%i)\n", + __FILE__, ret); + return ret; + } + + ret = of_register_driver(&grlib_apbuart_of_driver, &of_platform_bus_type); + + if (ret) { + printk(KERN_ERR + "%s: of_register_platform_driver failed (%i)\n", + __FILE__, ret); + uart_unregister_driver(&grlib_apbuart_driver); + return ret; + } + + return ret; +} + +static void __exit grlib_apbuart_exit(void) +{ + int i; + + for (i = 0; i < grlib_apbuart_port_nr; i++) + uart_remove_one_port(&grlib_apbuart_driver, + &grlib_apbuart_ports[i]); + + uart_unregister_driver(&grlib_apbuart_driver); + +} + +module_init(grlib_apbuart_init); +module_exit(grlib_apbuart_exit); + +MODULE_AUTHOR("Aeroflex Gaisler AB"); +MODULE_DESCRIPTION("GRLIB APBUART serial driver"); +MODULE_VERSION("2.1"); +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/apbuart.h b/drivers/serial/apbuart.h new file mode 100644 index 000000000000..5faf87c8d2bc --- /dev/null +++ b/drivers/serial/apbuart.h @@ -0,0 +1,64 @@ +#ifndef __GRLIB_APBUART_H__ +#define __GRLIB_APBUART_H__ + +#include + +#define UART_NR 8 +static int grlib_apbuart_port_nr; + +struct grlib_apbuart_regs_map { + u32 data; + u32 status; + u32 ctrl; + u32 scaler; +}; + +struct amba_prom_registers { + unsigned int phys_addr; + unsigned int reg_size; +}; + +/* + * The following defines the bits in the APBUART Status Registers. + */ +#define UART_STATUS_DR 0x00000001 /* Data Ready */ +#define UART_STATUS_TSE 0x00000002 /* TX Send Register Empty */ +#define UART_STATUS_THE 0x00000004 /* TX Hold Register Empty */ +#define UART_STATUS_BR 0x00000008 /* Break Error */ +#define UART_STATUS_OE 0x00000010 /* RX Overrun Error */ +#define UART_STATUS_PE 0x00000020 /* RX Parity Error */ +#define UART_STATUS_FE 0x00000040 /* RX Framing Error */ +#define UART_STATUS_ERR 0x00000078 /* Error Mask */ + +/* + * The following defines the bits in the APBUART Ctrl Registers. + */ +#define UART_CTRL_RE 0x00000001 /* Receiver enable */ +#define UART_CTRL_TE 0x00000002 /* Transmitter enable */ +#define UART_CTRL_RI 0x00000004 /* Receiver interrupt enable */ +#define UART_CTRL_TI 0x00000008 /* Transmitter irq */ +#define UART_CTRL_PS 0x00000010 /* Parity select */ +#define UART_CTRL_PE 0x00000020 /* Parity enable */ +#define UART_CTRL_FL 0x00000040 /* Flow control enable */ +#define UART_CTRL_LB 0x00000080 /* Loopback enable */ + +#define APBBASE(port) ((struct grlib_apbuart_regs_map *)((port)->membase)) + +#define APBBASE_DATA_P(port) (&(APBBASE(port)->data)) +#define APBBASE_STATUS_P(port) (&(APBBASE(port)->status)) +#define APBBASE_CTRL_P(port) (&(APBBASE(port)->ctrl)) +#define APBBASE_SCALAR_P(port) (&(APBBASE(port)->scaler)) + +#define UART_GET_CHAR(port) (__raw_readl(APBBASE_DATA_P(port))) +#define UART_PUT_CHAR(port, v) (__raw_writel(v, APBBASE_DATA_P(port))) +#define UART_GET_STATUS(port) (__raw_readl(APBBASE_STATUS_P(port))) +#define UART_PUT_STATUS(port, v)(__raw_writel(v, APBBASE_STATUS_P(port))) +#define UART_GET_CTRL(port) (__raw_readl(APBBASE_CTRL_P(port))) +#define UART_PUT_CTRL(port, v) (__raw_writel(v, APBBASE_CTRL_P(port))) +#define UART_GET_SCAL(port) (__raw_readl(APBBASE_SCALAR_P(port))) +#define UART_PUT_SCAL(port, v) (__raw_writel(v, APBBASE_SCALAR_P(port))) + +#define UART_RX_DATA(s) (((s) & UART_STATUS_DR) != 0) +#define UART_TX_READY(s) (((s) & UART_STATUS_THE) != 0) + +#endif /* __GRLIB_APBUART_H__ */ diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index db532ce288be..8c3dd36fe91a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -179,6 +179,9 @@ /* BCM63xx family SoCs */ #define PORT_BCM63XX 89 +/* Aeroflex Gaisler GRLIB APBUART */ +#define PORT_APBUART 90 + #ifdef __KERNEL__ #include -- cgit v1.3-8-gc7d7 From 2a855dd01bc1539111adb7233f587c5c468732ac Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sun, 25 Oct 2009 15:37:58 +0100 Subject: signal: Fix alternate signal stack check All architectures in the kernel increment/decrement the stack pointer before storing values on the stack. On architectures which have the stack grow down sas_ss_sp == sp is not on the alternate signal stack while sas_ss_sp + sas_ss_size == sp is on the alternate signal stack. On architectures which have the stack grow up sas_ss_sp == sp is on the alternate signal stack while sas_ss_sp + sas_ss_size == sp is not on the alternate signal stack. The current implementation fails for architectures which have the stack grow down on the corner case where sas_ss_sp == sp.This was reported as Debian bug #544905 on AMD64. Simplified test case: http://download.breakpoint.cc/tc-sig-stack.c The test case creates the following stack scenario: 0xn0300 stack top 0xn0200 alt stack pointer top (when switching to alt stack) 0xn01ff alt stack end 0xn0100 alt stack start == stack pointer If the signal is sent the stack pointer is pointing to the base address of the alt stack and the kernel erroneously decides that it has already switched to the alternate stack because of the current check for "sp - sas_ss_sp < sas_ss_size" On parisc (stack grows up) the scenario would be: 0xn0200 stack pointer 0xn01ff alt stack end 0xn0100 alt stack start = alt stack pointer base (when switching to alt stack) 0xn0000 stack base This is handled correctly by the current implementation. [ tglx: Modified for archs which have the stack grow up (parisc) which would fail with the correct implementation for stack grows down. Added a check for sp >= current->sas_ss_sp which is strictly not necessary but makes the code symetric for both variants ] Signed-off-by: Sebastian Andrzej Siewior Cc: Oleg Nesterov Cc: Roland McGrath Cc: Kyle McMartin Cc: stable@kernel.org LKML-Reference: <20091025143758.GA6653@Chamillionaire.breakpoint.cc> Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60bf583..0f67914a43c9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2086,11 +2086,18 @@ static inline int is_si_special(const struct siginfo *info) return info <= SEND_SIG_FORCED; } -/* True if we are on the alternate signal stack. */ - +/* + * True if we are on the alternate signal stack. + */ static inline int on_sig_stack(unsigned long sp) { - return (sp - current->sas_ss_sp < current->sas_ss_size); +#ifdef CONFIG_STACK_GROWSUP + return sp >= current->sas_ss_sp && + sp - current->sas_ss_sp < current->sas_ss_size; +#else + return sp > current->sas_ss_sp && + sp - current->sas_ss_sp <= current->sas_ss_size; +#endif } static inline int sas_ss_flags(unsigned long sp) -- cgit v1.3-8-gc7d7 From d94d9fee9fa4e66a0b91640a694b8b10177075b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Nov 2009 09:50:58 -0800 Subject: net: cleanup include/linux This cleanup patch puts struct/union/enum opening braces, in first line to ease grep games. struct something { becomes : struct something { Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/dn.h | 9 +- include/linux/errqueue.h | 6 +- include/linux/fib_rules.h | 9 +- include/linux/filter.h | 6 +- include/linux/gen_stats.h | 15 ++-- include/linux/if.h | 15 ++-- include/linux/if_addr.h | 9 +- include/linux/if_addrlabel.h | 6 +- include/linux/if_arcnet.h | 18 ++-- include/linux/if_arp.h | 3 +- include/linux/if_bonding.h | 3 +- include/linux/if_bridge.h | 9 +- include/linux/if_ec.h | 12 +-- include/linux/if_fddi.h | 22 ++--- include/linux/if_hippi.h | 15 ++-- include/linux/if_link.h | 27 ++---- include/linux/if_packet.h | 27 ++---- include/linux/if_plip.h | 3 +- include/linux/if_pppol2tp.h | 3 +- include/linux/if_tunnel.h | 6 +- include/linux/igmp.h | 15 ++-- include/linux/in.h | 18 ++-- include/linux/in6.h | 9 +- include/linux/inetdevice.h | 9 +- include/linux/ip_vs.h | 3 +- include/linux/mroute.h | 18 ++-- include/linux/mroute6.h | 15 ++-- include/linux/neighbour.h | 18 ++-- include/linux/netdevice.h | 30 +++---- include/linux/netfilter.h | 6 +- include/linux/netfilter/nf_conntrack_common.h | 6 +- include/linux/netfilter/nf_conntrack_ftp.h | 3 +- include/linux/netfilter/nf_conntrack_sctp.h | 3 +- include/linux/netfilter/nf_conntrack_tcp.h | 3 +- include/linux/netfilter/nfnetlink.h | 6 +- include/linux/netfilter/nfnetlink_compat.h | 3 +- include/linux/netfilter/x_tables.h | 45 ++++------ include/linux/netfilter/xt_connbytes.h | 3 +- include/linux/netfilter/xt_esp.h | 3 +- include/linux/netfilter/xt_multiport.h | 9 +- include/linux/netfilter/xt_policy.h | 18 ++-- include/linux/netfilter/xt_state.h | 3 +- include/linux/netfilter/xt_string.h | 3 +- include/linux/netfilter/xt_tcpudp.h | 6 +- include/linux/netfilter_arp/arp_tables.h | 21 ++--- include/linux/netfilter_bridge/ebt_802_3.h | 3 +- include/linux/netfilter_bridge/ebt_among.h | 9 +- include/linux/netfilter_bridge/ebt_arpreply.h | 3 +- include/linux/netfilter_bridge/ebt_ip.h | 3 +- include/linux/netfilter_bridge/ebt_ip6.h | 3 +- include/linux/netfilter_bridge/ebt_limit.h | 3 +- include/linux/netfilter_bridge/ebt_log.h | 3 +- include/linux/netfilter_bridge/ebt_mark_m.h | 3 +- include/linux/netfilter_bridge/ebt_mark_t.h | 3 +- include/linux/netfilter_bridge/ebt_nat.h | 3 +- include/linux/netfilter_bridge/ebt_pkttype.h | 3 +- include/linux/netfilter_bridge/ebt_redirect.h | 3 +- include/linux/netfilter_bridge/ebt_stp.h | 6 +- include/linux/netfilter_bridge/ebtables.h | 39 +++------ include/linux/netfilter_ipv4/ip_tables.h | 27 ++---- include/linux/netfilter_ipv4/ipt_SAME.h | 3 +- include/linux/netfilter_ipv4/ipt_ah.h | 3 +- include/linux/netfilter_ipv6/ip6_tables.h | 27 ++---- include/linux/netfilter_ipv6/ip6t_ah.h | 3 +- include/linux/netfilter_ipv6/ip6t_frag.h | 3 +- include/linux/netfilter_ipv6/ip6t_ipv6header.h | 3 +- include/linux/netfilter_ipv6/ip6t_mh.h | 3 +- include/linux/netfilter_ipv6/ip6t_opts.h | 3 +- include/linux/netfilter_ipv6/ip6t_rt.h | 3 +- include/linux/netlink.h | 24 ++---- include/linux/pkt_cls.h | 84 +++++++------------ include/linux/pkt_sched.h | 111 +++++++++---------------- include/linux/route.h | 3 +- include/linux/rtnetlink.h | 57 +++++-------- include/linux/skbuff.h | 3 +- include/linux/tc_act/tc_defact.h | 6 +- include/linux/tc_act/tc_gact.h | 9 +- include/linux/tc_act/tc_ipt.h | 3 +- include/linux/tc_act/tc_mirred.h | 6 +- include/linux/tc_act/tc_nat.h | 6 +- include/linux/tc_act/tc_pedit.h | 9 +- include/linux/tc_ematch/tc_em_cmp.h | 6 +- include/linux/tc_ematch/tc_em_meta.h | 15 ++-- include/linux/tc_ematch/tc_em_nbyte.h | 3 +- include/linux/tc_ematch/tc_em_text.h | 3 +- include/linux/tcp.h | 6 +- include/linux/xfrm.h | 27 ++---- 87 files changed, 351 insertions(+), 694 deletions(-) (limited to 'include') diff --git a/include/linux/dn.h b/include/linux/dn.h index fe9990823193..9c50445462d9 100644 --- a/include/linux/dn.h +++ b/include/linux/dn.h @@ -71,14 +71,12 @@ /* Structures */ -struct dn_naddr -{ +struct dn_naddr { __le16 a_len; __u8 a_addr[DN_MAXADDL]; /* Two bytes little endian */ }; -struct sockaddr_dn -{ +struct sockaddr_dn { __u16 sdn_family; __u8 sdn_flags; __u8 sdn_objnum; @@ -101,8 +99,7 @@ struct optdata_dn { __u8 opt_data[16]; /* User data */ }; -struct accessdata_dn -{ +struct accessdata_dn { __u8 acc_accl; __u8 acc_acc[DN_MAXACCL]; __u8 acc_passl; diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h index ec12cc74366f..034072cea853 100644 --- a/include/linux/errqueue.h +++ b/include/linux/errqueue.h @@ -3,8 +3,7 @@ #include -struct sock_extended_err -{ +struct sock_extended_err { __u32 ee_errno; __u8 ee_origin; __u8 ee_type; @@ -31,8 +30,7 @@ struct sock_extended_err #define SKB_EXT_ERR(skb) ((struct sock_exterr_skb *) ((skb)->cb)) -struct sock_exterr_skb -{ +struct sock_exterr_skb { union { struct inet_skb_parm h4; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 87b606b63f1e..c7e5b700bb91 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -13,8 +13,7 @@ /* try to find source address in routing lookups */ #define FIB_RULE_FIND_SADDR 0x00010000 -struct fib_rule_hdr -{ +struct fib_rule_hdr { __u8 family; __u8 dst_len; __u8 src_len; @@ -28,8 +27,7 @@ struct fib_rule_hdr __u32 flags; }; -enum -{ +enum { FRA_UNSPEC, FRA_DST, /* destination address */ FRA_SRC, /* source address */ @@ -52,8 +50,7 @@ enum #define FRA_MAX (__FRA_MAX - 1) -enum -{ +enum { FR_ACT_UNSPEC, FR_ACT_TO_TBL, /* Pass to fixed table */ FR_ACT_GOTO, /* Jump to another rule */ diff --git a/include/linux/filter.h b/include/linux/filter.h index bb3b4352978a..29a0e3db9f43 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -23,16 +23,14 @@ * the BPF code definitions which need to match so you can share filters */ -struct sock_filter /* Filter block */ -{ +struct sock_filter { /* Filter block */ __u16 code; /* Actual filter code */ __u8 jt; /* Jump true */ __u8 jf; /* Jump false */ __u32 k; /* Generic multiuse field */ }; -struct sock_fprog /* Required for SO_ATTACH_FILTER. */ -{ +struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ unsigned short len; /* Number of filter blocks */ struct sock_filter __user *filter; }; diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h index 710e901085d0..552c8a0a12d1 100644 --- a/include/linux/gen_stats.h +++ b/include/linux/gen_stats.h @@ -18,13 +18,11 @@ enum { * @bytes: number of seen bytes * @packets: number of seen packets */ -struct gnet_stats_basic -{ +struct gnet_stats_basic { __u64 bytes; __u32 packets; }; -struct gnet_stats_basic_packed -{ +struct gnet_stats_basic_packed { __u64 bytes; __u32 packets; } __attribute__ ((packed)); @@ -34,8 +32,7 @@ struct gnet_stats_basic_packed * @bps: current byte rate * @pps: current packet rate */ -struct gnet_stats_rate_est -{ +struct gnet_stats_rate_est { __u32 bps; __u32 pps; }; @@ -48,8 +45,7 @@ struct gnet_stats_rate_est * @requeues: number of requeues * @overlimits: number of enqueues over the limit */ -struct gnet_stats_queue -{ +struct gnet_stats_queue { __u32 qlen; __u32 backlog; __u32 drops; @@ -62,8 +58,7 @@ struct gnet_stats_queue * @interval: sampling period * @ewma_log: the log of measurement window weight */ -struct gnet_estimator -{ +struct gnet_estimator { signed char interval; unsigned char ewma_log; }; diff --git a/include/linux/if.h b/include/linux/if.h index b9a6229f3be7..3b2a46bf8f8d 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -125,8 +125,7 @@ enum { * being very small might be worth keeping for clean configuration. */ -struct ifmap -{ +struct ifmap { unsigned long mem_start; unsigned long mem_end; unsigned short base_addr; @@ -136,8 +135,7 @@ struct ifmap /* 3 bytes spare */ }; -struct if_settings -{ +struct if_settings { unsigned int type; /* Type of physical device or protocol */ unsigned int size; /* Size of the data allocated by the caller */ union { @@ -161,8 +159,7 @@ struct if_settings * remainder may be interface specific. */ -struct ifreq -{ +struct ifreq { #define IFHWADDRLEN 6 union { @@ -211,11 +208,9 @@ struct ifreq * must know all networks accessible). */ -struct ifconf -{ +struct ifconf { int ifc_len; /* size of buffer */ - union - { + union { char __user *ifcu_buf; struct ifreq __user *ifcu_req; } ifc_ifcu; diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index fd9740466757..23357ab81a77 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -4,8 +4,7 @@ #include #include -struct ifaddrmsg -{ +struct ifaddrmsg { __u8 ifa_family; __u8 ifa_prefixlen; /* The prefix length */ __u8 ifa_flags; /* Flags */ @@ -20,8 +19,7 @@ struct ifaddrmsg * but for point-to-point IFA_ADDRESS is DESTINATION address, * local address is supplied in IFA_LOCAL attribute. */ -enum -{ +enum { IFA_UNSPEC, IFA_ADDRESS, IFA_LOCAL, @@ -47,8 +45,7 @@ enum #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 -struct ifa_cacheinfo -{ +struct ifa_cacheinfo { __u32 ifa_prefered; __u32 ifa_valid; __u32 cstamp; /* created timestamp, hundredths of seconds */ diff --git a/include/linux/if_addrlabel.h b/include/linux/if_addrlabel.h index 89571f65d6de..54580c298187 100644 --- a/include/linux/if_addrlabel.h +++ b/include/linux/if_addrlabel.h @@ -12,8 +12,7 @@ #include -struct ifaddrlblmsg -{ +struct ifaddrlblmsg { __u8 ifal_family; /* Address family */ __u8 __ifal_reserved; /* Reserved */ __u8 ifal_prefixlen; /* Prefix length */ @@ -22,8 +21,7 @@ struct ifaddrlblmsg __u32 ifal_seq; /* sequence number */ }; -enum -{ +enum { IFAL_ADDRESS = 1, IFAL_LABEL = 2, __IFAL_MAX diff --git a/include/linux/if_arcnet.h b/include/linux/if_arcnet.h index 0835debab115..46e34bd0e783 100644 --- a/include/linux/if_arcnet.h +++ b/include/linux/if_arcnet.h @@ -56,8 +56,7 @@ /* * The RFC1201-specific components of an arcnet packet header. */ -struct arc_rfc1201 -{ +struct arc_rfc1201 { __u8 proto; /* protocol ID field - varies */ __u8 split_flag; /* for use with split packets */ __be16 sequence; /* sequence number */ @@ -69,8 +68,7 @@ struct arc_rfc1201 /* * The RFC1051-specific components. */ -struct arc_rfc1051 -{ +struct arc_rfc1051 { __u8 proto; /* ARC_P_RFC1051_ARP/RFC1051_IP */ __u8 payload[0]; /* 507 bytes */ }; @@ -81,8 +79,7 @@ struct arc_rfc1051 * The ethernet-encap-specific components. We have a real ethernet header * and some data. */ -struct arc_eth_encap -{ +struct arc_eth_encap { __u8 proto; /* Always ARC_P_ETHER */ struct ethhdr eth; /* standard ethernet header (yuck!) */ __u8 payload[0]; /* 493 bytes */ @@ -90,8 +87,7 @@ struct arc_eth_encap #define ETH_ENCAP_HDR_SIZE 14 -struct arc_cap -{ +struct arc_cap { __u8 proto; __u8 cookie[sizeof(int)]; /* Actually NOT sent over the network */ union { @@ -108,8 +104,7 @@ struct arc_cap * the _end_ of the 512-byte buffer. We hide this complexity inside the * driver. */ -struct arc_hardware -{ +struct arc_hardware { __u8 source, /* source ARCnet - filled in automagically */ dest, /* destination ARCnet - 0 for broadcast */ offset[2]; /* offset bytes (some weird semantics) */ @@ -120,8 +115,7 @@ struct arc_hardware * This is an ARCnet frame header, as seen by the kernel (and userspace, * when you do a raw packet capture). */ -struct archdr -{ +struct archdr { /* hardware requirements */ struct arc_hardware hard; diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 282eb37e2dec..e80b7f88f7c6 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -133,8 +133,7 @@ struct arpreq_old { * This structure defines an ethernet arp header. */ -struct arphdr -{ +struct arphdr { __be16 ar_hrd; /* format of hardware address */ __be16 ar_pro; /* format of protocol address */ unsigned char ar_hln; /* length of hardware address */ diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h index 65c2d247068b..cd525fae3c98 100644 --- a/include/linux/if_bonding.h +++ b/include/linux/if_bonding.h @@ -94,8 +94,7 @@ typedef struct ifbond { __s32 miimon; } ifbond; -typedef struct ifslave -{ +typedef struct ifslave { __s32 slave_id; /* Used as an IN param to the BOND_SLAVE_INFO_QUERY ioctl */ char slave_name[IFNAMSIZ]; __s8 link; diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 6badb3e2c4e4..938b7e81df95 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -49,8 +49,7 @@ #define BR_STATE_FORWARDING 3 #define BR_STATE_BLOCKING 4 -struct __bridge_info -{ +struct __bridge_info { __u64 designated_root; __u64 bridge_id; __u32 root_path_cost; @@ -72,8 +71,7 @@ struct __bridge_info __u32 gc_timer_value; }; -struct __port_info -{ +struct __port_info { __u64 designated_root; __u64 designated_bridge; __u16 port_id; @@ -89,8 +87,7 @@ struct __port_info __u32 hold_timer_value; }; -struct __fdb_entry -{ +struct __fdb_entry { __u8 mac_addr[6]; __u8 port_no; __u8 is_local; diff --git a/include/linux/if_ec.h b/include/linux/if_ec.h index e7499aa79783..d85f9f48129f 100644 --- a/include/linux/if_ec.h +++ b/include/linux/if_ec.h @@ -5,14 +5,12 @@ /* User visible stuff. Glibc provides its own but libc5 folk will use these */ -struct ec_addr -{ +struct ec_addr { unsigned char station; /* Station number. */ unsigned char net; /* Network number. */ }; -struct sockaddr_ec -{ +struct sockaddr_ec { unsigned short sec_family; unsigned char port; /* Port number. */ unsigned char cb; /* Control/flag byte. */ @@ -37,8 +35,7 @@ struct sockaddr_ec #define EC_HLEN 6 /* This is what an Econet frame looks like on the wire. */ -struct ec_framehdr -{ +struct ec_framehdr { unsigned char dst_stn; unsigned char dst_net; unsigned char src_stn; @@ -62,8 +59,7 @@ static inline struct econet_sock *ec_sk(const struct sock *sk) return (struct econet_sock *)sk; } -struct ec_device -{ +struct ec_device { unsigned char station, net; /* Econet protocol address */ }; diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h index 45de1046dbbf..5459c5c09930 100644 --- a/include/linux/if_fddi.h +++ b/include/linux/if_fddi.h @@ -63,36 +63,32 @@ #define FDDI_UI_CMD 0x03 /* Define 802.2 Type 1 header */ -struct fddi_8022_1_hdr - { +struct fddi_8022_1_hdr { __u8 dsap; /* destination service access point */ __u8 ssap; /* source service access point */ __u8 ctrl; /* control byte #1 */ - } __attribute__ ((packed)); +} __attribute__ ((packed)); /* Define 802.2 Type 2 header */ -struct fddi_8022_2_hdr - { +struct fddi_8022_2_hdr { __u8 dsap; /* destination service access point */ __u8 ssap; /* source service access point */ __u8 ctrl_1; /* control byte #1 */ __u8 ctrl_2; /* control byte #2 */ - } __attribute__ ((packed)); +} __attribute__ ((packed)); /* Define 802.2 SNAP header */ #define FDDI_K_OUI_LEN 3 -struct fddi_snap_hdr - { +struct fddi_snap_hdr { __u8 dsap; /* always 0xAA */ __u8 ssap; /* always 0xAA */ __u8 ctrl; /* always 0x03 */ __u8 oui[FDDI_K_OUI_LEN]; /* organizational universal id */ __be16 ethertype; /* packet type ID field */ - } __attribute__ ((packed)); +} __attribute__ ((packed)); /* Define FDDI LLC frame header */ -struct fddihdr - { +struct fddihdr { __u8 fc; /* frame control */ __u8 daddr[FDDI_K_ALEN]; /* destination address */ __u8 saddr[FDDI_K_ALEN]; /* source address */ @@ -102,7 +98,7 @@ struct fddihdr struct fddi_8022_2_hdr llc_8022_2; struct fddi_snap_hdr llc_snap; } hdr; - } __attribute__ ((packed)); +} __attribute__ ((packed)); #ifdef __KERNEL__ #include @@ -197,7 +193,7 @@ struct fddi_statistics { __u32 port_pc_withhold[2]; __u32 port_ler_flag[2]; __u32 port_hardware_present[2]; - }; +}; #endif /* __KERNEL__ */ #endif /* _LINUX_IF_FDDI_H */ diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h index 4a7c9940b080..8d038eb8db5c 100644 --- a/include/linux/if_hippi.h +++ b/include/linux/if_hippi.h @@ -51,8 +51,7 @@ * HIPPI statistics collection data. */ -struct hipnet_statistics -{ +struct hipnet_statistics { int rx_packets; /* total packets received */ int tx_packets; /* total packets transmitted */ int rx_errors; /* bad packets received */ @@ -77,8 +76,7 @@ struct hipnet_statistics }; -struct hippi_fp_hdr -{ +struct hippi_fp_hdr { #if 0 __u8 ulp; /* must contain 4 */ #if defined (__BIG_ENDIAN_BITFIELD) @@ -108,8 +106,7 @@ struct hippi_fp_hdr __be32 d2_size; } __attribute__ ((packed)); -struct hippi_le_hdr -{ +struct hippi_le_hdr { #if defined (__BIG_ENDIAN_BITFIELD) __u8 fc:3; __u8 double_wide:1; @@ -139,8 +136,7 @@ struct hippi_le_hdr * Looks like the dsap and ssap fields have been swapped by mistake in * RFC 2067 "IP over HIPPI". */ -struct hippi_snap_hdr -{ +struct hippi_snap_hdr { __u8 dsap; /* always 0xAA */ __u8 ssap; /* always 0xAA */ __u8 ctrl; /* always 0x03 */ @@ -148,8 +144,7 @@ struct hippi_snap_hdr __be16 ethertype; /* packet type ID field */ } __attribute__ ((packed)); -struct hippi_hdr -{ +struct hippi_hdr { struct hippi_fp_hdr fp; struct hippi_le_hdr le; struct hippi_snap_hdr snap; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 176c5182c515..1d3b2425f661 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -5,8 +5,7 @@ #include /* The struct should be in sync with struct net_device_stats */ -struct rtnl_link_stats -{ +struct rtnl_link_stats { __u32 rx_packets; /* total packets received */ __u32 tx_packets; /* total packets transmitted */ __u32 rx_bytes; /* total bytes received */ @@ -39,8 +38,7 @@ struct rtnl_link_stats }; /* The struct should be in sync with struct ifmap */ -struct rtnl_link_ifmap -{ +struct rtnl_link_ifmap { __u64 mem_start; __u64 mem_end; __u64 base_addr; @@ -49,8 +47,7 @@ struct rtnl_link_ifmap __u8 port; }; -enum -{ +enum { IFLA_UNSPEC, IFLA_ADDRESS, IFLA_BROADCAST, @@ -123,8 +120,7 @@ enum */ /* Subtype attributes for IFLA_PROTINFO */ -enum -{ +enum { IFLA_INET6_UNSPEC, IFLA_INET6_FLAGS, /* link flags */ IFLA_INET6_CONF, /* sysctl parameters */ @@ -137,16 +133,14 @@ enum #define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) -struct ifla_cacheinfo -{ +struct ifla_cacheinfo { __u32 max_reasm_len; __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ __u32 reachable_time; __u32 retrans_time; }; -enum -{ +enum { IFLA_INFO_UNSPEC, IFLA_INFO_KIND, IFLA_INFO_DATA, @@ -158,8 +152,7 @@ enum /* VLAN section */ -enum -{ +enum { IFLA_VLAN_UNSPEC, IFLA_VLAN_ID, IFLA_VLAN_FLAGS, @@ -175,8 +168,7 @@ struct ifla_vlan_flags { __u32 mask; }; -enum -{ +enum { IFLA_VLAN_QOS_UNSPEC, IFLA_VLAN_QOS_MAPPING, __IFLA_VLAN_QOS_MAX @@ -184,8 +176,7 @@ enum #define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) -struct ifla_vlan_qos_mapping -{ +struct ifla_vlan_qos_mapping { __u32 from; __u32 to; }; diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index dea7d6b7cf98..4021d47cc437 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -3,15 +3,13 @@ #include -struct sockaddr_pkt -{ +struct sockaddr_pkt { unsigned short spkt_family; unsigned char spkt_device[14]; __be16 spkt_protocol; }; -struct sockaddr_ll -{ +struct sockaddr_ll { unsigned short sll_family; __be16 sll_protocol; int sll_ifindex; @@ -49,14 +47,12 @@ struct sockaddr_ll #define PACKET_TX_RING 13 #define PACKET_LOSS 14 -struct tpacket_stats -{ +struct tpacket_stats { unsigned int tp_packets; unsigned int tp_drops; }; -struct tpacket_auxdata -{ +struct tpacket_auxdata { __u32 tp_status; __u32 tp_len; __u32 tp_snaplen; @@ -78,8 +74,7 @@ struct tpacket_auxdata #define TP_STATUS_SENDING 0x2 #define TP_STATUS_WRONG_FORMAT 0x4 -struct tpacket_hdr -{ +struct tpacket_hdr { unsigned long tp_status; unsigned int tp_len; unsigned int tp_snaplen; @@ -93,8 +88,7 @@ struct tpacket_hdr #define TPACKET_ALIGN(x) (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1)) #define TPACKET_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll)) -struct tpacket2_hdr -{ +struct tpacket2_hdr { __u32 tp_status; __u32 tp_len; __u32 tp_snaplen; @@ -107,8 +101,7 @@ struct tpacket2_hdr #define TPACKET2_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll)) -enum tpacket_versions -{ +enum tpacket_versions { TPACKET_V1, TPACKET_V2, }; @@ -126,16 +119,14 @@ enum tpacket_versions - Pad to align to TPACKET_ALIGNMENT=16 */ -struct tpacket_req -{ +struct tpacket_req { unsigned int tp_block_size; /* Minimal size of contiguous block */ unsigned int tp_block_nr; /* Number of blocks */ unsigned int tp_frame_size; /* Size of frame */ unsigned int tp_frame_nr; /* Total number of frames */ }; -struct packet_mreq -{ +struct packet_mreq { int mr_ifindex; unsigned short mr_type; unsigned short mr_alen; diff --git a/include/linux/if_plip.h b/include/linux/if_plip.h index 153a649915a2..6298c7e88b2b 100644 --- a/include/linux/if_plip.h +++ b/include/linux/if_plip.h @@ -15,8 +15,7 @@ #define SIOCDEVPLIP SIOCDEVPRIVATE -struct plipconf -{ +struct plipconf { unsigned short pcmd; unsigned long nibble; unsigned long trigger; diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h index 3a14b088c8ec..c58baea4a25b 100644 --- a/include/linux/if_pppol2tp.h +++ b/include/linux/if_pppol2tp.h @@ -24,8 +24,7 @@ /* Structure used to connect() the socket to a particular tunnel UDP * socket. */ -struct pppol2tp_addr -{ +struct pppol2tp_addr { __kernel_pid_t pid; /* pid that owns the fd. * 0 => current */ int fd; /* FD of UDP socket to use */ diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index 8d76cb4c86fa..1822d635be6b 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -30,8 +30,7 @@ #define GRE_FLAGS __cpu_to_be16(0x00F8) #define GRE_VERSION __cpu_to_be16(0x0007) -struct ip_tunnel_parm -{ +struct ip_tunnel_parm { char name[IFNAMSIZ]; int link; __be16 i_flags; @@ -63,8 +62,7 @@ struct ip_tunnel_6rd { __u16 relay_prefixlen; }; -enum -{ +enum { IFLA_GRE_UNSPEC, IFLA_GRE_LINK, IFLA_GRE_IFLAGS, diff --git a/include/linux/igmp.h b/include/linux/igmp.h index fe158e0e20e6..724c27e5d173 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -27,8 +27,7 @@ * Header in on cable format */ -struct igmphdr -{ +struct igmphdr { __u8 type; __u8 code; /* For newer IGMP */ __sum16 csum; @@ -151,8 +150,7 @@ static inline struct igmpv3_query * extern int sysctl_igmp_max_memberships; extern int sysctl_igmp_max_msf; -struct ip_sf_socklist -{ +struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; __be32 sl_addr[0]; @@ -167,16 +165,14 @@ struct ip_sf_socklist this list never used in fast path code */ -struct ip_mc_socklist -{ +struct ip_mc_socklist { struct ip_mc_socklist *next; struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip_sf_socklist *sflist; }; -struct ip_sf_list -{ +struct ip_sf_list { struct ip_sf_list *sf_next; __be32 sf_inaddr; unsigned long sf_count[2]; /* include/exclude counts */ @@ -185,8 +181,7 @@ struct ip_sf_list unsigned char sf_crcount; /* retrans. left to send */ }; -struct ip_mc_list -{ +struct ip_mc_list { struct in_device *interface; __be32 multiaddr; struct ip_sf_list *sources; diff --git a/include/linux/in.h b/include/linux/in.h index cf196da04ec9..b615649db129 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -118,14 +118,12 @@ struct in_addr { /* Request struct for multicast socket ops */ -struct ip_mreq -{ +struct ip_mreq { struct in_addr imr_multiaddr; /* IP multicast address of group */ struct in_addr imr_interface; /* local IP address of interface */ }; -struct ip_mreqn -{ +struct ip_mreqn { struct in_addr imr_multiaddr; /* IP multicast address of group */ struct in_addr imr_address; /* local IP address of interface */ int imr_ifindex; /* Interface index */ @@ -149,21 +147,18 @@ struct ip_msfilter { (sizeof(struct ip_msfilter) - sizeof(__u32) \ + (numsrc) * sizeof(__u32)) -struct group_req -{ +struct group_req { __u32 gr_interface; /* interface index */ struct __kernel_sockaddr_storage gr_group; /* group address */ }; -struct group_source_req -{ +struct group_source_req { __u32 gsr_interface; /* interface index */ struct __kernel_sockaddr_storage gsr_group; /* group address */ struct __kernel_sockaddr_storage gsr_source; /* source address */ }; -struct group_filter -{ +struct group_filter { __u32 gf_interface; /* interface index */ struct __kernel_sockaddr_storage gf_group; /* multicast address */ __u32 gf_fmode; /* filter mode */ @@ -175,8 +170,7 @@ struct group_filter (sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \ + (numsrc) * sizeof(struct __kernel_sockaddr_storage)) -struct in_pktinfo -{ +struct in_pktinfo { int ipi_ifindex; struct in_addr ipi_spec_dst; struct in_addr ipi_addr; diff --git a/include/linux/in6.h b/include/linux/in6.h index 718bf21c5754..dfa29168e6ab 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -27,10 +27,8 @@ * IPv6 address structure */ -struct in6_addr -{ - union - { +struct in6_addr { + union { __u8 u6_addr8[16]; __be16 u6_addr16[8]; __be32 u6_addr32[4]; @@ -75,8 +73,7 @@ struct ipv6_mreq { #define ipv6mr_acaddr ipv6mr_multiaddr -struct in6_flowlabel_req -{ +struct in6_flowlabel_req { struct in6_addr flr_dst; __be32 flr_label; __u8 flr_action; diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ad27c7da8798..eecfa559bfb4 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -10,15 +10,13 @@ #include #include -struct ipv4_devconf -{ +struct ipv4_devconf { void *sysctl; int data[__NET_IPV4_CONF_MAX - 1]; DECLARE_BITMAP(state, __NET_IPV4_CONF_MAX - 1); }; -struct in_device -{ +struct in_device { struct net_device *dev; atomic_t refcnt; int dead; @@ -110,8 +108,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) -struct in_ifaddr -{ +struct in_ifaddr { struct in_ifaddr *ifa_next; struct in_device *ifa_dev; struct rcu_head rcu_head; diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 148265e63e8d..dfc170362842 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -127,8 +127,7 @@ struct ip_vs_dest_user { /* * IPVS statistics object (for user space) */ -struct ip_vs_stats_user -{ +struct ip_vs_stats_user { __u32 conns; /* connections scheduled */ __u32 inpkts; /* incoming packets */ __u32 outpkts; /* outgoing packets */ diff --git a/include/linux/mroute.h b/include/linux/mroute.h index d5f69151f692..c5f3d53548e2 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -76,8 +76,7 @@ struct vifctl { * Cache manipulation structures for mrouted and PIMd */ -struct mfcctl -{ +struct mfcctl { struct in_addr mfcc_origin; /* Origin of mcast */ struct in_addr mfcc_mcastgrp; /* Group in question */ vifi_t mfcc_parent; /* Where it arrived */ @@ -92,8 +91,7 @@ struct mfcctl * Group count retrieval for mrouted */ -struct sioc_sg_req -{ +struct sioc_sg_req { struct in_addr src; struct in_addr grp; unsigned long pktcnt; @@ -105,8 +103,7 @@ struct sioc_sg_req * To get vif packet counts */ -struct sioc_vif_req -{ +struct sioc_vif_req { vifi_t vifi; /* Which iface */ unsigned long icount; /* In packets */ unsigned long ocount; /* Out packets */ @@ -119,8 +116,7 @@ struct sioc_vif_req * data. Magically happens to be like an IP packet as per the original */ -struct igmpmsg -{ +struct igmpmsg { __u32 unused1,unused2; unsigned char im_msgtype; /* What is this */ unsigned char im_mbz; /* Must be zero */ @@ -181,8 +177,7 @@ static inline int ip_mr_init(void) } #endif -struct vif_device -{ +struct vif_device { struct net_device *dev; /* Device we are using */ unsigned long bytes_in,bytes_out; unsigned long pkt_in,pkt_out; /* Statistics */ @@ -195,8 +190,7 @@ struct vif_device #define VIFF_STATIC 0x8000 -struct mfc_cache -{ +struct mfc_cache { struct mfc_cache *next; /* Next entry on cache line */ #ifdef CONFIG_NET_NS struct net *mfc_net; diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index b191865a6ca3..2caa1a8e525d 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -75,8 +75,7 @@ struct mif6ctl { * Cache manipulation structures for mrouted and PIMd */ -struct mf6cctl -{ +struct mf6cctl { struct sockaddr_in6 mf6cc_origin; /* Origin of mcast */ struct sockaddr_in6 mf6cc_mcastgrp; /* Group in question */ mifi_t mf6cc_parent; /* Where it arrived */ @@ -87,8 +86,7 @@ struct mf6cctl * Group count retrieval for pim6sd */ -struct sioc_sg_req6 -{ +struct sioc_sg_req6 { struct sockaddr_in6 src; struct sockaddr_in6 grp; unsigned long pktcnt; @@ -100,8 +98,7 @@ struct sioc_sg_req6 * To get vif packet counts */ -struct sioc_mif_req6 -{ +struct sioc_mif_req6 { mifi_t mifi; /* Which iface */ unsigned long icount; /* In packets */ unsigned long ocount; /* Out packets */ @@ -172,8 +169,7 @@ static inline void ip6_mr_cleanup(void) } #endif -struct mif_device -{ +struct mif_device { struct net_device *dev; /* Device we are using */ unsigned long bytes_in,bytes_out; unsigned long pkt_in,pkt_out; /* Statistics */ @@ -185,8 +181,7 @@ struct mif_device #define VIFF_STATIC 0x8000 -struct mfc6_cache -{ +struct mfc6_cache { struct mfc6_cache *next; /* Next entry on cache line */ #ifdef CONFIG_NET_NS struct net *mfc6_net; diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index 12c9de138451..a7003b7a695d 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -4,8 +4,7 @@ #include #include -struct ndmsg -{ +struct ndmsg { __u8 ndm_family; __u8 ndm_pad1; __u16 ndm_pad2; @@ -15,8 +14,7 @@ struct ndmsg __u8 ndm_type; }; -enum -{ +enum { NDA_UNSPEC, NDA_DST, NDA_LLADDR, @@ -56,8 +54,7 @@ enum NUD_PERMANENT is also cannot be deleted by garbage collectors. */ -struct nda_cacheinfo -{ +struct nda_cacheinfo { __u32 ndm_confirmed; __u32 ndm_used; __u32 ndm_updated; @@ -89,8 +86,7 @@ struct nda_cacheinfo * device. ****/ -struct ndt_stats -{ +struct ndt_stats { __u64 ndts_allocs; __u64 ndts_destroys; __u64 ndts_hash_grows; @@ -124,15 +120,13 @@ enum { }; #define NDTPA_MAX (__NDTPA_MAX - 1) -struct ndtmsg -{ +struct ndtmsg { __u8 ndtm_family; __u8 ndtm_pad1; __u16 ndtm_pad2; }; -struct ndt_config -{ +struct ndt_config { __u16 ndtc_key_len; __u16 ndtc_entry_size; __u32 ndtc_entries; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5077de028317..465add6c43e3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -125,8 +125,7 @@ typedef enum netdev_tx netdev_tx_t; * with byte counters. */ -struct net_device_stats -{ +struct net_device_stats { unsigned long rx_packets; /* total packets received */ unsigned long tx_packets; /* total packets transmitted */ unsigned long rx_bytes; /* total bytes received */ @@ -179,8 +178,7 @@ struct neighbour; struct neigh_parms; struct sk_buff; -struct netif_rx_stats -{ +struct netif_rx_stats { unsigned total; unsigned dropped; unsigned time_squeeze; @@ -189,8 +187,7 @@ struct netif_rx_stats DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat); -struct dev_addr_list -{ +struct dev_addr_list { struct dev_addr_list *next; u8 da_addr[MAX_ADDR_LEN]; u8 da_addrlen; @@ -227,8 +224,7 @@ struct netdev_hw_addr_list { int count; }; -struct hh_cache -{ +struct hh_cache { struct hh_cache *hh_next; /* Next entry */ atomic_t hh_refcnt; /* number of users */ /* @@ -291,8 +287,7 @@ struct header_ops { * code. */ -enum netdev_state_t -{ +enum netdev_state_t { __LINK_STATE_START, __LINK_STATE_PRESENT, __LINK_STATE_NOCARRIER, @@ -341,8 +336,7 @@ struct napi_struct { struct sk_buff *skb; }; -enum -{ +enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ @@ -458,8 +452,7 @@ static inline void napi_synchronize(const struct napi_struct *n) # define napi_synchronize(n) barrier() #endif -enum netdev_queue_state_t -{ +enum netdev_queue_state_t { __QUEUE_STATE_XOFF, __QUEUE_STATE_FROZEN, }; @@ -653,8 +646,7 @@ struct net_device_ops { * moves out. */ -struct net_device -{ +struct net_device { /* * This is the first field of the "visible" part of this structure @@ -1229,8 +1221,7 @@ static inline int unregister_gifconf(unsigned int family) * Incoming packets are placed on per-cpu queues so that * no locking is needed. */ -struct softnet_data -{ +struct softnet_data { struct Qdisc *output_queue; struct sk_buff_head input_pkt_queue; struct list_head poll_list; @@ -1627,7 +1618,8 @@ static inline int netif_dormant(const struct net_device *dev) * * Check if carrier is operational */ -static inline int netif_oper_up(const struct net_device *dev) { +static inline int netif_oper_up(const struct net_device *dev) +{ return (dev->operstate == IF_OPER_UP || dev->operstate == IF_OPER_UNKNOWN /* backward compat */); } diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 6132b5e6d9d3..48c54960773c 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -93,8 +93,7 @@ typedef unsigned int nf_hookfn(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)); -struct nf_hook_ops -{ +struct nf_hook_ops { struct list_head list; /* User fills in from here down. */ @@ -106,8 +105,7 @@ struct nf_hook_ops int priority; }; -struct nf_sockopt_ops -{ +struct nf_sockopt_ops { struct list_head list; u_int8_t pf; diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index a8248ee422b7..a374787ed9b0 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -3,8 +3,7 @@ /* Connection state tracking for netfilter. This is separated from, but required by, the NAT layer; it can also be used by an iptables extension. */ -enum ip_conntrack_info -{ +enum ip_conntrack_info { /* Part of an established connection (either direction). */ IP_CT_ESTABLISHED, @@ -76,8 +75,7 @@ enum ip_conntrack_status { }; #ifdef __KERNEL__ -struct ip_conntrack_stat -{ +struct ip_conntrack_stat { unsigned int searched; unsigned int found; unsigned int new; diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h index 47727d7546ea..3e3aa08980c3 100644 --- a/include/linux/netfilter/nf_conntrack_ftp.h +++ b/include/linux/netfilter/nf_conntrack_ftp.h @@ -3,8 +3,7 @@ /* FTP tracking. */ /* This enum is exposed to userspace */ -enum nf_ct_ftp_type -{ +enum nf_ct_ftp_type { /* PORT command from client */ NF_CT_FTP_PORT, /* PASV response from server */ diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h index 768f78c4ac53..ceeefe6681b5 100644 --- a/include/linux/netfilter/nf_conntrack_sctp.h +++ b/include/linux/netfilter/nf_conntrack_sctp.h @@ -16,8 +16,7 @@ enum sctp_conntrack { SCTP_CONNTRACK_MAX }; -struct ip_ct_sctp -{ +struct ip_ct_sctp { enum sctp_conntrack state; __be32 vtag[IP_CT_DIR_MAX]; diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index 4352feed2377..f6d97f64d7a0 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -55,8 +55,7 @@ struct ip_ct_tcp_state { u_int8_t flags; /* per direction options */ }; -struct ip_ct_tcp -{ +struct ip_ct_tcp { struct ip_ct_tcp_state seen[2]; /* connection parameters per direction */ u_int8_t state; /* state of the connection (enum tcp_conntrack) */ /* For detecting stale connections */ diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 9f00da287f2c..49d321f3ccd2 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -55,8 +55,7 @@ struct nfgenmsg { #include #include -struct nfnl_callback -{ +struct nfnl_callback { int (*call)(struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]); @@ -64,8 +63,7 @@ struct nfnl_callback const u_int16_t attr_count; /* number of nlattr's */ }; -struct nfnetlink_subsystem -{ +struct nfnetlink_subsystem { const char *name; __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ diff --git a/include/linux/netfilter/nfnetlink_compat.h b/include/linux/netfilter/nfnetlink_compat.h index eda55cabceec..ffb95036bbd4 100644 --- a/include/linux/netfilter/nfnetlink_compat.h +++ b/include/linux/netfilter/nfnetlink_compat.h @@ -21,8 +21,7 @@ * ! nfnetlink use the same attributes methods. - J. Schulist. */ -struct nfattr -{ +struct nfattr { __u16 nfa_len; __u16 nfa_type; /* we use 15 bits for the type, and the highest * bit to indicate whether the payload is nested */ diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 812cb153cabb..378f27ae7772 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -6,8 +6,7 @@ #define XT_FUNCTION_MAXNAMELEN 30 #define XT_TABLE_MAXNAMELEN 32 -struct xt_entry_match -{ +struct xt_entry_match { union { struct { __u16 match_size; @@ -31,8 +30,7 @@ struct xt_entry_match unsigned char data[0]; }; -struct xt_entry_target -{ +struct xt_entry_target { union { struct { __u16 target_size; @@ -64,16 +62,14 @@ struct xt_entry_target }, \ } -struct xt_standard_target -{ +struct xt_standard_target { struct xt_entry_target target; int verdict; }; /* The argument to IPT_SO_GET_REVISION_*. Returns highest revision * kernel supports, if >= revision. */ -struct xt_get_revision -{ +struct xt_get_revision { char name[XT_FUNCTION_MAXNAMELEN-1]; __u8 revision; @@ -90,8 +86,7 @@ struct xt_get_revision * ip6t_entry and arpt_entry. This sucks, and it is a hack. It will be my * personal pleasure to remove it -HW */ -struct _xt_align -{ +struct _xt_align { __u8 u8; __u16 u16; __u32 u32; @@ -109,14 +104,12 @@ struct _xt_align #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) -struct xt_counters -{ +struct xt_counters { __u64 pcnt, bcnt; /* Packet and byte counters */ }; /* The argument to IPT_SO_ADD_COUNTERS. */ -struct xt_counters_info -{ +struct xt_counters_info { /* Which table. */ char name[XT_TABLE_MAXNAMELEN]; @@ -269,8 +262,7 @@ struct xt_tgdtor_param { u_int8_t family; }; -struct xt_match -{ +struct xt_match { struct list_head list; const char name[XT_FUNCTION_MAXNAMELEN-1]; @@ -310,8 +302,7 @@ struct xt_match }; /* Registration hooks for targets. */ -struct xt_target -{ +struct xt_target { struct list_head list; const char name[XT_FUNCTION_MAXNAMELEN-1]; @@ -349,8 +340,7 @@ struct xt_target }; /* Furniture shopping... */ -struct xt_table -{ +struct xt_table { struct list_head list; /* What hooks you will enter on */ @@ -371,8 +361,7 @@ struct xt_table #include /* The table itself */ -struct xt_table_info -{ +struct xt_table_info { /* Size per table */ unsigned int size; /* Number of entries: FIXME. --RR */ @@ -528,8 +517,7 @@ static inline unsigned long ifname_compare_aligned(const char *_a, #ifdef CONFIG_COMPAT #include -struct compat_xt_entry_match -{ +struct compat_xt_entry_match { union { struct { u_int16_t match_size; @@ -545,8 +533,7 @@ struct compat_xt_entry_match unsigned char data[0]; }; -struct compat_xt_entry_target -{ +struct compat_xt_entry_target { union { struct { u_int16_t target_size; @@ -566,8 +553,7 @@ struct compat_xt_entry_target * need to change whole approach in order to calculate align as function of * current task alignment */ -struct compat_xt_counters -{ +struct compat_xt_counters { #if defined(CONFIG_X86_64) || defined(CONFIG_IA64) u_int32_t cnt[4]; #else @@ -575,8 +561,7 @@ struct compat_xt_counters #endif }; -struct compat_xt_counters_info -{ +struct compat_xt_counters_info { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t num_counters; struct compat_xt_counters counters[0]; diff --git a/include/linux/netfilter/xt_connbytes.h b/include/linux/netfilter/xt_connbytes.h index 52bd6153b996..92fcbb0d193e 100644 --- a/include/linux/netfilter/xt_connbytes.h +++ b/include/linux/netfilter/xt_connbytes.h @@ -15,8 +15,7 @@ enum xt_connbytes_direction { XT_CONNBYTES_DIR_BOTH, }; -struct xt_connbytes_info -{ +struct xt_connbytes_info { struct { aligned_u64 from; /* count to be matched */ aligned_u64 to; /* count to be matched */ diff --git a/include/linux/netfilter/xt_esp.h b/include/linux/netfilter/xt_esp.h index ef6fa4747d0a..ee6882408000 100644 --- a/include/linux/netfilter/xt_esp.h +++ b/include/linux/netfilter/xt_esp.h @@ -3,8 +3,7 @@ #include -struct xt_esp -{ +struct xt_esp { __u32 spis[2]; /* Security Parameter Index */ __u8 invflags; /* Inverse flags */ }; diff --git a/include/linux/netfilter/xt_multiport.h b/include/linux/netfilter/xt_multiport.h index 185db499fcbc..5b7e72dfffc5 100644 --- a/include/linux/netfilter/xt_multiport.h +++ b/include/linux/netfilter/xt_multiport.h @@ -3,8 +3,7 @@ #include -enum xt_multiport_flags -{ +enum xt_multiport_flags { XT_MULTIPORT_SOURCE, XT_MULTIPORT_DESTINATION, XT_MULTIPORT_EITHER @@ -13,15 +12,13 @@ enum xt_multiport_flags #define XT_MULTI_PORTS 15 /* Must fit inside union xt_matchinfo: 16 bytes */ -struct xt_multiport -{ +struct xt_multiport { __u8 flags; /* Type of comparison */ __u8 count; /* Number of ports */ __u16 ports[XT_MULTI_PORTS]; /* Ports */ }; -struct xt_multiport_v1 -{ +struct xt_multiport_v1 { __u8 flags; /* Type of comparison */ __u8 count; /* Number of ports */ __u16 ports[XT_MULTI_PORTS]; /* Ports */ diff --git a/include/linux/netfilter/xt_policy.h b/include/linux/netfilter/xt_policy.h index 7bb64e7c853d..be8ead05c316 100644 --- a/include/linux/netfilter/xt_policy.h +++ b/include/linux/netfilter/xt_policy.h @@ -5,22 +5,19 @@ #define XT_POLICY_MAX_ELEM 4 -enum xt_policy_flags -{ +enum xt_policy_flags { XT_POLICY_MATCH_IN = 0x1, XT_POLICY_MATCH_OUT = 0x2, XT_POLICY_MATCH_NONE = 0x4, XT_POLICY_MATCH_STRICT = 0x8, }; -enum xt_policy_modes -{ +enum xt_policy_modes { XT_POLICY_MODE_TRANSPORT, XT_POLICY_MODE_TUNNEL }; -struct xt_policy_spec -{ +struct xt_policy_spec { __u8 saddr:1, daddr:1, proto:1, @@ -30,15 +27,13 @@ struct xt_policy_spec }; #ifndef __KERNEL__ -union xt_policy_addr -{ +union xt_policy_addr { struct in_addr a4; struct in6_addr a6; }; #endif -struct xt_policy_elem -{ +struct xt_policy_elem { union { #ifdef __KERNEL__ struct { @@ -65,8 +60,7 @@ struct xt_policy_elem struct xt_policy_spec invert; }; -struct xt_policy_info -{ +struct xt_policy_info { struct xt_policy_elem pol[XT_POLICY_MAX_ELEM]; __u16 flags; __u16 len; diff --git a/include/linux/netfilter/xt_state.h b/include/linux/netfilter/xt_state.h index c06f32edee07..7b32de886613 100644 --- a/include/linux/netfilter/xt_state.h +++ b/include/linux/netfilter/xt_state.h @@ -6,8 +6,7 @@ #define XT_STATE_UNTRACKED (1 << (IP_CT_NUMBER + 1)) -struct xt_state_info -{ +struct xt_state_info { unsigned int statemask; }; #endif /*_XT_STATE_H*/ diff --git a/include/linux/netfilter/xt_string.h b/include/linux/netfilter/xt_string.h index ecbb95fc89ed..235347c02eab 100644 --- a/include/linux/netfilter/xt_string.h +++ b/include/linux/netfilter/xt_string.h @@ -11,8 +11,7 @@ enum { XT_STRING_FLAG_IGNORECASE = 0x02 }; -struct xt_string_info -{ +struct xt_string_info { __u16 from_offset; __u16 to_offset; char algo[XT_STRING_MAX_ALGO_NAME_SIZE]; diff --git a/include/linux/netfilter/xt_tcpudp.h b/include/linux/netfilter/xt_tcpudp.h index a490a0bc1d29..38aa7b399021 100644 --- a/include/linux/netfilter/xt_tcpudp.h +++ b/include/linux/netfilter/xt_tcpudp.h @@ -4,8 +4,7 @@ #include /* TCP matching stuff */ -struct xt_tcp -{ +struct xt_tcp { __u16 spts[2]; /* Source port range. */ __u16 dpts[2]; /* Destination port range. */ __u8 option; /* TCP Option iff non-zero*/ @@ -22,8 +21,7 @@ struct xt_tcp #define XT_TCP_INV_MASK 0x0F /* All possible flags. */ /* UDP matching stuff */ -struct xt_udp -{ +struct xt_udp { __u16 spts[2]; /* Source port range. */ __u16 dpts[2]; /* Destination port range. */ __u8 invflags; /* Inverse flags */ diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 6fe3e6aa10db..f2336523a9df 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -132,8 +132,7 @@ struct arpt_entry #define ARPT_RETURN XT_RETURN /* The argument to ARPT_SO_GET_INFO */ -struct arpt_getinfo -{ +struct arpt_getinfo { /* Which table: caller fills this in. */ char name[ARPT_TABLE_MAXNAMELEN]; @@ -155,8 +154,7 @@ struct arpt_getinfo }; /* The argument to ARPT_SO_SET_REPLACE. */ -struct arpt_replace -{ +struct arpt_replace { /* Which table. */ char name[ARPT_TABLE_MAXNAMELEN]; @@ -191,8 +189,7 @@ struct arpt_replace #define arpt_counters xt_counters /* The argument to ARPT_SO_GET_ENTRIES. */ -struct arpt_get_entries -{ +struct arpt_get_entries { /* Which table: user fills this in. */ char name[ARPT_TABLE_MAXNAMELEN]; @@ -224,20 +221,17 @@ static __inline__ struct arpt_entry_target *arpt_get_target(struct arpt_entry *e #ifdef __KERNEL__ /* Standard entry. */ -struct arpt_standard -{ +struct arpt_standard { struct arpt_entry entry; struct arpt_standard_target target; }; -struct arpt_error_target -{ +struct arpt_error_target { struct arpt_entry_target target; char errorname[ARPT_FUNCTION_MAXNAMELEN]; }; -struct arpt_error -{ +struct arpt_error { struct arpt_entry entry; struct arpt_error_target target; }; @@ -279,8 +273,7 @@ extern unsigned int arpt_do_table(struct sk_buff *skb, #ifdef CONFIG_COMPAT #include -struct compat_arpt_entry -{ +struct compat_arpt_entry { struct arpt_arp arp; u_int16_t target_offset; u_int16_t next_offset; diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h index a11b0c2017fd..c73ef0b18bdc 100644 --- a/include/linux/netfilter_bridge/ebt_802_3.h +++ b/include/linux/netfilter_bridge/ebt_802_3.h @@ -58,8 +58,7 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb) } #endif -struct ebt_802_3_info -{ +struct ebt_802_3_info { uint8_t sap; __be16 type; uint8_t bitmask; diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/linux/netfilter_bridge/ebt_among.h index 7654069233ca..0009558609a7 100644 --- a/include/linux/netfilter_bridge/ebt_among.h +++ b/include/linux/netfilter_bridge/ebt_among.h @@ -29,14 +29,12 @@ * Yes, it is a memory overhead, but in 2003 AD, who cares? */ -struct ebt_mac_wormhash_tuple -{ +struct ebt_mac_wormhash_tuple { uint32_t cmp[2]; __be32 ip; }; -struct ebt_mac_wormhash -{ +struct ebt_mac_wormhash { int table[257]; int poolsize; struct ebt_mac_wormhash_tuple pool[0]; @@ -45,8 +43,7 @@ struct ebt_mac_wormhash #define ebt_mac_wormhash_size(x) ((x) ? sizeof(struct ebt_mac_wormhash) \ + (x)->poolsize * sizeof(struct ebt_mac_wormhash_tuple) : 0) -struct ebt_among_info -{ +struct ebt_among_info { int wh_dst_ofs; int wh_src_ofs; int bitmask; diff --git a/include/linux/netfilter_bridge/ebt_arpreply.h b/include/linux/netfilter_bridge/ebt_arpreply.h index 96a8339960e0..7e77896e1fbf 100644 --- a/include/linux/netfilter_bridge/ebt_arpreply.h +++ b/include/linux/netfilter_bridge/ebt_arpreply.h @@ -1,8 +1,7 @@ #ifndef __LINUX_BRIDGE_EBT_ARPREPLY_H #define __LINUX_BRIDGE_EBT_ARPREPLY_H -struct ebt_arpreply_info -{ +struct ebt_arpreply_info { unsigned char mac[ETH_ALEN]; int target; }; diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/linux/netfilter_bridge/ebt_ip.h index d6847475bf2e..6a708fb92241 100644 --- a/include/linux/netfilter_bridge/ebt_ip.h +++ b/include/linux/netfilter_bridge/ebt_ip.h @@ -26,8 +26,7 @@ #define EBT_IP_MATCH "ip" /* the same values are used for the invflags */ -struct ebt_ip_info -{ +struct ebt_ip_info { __be32 saddr; __be32 daddr; __be32 smsk; diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h index 2273c3ae33ca..e5de98701519 100644 --- a/include/linux/netfilter_bridge/ebt_ip6.h +++ b/include/linux/netfilter_bridge/ebt_ip6.h @@ -23,8 +23,7 @@ #define EBT_IP6_MATCH "ip6" /* the same values are used for the invflags */ -struct ebt_ip6_info -{ +struct ebt_ip6_info { struct in6_addr saddr; struct in6_addr daddr; struct in6_addr smsk; diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/linux/netfilter_bridge/ebt_limit.h index d8b65000afe4..4bf76b751676 100644 --- a/include/linux/netfilter_bridge/ebt_limit.h +++ b/include/linux/netfilter_bridge/ebt_limit.h @@ -9,8 +9,7 @@ /* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490 seconds, or one every 59 hours. */ -struct ebt_limit_info -{ +struct ebt_limit_info { u_int32_t avg; /* Average secs between packets * scale */ u_int32_t burst; /* Period multiplier for upper limit. */ diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h index b76e653157e5..cc2cdfb764bc 100644 --- a/include/linux/netfilter_bridge/ebt_log.h +++ b/include/linux/netfilter_bridge/ebt_log.h @@ -9,8 +9,7 @@ #define EBT_LOG_PREFIX_SIZE 30 #define EBT_LOG_WATCHER "log" -struct ebt_log_info -{ +struct ebt_log_info { uint8_t loglevel; uint8_t prefix[EBT_LOG_PREFIX_SIZE]; uint32_t bitmask; diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/linux/netfilter_bridge/ebt_mark_m.h index 301524ff1065..9ceb10ec0ed6 100644 --- a/include/linux/netfilter_bridge/ebt_mark_m.h +++ b/include/linux/netfilter_bridge/ebt_mark_m.h @@ -4,8 +4,7 @@ #define EBT_MARK_AND 0x01 #define EBT_MARK_OR 0x02 #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR) -struct ebt_mark_m_info -{ +struct ebt_mark_m_info { unsigned long mark, mask; uint8_t invert; uint8_t bitmask; diff --git a/include/linux/netfilter_bridge/ebt_mark_t.h b/include/linux/netfilter_bridge/ebt_mark_t.h index 6270f6f33693..7d5a268a4311 100644 --- a/include/linux/netfilter_bridge/ebt_mark_t.h +++ b/include/linux/netfilter_bridge/ebt_mark_t.h @@ -13,8 +13,7 @@ #define MARK_AND_VALUE (0xffffffd0) #define MARK_XOR_VALUE (0xffffffc0) -struct ebt_mark_t_info -{ +struct ebt_mark_t_info { unsigned long mark; /* EBT_ACCEPT, EBT_DROP, EBT_CONTINUE or EBT_RETURN */ int target; diff --git a/include/linux/netfilter_bridge/ebt_nat.h b/include/linux/netfilter_bridge/ebt_nat.h index 435b886a51aa..5e74e3b03bd6 100644 --- a/include/linux/netfilter_bridge/ebt_nat.h +++ b/include/linux/netfilter_bridge/ebt_nat.h @@ -2,8 +2,7 @@ #define __LINUX_BRIDGE_EBT_NAT_H #define NAT_ARP_BIT (0x00000010) -struct ebt_nat_info -{ +struct ebt_nat_info { unsigned char mac[ETH_ALEN]; /* EBT_ACCEPT, EBT_DROP, EBT_CONTINUE or EBT_RETURN */ int target; diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/linux/netfilter_bridge/ebt_pkttype.h index 0d64bbb29c66..51a799840931 100644 --- a/include/linux/netfilter_bridge/ebt_pkttype.h +++ b/include/linux/netfilter_bridge/ebt_pkttype.h @@ -1,8 +1,7 @@ #ifndef __LINUX_BRIDGE_EBT_PKTTYPE_H #define __LINUX_BRIDGE_EBT_PKTTYPE_H -struct ebt_pkttype_info -{ +struct ebt_pkttype_info { uint8_t pkt_type; uint8_t invert; }; diff --git a/include/linux/netfilter_bridge/ebt_redirect.h b/include/linux/netfilter_bridge/ebt_redirect.h index 5c67990fce39..dd9622ce8488 100644 --- a/include/linux/netfilter_bridge/ebt_redirect.h +++ b/include/linux/netfilter_bridge/ebt_redirect.h @@ -1,8 +1,7 @@ #ifndef __LINUX_BRIDGE_EBT_REDIRECT_H #define __LINUX_BRIDGE_EBT_REDIRECT_H -struct ebt_redirect_info -{ +struct ebt_redirect_info { /* EBT_ACCEPT, EBT_DROP, EBT_CONTINUE or EBT_RETURN */ int target; }; diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/linux/netfilter_bridge/ebt_stp.h index e5fd67850f4d..e503a0aa2728 100644 --- a/include/linux/netfilter_bridge/ebt_stp.h +++ b/include/linux/netfilter_bridge/ebt_stp.h @@ -20,8 +20,7 @@ #define EBT_STP_MATCH "stp" -struct ebt_stp_config_info -{ +struct ebt_stp_config_info { uint8_t flags; uint16_t root_priol, root_priou; char root_addr[6], root_addrmsk[6]; @@ -35,8 +34,7 @@ struct ebt_stp_config_info uint16_t forward_delayl, forward_delayu; }; -struct ebt_stp_info -{ +struct ebt_stp_info { uint8_t type; struct ebt_stp_config_info config; uint16_t bitmask; diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index ea281e6a2048..3cc40c131cc3 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -34,14 +34,12 @@ struct xt_match; struct xt_target; -struct ebt_counter -{ +struct ebt_counter { uint64_t pcnt; uint64_t bcnt; }; -struct ebt_replace -{ +struct ebt_replace { char name[EBT_TABLE_MAXNAMELEN]; unsigned int valid_hooks; /* nr of rules in the table */ @@ -57,8 +55,7 @@ struct ebt_replace char __user *entries; }; -struct ebt_replace_kernel -{ +struct ebt_replace_kernel { char name[EBT_TABLE_MAXNAMELEN]; unsigned int valid_hooks; /* nr of rules in the table */ @@ -120,8 +117,7 @@ struct ebt_entries { #define EBT_INV_MASK (EBT_IPROTO | EBT_IIN | EBT_IOUT | EBT_ILOGICALIN \ | EBT_ILOGICALOUT | EBT_ISOURCE | EBT_IDEST) -struct ebt_entry_match -{ +struct ebt_entry_match { union { char name[EBT_FUNCTION_MAXNAMELEN]; struct xt_match *match; @@ -131,8 +127,7 @@ struct ebt_entry_match unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); }; -struct ebt_entry_watcher -{ +struct ebt_entry_watcher { union { char name[EBT_FUNCTION_MAXNAMELEN]; struct xt_target *watcher; @@ -142,8 +137,7 @@ struct ebt_entry_watcher unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); }; -struct ebt_entry_target -{ +struct ebt_entry_target { union { char name[EBT_FUNCTION_MAXNAMELEN]; struct xt_target *target; @@ -154,8 +148,7 @@ struct ebt_entry_target }; #define EBT_STANDARD_TARGET "standard" -struct ebt_standard_target -{ +struct ebt_standard_target { struct ebt_entry_target target; int verdict; }; @@ -206,8 +199,7 @@ struct ebt_entry { #define EBT_MATCH 0 #define EBT_NOMATCH 1 -struct ebt_match -{ +struct ebt_match { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; bool (*match)(const struct sk_buff *skb, const struct net_device *in, @@ -224,8 +216,7 @@ struct ebt_match struct module *me; }; -struct ebt_watcher -{ +struct ebt_watcher { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; unsigned int (*target)(struct sk_buff *skb, @@ -242,8 +233,7 @@ struct ebt_watcher struct module *me; }; -struct ebt_target -{ +struct ebt_target { struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; /* returns one of the standard EBT_* verdicts */ @@ -262,15 +252,13 @@ struct ebt_target }; /* used for jumping from and into user defined chains (udc) */ -struct ebt_chainstack -{ +struct ebt_chainstack { struct ebt_entries *chaininfo; /* pointer to chain data */ struct ebt_entry *e; /* pointer to entry data */ unsigned int n; /* n'th entry */ }; -struct ebt_table_info -{ +struct ebt_table_info { /* total size of the entries */ unsigned int entries_size; unsigned int nentries; @@ -282,8 +270,7 @@ struct ebt_table_info struct ebt_counter counters[0] ____cacheline_aligned; }; -struct ebt_table -{ +struct ebt_table { struct list_head list; char name[EBT_TABLE_MAXNAMELEN]; struct ebt_replace_kernel *table; diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 61fafc868a7b..27b3f5807305 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -76,8 +76,7 @@ struct ipt_ip { /* This structure defines each of the firewall rules. Consists of 3 parts which are 1) general IP header stuff 2) match specific stuff 3) the target to perform if the rule matches */ -struct ipt_entry -{ +struct ipt_entry { struct ipt_ip ip; /* Mark with fields that we care about. */ @@ -135,8 +134,7 @@ struct ipt_entry #define IPT_UDP_INV_MASK XT_UDP_INV_MASK /* ICMP matching stuff */ -struct ipt_icmp -{ +struct ipt_icmp { u_int8_t type; /* type to match */ u_int8_t code[2]; /* range of code */ u_int8_t invflags; /* Inverse flags */ @@ -146,8 +144,7 @@ struct ipt_icmp #define IPT_ICMP_INV 0x01 /* Invert the sense of type/code test */ /* The argument to IPT_SO_GET_INFO */ -struct ipt_getinfo -{ +struct ipt_getinfo { /* Which table: caller fills this in. */ char name[IPT_TABLE_MAXNAMELEN]; @@ -169,8 +166,7 @@ struct ipt_getinfo }; /* The argument to IPT_SO_SET_REPLACE. */ -struct ipt_replace -{ +struct ipt_replace { /* Which table. */ char name[IPT_TABLE_MAXNAMELEN]; @@ -204,8 +200,7 @@ struct ipt_replace #define ipt_counters_info xt_counters_info /* The argument to IPT_SO_GET_ENTRIES. */ -struct ipt_get_entries -{ +struct ipt_get_entries { /* Which table: user fills this in. */ char name[IPT_TABLE_MAXNAMELEN]; @@ -250,20 +245,17 @@ extern struct xt_table *ipt_register_table(struct net *net, extern void ipt_unregister_table(struct xt_table *table); /* Standard entry. */ -struct ipt_standard -{ +struct ipt_standard { struct ipt_entry entry; struct ipt_standard_target target; }; -struct ipt_error_target -{ +struct ipt_error_target { struct ipt_entry_target target; char errorname[IPT_FUNCTION_MAXNAMELEN]; }; -struct ipt_error -{ +struct ipt_error { struct ipt_entry entry; struct ipt_error_target target; }; @@ -301,8 +293,7 @@ extern unsigned int ipt_do_table(struct sk_buff *skb, #ifdef CONFIG_COMPAT #include -struct compat_ipt_entry -{ +struct compat_ipt_entry { struct ipt_ip ip; compat_uint_t nfcache; u_int16_t target_offset; diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h index be6e682a85ec..2529660c5b38 100644 --- a/include/linux/netfilter_ipv4/ipt_SAME.h +++ b/include/linux/netfilter_ipv4/ipt_SAME.h @@ -5,8 +5,7 @@ #define IPT_SAME_NODST 0x01 -struct ipt_same_info -{ +struct ipt_same_info { unsigned char info; u_int32_t rangesize; u_int32_t ipnum; diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/linux/netfilter_ipv4/ipt_ah.h index 7b9a2ac7adb9..2e555b4d05e3 100644 --- a/include/linux/netfilter_ipv4/ipt_ah.h +++ b/include/linux/netfilter_ipv4/ipt_ah.h @@ -1,8 +1,7 @@ #ifndef _IPT_AH_H #define _IPT_AH_H -struct ipt_ah -{ +struct ipt_ah { u_int32_t spis[2]; /* Security Parameter Index */ u_int8_t invflags; /* Inverse flags */ }; diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index a64e1451ac38..b31050d20ae4 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -88,8 +88,7 @@ struct ip6t_ip6 { /* This structure defines each of the firewall rules. Consists of 3 parts which are 1) general IP header stuff 2) match specific stuff 3) the target to perform if the rule matches */ -struct ip6t_entry -{ +struct ip6t_entry { struct ip6t_ip6 ipv6; /* Mark with fields that we care about. */ @@ -111,20 +110,17 @@ struct ip6t_entry }; /* Standard entry */ -struct ip6t_standard -{ +struct ip6t_standard { struct ip6t_entry entry; struct ip6t_standard_target target; }; -struct ip6t_error_target -{ +struct ip6t_error_target { struct ip6t_entry_target target; char errorname[IP6T_FUNCTION_MAXNAMELEN]; }; -struct ip6t_error -{ +struct ip6t_error { struct ip6t_entry entry; struct ip6t_error_target target; }; @@ -195,8 +191,7 @@ struct ip6t_error #define IP6T_UDP_INV_MASK XT_UDP_INV_MASK /* ICMP matching stuff */ -struct ip6t_icmp -{ +struct ip6t_icmp { u_int8_t type; /* type to match */ u_int8_t code[2]; /* range of code */ u_int8_t invflags; /* Inverse flags */ @@ -206,8 +201,7 @@ struct ip6t_icmp #define IP6T_ICMP_INV 0x01 /* Invert the sense of type/code test */ /* The argument to IP6T_SO_GET_INFO */ -struct ip6t_getinfo -{ +struct ip6t_getinfo { /* Which table: caller fills this in. */ char name[IP6T_TABLE_MAXNAMELEN]; @@ -229,8 +223,7 @@ struct ip6t_getinfo }; /* The argument to IP6T_SO_SET_REPLACE. */ -struct ip6t_replace -{ +struct ip6t_replace { /* Which table. */ char name[IP6T_TABLE_MAXNAMELEN]; @@ -264,8 +257,7 @@ struct ip6t_replace #define ip6t_counters_info xt_counters_info /* The argument to IP6T_SO_GET_ENTRIES. */ -struct ip6t_get_entries -{ +struct ip6t_get_entries { /* Which table: user fills this in. */ char name[IP6T_TABLE_MAXNAMELEN]; @@ -330,8 +322,7 @@ extern int ip6_masked_addrcmp(const struct in6_addr *addr1, #ifdef CONFIG_COMPAT #include -struct compat_ip6t_entry -{ +struct compat_ip6t_entry { struct ip6t_ip6 ipv6; compat_uint_t nfcache; u_int16_t target_offset; diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/linux/netfilter_ipv6/ip6t_ah.h index 8531879eb464..17a745cfb2c7 100644 --- a/include/linux/netfilter_ipv6/ip6t_ah.h +++ b/include/linux/netfilter_ipv6/ip6t_ah.h @@ -1,8 +1,7 @@ #ifndef _IP6T_AH_H #define _IP6T_AH_H -struct ip6t_ah -{ +struct ip6t_ah { u_int32_t spis[2]; /* Security Parameter Index */ u_int32_t hdrlen; /* Header Length */ u_int8_t hdrres; /* Test of the Reserved Filed */ diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/linux/netfilter_ipv6/ip6t_frag.h index 66070a0d6dfc..3724d0850920 100644 --- a/include/linux/netfilter_ipv6/ip6t_frag.h +++ b/include/linux/netfilter_ipv6/ip6t_frag.h @@ -1,8 +1,7 @@ #ifndef _IP6T_FRAG_H #define _IP6T_FRAG_H -struct ip6t_frag -{ +struct ip6t_frag { u_int32_t ids[2]; /* Security Parameter Index */ u_int32_t hdrlen; /* Header Length */ u_int8_t flags; /* */ diff --git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/linux/netfilter_ipv6/ip6t_ipv6header.h index 51c53fc9c44a..01dfd445596a 100644 --- a/include/linux/netfilter_ipv6/ip6t_ipv6header.h +++ b/include/linux/netfilter_ipv6/ip6t_ipv6header.h @@ -8,8 +8,7 @@ on whether they contain certain headers */ #ifndef __IPV6HEADER_H #define __IPV6HEADER_H -struct ip6t_ipv6header_info -{ +struct ip6t_ipv6header_info { u_int8_t matchflags; u_int8_t invflags; u_int8_t modeflag; diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h index b9ca9a5f74d0..18549bca2d1f 100644 --- a/include/linux/netfilter_ipv6/ip6t_mh.h +++ b/include/linux/netfilter_ipv6/ip6t_mh.h @@ -2,8 +2,7 @@ #define _IP6T_MH_H /* MH matching stuff */ -struct ip6t_mh -{ +struct ip6t_mh { u_int8_t types[2]; /* MH type range */ u_int8_t invflags; /* Inverse flags */ }; diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/linux/netfilter_ipv6/ip6t_opts.h index a07e36380ae8..62d89bcd9f9c 100644 --- a/include/linux/netfilter_ipv6/ip6t_opts.h +++ b/include/linux/netfilter_ipv6/ip6t_opts.h @@ -3,8 +3,7 @@ #define IP6T_OPTS_OPTSNR 16 -struct ip6t_opts -{ +struct ip6t_opts { u_int32_t hdrlen; /* Header Length */ u_int8_t flags; /* */ u_int8_t invflags; /* Inverse flags */ diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/linux/netfilter_ipv6/ip6t_rt.h index 52156023e8db..ab91bfd2cd00 100644 --- a/include/linux/netfilter_ipv6/ip6t_rt.h +++ b/include/linux/netfilter_ipv6/ip6t_rt.h @@ -5,8 +5,7 @@ #define IP6T_RT_HOPS 16 -struct ip6t_rt -{ +struct ip6t_rt { u_int32_t rt_type; /* Routing Type */ u_int32_t segsleft[2]; /* Segments Left */ u_int32_t hdrlen; /* Header Length */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index ab5d3126831f..fde27c017326 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -29,16 +29,14 @@ struct net; -struct sockaddr_nl -{ +struct sockaddr_nl { sa_family_t nl_family; /* AF_NETLINK */ unsigned short nl_pad; /* zero */ __u32 nl_pid; /* port ID */ __u32 nl_groups; /* multicast groups mask */ }; -struct nlmsghdr -{ +struct nlmsghdr { __u32 nlmsg_len; /* Length of message including header */ __u16 nlmsg_type; /* Message content */ __u16 nlmsg_flags; /* Additional flags */ @@ -94,8 +92,7 @@ struct nlmsghdr #define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ -struct nlmsgerr -{ +struct nlmsgerr { int error; struct nlmsghdr msg; }; @@ -106,8 +103,7 @@ struct nlmsgerr #define NETLINK_BROADCAST_ERROR 4 #define NETLINK_NO_ENOBUFS 5 -struct nl_pktinfo -{ +struct nl_pktinfo { __u32 group; }; @@ -127,8 +123,7 @@ enum { * <-------------- nlattr->nla_len --------------> */ -struct nlattr -{ +struct nlattr { __u16 nla_len; __u16 nla_type; }; @@ -161,8 +156,7 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) return (struct nlmsghdr *)skb->data; } -struct netlink_skb_parms -{ +struct netlink_skb_parms { struct ucred creds; /* Skb credentials */ __u32 pid; __u32 dst_group; @@ -220,8 +214,7 @@ int netlink_sendskb(struct sock *sk, struct sk_buff *skb); #define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN) -struct netlink_callback -{ +struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; int (*dump)(struct sk_buff * skb, @@ -231,8 +224,7 @@ struct netlink_callback long args[6]; }; -struct netlink_notify -{ +struct netlink_notify { struct net *net; int pid; int protocol; diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 3c842edff388..7f6ba8658abe 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -75,8 +75,7 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance #define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT)) /* Action attributes */ -enum -{ +enum { TCA_ACT_UNSPEC, TCA_ACT_KIND, TCA_ACT_OPTIONS, @@ -108,8 +107,7 @@ enum #define TC_ACT_JUMP 0x10000000 /* Action type identifiers*/ -enum -{ +enum { TCA_ID_UNSPEC=0, TCA_ID_POLICE=1, /* other actions go here */ @@ -118,8 +116,7 @@ enum #define TCA_ID_MAX __TCA_ID_MAX -struct tc_police -{ +struct tc_police { __u32 index; int action; #define TC_POLICE_UNSPEC TC_ACT_UNSPEC @@ -138,15 +135,13 @@ struct tc_police __u32 capab; }; -struct tcf_t -{ +struct tcf_t { __u64 install; __u64 lastuse; __u64 expires; }; -struct tc_cnt -{ +struct tc_cnt { int refcnt; int bindcnt; }; @@ -158,8 +153,7 @@ struct tc_cnt int refcnt; \ int bindcnt -enum -{ +enum { TCA_POLICE_UNSPEC, TCA_POLICE_TBF, TCA_POLICE_RATE, @@ -182,8 +176,7 @@ enum #define TC_U32_UNSPEC 0 #define TC_U32_ROOT (0xFFF00000) -enum -{ +enum { TCA_U32_UNSPEC, TCA_U32_CLASSID, TCA_U32_HASH, @@ -200,16 +193,14 @@ enum #define TCA_U32_MAX (__TCA_U32_MAX - 1) -struct tc_u32_key -{ +struct tc_u32_key { __be32 mask; __be32 val; int off; int offmask; }; -struct tc_u32_sel -{ +struct tc_u32_sel { unsigned char flags; unsigned char offshift; unsigned char nkeys; @@ -223,15 +214,13 @@ struct tc_u32_sel struct tc_u32_key keys[0]; }; -struct tc_u32_mark -{ +struct tc_u32_mark { __u32 val; __u32 mask; __u32 success; }; -struct tc_u32_pcnt -{ +struct tc_u32_pcnt { __u64 rcnt; __u64 rhit; __u64 kcnts[0]; @@ -249,8 +238,7 @@ struct tc_u32_pcnt /* RSVP filter */ -enum -{ +enum { TCA_RSVP_UNSPEC, TCA_RSVP_CLASSID, TCA_RSVP_DST, @@ -263,15 +251,13 @@ enum #define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 ) -struct tc_rsvp_gpi -{ +struct tc_rsvp_gpi { __u32 key; __u32 mask; int offset; }; -struct tc_rsvp_pinfo -{ +struct tc_rsvp_pinfo { struct tc_rsvp_gpi dpi; struct tc_rsvp_gpi spi; __u8 protocol; @@ -282,8 +268,7 @@ struct tc_rsvp_pinfo /* ROUTE filter */ -enum -{ +enum { TCA_ROUTE4_UNSPEC, TCA_ROUTE4_CLASSID, TCA_ROUTE4_TO, @@ -299,8 +284,7 @@ enum /* FW filter */ -enum -{ +enum { TCA_FW_UNSPEC, TCA_FW_CLASSID, TCA_FW_POLICE, @@ -314,8 +298,7 @@ enum /* TC index filter */ -enum -{ +enum { TCA_TCINDEX_UNSPEC, TCA_TCINDEX_HASH, TCA_TCINDEX_MASK, @@ -331,8 +314,7 @@ enum /* Flow filter */ -enum -{ +enum { FLOW_KEY_SRC, FLOW_KEY_DST, FLOW_KEY_PROTO, @@ -355,14 +337,12 @@ enum #define FLOW_KEY_MAX (__FLOW_KEY_MAX - 1) -enum -{ +enum { FLOW_MODE_MAP, FLOW_MODE_HASH, }; -enum -{ +enum { TCA_FLOW_UNSPEC, TCA_FLOW_KEYS, TCA_FLOW_MODE, @@ -383,8 +363,7 @@ enum /* Basic filter */ -enum -{ +enum { TCA_BASIC_UNSPEC, TCA_BASIC_CLASSID, TCA_BASIC_EMATCHES, @@ -398,8 +377,7 @@ enum /* Cgroup classifier */ -enum -{ +enum { TCA_CGROUP_UNSPEC, TCA_CGROUP_ACT, TCA_CGROUP_POLICE, @@ -411,14 +389,12 @@ enum /* Extended Matches */ -struct tcf_ematch_tree_hdr -{ +struct tcf_ematch_tree_hdr { __u16 nmatches; __u16 progid; }; -enum -{ +enum { TCA_EMATCH_TREE_UNSPEC, TCA_EMATCH_TREE_HDR, TCA_EMATCH_TREE_LIST, @@ -426,8 +402,7 @@ enum }; #define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1) -struct tcf_ematch_hdr -{ +struct tcf_ematch_hdr { __u16 matchid; __u16 kind; __u16 flags; @@ -457,8 +432,7 @@ struct tcf_ematch_hdr #define TCF_EM_REL_MASK 3 #define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK) -enum -{ +enum { TCF_LAYER_LINK, TCF_LAYER_NETWORK, TCF_LAYER_TRANSPORT, @@ -479,13 +453,11 @@ enum #define TCF_EM_VLAN 6 #define TCF_EM_MAX 6 -enum -{ +enum { TCF_EM_PROG_TC }; -enum -{ +enum { TCF_EM_OPND_EQ, TCF_EM_OPND_GT, TCF_EM_OPND_LT diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index d51a2b3e221e..2cfa4bc8dea6 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -29,8 +29,7 @@ Particular schedulers may have also their private records. */ -struct tc_stats -{ +struct tc_stats { __u64 bytes; /* NUmber of enqueues bytes */ __u32 packets; /* Number of enqueued packets */ __u32 drops; /* Packets dropped because of lack of resources */ @@ -42,8 +41,7 @@ struct tc_stats __u32 backlog; }; -struct tc_estimator -{ +struct tc_estimator { signed char interval; unsigned char ewma_log; }; @@ -75,8 +73,7 @@ struct tc_estimator #define TC_H_ROOT (0xFFFFFFFFU) #define TC_H_INGRESS (0xFFFFFFF1U) -struct tc_ratespec -{ +struct tc_ratespec { unsigned char cell_log; unsigned char __reserved; unsigned short overhead; @@ -109,8 +106,7 @@ enum { /* FIFO section */ -struct tc_fifo_qopt -{ +struct tc_fifo_qopt { __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ }; @@ -119,8 +115,7 @@ struct tc_fifo_qopt #define TCQ_PRIO_BANDS 16 #define TCQ_MIN_PRIO_BANDS 2 -struct tc_prio_qopt -{ +struct tc_prio_qopt { int bands; /* Number of bands */ __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ }; @@ -134,8 +129,7 @@ struct tc_multiq_qopt { /* TBF section */ -struct tc_tbf_qopt -{ +struct tc_tbf_qopt { struct tc_ratespec rate; struct tc_ratespec peakrate; __u32 limit; @@ -143,8 +137,7 @@ struct tc_tbf_qopt __u32 mtu; }; -enum -{ +enum { TCA_TBF_UNSPEC, TCA_TBF_PARMS, TCA_TBF_RTAB, @@ -161,8 +154,7 @@ enum /* SFQ section */ -struct tc_sfq_qopt -{ +struct tc_sfq_qopt { unsigned quantum; /* Bytes per round allocated to flow */ int perturb_period; /* Period of hash perturbation */ __u32 limit; /* Maximal packets in queue */ @@ -170,8 +162,7 @@ struct tc_sfq_qopt unsigned flows; /* Maximal number of flows */ }; -struct tc_sfq_xstats -{ +struct tc_sfq_xstats { __s32 allot; }; @@ -186,8 +177,7 @@ struct tc_sfq_xstats /* RED section */ -enum -{ +enum { TCA_RED_UNSPEC, TCA_RED_PARMS, TCA_RED_STAB, @@ -196,8 +186,7 @@ enum #define TCA_RED_MAX (__TCA_RED_MAX - 1) -struct tc_red_qopt -{ +struct tc_red_qopt { __u32 limit; /* HARD maximal queue length (bytes) */ __u32 qth_min; /* Min average length threshold (bytes) */ __u32 qth_max; /* Max average length threshold (bytes) */ @@ -209,8 +198,7 @@ struct tc_red_qopt #define TC_RED_HARDDROP 2 }; -struct tc_red_xstats -{ +struct tc_red_xstats { __u32 early; /* Early drops */ __u32 pdrop; /* Drops due to queue limits */ __u32 other; /* Drops due to drop() calls */ @@ -221,8 +209,7 @@ struct tc_red_xstats #define MAX_DPs 16 -enum -{ +enum { TCA_GRED_UNSPEC, TCA_GRED_PARMS, TCA_GRED_STAB, @@ -232,8 +219,7 @@ enum #define TCA_GRED_MAX (__TCA_GRED_MAX - 1) -struct tc_gred_qopt -{ +struct tc_gred_qopt { __u32 limit; /* HARD maximal queue length (bytes) */ __u32 qth_min; /* Min average length threshold (bytes) */ __u32 qth_max; /* Max average length threshold (bytes) */ @@ -253,8 +239,7 @@ struct tc_gred_qopt }; /* gred setup */ -struct tc_gred_sopt -{ +struct tc_gred_sopt { __u32 DPs; __u32 def_DP; __u8 grio; @@ -267,8 +252,7 @@ struct tc_gred_sopt #define TC_HTB_MAXDEPTH 8 #define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */ -struct tc_htb_opt -{ +struct tc_htb_opt { struct tc_ratespec rate; struct tc_ratespec ceil; __u32 buffer; @@ -277,8 +261,7 @@ struct tc_htb_opt __u32 level; /* out only */ __u32 prio; }; -struct tc_htb_glob -{ +struct tc_htb_glob { __u32 version; /* to match HTB/TC */ __u32 rate2quantum; /* bps->quantum divisor */ __u32 defcls; /* default class number */ @@ -287,8 +270,7 @@ struct tc_htb_glob /* stats */ __u32 direct_pkts; /* count of non shapped packets */ }; -enum -{ +enum { TCA_HTB_UNSPEC, TCA_HTB_PARMS, TCA_HTB_INIT, @@ -299,8 +281,7 @@ enum #define TCA_HTB_MAX (__TCA_HTB_MAX - 1) -struct tc_htb_xstats -{ +struct tc_htb_xstats { __u32 lends; __u32 borrows; __u32 giants; /* too big packets (rate will not be accurate) */ @@ -310,28 +291,24 @@ struct tc_htb_xstats /* HFSC section */ -struct tc_hfsc_qopt -{ +struct tc_hfsc_qopt { __u16 defcls; /* default class */ }; -struct tc_service_curve -{ +struct tc_service_curve { __u32 m1; /* slope of the first segment in bps */ __u32 d; /* x-projection of the first segment in us */ __u32 m2; /* slope of the second segment in bps */ }; -struct tc_hfsc_stats -{ +struct tc_hfsc_stats { __u64 work; /* total work done */ __u64 rtwork; /* work done by real-time criteria */ __u32 period; /* current period */ __u32 level; /* class level in hierarchy */ }; -enum -{ +enum { TCA_HFSC_UNSPEC, TCA_HFSC_RSC, TCA_HFSC_FSC, @@ -348,8 +325,7 @@ enum #define TC_CBQ_MAXLEVEL 8 #define TC_CBQ_DEF_EWMA 5 -struct tc_cbq_lssopt -{ +struct tc_cbq_lssopt { unsigned char change; unsigned char flags; #define TCF_CBQ_LSS_BOUNDED 1 @@ -368,8 +344,7 @@ struct tc_cbq_lssopt __u32 avpkt; }; -struct tc_cbq_wrropt -{ +struct tc_cbq_wrropt { unsigned char flags; unsigned char priority; unsigned char cpriority; @@ -378,8 +353,7 @@ struct tc_cbq_wrropt __u32 weight; }; -struct tc_cbq_ovl -{ +struct tc_cbq_ovl { unsigned char strategy; #define TC_CBQ_OVL_CLASSIC 0 #define TC_CBQ_OVL_DELAY 1 @@ -391,30 +365,26 @@ struct tc_cbq_ovl __u32 penalty; }; -struct tc_cbq_police -{ +struct tc_cbq_police { unsigned char police; unsigned char __res1; unsigned short __res2; }; -struct tc_cbq_fopt -{ +struct tc_cbq_fopt { __u32 split; __u32 defmap; __u32 defchange; }; -struct tc_cbq_xstats -{ +struct tc_cbq_xstats { __u32 borrows; __u32 overactions; __s32 avgidle; __s32 undertime; }; -enum -{ +enum { TCA_CBQ_UNSPEC, TCA_CBQ_LSSOPT, TCA_CBQ_WRROPT, @@ -459,8 +429,7 @@ enum { /* Network emulator */ -enum -{ +enum { TCA_NETEM_UNSPEC, TCA_NETEM_CORR, TCA_NETEM_DELAY_DIST, @@ -471,8 +440,7 @@ enum #define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1) -struct tc_netem_qopt -{ +struct tc_netem_qopt { __u32 latency; /* added delay (us) */ __u32 limit; /* fifo limit (packets) */ __u32 loss; /* random packet loss (0=none ~0=100%) */ @@ -481,21 +449,18 @@ struct tc_netem_qopt __u32 jitter; /* random jitter in latency (us) */ }; -struct tc_netem_corr -{ +struct tc_netem_corr { __u32 delay_corr; /* delay correlation */ __u32 loss_corr; /* packet loss correlation */ __u32 dup_corr; /* duplicate correlation */ }; -struct tc_netem_reorder -{ +struct tc_netem_reorder { __u32 probability; __u32 correlation; }; -struct tc_netem_corrupt -{ +struct tc_netem_corrupt { __u32 probability; __u32 correlation; }; @@ -504,8 +469,7 @@ struct tc_netem_corrupt /* DRR */ -enum -{ +enum { TCA_DRR_UNSPEC, TCA_DRR_QUANTUM, __TCA_DRR_MAX @@ -513,8 +477,7 @@ enum #define TCA_DRR_MAX (__TCA_DRR_MAX - 1) -struct tc_drr_stats -{ +struct tc_drr_stats { __u32 deficit; }; diff --git a/include/linux/route.h b/include/linux/route.h index f7ed35d5e653..6600708311c8 100644 --- a/include/linux/route.h +++ b/include/linux/route.h @@ -27,8 +27,7 @@ #include /* This structure gets passed by the SIOCADDRT and SIOCDELRT calls. */ -struct rtentry -{ +struct rtentry { unsigned long rt_pad1; struct sockaddr rt_dst; /* target address */ struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index e78b60cd65a4..14fc906ed602 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -127,8 +127,7 @@ enum { with attribute type. */ -struct rtattr -{ +struct rtattr { unsigned short rta_len; unsigned short rta_type; }; @@ -154,8 +153,7 @@ struct rtattr * Definitions used in routing table administration. ****/ -struct rtmsg -{ +struct rtmsg { unsigned char rtm_family; unsigned char rtm_dst_len; unsigned char rtm_src_len; @@ -171,8 +169,7 @@ struct rtmsg /* rtm_type */ -enum -{ +enum { RTN_UNSPEC, RTN_UNICAST, /* Gateway or direct route */ RTN_LOCAL, /* Accept locally */ @@ -230,8 +227,7 @@ enum could be assigned a value between UNIVERSE and LINK. */ -enum rt_scope_t -{ +enum rt_scope_t { RT_SCOPE_UNIVERSE=0, /* User defined values */ RT_SCOPE_SITE=200, @@ -249,8 +245,7 @@ enum rt_scope_t /* Reserved table identifiers */ -enum rt_class_t -{ +enum rt_class_t { RT_TABLE_UNSPEC=0, /* User defined values */ RT_TABLE_COMPAT=252, @@ -263,8 +258,7 @@ enum rt_class_t /* Routing message attributes */ -enum rtattr_type_t -{ +enum rtattr_type_t { RTA_UNSPEC, RTA_DST, RTA_SRC, @@ -298,8 +292,7 @@ enum rtattr_type_t * and rtt for different paths from multipath. */ -struct rtnexthop -{ +struct rtnexthop { unsigned short rtnh_len; unsigned char rtnh_flags; unsigned char rtnh_hops; @@ -325,8 +318,7 @@ struct rtnexthop /* RTM_CACHEINFO */ -struct rta_cacheinfo -{ +struct rta_cacheinfo { __u32 rta_clntref; __u32 rta_lastuse; __s32 rta_expires; @@ -341,8 +333,7 @@ struct rta_cacheinfo /* RTM_METRICS --- array of struct rtattr with types of RTAX_* */ -enum -{ +enum { RTAX_UNSPEC, #define RTAX_UNSPEC RTAX_UNSPEC RTAX_LOCK, @@ -383,8 +374,7 @@ enum #define RTAX_FEATURE_NO_WSCALE 0x00000010 #define RTAX_FEATURE_NO_DSACK 0x00000020 -struct rta_session -{ +struct rta_session { __u8 proto; __u8 pad1; __u16 pad2; @@ -409,8 +399,7 @@ struct rta_session * General form of address family dependent message. ****/ -struct rtgenmsg -{ +struct rtgenmsg { unsigned char rtgen_family; }; @@ -423,8 +412,7 @@ struct rtgenmsg * on network protocol. */ -struct ifinfomsg -{ +struct ifinfomsg { unsigned char ifi_family; unsigned char __ifi_pad; unsigned short ifi_type; /* ARPHRD_* */ @@ -437,8 +425,7 @@ struct ifinfomsg * prefix information ****/ -struct prefixmsg -{ +struct prefixmsg { unsigned char prefix_family; unsigned char prefix_pad1; unsigned short prefix_pad2; @@ -459,8 +446,7 @@ enum #define PREFIX_MAX (__PREFIX_MAX - 1) -struct prefix_cacheinfo -{ +struct prefix_cacheinfo { __u32 preferred_time; __u32 valid_time; }; @@ -470,8 +456,7 @@ struct prefix_cacheinfo * Traffic control messages. ****/ -struct tcmsg -{ +struct tcmsg { unsigned char tcm_family; unsigned char tcm__pad1; unsigned short tcm__pad2; @@ -481,8 +466,7 @@ struct tcmsg __u32 tcm_info; }; -enum -{ +enum { TCA_UNSPEC, TCA_KIND, TCA_OPTIONS, @@ -504,8 +488,7 @@ enum * Neighbor Discovery userland options ****/ -struct nduseroptmsg -{ +struct nduseroptmsg { unsigned char nduseropt_family; unsigned char nduseropt_pad1; unsigned short nduseropt_opts_len; /* Total length of options */ @@ -517,8 +500,7 @@ struct nduseroptmsg /* Followed by one or more ND options */ }; -enum -{ +enum { NDUSEROPT_UNSPEC, NDUSEROPT_SRCADDR, __NDUSEROPT_MAX @@ -600,8 +582,7 @@ enum rtnetlink_groups { #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) /* TC action piece */ -struct tcamsg -{ +struct tcamsg { unsigned char tca_family; unsigned char tca__pad1; unsigned short tca__pad2; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0c68fbd6faac..d0448c5d1ffc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -491,8 +491,7 @@ extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, int len,int odd, struct sk_buff *skb), void *from, int length); -struct skb_seq_state -{ +struct skb_seq_state { __u32 lower_offset; __u32 upper_offset; __u32 frag_idx; diff --git a/include/linux/tc_act/tc_defact.h b/include/linux/tc_act/tc_defact.h index 964f473af0f0..6f65d07c7ce2 100644 --- a/include/linux/tc_act/tc_defact.h +++ b/include/linux/tc_act/tc_defact.h @@ -3,13 +3,11 @@ #include -struct tc_defact -{ +struct tc_defact { tc_gen; }; -enum -{ +enum { TCA_DEF_UNSPEC, TCA_DEF_TM, TCA_DEF_PARMS, diff --git a/include/linux/tc_act/tc_gact.h b/include/linux/tc_act/tc_gact.h index e895c0a39629..f7bf94eed510 100644 --- a/include/linux/tc_act/tc_gact.h +++ b/include/linux/tc_act/tc_gact.h @@ -5,14 +5,12 @@ #include #define TCA_ACT_GACT 5 -struct tc_gact -{ +struct tc_gact { tc_gen; }; -struct tc_gact_p -{ +struct tc_gact_p { #define PGACT_NONE 0 #define PGACT_NETRAND 1 #define PGACT_DETERM 2 @@ -22,8 +20,7 @@ struct tc_gact_p int paction; }; -enum -{ +enum { TCA_GACT_UNSPEC, TCA_GACT_TM, TCA_GACT_PARMS, diff --git a/include/linux/tc_act/tc_ipt.h b/include/linux/tc_act/tc_ipt.h index 4b6f7b6c7a79..a2335563d21f 100644 --- a/include/linux/tc_act/tc_ipt.h +++ b/include/linux/tc_act/tc_ipt.h @@ -5,8 +5,7 @@ #define TCA_ACT_IPT 6 -enum -{ +enum { TCA_IPT_UNSPEC, TCA_IPT_TABLE, TCA_IPT_HOOK, diff --git a/include/linux/tc_act/tc_mirred.h b/include/linux/tc_act/tc_mirred.h index 0a99ab60d610..7561750e8fd6 100644 --- a/include/linux/tc_act/tc_mirred.h +++ b/include/linux/tc_act/tc_mirred.h @@ -10,15 +10,13 @@ #define TCA_INGRESS_REDIR 3 /* packet redirect to INGRESS*/ #define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */ -struct tc_mirred -{ +struct tc_mirred { tc_gen; int eaction; /* one of IN/EGRESS_MIRROR/REDIR */ __u32 ifindex; /* ifindex of egress port */ }; -enum -{ +enum { TCA_MIRRED_UNSPEC, TCA_MIRRED_TM, TCA_MIRRED_PARMS, diff --git a/include/linux/tc_act/tc_nat.h b/include/linux/tc_act/tc_nat.h index e7cf31e8ba79..6663aeba0b9a 100644 --- a/include/linux/tc_act/tc_nat.h +++ b/include/linux/tc_act/tc_nat.h @@ -6,8 +6,7 @@ #define TCA_ACT_NAT 9 -enum -{ +enum { TCA_NAT_UNSPEC, TCA_NAT_PARMS, TCA_NAT_TM, @@ -17,8 +16,7 @@ enum #define TCA_NAT_FLAG_EGRESS 1 -struct tc_nat -{ +struct tc_nat { tc_gen; __be32 old_addr; __be32 new_addr; diff --git a/include/linux/tc_act/tc_pedit.h b/include/linux/tc_act/tc_pedit.h index 54ce9064115a..716cfabcd5b2 100644 --- a/include/linux/tc_act/tc_pedit.h +++ b/include/linux/tc_act/tc_pedit.h @@ -6,8 +6,7 @@ #define TCA_ACT_PEDIT 7 -enum -{ +enum { TCA_PEDIT_UNSPEC, TCA_PEDIT_TM, TCA_PEDIT_PARMS, @@ -15,8 +14,7 @@ enum }; #define TCA_PEDIT_MAX (__TCA_PEDIT_MAX - 1) -struct tc_pedit_key -{ +struct tc_pedit_key { __u32 mask; /* AND */ __u32 val; /*XOR */ __u32 off; /*offset */ @@ -25,8 +23,7 @@ struct tc_pedit_key __u32 shift; }; -struct tc_pedit_sel -{ +struct tc_pedit_sel { tc_gen; unsigned char nkeys; unsigned char flags; diff --git a/include/linux/tc_ematch/tc_em_cmp.h b/include/linux/tc_ematch/tc_em_cmp.h index 38e7f7b25ec2..f34bb1bae083 100644 --- a/include/linux/tc_ematch/tc_em_cmp.h +++ b/include/linux/tc_ematch/tc_em_cmp.h @@ -4,8 +4,7 @@ #include #include -struct tcf_em_cmp -{ +struct tcf_em_cmp { __u32 val; __u32 mask; __u16 off; @@ -15,8 +14,7 @@ struct tcf_em_cmp __u8 opnd:4; }; -enum -{ +enum { TCF_EM_ALIGN_U8 = 1, TCF_EM_ALIGN_U16 = 2, TCF_EM_ALIGN_U32 = 4 diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index dcfb733fa1f6..0864206ec1a3 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -4,8 +4,7 @@ #include #include -enum -{ +enum { TCA_EM_META_UNSPEC, TCA_EM_META_HDR, TCA_EM_META_LVALUE, @@ -14,8 +13,7 @@ enum }; #define TCA_EM_META_MAX (__TCA_EM_META_MAX - 1) -struct tcf_meta_val -{ +struct tcf_meta_val { __u16 kind; __u8 shift; __u8 op; @@ -26,16 +24,14 @@ struct tcf_meta_val #define TCF_META_ID_MASK 0x7ff #define TCF_META_ID(kind) ((kind) & TCF_META_ID_MASK) -enum -{ +enum { TCF_META_TYPE_VAR, TCF_META_TYPE_INT, __TCF_META_TYPE_MAX }; #define TCF_META_TYPE_MAX (__TCF_META_TYPE_MAX - 1) -enum -{ +enum { TCF_META_ID_VALUE, TCF_META_ID_RANDOM, TCF_META_ID_LOADAVG_0, @@ -87,8 +83,7 @@ enum }; #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1) -struct tcf_meta_hdr -{ +struct tcf_meta_hdr { struct tcf_meta_val left; struct tcf_meta_val right; }; diff --git a/include/linux/tc_ematch/tc_em_nbyte.h b/include/linux/tc_ematch/tc_em_nbyte.h index 9ed8c2e58488..7172cfb999c1 100644 --- a/include/linux/tc_ematch/tc_em_nbyte.h +++ b/include/linux/tc_ematch/tc_em_nbyte.h @@ -4,8 +4,7 @@ #include #include -struct tcf_em_nbyte -{ +struct tcf_em_nbyte { __u16 off; __u16 len:12; __u8 layer:4; diff --git a/include/linux/tc_ematch/tc_em_text.h b/include/linux/tc_ematch/tc_em_text.h index d12a73a225fc..5aac4045ba88 100644 --- a/include/linux/tc_ematch/tc_em_text.h +++ b/include/linux/tc_ematch/tc_em_text.h @@ -6,8 +6,7 @@ #define TC_EM_TEXT_ALGOSIZ 16 -struct tcf_em_text -{ +struct tcf_em_text { char algo[TC_EM_TEXT_ALGOSIZ]; __u16 from_offset; __u16 to_offset; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 61723a7c21fe..eeecb8547a2a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -102,8 +102,7 @@ enum { #define TCPI_OPT_WSCALE 4 #define TCPI_OPT_ECN 8 -enum tcp_ca_state -{ +enum tcp_ca_state { TCP_CA_Open = 0, #define TCPF_CA_Open (1< Date: Wed, 4 Nov 2009 14:42:28 +0100 Subject: mac80211: make ieee80211_find_sta per virtual interface Since we have a TODO item to make all station management dependent on virtual interfaces, I figured I'd start with pushing such a change to drivers before more drivers start using the ieee80211_find_sta() API with a hw pointer and cause us grief later on. For now continue exporting the old API in form of ieee80211_find_sta_by_hw(), but discourage its use strongly. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/recv.c | 3 ++- drivers/net/wireless/ath/ath9k/xmit.c | 3 ++- drivers/net/wireless/iwlwifi/iwl-3945-rs.c | 3 ++- drivers/net/wireless/iwlwifi/iwl-core.c | 2 +- drivers/net/wireless/iwlwifi/iwl-sta.c | 2 +- include/net/mac80211.h | 25 +++++++++++++++++++++++-- net/mac80211/sta_info.c | 18 ++++++++++++++++-- 7 files changed, 47 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index c880a55939bf..355dd1834e1d 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -202,7 +202,8 @@ static int ath_rx_prepare(struct sk_buff *skb, struct ath_desc *ds, } rcu_read_lock(); - sta = ieee80211_find_sta(sc->hw, hdr->addr2); + /* XXX: use ieee80211_find_sta! */ + sta = ieee80211_find_sta_by_hw(sc->hw, hdr->addr2); if (sta) { an = (struct ath_node *) sta->drv_priv; if (ds->ds_rxstat.rs_rssi != ATH9K_RSSI_BAD && diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 2a4efcbced60..8e052f406c35 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -282,7 +282,8 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, rcu_read_lock(); - sta = ieee80211_find_sta(sc->hw, hdr->addr1); + /* XXX: use ieee80211_find_sta! */ + sta = ieee80211_find_sta_by_hw(sc->hw, hdr->addr1); if (!sta) { rcu_read_unlock(); return; diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c index 819a81bbb1b7..dc81e19674f7 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c @@ -913,7 +913,8 @@ void iwl3945_rate_scale_init(struct ieee80211_hw *hw, s32 sta_id) rcu_read_lock(); - sta = ieee80211_find_sta(hw, priv->stations[sta_id].sta.sta.addr); + sta = ieee80211_find_sta(priv->vif, + priv->stations[sta_id].sta.sta.addr); if (!sta) { rcu_read_unlock(); return; diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 7373b2f50aca..e0b5b4aef41d 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -2300,7 +2300,7 @@ static void iwl_ht_conf(struct iwl_priv *priv, switch (priv->iw_mode) { case NL80211_IFTYPE_STATION: rcu_read_lock(); - sta = ieee80211_find_sta(priv->hw, priv->bssid); + sta = ieee80211_find_sta(priv->vif, priv->bssid); if (sta) { struct ieee80211_sta_ht_cap *ht_cap = &sta->ht_cap; int maxstreams; diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index 74cc8dbe9359..eba36f737388 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -1017,7 +1017,7 @@ int iwl_rxon_add_station(struct iwl_priv *priv, const u8 *addr, bool is_ap) */ if (priv->current_ht_config.is_ht) { rcu_read_lock(); - sta = ieee80211_find_sta(priv->hw, addr); + sta = ieee80211_find_sta(priv->vif, addr); if (sta) { memcpy(&ht_config, &sta->ht_cap, sizeof(ht_config)); cur_ht_config = &ht_config; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e12293e60ac1..7f035d779db9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2106,15 +2106,36 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra, /** * ieee80211_find_sta - find a station * - * @hw: pointer as obtained from ieee80211_alloc_hw() + * @vif: virtual interface to look for station on * @addr: station's address * * This function must be called under RCU lock and the * resulting pointer is only valid under RCU lock as well. */ -struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, +struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, const u8 *addr); +/** + * ieee80211_find_sta_by_hw - find a station on hardware + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @addr: station's address + * + * This function must be called under RCU lock and the + * resulting pointer is only valid under RCU lock as well. + * + * NOTE: This function should not be used! When mac80211 is converted + * internally to properly keep track of stations on multiple + * virtual interfaces, it will not always know which station to + * return here since a single address might be used by multiple + * logical stations (e.g. consider a station connecting to another + * BSSID on the same AP hardware without disconnecting first). + * + * DO NOT USE THIS FUNCTION. + */ +struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, + const u8 *addr); + /** * ieee80211_beacon_loss - inform hardware does not receive beacons * diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 594f2318c3d8..cde2da7a74df 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -801,8 +801,8 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, sta_info_destroy(sta); } -struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, - const u8 *addr) +struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, + const u8 *addr) { struct sta_info *sta = sta_info_get(hw_to_local(hw), addr); @@ -810,4 +810,18 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, return NULL; return &sta->sta; } +EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw); + +struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, + const u8 *addr) +{ + struct ieee80211_sub_if_data *sdata; + + if (!vif) + return NULL; + + sdata = vif_to_sdata(vif); + + return ieee80211_find_sta_by_hw(&sdata->local->hw, addr); +} EXPORT_SYMBOL(ieee80211_find_sta); -- cgit v1.3-8-gc7d7 From 3c5d92a0cfb5103c0d5ab74d4ae6373d3af38148 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 29 Sep 2009 14:25:16 +0200 Subject: nohz: Introduce arch_needs_cpu Allow the architecture to request a normal jiffy tick when the system goes idle and tick_nohz_stop_sched_tick is called . On s390 the hook is used to prevent the system going fully idle if there has been an interrupt other than a clock comparator interrupt since the last wakeup. On s390 the HiperSockets response time for 1 connection ping-pong goes down from 42 to 34 microseconds. The CPU cost decreases by 27%. Signed-off-by: Martin Schwidefsky LKML-Reference: <20090929122533.402715150@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/s390/include/asm/cputime.h | 8 ++++++++ arch/s390/kernel/s390_ext.c | 2 ++ arch/s390/kernel/vtime.c | 2 ++ drivers/s390/cio/cio.c | 1 + include/linux/tick.h | 3 +++ kernel/time/tick-sched.c | 13 ++++++++----- 6 files changed, 24 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 24b1244aadb9..95f3561517c7 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -183,6 +183,7 @@ struct s390_idle_data { unsigned long long idle_count; unsigned long long idle_enter; unsigned long long idle_time; + int nohz_delay; }; DECLARE_PER_CPU(struct s390_idle_data, s390_idle); @@ -198,4 +199,11 @@ static inline void s390_idle_check(void) vtime_start_cpu(); } +static inline int s390_nohz_delay(int cpu) +{ + return per_cpu(s390_idle, cpu).nohz_delay != 0; +} + +#define arch_needs_cpu(cpu) s390_nohz_delay(cpu) + #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index 0de305b598ce..59618bcd99b7 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -126,6 +126,8 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned short code) /* Serve timer interrupts first. */ clock_comparator_work(); kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; + if (code != 0x1004) + __get_cpu_var(s390_idle).nohz_delay = 1; index = ext_hash(code); for (p = ext_int_hash[index]; p; p = p->next) { if (likely(p->code == code)) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c41bb0d416e1..b59a812a010e 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -167,6 +167,8 @@ void vtime_stop_cpu(void) /* Wait for external, I/O or machine check interrupt. */ psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; + idle->nohz_delay = 0; + /* Check if the CPU timer needs to be reprogrammed. */ if (vq->do_spt) { __u64 vmax = VTIMER_MAX_SLICE; diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 138124fcfcad..126f240715a4 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -618,6 +618,7 @@ void __irq_entry do_IRQ(struct pt_regs *regs) old_regs = set_irq_regs(regs); s390_idle_check(); irq_enter(); + __get_cpu_var(s390_idle).nohz_delay = 1; if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) /* Serve timer interrupts first. */ clock_comparator_work(); diff --git a/include/linux/tick.h b/include/linux/tick.h index 0482229c07db..8dc082194b22 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -98,6 +98,9 @@ extern int tick_check_oneshot_change(int allow_nohz); extern struct tick_sched *tick_get_tick_sched(int cpu); extern void tick_check_idle(int cpu); extern int tick_oneshot_mode_active(void); +# ifndef arch_needs_cpu +# define arch_needs_cpu(cpu) (0) +# endif # else static inline void tick_clock_notify(void) { } static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7378e2c71ca6..3840f6dff7eb 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -264,12 +264,15 @@ void tick_nohz_stop_sched_tick(int inidle) last_jiffies = jiffies; } while (read_seqretry(&xtime_lock, seq)); - /* Get the next timer wheel timer */ - next_jiffies = get_next_timer_interrupt(last_jiffies); - delta_jiffies = next_jiffies - last_jiffies; - - if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu)) + if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || + arch_needs_cpu(cpu)) { + next_jiffies = last_jiffies + 1; delta_jiffies = 1; + } else { + /* Get the next timer wheel timer */ + next_jiffies = get_next_timer_interrupt(last_jiffies); + delta_jiffies = next_jiffies - last_jiffies; + } /* * Do not stop the tick, if we are only one off * or if the cpu is required for rcu -- cgit v1.3-8-gc7d7 From 6a2a2d6bf8581216e08be15fcb563cfd6c430e1e Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 4 Nov 2009 23:23:10 -0800 Subject: tcp: Use defaults when no route options are available Trying to parse the option of a SYN packet that we have no route entry for should just use global wide defaults for route entry options. Signed-off-by: Gilad Ben-Yossef Tested-by: Valdis.Kletnieks@vt.edu Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- net/ipv4/tcp_input.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 39c4a5963e12..387cb3cfde7e 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -113,7 +113,7 @@ dst_metric(const struct dst_entry *dst, int metric) static inline u32 dst_feature(const struct dst_entry *dst, u32 feature) { - return dst_metric(dst, RTAX_FEATURES) & feature; + return (dst ? dst_metric(dst, RTAX_FEATURES) & feature : 0); } static inline u32 dst_mtu(const struct dst_entry *dst) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ba0eab65fe80..be0c5bf7bfca 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3704,8 +3704,6 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, struct tcphdr *th = tcp_hdr(skb); int length = (th->doff * 4) - sizeof(struct tcphdr); - BUG_ON(!estab && !dst); - ptr = (unsigned char *)(th + 1); opt_rx->saw_tstamp = 0; -- cgit v1.3-8-gc7d7 From 5b915d9e6dc3d22fedde91dfef1cb1a8fa9a1870 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 5 Nov 2009 14:08:03 +0100 Subject: HID: fixup quirk for NCR devices NCR devices are terminally broken by design -- they claim themselves to contain proper input applications in their HID report descriptor, but behave very badly if treated in standard way. According to NCR developers, the devices get confused when queried for reports in a standard way, rendering them unusable. NCR is shipping application called "RPSL" that can be used to drive these devices through hiddev, under the assumption that in-kernel driver doesn't perform initial report query. If it does, neither in-kernel nor hiddev-based driver can operate with these devices any more. Introduce a quirk that skips the report query for all NCR devices. The previous NOGET quirk was wrong and had been introduced because I misunderstood the nature of brokenness of these devices. Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-core.c | 3 ++- drivers/hid/usbhid/hid-quirks.c | 2 +- include/linux/hid.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 3f56e9c02e65..0258289f3b3e 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -996,7 +996,8 @@ static int usbhid_start(struct hid_device *hid) usbhid->urbctrl->transfer_dma = usbhid->ctrlbuf_dma; usbhid->urbctrl->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP | URB_NO_SETUP_DMA_MAP); - usbhid_init_reports(hid); + if (!(hid->quirks & HID_QUIRK_NO_INIT_REPORTS)) + usbhid_init_reports(hid); set_bit(HID_STARTED, &usbhid->iofl); diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 2d445b270215..c3b02f59792b 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -281,7 +281,7 @@ u32 usbhid_lookup_quirk(const u16 idVendor, const u16 idProduct) if (idVendor == USB_VENDOR_ID_NCR && idProduct >= USB_DEVICE_ID_NCR_FIRST && idProduct <= USB_DEVICE_ID_NCR_LAST) - return HID_QUIRK_NOGET; + return HID_QUIRK_NO_INIT_REPORTS; down_read(&dquirks_rwsem); bl_entry = usbhid_exists_dquirk(idVendor, idProduct); diff --git a/include/linux/hid.h b/include/linux/hid.h index 10f628416740..87093652dda8 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -312,6 +312,7 @@ struct hid_item { #define HID_QUIRK_MULTI_INPUT 0x00000040 #define HID_QUIRK_SKIP_OUTPUT_REPORTS 0x00010000 #define HID_QUIRK_FULLSPEED_INTERVAL 0x10000000 +#define HID_QUIRK_NO_INIT_REPORTS 0x20000000 /* * This is the global environment of the parser. This information is -- cgit v1.3-8-gc7d7 From d114cd84a1c5ce42bb10cd3a2da57b2bbcef909b Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 5 Nov 2009 18:32:41 +0100 Subject: ALSA: cs4236: detect chip in one pass The cs4236 was two step detection with call to the snd_wss_free() between two steps. The snd_wss_free() did not free a sound device created in the snd_wss_create(). This caused an OOPS during module removal as the same sound device was released twice. The same OOPS happened if the cs4236 module loading failed. Fix this by adapting the snd_cs4236_create() to correctly work with chips less capable then cs4236. The snd_cs4236_create() behaves the same as the snd_wss_create() if the chip is less capable than the cs4236. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai --- include/sound/wss.h | 1 - sound/isa/cs423x/cs4236.c | 13 +++-------- sound/isa/cs423x/cs4236_lib.c | 50 +++++++++++++++++++++++++++---------------- sound/isa/wss/wss_lib.c | 3 +-- 4 files changed, 35 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/sound/wss.h b/include/sound/wss.h index 6d65f322f1d5..fd01f22825cd 100644 --- a/include/sound/wss.h +++ b/include/sound/wss.h @@ -154,7 +154,6 @@ int snd_wss_create(struct snd_card *card, unsigned short hardware, unsigned short hwshare, struct snd_wss **rchip); -int snd_wss_free(struct snd_wss *chip); int snd_wss_pcm(struct snd_wss *chip, int device, struct snd_pcm **rpcm); int snd_wss_timer(struct snd_wss *chip, int device, struct snd_timer **rtimer); int snd_wss_mixer(struct snd_wss *chip); diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index a076a6ce8071..93fa6720d197 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -394,21 +394,15 @@ static int __devinit snd_cs423x_probe(struct snd_card *card, int dev) return -EBUSY; } - err = snd_wss_create(card, port[dev], cport[dev], + err = snd_cs4236_create(card, port[dev], cport[dev], irq[dev], dma1[dev], dma2[dev], WSS_HW_DETECT3, 0, &chip); if (err < 0) return err; + + acard->chip = chip; if (chip->hardware & WSS_HW_CS4236B_MASK) { - snd_wss_free(chip); - err = snd_cs4236_create(card, - port[dev], cport[dev], - irq[dev], dma1[dev], dma2[dev], - WSS_HW_DETECT, 0, &chip); - if (err < 0) - return err; - acard->chip = chip; err = snd_cs4236_pcm(chip, 0, &pcm); if (err < 0) @@ -418,7 +412,6 @@ static int __devinit snd_cs423x_probe(struct snd_card *card, int dev) if (err < 0) return err; } else { - acard->chip = chip; err = snd_wss_pcm(chip, 0, &pcm); if (err < 0) return err; diff --git a/sound/isa/cs423x/cs4236_lib.c b/sound/isa/cs423x/cs4236_lib.c index 38835f31298b..1b1ad1cad328 100644 --- a/sound/isa/cs423x/cs4236_lib.c +++ b/sound/isa/cs423x/cs4236_lib.c @@ -87,6 +87,7 @@ #include #include #include +#include /* * @@ -264,7 +265,10 @@ static void snd_cs4236_resume(struct snd_wss *chip) } #endif /* CONFIG_PM */ - +/* + * This function does no fail if the chip is not CS4236B or compatible. + * It just an equivalent to the snd_wss_create() then. + */ int snd_cs4236_create(struct snd_card *card, unsigned long port, unsigned long cport, @@ -281,21 +285,17 @@ int snd_cs4236_create(struct snd_card *card, *rchip = NULL; if (hardware == WSS_HW_DETECT) hardware = WSS_HW_DETECT3; - if (cport < 0x100) { - snd_printk(KERN_ERR "please, specify control port " - "for CS4236+ chips\n"); - return -ENODEV; - } + err = snd_wss_create(card, port, cport, irq, dma1, dma2, hardware, hwshare, &chip); if (err < 0) return err; - if (!(chip->hardware & WSS_HW_CS4236B_MASK)) { - snd_printk(KERN_ERR "CS4236+: MODE3 and extended registers " - "not available, hardware=0x%x\n", chip->hardware); - snd_device_free(card, chip); - return -ENODEV; + if ((chip->hardware & WSS_HW_CS4236B_MASK) == 0) { + snd_printd("chip is not CS4236+, hardware=0x%x\n", + chip->hardware); + *rchip = chip; + return 0; } #if 0 { @@ -308,9 +308,16 @@ int snd_cs4236_create(struct snd_card *card, idx, snd_cs4236_ctrl_in(chip, idx)); } #endif + if (cport < 0x100 || cport == SNDRV_AUTO_PORT) { + snd_printk(KERN_ERR "please, specify control port " + "for CS4236+ chips\n"); + snd_device_free(card, chip); + return -ENODEV; + } ver1 = snd_cs4236_ctrl_in(chip, 1); ver2 = snd_cs4236_ext_in(chip, CS4236_VERSION); - snd_printdd("CS4236: [0x%lx] C1 (version) = 0x%x, ext = 0x%x\n", cport, ver1, ver2); + snd_printdd("CS4236: [0x%lx] C1 (version) = 0x%x, ext = 0x%x\n", + cport, ver1, ver2); if (ver1 != ver2) { snd_printk(KERN_ERR "CS4236+ chip detected, but " "control port 0x%lx is not valid\n", cport); @@ -321,13 +328,17 @@ int snd_cs4236_create(struct snd_card *card, snd_cs4236_ctrl_out(chip, 2, 0xff); snd_cs4236_ctrl_out(chip, 3, 0x00); snd_cs4236_ctrl_out(chip, 4, 0x80); - snd_cs4236_ctrl_out(chip, 5, ((IEC958_AES1_CON_PCM_CODER & 3) << 6) | IEC958_AES0_CON_EMPHASIS_NONE); + reg = ((IEC958_AES1_CON_PCM_CODER & 3) << 6) | + IEC958_AES0_CON_EMPHASIS_NONE; + snd_cs4236_ctrl_out(chip, 5, reg); snd_cs4236_ctrl_out(chip, 6, IEC958_AES1_CON_PCM_CODER >> 2); snd_cs4236_ctrl_out(chip, 7, 0x00); - /* 0x8c for C8 is valid for Turtle Beach Malibu - the IEC-958 output */ - /* is working with this setup, other hardware should have */ - /* different signal paths and this value should be selectable */ - /* in the future */ + /* + * 0x8c for C8 is valid for Turtle Beach Malibu - the IEC-958 + * output is working with this setup, other hardware should + * have different signal paths and this value should be + * selectable in the future + */ snd_cs4236_ctrl_out(chip, 8, 0x8c); chip->rate_constraint = snd_cs4236_xrate; chip->set_playback_format = snd_cs4236_playback_format; @@ -339,9 +350,10 @@ int snd_cs4236_create(struct snd_card *card, /* initialize extended registers */ for (reg = 0; reg < sizeof(snd_cs4236_ext_map); reg++) - snd_cs4236_ext_out(chip, CS4236_I23VAL(reg), snd_cs4236_ext_map[reg]); + snd_cs4236_ext_out(chip, CS4236_I23VAL(reg), + snd_cs4236_ext_map[reg]); - /* initialize compatible but more featured registers */ + /* initialize compatible but more featured registers */ snd_wss_out(chip, CS4231_LEFT_INPUT, 0x40); snd_wss_out(chip, CS4231_RIGHT_INPUT, 0x40); snd_wss_out(chip, CS4231_AUX1_LEFT_INPUT, 0xff); diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c index 2ba18978b419..705db0924375 100644 --- a/sound/isa/wss/wss_lib.c +++ b/sound/isa/wss/wss_lib.c @@ -1682,7 +1682,7 @@ static void snd_wss_resume(struct snd_wss *chip) } #endif /* CONFIG_PM */ -int snd_wss_free(struct snd_wss *chip) +static int snd_wss_free(struct snd_wss *chip) { release_and_free_resource(chip->res_port); release_and_free_resource(chip->res_cport); @@ -1705,7 +1705,6 @@ int snd_wss_free(struct snd_wss *chip) kfree(chip); return 0; } -EXPORT_SYMBOL(snd_wss_free); static int snd_wss_dev_free(struct snd_device *device) { -- cgit v1.3-8-gc7d7 From 2da3e160cb3d226d87b907fab26850d838ed8d7c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Nov 2009 23:06:50 +0100 Subject: hw-breakpoint: Move asm-generic/hw_breakpoint.h to linux/hw_breakpoint.h We plan to make the breakpoints parameters generic among architectures. For that it's better to move the asm-generic header to a generic linux header. Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/hw_breakpoint.h | 2 +- include/asm-generic/hw_breakpoint.h | 139 ----------------------------------- include/linux/hw_breakpoint.h | 136 ++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 140 deletions(-) delete mode 100644 include/asm-generic/hw_breakpoint.h create mode 100644 include/linux/hw_breakpoint.h (limited to 'include') diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 1acb4d45de70..3cfca8e2b5f6 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -12,7 +12,7 @@ struct arch_hw_breakpoint { }; #include -#include +#include /* Available HW breakpoint length encodings */ #define HW_BREAKPOINT_LEN_1 0x40 diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h deleted file mode 100644 index 9bf2d12eb74a..000000000000 --- a/include/asm-generic/hw_breakpoint.h +++ /dev/null @@ -1,139 +0,0 @@ -#ifndef _ASM_GENERIC_HW_BREAKPOINT_H -#define _ASM_GENERIC_HW_BREAKPOINT_H - -#ifndef __ARCH_HW_BREAKPOINT_H -#error "Please don't include this file directly" -#endif - -#ifdef __KERNEL__ -#include -#include -#include - -/** - * struct hw_breakpoint - unified kernel/user-space hardware breakpoint - * @triggered: callback invoked after target address access - * @info: arch-specific breakpoint info (address, length, and type) - * - * %hw_breakpoint structures are the kernel's way of representing - * hardware breakpoints. These are data breakpoints - * (also known as "watchpoints", triggered on data access), and the breakpoint's - * target address can be located in either kernel space or user space. - * - * The breakpoint's address, length, and type are highly - * architecture-specific. The values are encoded in the @info field; you - * specify them when registering the breakpoint. To examine the encoded - * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared - * below. - * - * The address is specified as a regular kernel pointer (for kernel-space - * breakponts) or as an %__user pointer (for user-space breakpoints). - * With register_user_hw_breakpoint(), the address must refer to a - * location in user space. The breakpoint will be active only while the - * requested task is running. Conversely with - * register_kernel_hw_breakpoint(), the address must refer to a location - * in kernel space, and the breakpoint will be active on all CPUs - * regardless of the current task. - * - * The length is the breakpoint's extent in bytes, which is subject to - * certain limitations. include/asm/hw_breakpoint.h contains macros - * defining the available lengths for a specific architecture. Note that - * the address's alignment must match the length. The breakpoint will - * catch accesses to any byte in the range from address to address + - * (length - 1). - * - * The breakpoint's type indicates the sort of access that will cause it - * to trigger. Possible values may include: - * - * %HW_BREAKPOINT_RW (triggered on read or write access), - * %HW_BREAKPOINT_WRITE (triggered on write access), and - * %HW_BREAKPOINT_READ (triggered on read access). - * - * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all - * possibilities are available on all architectures. Execute breakpoints - * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. - * - * When a breakpoint gets hit, the @triggered callback is - * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the - * processor registers. - * Data breakpoints occur after the memory access has taken place. - * Breakpoints are disabled during execution @triggered, to avoid - * recursive traps and allow unhindered access to breakpointed memory. - * - * This sample code sets a breakpoint on pid_max and registers a callback - * function for writes to that variable. Note that it is not portable - * as written, because not all architectures support HW_BREAKPOINT_LEN_4. - * - * ---------------------------------------------------------------------- - * - * #include - * - * struct hw_breakpoint my_bp; - * - * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) - * { - * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); - * dump_stack(); - * ............... - * } - * - * static struct hw_breakpoint my_bp; - * - * static int init_module(void) - * { - * ...................... - * my_bp.info.type = HW_BREAKPOINT_WRITE; - * my_bp.info.len = HW_BREAKPOINT_LEN_4; - * - * my_bp.installed = (void *)my_bp_installed; - * - * rc = register_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * static void cleanup_module(void) - * { - * ...................... - * unregister_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * ---------------------------------------------------------------------- - */ -struct hw_breakpoint { - void (*triggered)(struct hw_breakpoint *, struct pt_regs *); - struct arch_hw_breakpoint info; -}; - -/* - * len and type values are defined in include/asm/hw_breakpoint.h. - * Available values vary according to the architecture. On i386 the - * possibilities are: - * - * HW_BREAKPOINT_LEN_1 - * HW_BREAKPOINT_LEN_2 - * HW_BREAKPOINT_LEN_4 - * HW_BREAKPOINT_RW - * HW_BREAKPOINT_READ - * - * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the - * 1-, 2-, and 4-byte lengths may be unavailable. There also may be - * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. - */ - -extern int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern int modify_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -/* - * Kernel breakpoints are not associated with any particular thread. - */ -extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); -extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); - -extern unsigned int hbp_kernel_pos; - -#endif /* __KERNEL__ */ -#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */ diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h new file mode 100644 index 000000000000..61ccc8f17eac --- /dev/null +++ b/include/linux/hw_breakpoint.h @@ -0,0 +1,136 @@ +#ifndef _LINUX_HW_BREAKPOINT_H +#define _LINUX_HW_BREAKPOINT_H + + +#ifdef __KERNEL__ +#include +#include +#include + +/** + * struct hw_breakpoint - unified kernel/user-space hardware breakpoint + * @triggered: callback invoked after target address access + * @info: arch-specific breakpoint info (address, length, and type) + * + * %hw_breakpoint structures are the kernel's way of representing + * hardware breakpoints. These are data breakpoints + * (also known as "watchpoints", triggered on data access), and the breakpoint's + * target address can be located in either kernel space or user space. + * + * The breakpoint's address, length, and type are highly + * architecture-specific. The values are encoded in the @info field; you + * specify them when registering the breakpoint. To examine the encoded + * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared + * below. + * + * The address is specified as a regular kernel pointer (for kernel-space + * breakponts) or as an %__user pointer (for user-space breakpoints). + * With register_user_hw_breakpoint(), the address must refer to a + * location in user space. The breakpoint will be active only while the + * requested task is running. Conversely with + * register_kernel_hw_breakpoint(), the address must refer to a location + * in kernel space, and the breakpoint will be active on all CPUs + * regardless of the current task. + * + * The length is the breakpoint's extent in bytes, which is subject to + * certain limitations. include/asm/hw_breakpoint.h contains macros + * defining the available lengths for a specific architecture. Note that + * the address's alignment must match the length. The breakpoint will + * catch accesses to any byte in the range from address to address + + * (length - 1). + * + * The breakpoint's type indicates the sort of access that will cause it + * to trigger. Possible values may include: + * + * %HW_BREAKPOINT_RW (triggered on read or write access), + * %HW_BREAKPOINT_WRITE (triggered on write access), and + * %HW_BREAKPOINT_READ (triggered on read access). + * + * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all + * possibilities are available on all architectures. Execute breakpoints + * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. + * + * When a breakpoint gets hit, the @triggered callback is + * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the + * processor registers. + * Data breakpoints occur after the memory access has taken place. + * Breakpoints are disabled during execution @triggered, to avoid + * recursive traps and allow unhindered access to breakpointed memory. + * + * This sample code sets a breakpoint on pid_max and registers a callback + * function for writes to that variable. Note that it is not portable + * as written, because not all architectures support HW_BREAKPOINT_LEN_4. + * + * ---------------------------------------------------------------------- + * + * #include + * + * struct hw_breakpoint my_bp; + * + * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) + * { + * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); + * dump_stack(); + * ............... + * } + * + * static struct hw_breakpoint my_bp; + * + * static int init_module(void) + * { + * ...................... + * my_bp.info.type = HW_BREAKPOINT_WRITE; + * my_bp.info.len = HW_BREAKPOINT_LEN_4; + * + * my_bp.installed = (void *)my_bp_installed; + * + * rc = register_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * static void cleanup_module(void) + * { + * ...................... + * unregister_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * ---------------------------------------------------------------------- + */ +struct hw_breakpoint { + void (*triggered)(struct hw_breakpoint *, struct pt_regs *); + struct arch_hw_breakpoint info; +}; + +/* + * len and type values are defined in include/asm/hw_breakpoint.h. + * Available values vary according to the architecture. On i386 the + * possibilities are: + * + * HW_BREAKPOINT_LEN_1 + * HW_BREAKPOINT_LEN_2 + * HW_BREAKPOINT_LEN_4 + * HW_BREAKPOINT_RW + * HW_BREAKPOINT_READ + * + * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the + * 1-, 2-, and 4-byte lengths may be unavailable. There also may be + * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. + */ + +extern int register_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern int modify_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern void unregister_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +/* + * Kernel breakpoints are not associated with any particular thread. + */ +extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); + +extern unsigned int hbp_kernel_pos; + +#endif /* __KERNEL__ */ +#endif /* _LINUX_HW_BREAKPOINT_H */ -- cgit v1.3-8-gc7d7 From 13f18aa05f5abe135f47b6417537ae2b2fedc18c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 5 Nov 2009 20:44:37 -0800 Subject: net: drop capability from protocol definitions struct can_proto had a capability field which wasn't ever used. It is dropped entirely. struct inet_protosw had a capability field which can be more clearly expressed in the code by just checking if sock->type = SOCK_RAW. Signed-off-by: Eric Paris Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/can/core.h | 2 -- include/net/protocol.h | 4 ---- net/can/af_can.c | 5 ----- net/can/bcm.c | 1 - net/can/raw.c | 1 - net/dccp/ipv4.c | 1 - net/dccp/ipv6.c | 1 - net/ipv4/af_inet.c | 5 +---- net/ipv4/udplite.c | 1 - net/ipv6/af_inet6.c | 2 +- net/ipv6/raw.c | 1 - net/ipv6/tcp_ipv6.c | 1 - net/ipv6/udp.c | 1 - net/ipv6/udplite.c | 1 - net/sctp/ipv6.c | 2 -- net/sctp/protocol.c | 2 -- 16 files changed, 2 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index 25085cbadcfc..6c507bea275f 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -32,14 +32,12 @@ * struct can_proto - CAN protocol structure * @type: type argument in socket() syscall, e.g. SOCK_DGRAM. * @protocol: protocol number in socket() syscall. - * @capability: capability needed to open the socket, or -1 for no restriction. * @ops: pointer to struct proto_ops for sock->ops. * @prot: pointer to struct proto structure. */ struct can_proto { int type; int protocol; - int capability; struct proto_ops *ops; struct proto *prot; }; diff --git a/include/net/protocol.h b/include/net/protocol.h index 89932d45da59..f1effdd3c265 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -82,10 +82,6 @@ struct inet_protosw { struct proto *prot; const struct proto_ops *ops; - int capability; /* Which (if any) capability do - * we need to use this socket - * interface? - */ char no_check; /* checksum on rcv/xmit/none? */ unsigned char flags; /* See INET_PROTOSW_* below. */ }; diff --git a/net/can/af_can.c b/net/can/af_can.c index 3f2eb27e1ffb..9c0426dc3184 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -160,11 +160,6 @@ static int can_create(struct net *net, struct socket *sock, int protocol) goto errout; } - if (cp->capability >= 0 && !capable(cp->capability)) { - err = -EPERM; - goto errout; - } - sock->ops = cp->ops; sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot); diff --git a/net/can/bcm.c b/net/can/bcm.c index 2f47039c79dd..67b5433db13b 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1576,7 +1576,6 @@ static struct proto bcm_proto __read_mostly = { static struct can_proto bcm_can_proto __read_mostly = { .type = SOCK_DGRAM, .protocol = CAN_BCM, - .capability = -1, .ops = &bcm_ops, .prot = &bcm_proto, }; diff --git a/net/can/raw.c b/net/can/raw.c index 6e77db58b9e6..abca920440b5 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -742,7 +742,6 @@ static struct proto raw_proto __read_mostly = { static struct can_proto raw_can_proto __read_mostly = { .type = SOCK_RAW, .protocol = CAN_RAW, - .capability = -1, .ops = &raw_ops, .prot = &raw_proto, }; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 00028d4b09d9..2423a0866733 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -991,7 +991,6 @@ static struct inet_protosw dccp_v4_protosw = { .protocol = IPPROTO_DCCP, .prot = &dccp_v4_prot, .ops = &inet_dccp_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_ICSK, }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6d89f9f7d5d8..50ea91a77705 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1185,7 +1185,6 @@ static struct inet_protosw dccp_v6_protosw = { .protocol = IPPROTO_DCCP, .prot = &dccp_v6_prot, .ops = &inet6_dccp_ops, - .capability = -1, .flags = INET_PROTOSW_ICSK, }; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 538e84d0bcba..180ec4c94919 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -325,7 +325,7 @@ lookup_protocol: } err = -EPERM; - if (answer->capability > 0 && !capable(answer->capability)) + if (sock->type == SOCK_RAW && !capable(CAP_NET_RAW)) goto out_rcu_unlock; err = -EAFNOSUPPORT; @@ -947,7 +947,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, @@ -958,7 +957,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_UDP, .prot = &udp_prot, .ops = &inet_dgram_ops, - .capability = -1, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }, @@ -969,7 +967,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_IP, /* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, - .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, } diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 470c504b9554..66f79513f4a5 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -64,7 +64,6 @@ static struct inet_protosw udplite4_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplite_prot, .ops = &inet_dgram_ops, - .capability = -1, .no_check = 0, /* must checksum (RFC 3828) */ .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 9105b25defe5..1b3889356599 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -158,7 +158,7 @@ lookup_protocol: } err = -EPERM; - if (answer->capability > 0 && !capable(answer->capability)) + if (sock->type == SOCK_RAW && !capable(CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index cb834ab7f071..818ef21ba76d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1336,7 +1336,6 @@ static struct inet_protosw rawv6_protosw = { .protocol = IPPROTO_IP, /* wild card */ .prot = &rawv6_prot, .ops = &inet6_sockraw_ops, - .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 34925f089e07..696a22f034e8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2112,7 +2112,6 @@ static struct inet_protosw tcpv6_protosw = { .protocol = IPPROTO_TCP, .prot = &tcpv6_prot, .ops = &inet6_stream_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d3b59d73f507..bbe2f3e445fc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1286,7 +1286,6 @@ static struct inet_protosw udpv6_protosw = { .protocol = IPPROTO_UDP, .prot = &udpv6_prot, .ops = &inet6_dgram_ops, - .capability =-1, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index d737a27ee010..6ea6938919e6 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -62,7 +62,6 @@ static struct inet_protosw udplite6_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplitev6_prot, .ops = &inet6_dgram_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index bb280e60e00a..bacd6a7318be 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -930,7 +930,6 @@ static struct inet_protosw sctpv6_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; @@ -939,7 +938,6 @@ static struct inet_protosw sctpv6_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG, }; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index fe44c57101de..08ef203d36ac 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -909,7 +909,6 @@ static struct inet_protosw sctp_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; @@ -918,7 +917,6 @@ static struct inet_protosw sctp_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; -- cgit v1.3-8-gc7d7 From 3f378b684453f2a028eda463ce383370545d9cc9 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 5 Nov 2009 22:18:14 -0800 Subject: net: pass kern to net_proto_family create function The generic __sock_create function has a kern argument which allows the security system to make decisions based on if a socket is being created by the kernel or by userspace. This patch passes that flag to the net_proto_family specific create function, so it can do the same thing. Signed-off-by: Eric Paris Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/isdn/mISDN/socket.c | 2 +- drivers/net/pppox.c | 3 ++- include/linux/net.h | 3 ++- net/appletalk/ddp.c | 3 ++- net/atm/pvc.c | 3 ++- net/atm/svc.c | 7 ++++--- net/ax25/af_ax25.c | 3 ++- net/bluetooth/af_bluetooth.c | 5 +++-- net/bluetooth/bnep/sock.c | 3 ++- net/bluetooth/cmtp/sock.c | 3 ++- net/bluetooth/hci_sock.c | 3 ++- net/bluetooth/hidp/sock.c | 3 ++- net/bluetooth/l2cap.c | 3 ++- net/bluetooth/rfcomm/sock.c | 3 ++- net/bluetooth/sco.c | 3 ++- net/can/af_can.c | 3 ++- net/decnet/af_decnet.c | 3 ++- net/econet/af_econet.c | 3 ++- net/ieee802154/af_ieee802154.c | 2 +- net/ipv4/af_inet.c | 3 ++- net/ipv6/af_inet6.c | 3 ++- net/ipx/af_ipx.c | 3 ++- net/irda/af_irda.c | 7 ++++--- net/iucv/af_iucv.c | 3 ++- net/key/af_key.c | 3 ++- net/llc/af_llc.c | 5 ++++- net/netlink/af_netlink.c | 3 ++- net/netrom/af_netrom.c | 3 ++- net/packet/af_packet.c | 3 ++- net/phonet/af_phonet.c | 3 ++- net/rds/af_rds.c | 3 ++- net/rose/af_rose.c | 3 ++- net/rxrpc/af_rxrpc.c | 3 ++- net/socket.c | 2 +- net/tipc/socket.c | 6 ++++-- net/unix/af_unix.c | 3 ++- net/x25/af_x25.c | 3 ++- 37 files changed, 80 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 28182ed8dea1..fcfe17a19a61 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -779,7 +779,7 @@ base_sock_create(struct net *net, struct socket *sock, int protocol) } static int -mISDN_sock_create(struct net *net, struct socket *sock, int proto) +mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern) { int err = -EPROTONOSUPPORT; diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c index c14ee24c05a8..ac806b27c658 100644 --- a/drivers/net/pppox.c +++ b/drivers/net/pppox.c @@ -104,7 +104,8 @@ int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) EXPORT_SYMBOL(pppox_ioctl); -static int pppox_create(struct net *net, struct socket *sock, int protocol) +static int pppox_create(struct net *net, struct socket *sock, int protocol, + int kern) { int rc = -EPROTOTYPE; diff --git a/include/linux/net.h b/include/linux/net.h index 4da9d571b053..70ee3c310f15 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -204,7 +204,8 @@ struct proto_ops { struct net_proto_family { int family; - int (*create)(struct net *net, struct socket *sock, int protocol); + int (*create)(struct net *net, struct socket *sock, + int protocol, int kern); struct module *owner; }; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index abe38014b7fd..4b0ce2e2b46e 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1021,7 +1021,8 @@ static struct proto ddp_proto = { * Create a socket. Initialise the socket, blank the addresses * set the state. */ -static int atalk_create(struct net *net, struct socket *sock, int protocol) +static int atalk_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; diff --git a/net/atm/pvc.c b/net/atm/pvc.c index a6e1fdbae87f..8d74e62b0d79 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -127,7 +127,8 @@ static const struct proto_ops pvc_proto_ops = { }; -static int pvc_create(struct net *net, struct socket *sock,int protocol) +static int pvc_create(struct net *net, struct socket *sock, int protocol, + int kern) { if (net != &init_net) return -EAFNOSUPPORT; diff --git a/net/atm/svc.c b/net/atm/svc.c index 819354233318..c7395070ee78 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -25,7 +25,7 @@ #include "signaling.h" #include "addr.h" -static int svc_create(struct net *net, struct socket *sock,int protocol); +static int svc_create(struct net *net, struct socket *sock, int protocol, int kern); /* * Note: since all this is still nicely synchronized with the signaling demon, @@ -330,7 +330,7 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags) lock_sock(sk); - error = svc_create(sock_net(sk), newsock,0); + error = svc_create(sock_net(sk), newsock, 0, 0); if (error) goto out; @@ -650,7 +650,8 @@ static const struct proto_ops svc_proto_ops = { }; -static int svc_create(struct net *net, struct socket *sock,int protocol) +static int svc_create(struct net *net, struct socket *sock, int protocol, + int kern) { int error; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index f1e998b2796e..d6ddfa4c4471 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -799,7 +799,8 @@ static struct proto ax25_proto = { .obj_size = sizeof(struct sock), }; -static int ax25_create(struct net *net, struct socket *sock, int protocol) +static int ax25_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; ax25_cb *ax25; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 399e59c9c6cb..087cc51f5927 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -126,7 +126,8 @@ int bt_sock_unregister(int proto) } EXPORT_SYMBOL(bt_sock_unregister); -static int bt_sock_create(struct net *net, struct socket *sock, int proto) +static int bt_sock_create(struct net *net, struct socket *sock, int proto, + int kern) { int err; @@ -144,7 +145,7 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto) read_lock(&bt_proto_lock); if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { - err = bt_proto[proto]->create(net, sock, proto); + err = bt_proto[proto]->create(net, sock, proto, kern); bt_sock_reclassify_lock(sock, proto); module_put(bt_proto[proto]->owner); } diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 0a2c5460bb48..2ff6ac7b2ed4 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -195,7 +195,8 @@ static struct proto bnep_proto = { .obj_size = sizeof(struct bt_sock) }; -static int bnep_sock_create(struct net *net, struct socket *sock, int protocol) +static int bnep_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index de7c8040bc56..978cc3a718ad 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -190,7 +190,8 @@ static struct proto cmtp_proto = { .obj_size = sizeof(struct bt_sock) }; -static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol) +static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index e7395f231989..1ca5c7ca9bd4 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -621,7 +621,8 @@ static struct proto hci_sk_proto = { .obj_size = sizeof(struct hci_pinfo) }; -static int hci_sock_create(struct net *net, struct socket *sock, int protocol) +static int hci_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 4beb6a7a2953..9cfef68b9fec 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -241,7 +241,8 @@ static struct proto hidp_proto = { .obj_size = sizeof(struct bt_sock) }; -static int hidp_sock_create(struct net *net, struct socket *sock, int protocol) +static int hidp_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index d65101d92ee5..365ae161d702 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -819,7 +819,8 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p return sk; } -static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol) +static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index d3bfc1b0afb1..4b5968dda673 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -323,7 +323,8 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int return sk; } -static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol) +static int rfcomm_sock_create(struct net *net, struct socket *sock, + int protocol, int kern) { struct sock *sk; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 694a65541b73..dd8f6ec57dce 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -430,7 +430,8 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int pro return sk; } -static int sco_sock_create(struct net *net, struct socket *sock, int protocol) +static int sco_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; diff --git a/net/can/af_can.c b/net/can/af_can.c index 9c0426dc3184..833bd838edc6 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -114,7 +114,8 @@ static void can_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); } -static int can_create(struct net *net, struct socket *sock, int protocol) +static int can_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct can_proto *cp; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 2e355841ca99..9ade3a6de954 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -675,7 +675,8 @@ char *dn_addr2asc(__u16 addr, char *buf) -static int dn_create(struct net *net, struct socket *sock, int protocol) +static int dn_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 5e9426a11c3e..596679803de5 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -605,7 +605,8 @@ static struct proto econet_proto = { * Create an Econet socket */ -static int econet_create(struct net *net, struct socket *sock, int protocol) +static int econet_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct econet_sock *eo; diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 309348fba72b..de6e34d2a7f8 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -234,7 +234,7 @@ static const struct proto_ops ieee802154_dgram_ops = { * set the state. */ static int ieee802154_create(struct net *net, struct socket *sock, - int protocol) + int protocol, int kern) { struct sock *sk; int rc; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 180ec4c94919..5c7e42c02afb 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -262,7 +262,8 @@ static inline int inet_netns_ok(struct net *net, int protocol) * Create an inet socket. */ -static int inet_create(struct net *net, struct socket *sock, int protocol) +static int inet_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct inet_protosw *answer; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 1b3889356599..45ed5e05ab32 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -95,7 +95,8 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } -static int inet6_create(struct net *net, struct socket *sock, int protocol) +static int inet6_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct inet_sock *inet; struct ipv6_pinfo *np; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 6481ee4bdf72..96d193a24415 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1352,7 +1352,8 @@ static struct proto ipx_proto = { .obj_size = sizeof(struct ipx_sock), }; -static int ipx_create(struct net *net, struct socket *sock, int protocol) +static int ipx_create(struct net *net, struct socket *sock, int protocol, + int kern) { int rc = -ESOCKTNOSUPPORT; struct sock *sk; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 9429e4002bca..e73a0016c0aa 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -61,7 +61,7 @@ #include -static int irda_create(struct net *net, struct socket *sock, int protocol); +static int irda_create(struct net *net, struct socket *sock, int protocol, int kern); static const struct proto_ops irda_stream_ops; static const struct proto_ops irda_seqpacket_ops; @@ -839,7 +839,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) IRDA_DEBUG(2, "%s()\n", __func__); - err = irda_create(sock_net(sk), newsock, sk->sk_protocol); + err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0); if (err) return err; @@ -1062,7 +1062,8 @@ static struct proto irda_proto = { * Create IrDA socket * */ -static int irda_create(struct net *net, struct socket *sock, int protocol) +static int irda_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct irda_sock *self; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 3aebabb158a8..1e428863574f 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -481,7 +481,8 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) } /* Create an IUCV socket */ -static int iucv_sock_create(struct net *net, struct socket *sock, int protocol) +static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; diff --git a/net/key/af_key.c b/net/key/af_key.c index 472f6594184a..86b2c22d0918 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -177,7 +177,8 @@ static struct proto key_proto = { .obj_size = sizeof(struct pfkey_sock), }; -static int pfkey_create(struct net *net, struct socket *sock, int protocol) +static int pfkey_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 4866b4fb0c27..5266c286b260 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -140,14 +140,17 @@ static struct proto llc_proto = { /** * llc_ui_create - alloc and init a new llc_ui socket + * @net: network namespace (must be default network) * @sock: Socket to initialize and attach allocated sk to. * @protocol: Unused. + * @kern: on behalf of kernel or userspace * * Allocate and initialize a new llc_ui socket, validate the user wants a * socket type we have available. * Returns 0 upon success, negative upon failure. */ -static int llc_ui_create(struct net *net, struct socket *sock, int protocol) +static int llc_ui_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0cd2d8829313..aea805c98da3 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -428,7 +428,8 @@ static int __netlink_create(struct net *net, struct socket *sock, return 0; } -static int netlink_create(struct net *net, struct socket *sock, int protocol) +static int netlink_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct module *module = NULL; struct mutex *cb_mutex; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 281fa597cae5..4bdd5697f63b 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -425,7 +425,8 @@ static struct proto nr_proto = { .obj_size = sizeof(struct nr_sock), }; -static int nr_create(struct net *net, struct socket *sock, int protocol) +static int nr_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct nr_sock *nr; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 91d246d34780..3304caa65347 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1344,7 +1344,8 @@ static struct proto packet_proto = { * Create a packet of type SOCK_PACKET. */ -static int packet_create(struct net *net, struct socket *sock, int protocol) +static int packet_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct packet_sock *po; diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 66737aa995ea..3bd1be6b26f0 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -60,7 +60,8 @@ static inline void phonet_proto_put(struct phonet_protocol *pp) /* protocol family functions */ -static int pn_socket_create(struct net *net, struct socket *sock, int protocol) +static int pn_socket_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct pn_sock *pn; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 2b978dc6e75d..e25d8d5ce8df 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -410,7 +410,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) return 0; } -static int rds_create(struct net *net, struct socket *sock, int protocol) +static int rds_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index c17734c2ce89..4de4287fec37 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -512,7 +512,8 @@ static struct proto rose_proto = { .obj_size = sizeof(struct rose_sock), }; -static int rose_create(struct net *net, struct socket *sock, int protocol) +static int rose_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct rose_sock *rose; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 6817c9781ef3..f978d02a248a 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -608,7 +608,8 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, /* * create an RxRPC socket */ -static int rxrpc_create(struct net *net, struct socket *sock, int protocol) +static int rxrpc_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct rxrpc_sock *rx; struct sock *sk; diff --git a/net/socket.c b/net/socket.c index 9dff31c9b799..4f3e0f0c156b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1252,7 +1252,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol, /* Now protected by module ref count */ rcu_read_unlock(); - err = pf->create(net, sock, protocol); + err = pf->create(net, sock, protocol, kern); if (err < 0) goto out_module_put; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e6d9abf7440e..d00c2119faf3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -177,6 +177,7 @@ static void reject_rx_queue(struct sock *sk) * @net: network namespace (must be default network) * @sock: pre-allocated socket structure * @protocol: protocol indicator (must be 0) + * @kern: caused by kernel or by userspace? * * This routine creates additional data structures used by the TIPC socket, * initializes them, and links them together. @@ -184,7 +185,8 @@ static void reject_rx_queue(struct sock *sk) * Returns 0 on success, errno otherwise */ -static int tipc_create(struct net *net, struct socket *sock, int protocol) +static int tipc_create(struct net *net, struct socket *sock, int protocol, + int kern) { const struct proto_ops *ops; socket_state state; @@ -1528,7 +1530,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_create(sock_net(sock->sk), new_sock, 0); + res = tipc_create(sock_net(sock->sk), new_sock, 0, 0); if (!res) { struct sock *new_sk = new_sock->sk; struct tipc_sock *new_tsock = tipc_sk(new_sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3291902f0b88..178d3af2a605 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -621,7 +621,8 @@ out: return sk; } -static int unix_create(struct net *net, struct socket *sock, int protocol) +static int unix_create(struct net *net, struct socket *sock, int protocol, + int kern) { if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index e19d811788a5..38e235f61e27 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -501,7 +501,8 @@ out: return sk; } -static int x25_create(struct net *net, struct socket *sock, int protocol) +static int x25_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct x25_sock *x25; -- cgit v1.3-8-gc7d7 From 765af10de6d93820def9978c53ed828e4d3bd4f4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 5 Nov 2009 22:59:46 -0800 Subject: Input: add new keycodes useful in mobile devices Add new codes for camera focus key, and camera lens cover, keypad slide, front proximity switches. Signed-off-by: Jani Nikula Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index 9ee67b4b2b48..9a04e26daab2 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -595,6 +595,8 @@ struct input_absinfo { #define KEY_NUMERIC_STAR 0x20a #define KEY_NUMERIC_POUND 0x20b +#define KEY_CAMERA_FOCUS 0x210 + /* We avoid low common keys in module aliases so they don't get huge. */ #define KEY_MIN_INTERESTING KEY_MUTE #define KEY_MAX 0x2ff @@ -677,6 +679,9 @@ struct input_absinfo { #define SW_LINEOUT_INSERT 0x06 /* set = inserted */ #define SW_JACK_PHYSICAL_INSERT 0x07 /* set = mechanical switch set */ #define SW_VIDEOOUT_INSERT 0x08 /* set = inserted */ +#define SW_CAMERA_LENS_COVER 0x09 /* set = lens covered */ +#define SW_KEYPAD_SLIDE 0x0a /* set = keypad slide out */ +#define SW_FRONT_PROXIMITY 0x0b /* set = front proximity sensor active */ #define SW_MAX 0x0f #define SW_CNT (SW_MAX+1) -- cgit v1.3-8-gc7d7 From 69d9ab96f983720b7794e91437bd9c5f83bae9f7 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Mon, 14 Sep 2009 18:11:03 +0400 Subject: wpan-phy: add a helper to put the wpan_phy device Signed-off-by: Dmitry Eremin-Solenikov --- include/net/wpan-phy.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h index 547b1e271ac9..5e803a0e4e72 100644 --- a/include/net/wpan-phy.h +++ b/include/net/wpan-phy.h @@ -56,6 +56,12 @@ static inline void *wpan_phy_priv(struct wpan_phy *phy) } struct wpan_phy *wpan_phy_find(const char *str); + +static inline void wpan_phy_put(struct wpan_phy *phy) +{ + put_device(&phy->dev); +} + static inline const char *wpan_phy_name(struct wpan_phy *phy) { return dev_name(&phy->dev); -- cgit v1.3-8-gc7d7 From 1c889f4db6b2f8f8429e62011ba622642faba019 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Tue, 15 Sep 2009 16:57:04 +0400 Subject: wpan-phy: add wpan-phy iteration functions Add API to iterate over the wpan-phy instances. Signed-off-by: Dmitry Eremin-Solenikov --- include/net/wpan-phy.h | 2 ++ net/ieee802154/wpan-class.c | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h index 5e803a0e4e72..3367dd95cf85 100644 --- a/include/net/wpan-phy.h +++ b/include/net/wpan-phy.h @@ -48,6 +48,8 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size); int wpan_phy_register(struct device *parent, struct wpan_phy *phy); void wpan_phy_unregister(struct wpan_phy *phy); void wpan_phy_free(struct wpan_phy *phy); +/* Same semantics as for class_for_each_device */ +int wpan_phy_for_each(int (*fn)(struct wpan_phy *phy, void *data), void *data); static inline void *wpan_phy_priv(struct wpan_phy *phy) { diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index f306604da67a..0cec13842916 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -91,6 +91,31 @@ struct wpan_phy *wpan_phy_find(const char *str) } EXPORT_SYMBOL(wpan_phy_find); +struct wpan_phy_iter_data { + int (*fn)(struct wpan_phy *phy, void *data); + void *data; +}; + +static int wpan_phy_iter(struct device *dev, void *_data) +{ + struct wpan_phy_iter_data *wpid = _data; + struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); + return wpid->fn(phy, wpid->data); +} + +int wpan_phy_for_each(int (*fn)(struct wpan_phy *phy, void *data), + void *data) +{ + struct wpan_phy_iter_data wpid = { + .fn = fn, + .data = data, + }; + + return class_for_each_device(&wpan_phy_class, NULL, + &wpid, wpan_phy_iter); +} +EXPORT_SYMBOL(wpan_phy_for_each); + static int wpan_phy_idx_valid(int idx) { return idx >= 0; -- cgit v1.3-8-gc7d7 From a0b4a738e0e03f5e0d6ca366560f9a48e5adf83a Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Tue, 22 Sep 2009 15:26:48 +0400 Subject: wpan-phy: allow specifying a per-page channel mask IEEE 802.15.4-2006 defines channel pages that hold channels (max 32 pages, 27 channels per page). Allow the driver to specify supported channels on pages, other than the first one. Signed-off-by: Dmitry Eremin-Solenikov --- drivers/ieee802154/fakehard.c | 2 +- include/net/wpan-phy.h | 2 +- net/ieee802154/wpan-class.c | 20 +++++++++++++++++++- 3 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c index 96a2959ce877..f6f2afefaa17 100644 --- a/drivers/ieee802154/fakehard.c +++ b/drivers/ieee802154/fakehard.c @@ -356,7 +356,7 @@ static int __devinit ieee802154fake_probe(struct platform_device *pdev) dev->addr_len); memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); - phy->channels_supported = (1 << 27) - 1; + phy->channels_supported[0] = (1 << 27) - 1; phy->transmit_power = 0xbf; dev->netdev_ops = &fake_ops; diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h index 3367dd95cf85..7b7fc581e673 100644 --- a/include/net/wpan-phy.h +++ b/include/net/wpan-phy.h @@ -34,7 +34,7 @@ struct wpan_phy { */ u8 current_channel; u8 current_page; - u32 channels_supported; + u32 channels_supported[32]; u8 transmit_power; u8 cca_mode; diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index 68ccde63155b..0c51f85aa591 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -40,12 +40,30 @@ static ssize_t name ## _show(struct device *dev, \ MASTER_SHOW(current_channel, "%d"); MASTER_SHOW(current_page, "%d"); -MASTER_SHOW(channels_supported, "%#x"); MASTER_SHOW_COMPLEX(transmit_power, "%d +- %d dB", ((signed char) (phy->transmit_power << 2)) >> 2, (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1 ); MASTER_SHOW(cca_mode, "%d"); +static ssize_t channels_supported_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); + int ret; + int i, len = 0; + + mutex_lock(&phy->pib_lock); + for (i = 0; i < 32; i++) { + ret = snprintf(buf + len, PAGE_SIZE - len, + "%#09x\n", phy->channels_supported[i]); + if (ret < 0) + break; + len += ret; + } + mutex_unlock(&phy->pib_lock); + return len; +} + static struct device_attribute pmib_attrs[] = { __ATTR_RO(current_channel), __ATTR_RO(current_page), -- cgit v1.3-8-gc7d7 From e9cf356c0c6b975fda84b15a5abdd1db88d74f84 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Mon, 28 Sep 2009 19:01:20 +0400 Subject: wpan-phy: follow usual patter of devices registration Follow the usual pattern of devices registration by adding new function (wpan_phy_set_dev) that sets child->parent relationship and removing parent argument from wpan_phy_register call. Signed-off-by: Dmitry Eremin-Solenikov --- drivers/ieee802154/fakehard.c | 3 ++- include/net/wpan-phy.h | 6 +++++- net/ieee802154/wpan-class.c | 4 +--- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c index f6f2afefaa17..4ea93cc7afcc 100644 --- a/drivers/ieee802154/fakehard.c +++ b/drivers/ieee802154/fakehard.c @@ -372,11 +372,12 @@ static int __devinit ieee802154fake_probe(struct platform_device *pdev) goto out; } + wpan_phy_set_dev(phy, &pdev->dev); SET_NETDEV_DEV(dev, &phy->dev); platform_set_drvdata(pdev, dev); - err = wpan_phy_register(&pdev->dev, phy); + err = wpan_phy_register(phy); if (err) goto out; diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h index 7b7fc581e673..f63537c17363 100644 --- a/include/net/wpan-phy.h +++ b/include/net/wpan-phy.h @@ -45,7 +45,11 @@ struct wpan_phy { }; struct wpan_phy *wpan_phy_alloc(size_t priv_size); -int wpan_phy_register(struct device *parent, struct wpan_phy *phy); +static inline void wpan_phy_set_dev(struct wpan_phy *phy, struct device *dev) +{ + phy->dev.parent = dev; +} +int wpan_phy_register(struct wpan_phy *phy); void wpan_phy_unregister(struct wpan_phy *phy); void wpan_phy_free(struct wpan_phy *phy); /* Same semantics as for class_for_each_device */ diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index 0c51f85aa591..cd42e88b8397 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -168,10 +168,8 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size) } EXPORT_SYMBOL(wpan_phy_alloc); -int wpan_phy_register(struct device *parent, struct wpan_phy *phy) +int wpan_phy_register(struct wpan_phy *phy) { - phy->dev.parent = parent; - return device_add(&phy->dev); } EXPORT_SYMBOL(wpan_phy_register); -- cgit v1.3-8-gc7d7 From a9966b580a3e9d7cf820b5360b574f439d813ef4 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 2 Oct 2009 19:05:00 +0400 Subject: ieee802154: constify struct net_device in some operations Some of ieee802154 operations really shouldn't change passed net_device. Constify passed argument. Signed-off-by: Dmitry Eremin-Solenikov --- drivers/ieee802154/fakehard.c | 8 ++++---- include/net/ieee802154_netdev.h | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c index 4ea93cc7afcc..77fbb51157af 100644 --- a/drivers/ieee802154/fakehard.c +++ b/drivers/ieee802154/fakehard.c @@ -43,7 +43,7 @@ struct wpan_phy *net_to_phy(struct net_device *dev) * * Return the ID of the PAN from the PIB. */ -static u16 fake_get_pan_id(struct net_device *dev) +static u16 fake_get_pan_id(const struct net_device *dev) { BUG_ON(dev->type != ARPHRD_IEEE802154); @@ -58,7 +58,7 @@ static u16 fake_get_pan_id(struct net_device *dev) * device. If the device has not yet had a short address assigned * then this should return 0xFFFF to indicate a lack of association. */ -static u16 fake_get_short_addr(struct net_device *dev) +static u16 fake_get_short_addr(const struct net_device *dev) { BUG_ON(dev->type != ARPHRD_IEEE802154); @@ -78,7 +78,7 @@ static u16 fake_get_short_addr(struct net_device *dev) * Note: This is in section 7.2.1.2 of the IEEE 802.15.4-2006 * document. */ -static u8 fake_get_dsn(struct net_device *dev) +static u8 fake_get_dsn(const struct net_device *dev) { BUG_ON(dev->type != ARPHRD_IEEE802154); @@ -98,7 +98,7 @@ static u8 fake_get_dsn(struct net_device *dev) * Note: This is in section 7.2.1.2 of the IEEE 802.15.4-2006 * document. */ -static u8 fake_get_bsn(struct net_device *dev) +static u8 fake_get_bsn(const struct net_device *dev) { BUG_ON(dev->type != ARPHRD_IEEE802154); diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 5dc6a61952de..d23fb5a94c96 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -98,14 +98,14 @@ struct ieee802154_mlme_ops { * FIXME: these should become the part of PIB/MIB interface. * However we still don't have IB interface of any kind */ - u16 (*get_pan_id)(struct net_device *dev); - u16 (*get_short_addr)(struct net_device *dev); - u8 (*get_dsn)(struct net_device *dev); - u8 (*get_bsn)(struct net_device *dev); + u16 (*get_pan_id)(const struct net_device *dev); + u16 (*get_short_addr)(const struct net_device *dev); + u8 (*get_dsn)(const struct net_device *dev); + u8 (*get_bsn)(const struct net_device *dev); }; static inline struct ieee802154_mlme_ops *ieee802154_mlme_ops( - struct net_device *dev) + const struct net_device *dev) { return dev->ml_priv; } -- cgit v1.3-8-gc7d7 From 1eaa9d03d3ee9156c8c405b006ce892ae28290ad Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Tue, 15 Sep 2009 17:04:44 +0400 Subject: ieee802154: add LIST_PHY command support Add nl802154 command to get information about PHY's present in the system. Signed-off-by: Dmitry Eremin-Solenikov --- include/linux/nl802154.h | 4 + net/ieee802154/Makefile | 2 +- net/ieee802154/netlink.c | 4 + net/ieee802154/nl-phy.c | 188 +++++++++++++++++++++++++++++++++++++++++++++ net/ieee802154/nl_policy.c | 2 + 5 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 net/ieee802154/nl-phy.c (limited to 'include') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index b7d9435d5a9f..275fd94f4727 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -65,6 +65,9 @@ enum { IEEE802154_ATTR_SEC, IEEE802154_ATTR_PAGE, + IEEE802154_ATTR_CHANNEL_PAGE_LIST, + + IEEE802154_ATTR_PHY_NAME, __IEEE802154_ATTR_MAX, }; @@ -114,6 +117,7 @@ enum { IEEE802154_RX_ENABLE_CONF, /* Not supported yet */ IEEE802154_LIST_IFACE, + IEEE802154_LIST_PHY, __IEEE802154_CMD_MAX, }; diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 69af9f6a404b..ce2d33582859 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o -ieee802154-y := netlink.o nl-mac.o nl_policy.o wpan-class.o +ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o af_802154-y := af_ieee802154.o raw.o dgram.o ccflags-y += -Wall -DDEBUG diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 5b738ec7d9f1..8a221737f75c 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -87,6 +87,10 @@ int __init ieee802154_nl_init(void) if (rc) goto fail; + rc = nl802154_phy_register(); + if (rc) + goto fail; + return 0; fail: diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c new file mode 100644 index 000000000000..b7af722eb784 --- /dev/null +++ b/net/ieee802154/nl-phy.c @@ -0,0 +1,188 @@ +/* + * Netlink inteface for IEEE 802.15.4 stack + * + * Copyright 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Sergey Lapin + * Dmitry Eremin-Solenikov + * Maxim Osipov + */ + +#include +#include +#include +#include +#include + +#include "ieee802154.h" + +static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, + u32 seq, int flags, struct wpan_phy *phy) +{ + void *hdr; + int i, pages = 0; + uint32_t *buf = kzalloc(32 * sizeof(uint32_t), GFP_KERNEL); + + pr_debug("%s\n", __func__); + + if (!buf) + goto out; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, + IEEE802154_LIST_PHY); + if (!hdr) + goto out; + + mutex_lock(&phy->pib_lock); + NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)); + + NLA_PUT_U8(msg, IEEE802154_ATTR_PAGE, phy->current_page); + NLA_PUT_U8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel); + for (i = 0; i < 32; i++) { + if (phy->channels_supported[i]) + buf[pages++] = phy->channels_supported[i] | (i << 27); + } + if (pages) + NLA_PUT(msg, IEEE802154_ATTR_CHANNEL_PAGE_LIST, + pages * sizeof(uint32_t), buf); + + mutex_unlock(&phy->pib_lock); + return genlmsg_end(msg, hdr); + +nla_put_failure: + mutex_unlock(&phy->pib_lock); + genlmsg_cancel(msg, hdr); +out: + kfree(buf); + return -EMSGSIZE; +} + +static int ieee802154_list_phy(struct sk_buff *skb, + struct genl_info *info) +{ + /* Request for interface name, index, type, IEEE address, + PAN Id, short address */ + struct sk_buff *msg; + struct wpan_phy *phy; + const char *name; + int rc = -ENOBUFS; + + pr_debug("%s\n", __func__); + + if (!info->attrs[IEEE802154_ATTR_PHY_NAME]) + return -EINVAL; + + name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); + if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0') + return -EINVAL; /* phy name should be null-terminated */ + + + phy = wpan_phy_find(name); + if (!phy) + return -ENODEV; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + goto out_dev; + + rc = ieee802154_nl_fill_phy(msg, info->snd_pid, info->snd_seq, + 0, phy); + if (rc < 0) + goto out_free; + + wpan_phy_put(phy); + + return genlmsg_reply(msg, info); +out_free: + nlmsg_free(msg); +out_dev: + wpan_phy_put(phy); + return rc; + +} + +struct dump_phy_data { + struct sk_buff *skb; + struct netlink_callback *cb; + int idx, s_idx; +}; + +static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data) +{ + int rc; + struct dump_phy_data *data = _data; + + pr_debug("%s\n", __func__); + + if (data->idx++ < data->s_idx) + return 0; + + rc = ieee802154_nl_fill_phy(data->skb, + NETLINK_CB(data->cb->skb).pid, + data->cb->nlh->nlmsg_seq, + NLM_F_MULTI, + phy); + + if (rc < 0) { + data->idx--; + return rc; + } + + return 0; +} + +static int ieee802154_dump_phy(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct dump_phy_data data = { + .cb = cb, + .skb = skb, + .s_idx = cb->args[0], + .idx = 0, + }; + + pr_debug("%s\n", __func__); + + wpan_phy_for_each(ieee802154_dump_phy_iter, &data); + + cb->args[0] = data.idx; + + return skb->len; +} + +static struct genl_ops ieee802154_phy_ops[] = { + IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy, + ieee802154_dump_phy), +}; + +/* + * No need to unregister as family unregistration will do it. + */ +int nl802154_phy_register(void) +{ + int i; + int rc; + + for (i = 0; i < ARRAY_SIZE(ieee802154_phy_ops); i++) { + rc = genl_register_ops(&nl802154_family, + &ieee802154_phy_ops[i]); + if (rc) + return rc; + } + + return 0; +} diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 2363ebee02e7..6adda4d46f95 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -27,6 +27,7 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_DEV_NAME] = { .type = NLA_STRING, }, [IEEE802154_ATTR_DEV_INDEX] = { .type = NLA_U32, }, + [IEEE802154_ATTR_PHY_NAME] = { .type = NLA_STRING, }, [IEEE802154_ATTR_STATUS] = { .type = NLA_U8, }, [IEEE802154_ATTR_SHORT_ADDR] = { .type = NLA_U16, }, @@ -50,5 +51,6 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_CHANNELS] = { .type = NLA_U32, }, [IEEE802154_ATTR_DURATION] = { .type = NLA_U8, }, [IEEE802154_ATTR_ED_LIST] = { .len = 27 }, + [IEEE802154_ATTR_CHANNEL_PAGE_LIST] = { .len = 32 * 4, }, }; -- cgit v1.3-8-gc7d7 From 42723448b8bacf60c96812196d7725d8d605d0c4 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Wed, 4 Nov 2009 17:53:23 +0300 Subject: ieee802154: add an mlme_ops call to retrieve PHY object ops->get_phy should increment reference to wpan-phy. As we return the external structure, we should do refcounting correctly. Signed-off-by: Dmitry Eremin-Solenikov --- include/net/ieee802154_netdev.h | 6 ++++++ include/net/wpan-phy.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index d23fb5a94c96..57430555487a 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -74,8 +74,12 @@ static inline int mac_cb_type(struct sk_buff *skb) #define IEEE802154_MAC_SCAN_PASSIVE 2 #define IEEE802154_MAC_SCAN_ORPHAN 3 +struct wpan_phy; /* * This should be located at net_device->ml_priv + * + * get_phy should increment the reference counting on returned phy. + * Use wpan_wpy_put to put that reference. */ struct ieee802154_mlme_ops { int (*assoc_req)(struct net_device *dev, @@ -94,6 +98,8 @@ struct ieee802154_mlme_ops { int (*scan_req)(struct net_device *dev, u8 type, u32 channels, u8 page, u8 duration); + struct wpan_phy *(*get_phy)(const struct net_device *dev); + /* * FIXME: these should become the part of PIB/MIB interface. * However we still don't have IB interface of any kind diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h index f63537c17363..a65e98509fbc 100644 --- a/include/net/wpan-phy.h +++ b/include/net/wpan-phy.h @@ -44,6 +44,8 @@ struct wpan_phy { char priv[0] __attribute__((__aligned__(NETDEV_ALIGN))); }; +#define to_phy(_dev) container_of(_dev, struct wpan_phy, dev) + struct wpan_phy *wpan_phy_alloc(size_t priv_size); static inline void wpan_phy_set_dev(struct wpan_phy *phy, struct device *dev) { -- cgit v1.3-8-gc7d7 From bb1cafb8fc414d6dbe933f888df6540c2ef02101 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 5 Nov 2009 16:56:23 +0300 Subject: ieee802154: add support for creation/removal of logic interfaces Add support for two more NL802154 commands: ADD_IFACE and DEL_IFACE, thus allowing creation and removal of logic WPAN interfaces on the top of wpan-phy. Signed-off-by: Dmitry Eremin-Solenikov --- include/linux/nl802154.h | 2 + include/net/wpan-phy.h | 4 ++ net/ieee802154/nl-phy.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 162 insertions(+) (limited to 'include') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 275fd94f4727..33d9f5175109 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -118,6 +118,8 @@ enum { IEEE802154_LIST_IFACE, IEEE802154_LIST_PHY, + IEEE802154_ADD_IFACE, + IEEE802154_DEL_IFACE, __IEEE802154_CMD_MAX, }; diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h index a65e98509fbc..85926231c07a 100644 --- a/include/net/wpan-phy.h +++ b/include/net/wpan-phy.h @@ -41,6 +41,10 @@ struct wpan_phy { struct device dev; int idx; + struct net_device *(*add_iface)(struct wpan_phy *phy, + const char *name); + void (*del_iface)(struct wpan_phy *phy, struct net_device *dev); + char priv[0] __attribute__((__aligned__(NETDEV_ALIGN))); }; diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index b7af722eb784..199a2d9d12f9 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -26,6 +26,9 @@ #include #include #include +#include +#include +#include /* for rtnl_{un,}lock */ #include #include "ieee802154.h" @@ -164,9 +167,162 @@ static int ieee802154_dump_phy(struct sk_buff *skb, return skb->len; } +static int ieee802154_add_iface(struct sk_buff *skb, + struct genl_info *info) +{ + struct sk_buff *msg; + struct wpan_phy *phy; + const char *name; + const char *devname; + int rc = -ENOBUFS; + struct net_device *dev; + + pr_debug("%s\n", __func__); + + if (!info->attrs[IEEE802154_ATTR_PHY_NAME]) + return -EINVAL; + + name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); + if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0') + return -EINVAL; /* phy name should be null-terminated */ + + if (info->attrs[IEEE802154_ATTR_DEV_NAME]) { + devname = nla_data(info->attrs[IEEE802154_ATTR_DEV_NAME]); + if (devname[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] + != '\0') + return -EINVAL; /* phy name should be null-terminated */ + } else { + devname = "wpan%d"; + } + + if (strlen(devname) >= IFNAMSIZ) + return -ENAMETOOLONG; + + phy = wpan_phy_find(name); + if (!phy) + return -ENODEV; + + msg = ieee802154_nl_new_reply(info, 0, IEEE802154_ADD_IFACE); + if (!msg) + goto out_dev; + + if (!phy->add_iface) { + rc = -EINVAL; + goto nla_put_failure; + } + + dev = phy->add_iface(phy, devname); + if (IS_ERR(dev)) { + rc = PTR_ERR(dev); + goto nla_put_failure; + } + + NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)); + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + + dev_put(dev); + + wpan_phy_put(phy); + + return ieee802154_nl_reply(msg, info); + +nla_put_failure: + nlmsg_free(msg); +out_dev: + wpan_phy_put(phy); + return rc; +} + +static int ieee802154_del_iface(struct sk_buff *skb, + struct genl_info *info) +{ + struct sk_buff *msg; + struct wpan_phy *phy; + const char *name; + int rc; + struct net_device *dev; + + pr_debug("%s\n", __func__); + + if (!info->attrs[IEEE802154_ATTR_DEV_NAME]) + return -EINVAL; + + name = nla_data(info->attrs[IEEE802154_ATTR_DEV_NAME]); + if (name[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] != '\0') + return -EINVAL; /* name should be null-terminated */ + + dev = dev_get_by_name(genl_info_net(info), name); + if (!dev) + return -ENODEV; + + phy = ieee802154_mlme_ops(dev)->get_phy(dev); + BUG_ON(!phy); + + rc = -EINVAL; + /* phy name is optional, but should be checked if it's given */ + if (info->attrs[IEEE802154_ATTR_PHY_NAME]) { + struct wpan_phy *phy2; + + const char *pname = + nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); + if (pname[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] + != '\0') + /* name should be null-terminated */ + goto out_dev; + + phy2 = wpan_phy_find(pname); + if (!phy2) + goto out_dev; + + if (phy != phy2) { + wpan_phy_put(phy2); + goto out_dev; + } + } + + rc = -ENOBUFS; + + msg = ieee802154_nl_new_reply(info, 0, IEEE802154_DEL_IFACE); + if (!msg) + goto out_dev; + + if (!phy->del_iface) { + rc = -EINVAL; + goto nla_put_failure; + } + + rtnl_lock(); + phy->del_iface(phy, dev); + + /* We don't have device anymore */ + dev_put(dev); + dev = NULL; + + rtnl_unlock(); + + + NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)); + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, name); + + wpan_phy_put(phy); + + return ieee802154_nl_reply(msg, info); + +nla_put_failure: + nlmsg_free(msg); +out_dev: + wpan_phy_put(phy); + if (dev) + dev_put(dev); + + return rc; +} + static struct genl_ops ieee802154_phy_ops[] = { IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy, ieee802154_dump_phy), + IEEE802154_OP(IEEE802154_ADD_IFACE, ieee802154_add_iface), + IEEE802154_OP(IEEE802154_DEL_IFACE, ieee802154_del_iface), }; /* -- cgit v1.3-8-gc7d7 From 642c6d946b5cdc27d0146c41dc20b7c4d4c3ccd8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Apr 2009 01:08:48 -0700 Subject: sysctl: Make do_sysctl static Now that all of the architectures use compat_sys_sysctl do_sysctl can become static. Signed-off-by: Eric W. Biederman --- include/linux/sysctl.h | 4 ---- kernel/sysctl_binary.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 1e4743ee6831..82c32b89932d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -996,10 +996,6 @@ extern int proc_doulongvec_minmax(struct ctl_table *, int, extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void __user *, size_t *, loff_t *); -extern int do_sysctl (int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen); - extern ctl_handler sysctl_data; extern ctl_handler sysctl_string; extern ctl_handler sysctl_intvec; diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 775cc49da622..642019894299 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -135,7 +135,7 @@ static void deprecated_sysctl_warning(const int *name, int nlen) return; } -int do_sysctl(int __user *args_name, int nlen, +static int do_sysctl(int __user *args_name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { -- cgit v1.3-8-gc7d7 From 31cef7076ed9409a33f19ea372d6dc5fdefe27ae Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 2 Nov 2009 09:34:16 +0100 Subject: control: remove snd_konctrol_volatile::owner_pid field We do not need to save the ID of the process that locked a control because that information is already available in the owner's file data. Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai --- include/sound/control.h | 1 - sound/core/control.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/sound/control.h b/include/sound/control.h index ef96f07aa03b..3517745d0a2d 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -56,7 +56,6 @@ struct snd_kcontrol_new { struct snd_kcontrol_volatile { struct snd_ctl_file *owner; /* locked */ - pid_t owner_pid; unsigned int access; /* access rights */ }; diff --git a/sound/core/control.c b/sound/core/control.c index a8b7fabe645e..814d2cf1a34c 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -672,7 +672,7 @@ static int snd_ctl_elem_info(struct snd_ctl_file *ctl, info->access |= SNDRV_CTL_ELEM_ACCESS_LOCK; if (vd->owner == ctl) info->access |= SNDRV_CTL_ELEM_ACCESS_OWNER; - info->owner = vd->owner_pid; + info->owner = vd->owner->pid; } else { info->owner = -1; } @@ -827,7 +827,6 @@ static int snd_ctl_elem_lock(struct snd_ctl_file *file, result = -EBUSY; else { vd->owner = file; - vd->owner_pid = current->pid; result = 0; } } @@ -858,7 +857,6 @@ static int snd_ctl_elem_unlock(struct snd_ctl_file *file, result = -EPERM; else { vd->owner = NULL; - vd->owner_pid = 0; result = 0; } } -- cgit v1.3-8-gc7d7 From 25d27eded1f4fc728e64f443adc339b5229be5d7 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 2 Nov 2009 09:35:44 +0100 Subject: control: use reference-counted pid Instead of storing the PID number, take a reference to the task's pid structure. This protects against duplicates due to PID overflows, and using pid_vnr() ensures that the PID returned by snd_ctl_elem_info() is correct as seen from the current namespace. Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai --- include/sound/control.h | 4 +++- sound/core/control.c | 5 +++-- sound/core/pcm.c | 2 +- sound/core/rawmidi.c | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/sound/control.h b/include/sound/control.h index 3517745d0a2d..112374dc0c58 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -86,10 +86,12 @@ struct snd_kctl_event { #define snd_kctl_event(n) list_entry(n, struct snd_kctl_event, list) +struct pid; + struct snd_ctl_file { struct list_head list; /* list of all control files */ struct snd_card *card; - pid_t pid; + struct pid *pid; int prefer_pcm_subdevice; int prefer_rawmidi_subdevice; wait_queue_head_t change_sleep; diff --git a/sound/core/control.c b/sound/core/control.c index 814d2cf1a34c..73dc10ac33f6 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -75,7 +75,7 @@ static int snd_ctl_open(struct inode *inode, struct file *file) ctl->card = card; ctl->prefer_pcm_subdevice = -1; ctl->prefer_rawmidi_subdevice = -1; - ctl->pid = current->pid; + ctl->pid = get_pid(task_pid(current)); file->private_data = ctl; write_lock_irqsave(&card->ctl_files_rwlock, flags); list_add_tail(&ctl->list, &card->ctl_files); @@ -125,6 +125,7 @@ static int snd_ctl_release(struct inode *inode, struct file *file) control->vd[idx].owner = NULL; up_write(&card->controls_rwsem); snd_ctl_empty_read_queue(ctl); + put_pid(ctl->pid); kfree(ctl); module_put(card->module); snd_card_file_remove(card, file); @@ -672,7 +673,7 @@ static int snd_ctl_elem_info(struct snd_ctl_file *ctl, info->access |= SNDRV_CTL_ELEM_ACCESS_LOCK; if (vd->owner == ctl) info->access |= SNDRV_CTL_ELEM_ACCESS_OWNER; - info->owner = vd->owner->pid; + info->owner = pid_vnr(vd->owner->pid); } else { info->owner = -1; } diff --git a/sound/core/pcm.c b/sound/core/pcm.c index c69c60b2a48a..8e2c7833614c 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -809,7 +809,7 @@ int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream, card = pcm->card; read_lock(&card->ctl_files_rwlock); list_for_each_entry(kctl, &card->ctl_files, list) { - if (kctl->pid == current->pid) { + if (kctl->pid == task_pid(current)) { prefer_subdevice = kctl->prefer_pcm_subdevice; if (prefer_subdevice != -1) break; diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index c0adc14c91f0..8a81bdafce6e 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -415,7 +415,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file) subdevice = -1; read_lock(&card->ctl_files_rwlock); list_for_each_entry(kctl, &card->ctl_files, list) { - if (kctl->pid == current->pid) { + if (kctl->pid == task_pid(current)) { subdevice = kctl->prefer_rawmidi_subdevice; if (subdevice != -1) break; -- cgit v1.3-8-gc7d7 From af81858172cc0f3da81946aab919c26e4b364efc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Nov 2009 11:35:50 +0100 Subject: mac80211: async station powersave handling Some devices require that all frames to a station are flushed when that station goes into powersave mode before being able to send frames to that station again when it wakes up or polls -- all in order to avoid reordering and too many or too few frames being sent to the station when it polls. Normally, this is the case unless the station goes to sleep and wakes up very quickly again. But in that case, frames for it may be pending on the hardware queues, and thus races could happen in the case of multiple hardware queues used for QoS/WMM. Normally this isn't a problem, but with the iwlwifi mechanism we need to make sure the race doesn't happen. This makes mac80211 able to cope with the race with driver help by a new WLAN_STA_PS_DRIVER per-station flag that can be controlled by the driver and tells mac80211 whether it can transmit frames or not. This flag must be set according to very specific rules outlined in the documentation for the function that controls it. When we buffer new frames for the station, we normally set the TIM bit right away, but while the driver has blocked transmission to that sta we need to avoid that as well since we cannot respond to the station if it wakes up due to the TIM bit. Once the driver unblocks, we can set the TIM bit. Similarly, when the station just wakes up, we need to wait until all other frames are flushed before we can transmit frames to that station, so the same applies here, we need to wait for the driver to give the OK. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 32 ++++++++++++ net/mac80211/debugfs_sta.c | 5 +- net/mac80211/main.c | 8 +-- net/mac80211/rx.c | 86 ++++++--------------------------- net/mac80211/sta_info.c | 118 ++++++++++++++++++++++++++++++++++++++++++++- net/mac80211/sta_info.h | 23 +++++++-- net/mac80211/tx.c | 13 +++-- 7 files changed, 200 insertions(+), 85 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7f035d779db9..2c10eac637d8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2136,6 +2136,38 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, const u8 *addr); +/** + * ieee80211_sta_block_awake - block station from waking up + * @hw: the hardware + * @pubsta: the station + * @block: whether to block or unblock + * + * Some devices require that all frames that are on the queues + * for a specific station that went to sleep are flushed before + * a poll response or frames after the station woke up can be + * delivered to that it. Note that such frames must be rejected + * by the driver as filtered, with the appropriate status flag. + * + * This function allows implementing this mode in a race-free + * manner. + * + * To do this, a driver must keep track of the number of frames + * still enqueued for a specific station. If this number is not + * zero when the station goes to sleep, the driver must call + * this function to force mac80211 to consider the station to + * be asleep regardless of the station's actual state. Once the + * number of outstanding frames reaches zero, the driver must + * call this function again to unblock the station. That will + * cause mac80211 to be able to send ps-poll responses, and if + * the station queried in the meantime then frames will also + * be sent out as a result of this. Additionally, the driver + * will be notified that the station woke up some time after + * it is unblocked, regardless of whether the station actually + * woke up while blocked or not. + */ +void ieee80211_sta_block_awake(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, bool block); + /** * ieee80211_beacon_loss - inform hardware does not receive beacons * diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 4425b613552c..f043c29070d7 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -66,10 +66,11 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, char buf[100]; struct sta_info *sta = file->private_data; u32 staflags = get_sta_flags(sta); - int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s", + int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s", staflags & WLAN_STA_AUTH ? "AUTH\n" : "", staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "", - staflags & WLAN_STA_PS ? "PS\n" : "", + staflags & WLAN_STA_PS_STA ? "PS (sta)\n" : "", + staflags & WLAN_STA_PS_DRIVER ? "PS (driver)\n" : "", staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "", staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "", staflags & WLAN_STA_WME ? "WME\n" : "", diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 9e6703ff7fbb..beb8718d905e 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -385,13 +385,13 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, * can be unknown, for example with different interrupt status * bits. */ - if (test_sta_flags(sta, WLAN_STA_PS) && + if (test_sta_flags(sta, WLAN_STA_PS_STA) && skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { skb_queue_tail(&sta->tx_filtered, skb); return; } - if (!test_sta_flags(sta, WLAN_STA_PS) && + if (!test_sta_flags(sta, WLAN_STA_PS_STA) && !(info->flags & IEEE80211_TX_INTFL_RETRIED)) { /* Software retry the packet once */ info->flags |= IEEE80211_TX_INTFL_RETRIED; @@ -406,7 +406,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, "queue_len=%d PS=%d @%lu\n", wiphy_name(local->hw.wiphy), skb_queue_len(&sta->tx_filtered), - !!test_sta_flags(sta, WLAN_STA_PS), jiffies); + !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies); #endif dev_kfree_skb(skb); } @@ -446,7 +446,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (sta) { if (!(info->flags & IEEE80211_TX_STAT_ACK) && - test_sta_flags(sta, WLAN_STA_PS)) { + test_sta_flags(sta, WLAN_STA_PS_STA)) { /* * The STA is in power save mode, so assume * that this TX packet failed because of that. diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c06496f0b76d..28316b2a585f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -781,7 +781,7 @@ static void ap_sta_ps_start(struct sta_info *sta) struct ieee80211_local *local = sdata->local; atomic_inc(&sdata->bss->num_sta_ps); - set_sta_flags(sta, WLAN_STA_PS); + set_sta_flags(sta, WLAN_STA_PS_STA); drv_sta_notify(local, &sdata->vif, STA_NOTIFY_SLEEP, &sta->sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n", @@ -792,33 +792,25 @@ static void ap_sta_ps_start(struct sta_info *sta) static void ap_sta_ps_end(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - struct ieee80211_local *local = sdata->local; - int sent, buffered; atomic_dec(&sdata->bss->num_sta_ps); - clear_sta_flags(sta, WLAN_STA_PS); - drv_sta_notify(local, &sdata->vif, STA_NOTIFY_AWAKE, &sta->sta); - - if (!skb_queue_empty(&sta->ps_tx_buf)) - sta_info_clear_tim_bit(sta); + clear_sta_flags(sta, WLAN_STA_PS_STA); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d exits power save mode\n", sdata->dev->name, sta->sta.addr, sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - /* Send all buffered frames to the station */ - sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered); - buffered = ieee80211_add_pending_skbs(local, &sta->ps_tx_buf); - sent += buffered; - local->total_ps_buffered -= buffered; - + if (test_sta_flags(sta, WLAN_STA_PS_DRIVER)) { #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA %pM aid %d sending %d filtered/%d PS frames " - "since STA not sleeping anymore\n", sdata->dev->name, - sta->sta.addr, sta->sta.aid, sent - buffered, buffered); + printk(KERN_DEBUG "%s: STA %pM aid %d driver-ps-blocked\n", + sdata->dev->name, sta->sta.addr, sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + return; + } + + ieee80211_sta_ps_deliver_wakeup(sta); } static ieee80211_rx_result debug_noinline @@ -866,7 +858,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (!ieee80211_has_morefrags(hdr->frame_control) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { - if (test_sta_flags(sta, WLAN_STA_PS)) { + if (test_sta_flags(sta, WLAN_STA_PS_STA)) { /* * Ignore doze->wake transitions that are * indicated by non-data frames, the standard @@ -1094,9 +1086,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) static ieee80211_rx_result debug_noinline ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); - struct sk_buff *skb; - int no_pending_pkts; + struct ieee80211_sub_if_data *sdata = rx->sdata; __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control; if (likely(!rx->sta || !ieee80211_is_pspoll(fc) || @@ -1107,56 +1097,10 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) (sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) return RX_DROP_UNUSABLE; - skb = skb_dequeue(&rx->sta->tx_filtered); - if (!skb) { - skb = skb_dequeue(&rx->sta->ps_tx_buf); - if (skb) - rx->local->total_ps_buffered--; - } - no_pending_pkts = skb_queue_empty(&rx->sta->tx_filtered) && - skb_queue_empty(&rx->sta->ps_tx_buf); - - if (skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) skb->data; - - /* - * Tell TX path to send this frame even though the STA may - * still remain is PS mode after this frame exchange. - */ - info->flags |= IEEE80211_TX_CTL_PSPOLL_RESPONSE; - -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA %pM aid %d: PS Poll (entries after %d)\n", - rx->sta->sta.addr, rx->sta->sta.aid, - skb_queue_len(&rx->sta->ps_tx_buf)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - - /* Use MoreData flag to indicate whether there are more - * buffered frames for this STA */ - if (no_pending_pkts) - hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); - else - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); - - ieee80211_add_pending_skb(rx->local, skb); - - if (no_pending_pkts) - sta_info_clear_tim_bit(rx->sta); -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - } else { - /* - * FIXME: This can be the result of a race condition between - * us expiring a frame and the station polling for it. - * Should we send it a null-func frame indicating we - * have nothing buffered for it? - */ - printk(KERN_DEBUG "%s: STA %pM sent PS Poll even " - "though there are no buffered frames for it\n", - rx->dev->name, rx->sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - } + if (!test_sta_flags(rx->sta, WLAN_STA_PS_DRIVER)) + ieee80211_sta_ps_deliver_poll_response(rx->sta); + else + set_sta_flags(rx->sta, WLAN_STA_PSPOLL); /* Free PS Poll skb here instead of returning RX_DROP that would * count as an dropped frame. */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index cde2da7a74df..be59456e8a42 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -171,6 +171,8 @@ void sta_info_destroy(struct sta_info *sta) local = sta->local; + cancel_work_sync(&sta->drv_unblock_wk); + rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); @@ -259,6 +261,21 @@ static void sta_info_hash_add(struct ieee80211_local *local, rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); } +static void sta_unblock(struct work_struct *wk) +{ + struct sta_info *sta; + + sta = container_of(wk, struct sta_info, drv_unblock_wk); + + if (sta->dead) + return; + + if (!test_sta_flags(sta, WLAN_STA_PS_STA)) + ieee80211_sta_ps_deliver_wakeup(sta); + else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) + ieee80211_sta_ps_deliver_poll_response(sta); +} + struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr, gfp_t gfp) { @@ -272,6 +289,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->lock); spin_lock_init(&sta->flaglock); + INIT_WORK(&sta->drv_unblock_wk, sta_unblock); memcpy(sta->sta.addr, addr, ETH_ALEN); sta->local = local; @@ -478,8 +496,10 @@ static void __sta_info_unlink(struct sta_info **sta) } list_del(&(*sta)->list); + (*sta)->dead = true; - if (test_and_clear_sta_flags(*sta, WLAN_STA_PS)) { + if (test_and_clear_sta_flags(*sta, + WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) { BUG_ON(!sdata->bss); atomic_dec(&sdata->bss->num_sta_ps); @@ -825,3 +845,99 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, return ieee80211_find_sta_by_hw(&sdata->local->hw, addr); } EXPORT_SYMBOL(ieee80211_find_sta); + +/* powersave support code */ +void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + int sent, buffered; + + drv_sta_notify(local, &sdata->vif, STA_NOTIFY_AWAKE, &sta->sta); + + if (!skb_queue_empty(&sta->ps_tx_buf)) + sta_info_clear_tim_bit(sta); + + /* Send all buffered frames to the station */ + sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered); + buffered = ieee80211_add_pending_skbs(local, &sta->ps_tx_buf); + sent += buffered; + local->total_ps_buffered -= buffered; + +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "%s: STA %pM aid %d sending %d filtered/%d PS frames " + "since STA not sleeping anymore\n", sdata->dev->name, + sta->sta.addr, sta->sta.aid, sent - buffered, buffered); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ +} + +void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + int no_pending_pkts; + + skb = skb_dequeue(&sta->tx_filtered); + if (!skb) { + skb = skb_dequeue(&sta->ps_tx_buf); + if (skb) + local->total_ps_buffered--; + } + no_pending_pkts = skb_queue_empty(&sta->tx_filtered) && + skb_queue_empty(&sta->ps_tx_buf); + + if (skb) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) skb->data; + + /* + * Tell TX path to send this frame even though the STA may + * still remain is PS mode after this frame exchange. + */ + info->flags |= IEEE80211_TX_CTL_PSPOLL_RESPONSE; + +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "STA %pM aid %d: PS Poll (entries after %d)\n", + sta->sta.addr, sta->sta.aid, + skb_queue_len(&sta->ps_tx_buf)); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + + /* Use MoreData flag to indicate whether there are more + * buffered frames for this STA */ + if (no_pending_pkts) + hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); + else + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); + + ieee80211_add_pending_skb(local, skb); + + if (no_pending_pkts) + sta_info_clear_tim_bit(sta); +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + } else { + /* + * FIXME: This can be the result of a race condition between + * us expiring a frame and the station polling for it. + * Should we send it a null-func frame indicating we + * have nothing buffered for it? + */ + printk(KERN_DEBUG "%s: STA %pM sent PS Poll even " + "though there are no buffered frames for it\n", + sdata->dev->name, sta->sta.addr); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + } +} + +void ieee80211_sta_block_awake(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, bool block) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + + if (block) + set_sta_flags(sta, WLAN_STA_PS_DRIVER); + else + ieee80211_queue_work(hw, &sta->drv_unblock_wk); +} +EXPORT_SYMBOL(ieee80211_sta_block_awake); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 703f5492ee65..4673454176ed 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -12,6 +12,7 @@ #include #include #include +#include #include "key.h" /** @@ -21,7 +22,7 @@ * * @WLAN_STA_AUTH: Station is authenticated. * @WLAN_STA_ASSOC: Station is associated. - * @WLAN_STA_PS: Station is in power-save mode + * @WLAN_STA_PS_STA: Station is in power-save mode * @WLAN_STA_AUTHORIZED: Station is authorized to send/receive traffic. * This bit is always checked so needs to be enabled for all stations * when virtual port control is not in use. @@ -36,11 +37,16 @@ * @WLAN_STA_MFP: Management frame protection is used with this STA. * @WLAN_STA_SUSPEND: Set/cleared during a suspend/resume cycle. * Used to deny ADDBA requests (both TX and RX). + * @WLAN_STA_PS_DRIVER: driver requires keeping this station in + * power-save mode logically to flush frames that might still + * be in the queues + * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping + * station in power-save mode, reply when the driver unblocks. */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH = 1<<0, WLAN_STA_ASSOC = 1<<1, - WLAN_STA_PS = 1<<2, + WLAN_STA_PS_STA = 1<<2, WLAN_STA_AUTHORIZED = 1<<3, WLAN_STA_SHORT_PREAMBLE = 1<<4, WLAN_STA_ASSOC_AP = 1<<5, @@ -48,7 +54,9 @@ enum ieee80211_sta_info_flags { WLAN_STA_WDS = 1<<7, WLAN_STA_CLEAR_PS_FILT = 1<<9, WLAN_STA_MFP = 1<<10, - WLAN_STA_SUSPEND = 1<<11 + WLAN_STA_SUSPEND = 1<<11, + WLAN_STA_PS_DRIVER = 1<<12, + WLAN_STA_PSPOLL = 1<<13, }; #define STA_TID_NUM 16 @@ -216,6 +224,8 @@ struct sta_ampdu_mlme { * @plink_timer_was_running: used by suspend/resume to restore timers * @debugfs: debug filesystem info * @sta: station information we share with the driver + * @dead: set to true when sta is unlinked + * @drv_unblock_wk used for driver PS unblocking */ struct sta_info { /* General information, mostly static */ @@ -229,8 +239,12 @@ struct sta_info { spinlock_t lock; spinlock_t flaglock; + struct work_struct drv_unblock_wk; + u16 listen_interval; + bool dead; + /* * for use by the internal lifetime management, * see __sta_info_unlink @@ -430,4 +444,7 @@ int sta_info_flush(struct ieee80211_local *local, void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time); +void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); +void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); + #endif /* STA_INFO_H */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c7dc8ccff5b2..bfaa43e096d2 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -374,7 +374,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) staflags = get_sta_flags(sta); - if (unlikely((staflags & WLAN_STA_PS) && + if (unlikely((staflags & (WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) && !(info->flags & IEEE80211_TX_CTL_PSPOLL_RESPONSE))) { #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "STA %pM aid %d: PS buffer (entries " @@ -397,8 +397,13 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) } else tx->local->total_ps_buffered++; - /* Queue frame to be sent after STA sends an PS Poll frame */ - if (skb_queue_empty(&sta->ps_tx_buf)) + /* + * Queue frame to be sent after STA wakes up/polls, + * but don't set the TIM bit if the driver is blocking + * wakeup or poll response transmissions anyway. + */ + if (skb_queue_empty(&sta->ps_tx_buf) && + !(staflags & WLAN_STA_PS_DRIVER)) sta_info_set_tim_bit(sta); info->control.jiffies = jiffies; @@ -408,7 +413,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) return TX_QUEUED; } #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - else if (unlikely(test_sta_flags(sta, WLAN_STA_PS))) { + else if (unlikely(staflags & WLAN_STA_PS_STA)) { printk(KERN_DEBUG "%s: STA %pM in PS mode, but pspoll " "set -> send frame\n", tx->dev->name, sta->sta.addr); -- cgit v1.3-8-gc7d7 From 2dceba14ef0e62738d58777a1bd4018130d47a74 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 6 Nov 2009 08:09:03 +0000 Subject: compat: add struct compat_ifreq etc to compat.h In order to move socket ioctl conversion code into multiple places in the socket code, we need a common defintion of the data structures it uses. Also change the name from ifreq32 to compat_ifreq to follow the naming convention for compat.h Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/linux/compat.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index af931ee43dd8..8311d2e29632 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -10,6 +10,8 @@ #include #include /* for HZ */ #include +#include +#include #include #include @@ -154,6 +156,43 @@ typedef struct compat_sigevent { } _sigev_un; } compat_sigevent_t; +struct compat_ifmap { + compat_ulong_t mem_start; + compat_ulong_t mem_end; + unsigned short base_addr; + unsigned char irq; + unsigned char dma; + unsigned char port; +}; + +struct compat_ifreq { +#define IFHWADDRLEN 6 +#define IFNAMSIZ 16 + union { + char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + } ifr_ifrn; + union { + struct sockaddr ifru_addr; + struct sockaddr ifru_dstaddr; + struct sockaddr ifru_broadaddr; + struct sockaddr ifru_netmask; + struct sockaddr ifru_hwaddr; + short ifru_flags; + compat_int_t ifru_ivalue; + compat_int_t ifru_mtu; + struct compat_ifmap ifru_map; + char ifru_slave[IFNAMSIZ]; /* Just fits the size */ + char ifru_newname[IFNAMSIZ]; + compat_caddr_t ifru_data; + /* XXXX? ifru_settings should be here */ + } ifr_ifru; +}; + +struct compat_ifconf { + compat_int_t ifc_len; /* size of buffer */ + compat_caddr_t ifcbuf; +}; + struct compat_robust_list { compat_uptr_t next; }; -- cgit v1.3-8-gc7d7 From b622d97a63ad4ce890b625c62acd1bb894592e63 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 6 Nov 2009 20:46:52 -0800 Subject: net: compat: No need to define IFHWADDRLEN and IFNAMSIZ twice. It's defined colloqually in linux/if.h and linux/compat.h includes that. Signed-off-by: David S. Miller --- include/linux/compat.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index 8311d2e29632..224c7a896172 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -166,8 +166,6 @@ struct compat_ifmap { }; struct compat_ifreq { -#define IFHWADDRLEN 6 -#define IFNAMSIZ 16 union { char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */ } ifr_ifrn; -- cgit v1.3-8-gc7d7 From b215c57dcc847b15693899d26aa0ee4669dacefb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Nov 2009 04:37:30 +0000 Subject: net: kill proto_ops wrapper All users of wrapped proto_ops are now gone, so we can safely remove the wrappers as well. Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/linux/net.h | 83 ----------------------------------------------------- 1 file changed, 83 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 70ee3c310f15..6ce87663551c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -268,89 +268,6 @@ extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); extern int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); -#ifndef CONFIG_SMP -#define SOCKOPS_WRAPPED(name) name -#define SOCKOPS_WRAP(name, fam) -#else - -#define SOCKOPS_WRAPPED(name) __unlocked_##name - -#define SOCKCALL_WRAP(name, call, parms, args) \ -static int __lock_##name##_##call parms \ -{ \ - int ret; \ - lock_kernel(); \ - ret = __unlocked_##name##_ops.call args ;\ - unlock_kernel(); \ - return ret; \ -} - -#define SOCKCALL_UWRAP(name, call, parms, args) \ -static unsigned int __lock_##name##_##call parms \ -{ \ - int ret; \ - lock_kernel(); \ - ret = __unlocked_##name##_ops.call args ;\ - unlock_kernel(); \ - return ret; \ -} - - -#define SOCKOPS_WRAP(name, fam) \ -SOCKCALL_WRAP(name, release, (struct socket *sock), (sock)) \ -SOCKCALL_WRAP(name, bind, (struct socket *sock, struct sockaddr *uaddr, int addr_len), \ - (sock, uaddr, addr_len)) \ -SOCKCALL_WRAP(name, connect, (struct socket *sock, struct sockaddr * uaddr, \ - int addr_len, int flags), \ - (sock, uaddr, addr_len, flags)) \ -SOCKCALL_WRAP(name, socketpair, (struct socket *sock1, struct socket *sock2), \ - (sock1, sock2)) \ -SOCKCALL_WRAP(name, accept, (struct socket *sock, struct socket *newsock, \ - int flags), (sock, newsock, flags)) \ -SOCKCALL_WRAP(name, getname, (struct socket *sock, struct sockaddr *uaddr, \ - int *addr_len, int peer), (sock, uaddr, addr_len, peer)) \ -SOCKCALL_UWRAP(name, poll, (struct file *file, struct socket *sock, struct poll_table_struct *wait), \ - (file, sock, wait)) \ -SOCKCALL_WRAP(name, ioctl, (struct socket *sock, unsigned int cmd, \ - unsigned long arg), (sock, cmd, arg)) \ -SOCKCALL_WRAP(name, compat_ioctl, (struct socket *sock, unsigned int cmd, \ - unsigned long arg), (sock, cmd, arg)) \ -SOCKCALL_WRAP(name, listen, (struct socket *sock, int len), (sock, len)) \ -SOCKCALL_WRAP(name, shutdown, (struct socket *sock, int flags), (sock, flags)) \ -SOCKCALL_WRAP(name, setsockopt, (struct socket *sock, int level, int optname, \ - char __user *optval, unsigned int optlen), (sock, level, optname, optval, optlen)) \ -SOCKCALL_WRAP(name, getsockopt, (struct socket *sock, int level, int optname, \ - char __user *optval, int __user *optlen), (sock, level, optname, optval, optlen)) \ -SOCKCALL_WRAP(name, sendmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len), \ - (iocb, sock, m, len)) \ -SOCKCALL_WRAP(name, recvmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len, int flags), \ - (iocb, sock, m, len, flags)) \ -SOCKCALL_WRAP(name, mmap, (struct file *file, struct socket *sock, struct vm_area_struct *vma), \ - (file, sock, vma)) \ - \ -static const struct proto_ops name##_ops = { \ - .family = fam, \ - .owner = THIS_MODULE, \ - .release = __lock_##name##_release, \ - .bind = __lock_##name##_bind, \ - .connect = __lock_##name##_connect, \ - .socketpair = __lock_##name##_socketpair, \ - .accept = __lock_##name##_accept, \ - .getname = __lock_##name##_getname, \ - .poll = __lock_##name##_poll, \ - .ioctl = __lock_##name##_ioctl, \ - .compat_ioctl = __lock_##name##_compat_ioctl, \ - .listen = __lock_##name##_listen, \ - .shutdown = __lock_##name##_shutdown, \ - .setsockopt = __lock_##name##_setsockopt, \ - .getsockopt = __lock_##name##_getsockopt, \ - .sendmsg = __lock_##name##_sendmsg, \ - .recvmsg = __lock_##name##_recvmsg, \ - .mmap = __lock_##name##_mmap, \ -}; - -#endif - #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) -- cgit v1.3-8-gc7d7 From 81adee47dfb608df3ad0b91d230fb3cef75f0060 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 8 Nov 2009 00:53:51 -0800 Subject: net: Support specifying the network namespace upon device creation. There is no good reason to not support userspace specifying the network namespace during device creation, and it makes it easier to create a network device and pass it to a child network namespace with a well known name. We have to be careful to ensure that the target network namespace for the new device exists through the life of the call. To keep that logic clear I have factored out the network namespace grabbing logic into rtnl_link_get_net. In addtion we need to continue to pass the source network namespace to the rtnl_link_ops.newlink method so that we can find the base device source network namespace. Signed-off-by: Eric W. Biederman Acked-by: Eric Dumazet --- drivers/net/can/dev.c | 2 +- drivers/net/macvlan.c | 4 ++-- drivers/net/veth.c | 15 ++++++++++++--- include/net/rtnetlink.h | 8 +++++--- net/8021q/vlan_netlink.c | 4 ++-- net/core/rtnetlink.c | 38 +++++++++++++++++++++++++++----------- net/ipv4/ip_gre.c | 2 +- 7 files changed, 50 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index c3db111d2ff5..5fe34d64ca2a 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -674,7 +674,7 @@ nla_put_failure: return -EMSGSIZE; } -static int can_newlink(struct net_device *dev, +static int can_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { return -EOPNOTSUPP; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 20b7707f38ef..d7dba3f6f763 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -504,7 +504,7 @@ static int macvlan_get_tx_queues(struct net *net, return 0; } -static int macvlan_newlink(struct net_device *dev, +static int macvlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct macvlan_dev *vlan = netdev_priv(dev); @@ -515,7 +515,7 @@ static int macvlan_newlink(struct net_device *dev, if (!tb[IFLA_LINK]) return -EINVAL; - lowerdev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); + lowerdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (lowerdev == NULL) return -ENODEV; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 9bed694cd215..2d657f2314cb 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -340,7 +340,7 @@ static int veth_validate(struct nlattr *tb[], struct nlattr *data[]) static struct rtnl_link_ops veth_link_ops; -static int veth_newlink(struct net_device *dev, +static int veth_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { int err; @@ -348,6 +348,7 @@ static int veth_newlink(struct net_device *dev, struct veth_priv *priv; char ifname[IFNAMSIZ]; struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; + struct net *net; /* * create and register peer first @@ -380,14 +381,22 @@ static int veth_newlink(struct net_device *dev, else snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); - peer = rtnl_create_link(dev_net(dev), ifname, &veth_link_ops, tbp); - if (IS_ERR(peer)) + net = rtnl_link_get_net(src_net, tbp); + if (IS_ERR(net)) + return PTR_ERR(net); + + peer = rtnl_create_link(src_net, net, ifname, &veth_link_ops, tbp); + if (IS_ERR(peer)) { + put_net(net); return PTR_ERR(peer); + } if (tbp[IFLA_ADDRESS] == NULL) random_ether_addr(peer->dev_addr); err = register_netdevice(peer); + put_net(net); + net = NULL; if (err < 0) goto err_register_peer; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index cd5af1f508f2..48d3efcb0880 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -55,7 +55,8 @@ struct rtnl_link_ops { int (*validate)(struct nlattr *tb[], struct nlattr *data[]); - int (*newlink)(struct net_device *dev, + int (*newlink)(struct net *src_net, + struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]); int (*changelink)(struct net_device *dev, @@ -83,8 +84,9 @@ extern void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops); extern int rtnl_link_register(struct rtnl_link_ops *ops); extern void rtnl_link_unregister(struct rtnl_link_ops *ops); -extern struct net_device *rtnl_create_link(struct net *net, char *ifname, - const struct rtnl_link_ops *ops, struct nlattr *tb[]); +extern struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); +extern struct net_device *rtnl_create_link(struct net *src_net, struct net *net, + char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]); extern const struct nla_policy ifla_policy[IFLA_MAX+1]; #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index a91504850195..3c9cf6a8e7fb 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -119,7 +119,7 @@ static int vlan_get_tx_queues(struct net *net, return 0; } -static int vlan_newlink(struct net_device *dev, +static int vlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct vlan_dev_info *vlan = vlan_dev_info(dev); @@ -131,7 +131,7 @@ static int vlan_newlink(struct net_device *dev, if (!tb[IFLA_LINK]) return -EINVAL; - real_dev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); + real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e2f3317f290f..33148a568199 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -733,6 +733,20 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_DATA] = { .type = NLA_NESTED }, }; +struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) +{ + struct net *net; + /* Examine the link attributes and figure out which + * network namespace we are talking about. + */ + if (tb[IFLA_NET_NS_PID]) + net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + else + net = get_net(src_net); + return net; +} +EXPORT_SYMBOL(rtnl_link_get_net); + static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) { if (dev) { @@ -756,8 +770,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, int err; if (tb[IFLA_NET_NS_PID]) { - struct net *net; - net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + struct net *net = rtnl_link_get_net(dev_net(dev), tb); if (IS_ERR(net)) { err = PTR_ERR(net); goto errout; @@ -976,8 +989,8 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return 0; } -struct net_device *rtnl_create_link(struct net *net, char *ifname, - const struct rtnl_link_ops *ops, struct nlattr *tb[]) +struct net_device *rtnl_create_link(struct net *src_net, struct net *net, + char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) { int err; struct net_device *dev; @@ -985,7 +998,7 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname, unsigned int real_num_queues = 1; if (ops->get_tx_queues) { - err = ops->get_tx_queues(net, tb, &num_queues, + err = ops->get_tx_queues(src_net, tb, &num_queues, &real_num_queues); if (err) goto err; @@ -995,16 +1008,16 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname, if (!dev) goto err; + dev_net_set(dev, net); + dev->rtnl_link_ops = ops; dev->real_num_tx_queues = real_num_queues; + if (strchr(dev->name, '%')) { err = dev_alloc_name(dev, dev->name); if (err < 0) goto err_free; } - dev_net_set(dev, net); - dev->rtnl_link_ops = ops; - if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); if (tb[IFLA_ADDRESS]) @@ -1083,6 +1096,7 @@ replay: if (1) { struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; + struct net *dest_net; if (ops) { if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { @@ -1147,17 +1161,19 @@ replay: if (!ifname[0]) snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); - dev = rtnl_create_link(net, ifname, ops, tb); + dest_net = rtnl_link_get_net(net, tb); + dev = rtnl_create_link(net, dest_net, ifname, ops, tb); if (IS_ERR(dev)) err = PTR_ERR(dev); else if (ops->newlink) - err = ops->newlink(dev, tb, data); + err = ops->newlink(net, dev, tb, data); else err = register_netdevice(dev); - if (err < 0 && !IS_ERR(dev)) free_netdev(dev); + + put_net(dest_net); return err; } } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 71a3242fb7d0..a7de9e3a8f18 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1483,7 +1483,7 @@ static void ipgre_tap_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], +static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct ip_tunnel *nt; -- cgit v1.3-8-gc7d7 From 444a2a3bcd6d5bed5c823136f68fcc93c0fe283f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Nov 2009 04:13:05 +0100 Subject: tracing, perf_events: Protect the buffer from recursion in perf While tracing using events with perf, if one enables the lockdep:lock_acquire event, it will infect every other perf trace events. Basically, you can enable whatever set of trace events through perf but if this event is part of the set, the only result we can get is a long list of lock_acquire events of rcu read lock, and only that. This is because of a recursion inside perf. 1) When a trace event is triggered, it will fill a per cpu buffer and submit it to perf. 2) Perf will commit this event but will also protect some data using rcu_read_lock 3) A recursion appears: rcu_read_lock triggers a lock_acquire event that will fill the per cpu event and then submit the buffer to perf. 4) Perf detects a recursion and ignores it 5) Perf continues its work on the previous event, but its buffer has been overwritten by the lock_acquire event, it has then been turned into a lock_acquire event of rcu read lock Such scenario also happens with lock_release with rcu_read_unlock(). We could turn the rcu_read_lock() into __rcu_read_lock() to drop the lock debugging from perf fast path, but that would make us lose the rcu debugging and that doesn't prevent from other possible kind of recursion from perf in the future. This patch adds a recursion protection based on a counter on the perf trace per cpu buffers to solve the problem. -v2: Fixed lost whitespace, added reviewed-by tag Signed-off-by: Frederic Weisbecker Reviewed-by: Masami Hiramatsu Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 9 +++++-- include/trace/ftrace.h | 39 ++++++++++++++++++++++------- kernel/trace/trace_event_profile.c | 41 ++++++++++++++----------------- kernel/trace/trace_kprobe.c | 50 ++++++++++++++++++++++++++++++++------ kernel/trace/trace_syscalls.c | 44 +++++++++++++++++++++++++++------ 5 files changed, 133 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index f7b47c336703..43360c1d8f70 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -137,8 +137,13 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -extern char *trace_profile_buf; -extern char *trace_profile_buf_nmi; +struct perf_trace_buf { + char buf[FTRACE_MAX_PROFILE_SIZE]; + int recursion; +}; + +extern struct perf_trace_buf *perf_trace_buf; +extern struct perf_trace_buf *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a7f946094128..4945d1c99864 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -649,6 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * struct ftrace_event_call *event_call = &event_; * extern void perf_tp_event(int, u64, u64, void *, int); * struct ftrace_raw_##call *entry; + * struct perf_trace_buf *trace_buf; * u64 __addr = 0, __count = 1; * unsigned long irq_flags; * struct trace_entry *ent; @@ -673,14 +674,25 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * __cpu = smp_processor_id(); * * if (in_nmi()) - * raw_data = rcu_dereference(trace_profile_buf_nmi); + * trace_buf = rcu_dereference(perf_trace_buf_nmi); * else - * raw_data = rcu_dereference(trace_profile_buf); + * trace_buf = rcu_dereference(perf_trace_buf); * - * if (!raw_data) + * if (!trace_buf) * goto end; * - * raw_data = per_cpu_ptr(raw_data, __cpu); + * trace_buf = per_cpu_ptr(trace_buf, __cpu); + * + * // Avoid recursion from perf that could mess up the buffer + * if (trace_buf->recursion++) + * goto end_recursion; + * + * raw_data = trace_buf->buf; + * + * // Make recursion update visible before entering perf_tp_event + * // so that we protect from perf recursions. + * + * barrier(); * * //zero dead bytes from alignment to avoid stack leak to userspace: * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; @@ -713,8 +725,9 @@ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_event_call *event_call = &event_##call; \ - extern void perf_tp_event(int, u64, u64, void *, int); \ + extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ + struct perf_trace_buf *trace_buf; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ @@ -739,14 +752,20 @@ static void ftrace_profile_##call(proto) \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ - raw_data = rcu_dereference(trace_profile_buf_nmi); \ + trace_buf = rcu_dereference(perf_trace_buf_nmi); \ else \ - raw_data = rcu_dereference(trace_profile_buf); \ + trace_buf = rcu_dereference(perf_trace_buf); \ \ - if (!raw_data) \ + if (!trace_buf) \ goto end; \ \ - raw_data = per_cpu_ptr(raw_data, __cpu); \ + trace_buf = per_cpu_ptr(trace_buf, __cpu); \ + if (trace_buf->recursion++) \ + goto end_recursion; \ + \ + barrier(); \ + \ + raw_data = trace_buf->buf; \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -761,6 +780,8 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ +end_recursion: \ + trace_buf->recursion--; \ end: \ local_irq_restore(irq_flags); \ \ diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index c9f687ab0d4f..e0d351b01f5a 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -8,41 +8,36 @@ #include #include "trace.h" -/* - * We can't use a size but a type in alloc_percpu() - * So let's create a dummy type that matches the desired size - */ -typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; -char *trace_profile_buf; -EXPORT_SYMBOL_GPL(trace_profile_buf); +struct perf_trace_buf *perf_trace_buf; +EXPORT_SYMBOL_GPL(perf_trace_buf); -char *trace_profile_buf_nmi; -EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); +struct perf_trace_buf *perf_trace_buf_nmi; +EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); /* Count the events in use (per event id, not per instance) */ static int total_profile_count; static int ftrace_profile_enable_event(struct ftrace_event_call *event) { - char *buf; + struct perf_trace_buf *buf; int ret = -ENOMEM; if (atomic_inc_return(&event->profile_count)) return 0; if (!total_profile_count) { - buf = (char *)alloc_percpu(profile_buf_t); + buf = alloc_percpu(struct perf_trace_buf); if (!buf) goto fail_buf; - rcu_assign_pointer(trace_profile_buf, buf); + rcu_assign_pointer(perf_trace_buf, buf); - buf = (char *)alloc_percpu(profile_buf_t); + buf = alloc_percpu(struct perf_trace_buf); if (!buf) goto fail_buf_nmi; - rcu_assign_pointer(trace_profile_buf_nmi, buf); + rcu_assign_pointer(perf_trace_buf_nmi, buf); } ret = event->profile_enable(event); @@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) fail_buf_nmi: if (!total_profile_count) { - free_percpu(trace_profile_buf_nmi); - free_percpu(trace_profile_buf); - trace_profile_buf_nmi = NULL; - trace_profile_buf = NULL; + free_percpu(perf_trace_buf_nmi); + free_percpu(perf_trace_buf); + perf_trace_buf_nmi = NULL; + perf_trace_buf = NULL; } fail_buf: atomic_dec(&event->profile_count); @@ -84,7 +79,7 @@ int ftrace_profile_enable(int event_id) static void ftrace_profile_disable_event(struct ftrace_event_call *event) { - char *buf, *nmi_buf; + struct perf_trace_buf *buf, *nmi_buf; if (!atomic_add_negative(-1, &event->profile_count)) return; @@ -92,11 +87,11 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event) event->profile_disable(event); if (!--total_profile_count) { - buf = trace_profile_buf; - rcu_assign_pointer(trace_profile_buf, NULL); + buf = perf_trace_buf; + rcu_assign_pointer(perf_trace_buf, NULL); - nmi_buf = trace_profile_buf_nmi; - rcu_assign_pointer(trace_profile_buf_nmi, NULL); + nmi_buf = perf_trace_buf_nmi; + rcu_assign_pointer(perf_trace_buf_nmi, NULL); /* * Ensure every events in profiling have finished before diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index cf17a6694f32..3696476f307d 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1208,6 +1208,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; + struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; @@ -1229,14 +1230,26 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, __cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, __cpu); + trace_buf = per_cpu_ptr(trace_buf, __cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; entry = (struct kprobe_trace_entry *)raw_data; @@ -1249,8 +1262,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ip, 1, entry, size); + +end_recursion: + trace_buf->recursion--; end: local_irq_restore(irq_flags); + return 0; } @@ -1261,6 +1278,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; + struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; @@ -1282,14 +1300,26 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, __cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, __cpu); + trace_buf = per_cpu_ptr(trace_buf, __cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; entry = (struct kretprobe_trace_entry *)raw_data; @@ -1303,8 +1333,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ret_ip, 1, entry, size); + +end_recursion: + trace_buf->recursion--; end: local_irq_restore(irq_flags); + return 0; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 58b8e5370767..51213b0aa81b 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -477,6 +477,7 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; + struct perf_trace_buf *trace_buf; struct syscall_trace_enter *rec; unsigned long flags; char *raw_data; @@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, cpu); + trace_buf = per_cpu_ptr(trace_buf, cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) (unsigned long *)&rec->args); perf_tp_event(sys_data->enter_id, 0, 1, rec, size); +end_recursion: + trace_buf->recursion--; end: local_irq_restore(flags); } @@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; + struct perf_trace_buf *trace_buf; unsigned long flags; int syscall_nr; char *raw_data; @@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, cpu); + trace_buf = per_cpu_ptr(trace_buf, cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); +end_recursion: + trace_buf->recursion--; end: local_irq_restore(flags); } -- cgit v1.3-8-gc7d7 From e0000163e30eeb112b41486ea113fd54f64e1f17 Mon Sep 17 00:00:00 2001 From: Christian Pellegrin Date: Mon, 2 Nov 2009 23:07:00 +0000 Subject: can: Driver for the Microchip MCP251x SPI CAN controllers Signed-off-by: Christian Pellegrin Signed-off-by: Wolfgang Grandegger Signed-off-by: David S. Miller --- drivers/net/can/Kconfig | 6 + drivers/net/can/Makefile | 1 + drivers/net/can/mcp251x.c | 1164 ++++++++++++++++++++++++++++++++++ include/linux/can/platform/mcp251x.h | 36 ++ 4 files changed, 1207 insertions(+) create mode 100644 drivers/net/can/mcp251x.c create mode 100644 include/linux/can/platform/mcp251x.h (limited to 'include') diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 26d77cc0ded7..b819cc2a429e 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -102,6 +102,12 @@ config CAN_TI_HECC Driver for TI HECC (High End CAN Controller) module found on many TI devices. The device specifications are available from www.ti.com +config CAN_MCP251X + tristate "Microchip MCP251x SPI CAN controllers" + depends on CAN_DEV && SPI + ---help--- + Driver for the Microchip MCP251x SPI CAN controllers. + config CAN_DEBUG_DEVICES bool "CAN devices debugging messages" depends on CAN diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index 31f4ab5df28b..14891817ea5b 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -12,5 +12,6 @@ obj-y += usb/ obj-$(CONFIG_CAN_SJA1000) += sja1000/ obj-$(CONFIG_CAN_AT91) += at91_can.o obj-$(CONFIG_CAN_TI_HECC) += ti_hecc.o +obj-$(CONFIG_CAN_MCP251X) += mcp251x.o ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c new file mode 100644 index 000000000000..8f48f4b50b7c --- /dev/null +++ b/drivers/net/can/mcp251x.c @@ -0,0 +1,1164 @@ +/* + * CAN bus driver for Microchip 251x CAN Controller with SPI Interface + * + * MCP2510 support and bug fixes by Christian Pellegrin + * + * + * Copyright 2009 Christian Pellegrin EVOL S.r.l. + * + * Copyright 2007 Raymarine UK, Ltd. All Rights Reserved. + * Written under contract by: + * Chris Elston, Katalix Systems, Ltd. + * + * Based on Microchip MCP251x CAN controller driver written by + * David Vrabel, Copyright 2006 Arcom Control Systems Ltd. + * + * Based on CAN bus driver for the CCAN controller written by + * - Sascha Hauer, Marc Kleine-Budde, Pengutronix + * - Simon Kallweit, intefo AG + * Copyright 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * + * Your platform definition file should specify something like: + * + * static struct mcp251x_platform_data mcp251x_info = { + * .oscillator_frequency = 8000000, + * .board_specific_setup = &mcp251x_setup, + * .model = CAN_MCP251X_MCP2510, + * .power_enable = mcp251x_power_enable, + * .transceiver_enable = NULL, + * }; + * + * static struct spi_board_info spi_board_info[] = { + * { + * .modalias = "mcp251x", + * .platform_data = &mcp251x_info, + * .irq = IRQ_EINT13, + * .max_speed_hz = 2*1000*1000, + * .chip_select = 2, + * }, + * }; + * + * Please see mcp251x.h for a description of the fields in + * struct mcp251x_platform_data. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* SPI interface instruction set */ +#define INSTRUCTION_WRITE 0x02 +#define INSTRUCTION_READ 0x03 +#define INSTRUCTION_BIT_MODIFY 0x05 +#define INSTRUCTION_LOAD_TXB(n) (0x40 + 2 * (n)) +#define INSTRUCTION_READ_RXB(n) (((n) == 0) ? 0x90 : 0x94) +#define INSTRUCTION_RESET 0xC0 + +/* MPC251x registers */ +#define CANSTAT 0x0e +#define CANCTRL 0x0f +# define CANCTRL_REQOP_MASK 0xe0 +# define CANCTRL_REQOP_CONF 0x80 +# define CANCTRL_REQOP_LISTEN_ONLY 0x60 +# define CANCTRL_REQOP_LOOPBACK 0x40 +# define CANCTRL_REQOP_SLEEP 0x20 +# define CANCTRL_REQOP_NORMAL 0x00 +# define CANCTRL_OSM 0x08 +# define CANCTRL_ABAT 0x10 +#define TEC 0x1c +#define REC 0x1d +#define CNF1 0x2a +# define CNF1_SJW_SHIFT 6 +#define CNF2 0x29 +# define CNF2_BTLMODE 0x80 +# define CNF2_SAM 0x40 +# define CNF2_PS1_SHIFT 3 +#define CNF3 0x28 +# define CNF3_SOF 0x08 +# define CNF3_WAKFIL 0x04 +# define CNF3_PHSEG2_MASK 0x07 +#define CANINTE 0x2b +# define CANINTE_MERRE 0x80 +# define CANINTE_WAKIE 0x40 +# define CANINTE_ERRIE 0x20 +# define CANINTE_TX2IE 0x10 +# define CANINTE_TX1IE 0x08 +# define CANINTE_TX0IE 0x04 +# define CANINTE_RX1IE 0x02 +# define CANINTE_RX0IE 0x01 +#define CANINTF 0x2c +# define CANINTF_MERRF 0x80 +# define CANINTF_WAKIF 0x40 +# define CANINTF_ERRIF 0x20 +# define CANINTF_TX2IF 0x10 +# define CANINTF_TX1IF 0x08 +# define CANINTF_TX0IF 0x04 +# define CANINTF_RX1IF 0x02 +# define CANINTF_RX0IF 0x01 +#define EFLG 0x2d +# define EFLG_EWARN 0x01 +# define EFLG_RXWAR 0x02 +# define EFLG_TXWAR 0x04 +# define EFLG_RXEP 0x08 +# define EFLG_TXEP 0x10 +# define EFLG_TXBO 0x20 +# define EFLG_RX0OVR 0x40 +# define EFLG_RX1OVR 0x80 +#define TXBCTRL(n) (((n) * 0x10) + 0x30 + TXBCTRL_OFF) +# define TXBCTRL_ABTF 0x40 +# define TXBCTRL_MLOA 0x20 +# define TXBCTRL_TXERR 0x10 +# define TXBCTRL_TXREQ 0x08 +#define TXBSIDH(n) (((n) * 0x10) + 0x30 + TXBSIDH_OFF) +# define SIDH_SHIFT 3 +#define TXBSIDL(n) (((n) * 0x10) + 0x30 + TXBSIDL_OFF) +# define SIDL_SID_MASK 7 +# define SIDL_SID_SHIFT 5 +# define SIDL_EXIDE_SHIFT 3 +# define SIDL_EID_SHIFT 16 +# define SIDL_EID_MASK 3 +#define TXBEID8(n) (((n) * 0x10) + 0x30 + TXBEID8_OFF) +#define TXBEID0(n) (((n) * 0x10) + 0x30 + TXBEID0_OFF) +#define TXBDLC(n) (((n) * 0x10) + 0x30 + TXBDLC_OFF) +# define DLC_RTR_SHIFT 6 +#define TXBCTRL_OFF 0 +#define TXBSIDH_OFF 1 +#define TXBSIDL_OFF 2 +#define TXBEID8_OFF 3 +#define TXBEID0_OFF 4 +#define TXBDLC_OFF 5 +#define TXBDAT_OFF 6 +#define RXBCTRL(n) (((n) * 0x10) + 0x60 + RXBCTRL_OFF) +# define RXBCTRL_BUKT 0x04 +# define RXBCTRL_RXM0 0x20 +# define RXBCTRL_RXM1 0x40 +#define RXBSIDH(n) (((n) * 0x10) + 0x60 + RXBSIDH_OFF) +# define RXBSIDH_SHIFT 3 +#define RXBSIDL(n) (((n) * 0x10) + 0x60 + RXBSIDL_OFF) +# define RXBSIDL_IDE 0x08 +# define RXBSIDL_EID 3 +# define RXBSIDL_SHIFT 5 +#define RXBEID8(n) (((n) * 0x10) + 0x60 + RXBEID8_OFF) +#define RXBEID0(n) (((n) * 0x10) + 0x60 + RXBEID0_OFF) +#define RXBDLC(n) (((n) * 0x10) + 0x60 + RXBDLC_OFF) +# define RXBDLC_LEN_MASK 0x0f +# define RXBDLC_RTR 0x40 +#define RXBCTRL_OFF 0 +#define RXBSIDH_OFF 1 +#define RXBSIDL_OFF 2 +#define RXBEID8_OFF 3 +#define RXBEID0_OFF 4 +#define RXBDLC_OFF 5 +#define RXBDAT_OFF 6 + +#define GET_BYTE(val, byte) \ + (((val) >> ((byte) * 8)) & 0xff) +#define SET_BYTE(val, byte) \ + (((val) & 0xff) << ((byte) * 8)) + +/* + * Buffer size required for the largest SPI transfer (i.e., reading a + * frame) + */ +#define CAN_FRAME_MAX_DATA_LEN 8 +#define SPI_TRANSFER_BUF_LEN (6 + CAN_FRAME_MAX_DATA_LEN) +#define CAN_FRAME_MAX_BITS 128 + +#define TX_ECHO_SKB_MAX 1 + +#define DEVICE_NAME "mcp251x" + +static int mcp251x_enable_dma; /* Enable SPI DMA. Default: 0 (Off) */ +module_param(mcp251x_enable_dma, int, S_IRUGO); +MODULE_PARM_DESC(mcp251x_enable_dma, "Enable SPI DMA. Default: 0 (Off)"); + +static struct can_bittiming_const mcp251x_bittiming_const = { + .name = DEVICE_NAME, + .tseg1_min = 3, + .tseg1_max = 16, + .tseg2_min = 2, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 64, + .brp_inc = 1, +}; + +struct mcp251x_priv { + struct can_priv can; + struct net_device *net; + struct spi_device *spi; + + struct mutex spi_lock; /* SPI buffer lock */ + u8 *spi_tx_buf; + u8 *spi_rx_buf; + dma_addr_t spi_tx_dma; + dma_addr_t spi_rx_dma; + + struct sk_buff *tx_skb; + int tx_len; + struct workqueue_struct *wq; + struct work_struct tx_work; + struct work_struct irq_work; + struct completion awake; + int wake; + int force_quit; + int after_suspend; +#define AFTER_SUSPEND_UP 1 +#define AFTER_SUSPEND_DOWN 2 +#define AFTER_SUSPEND_POWER 4 +#define AFTER_SUSPEND_RESTART 8 + int restart_tx; +}; + +static void mcp251x_clean(struct net_device *net) +{ + struct mcp251x_priv *priv = netdev_priv(net); + + net->stats.tx_errors++; + if (priv->tx_skb) + dev_kfree_skb(priv->tx_skb); + if (priv->tx_len) + can_free_echo_skb(priv->net, 0); + priv->tx_skb = NULL; + priv->tx_len = 0; +} + +/* + * Note about handling of error return of mcp251x_spi_trans: accessing + * registers via SPI is not really different conceptually than using + * normal I/O assembler instructions, although it's much more + * complicated from a practical POV. So it's not advisable to always + * check the return value of this function. Imagine that every + * read{b,l}, write{b,l} and friends would be bracketed in "if ( < 0) + * error();", it would be a great mess (well there are some situation + * when exception handling C++ like could be useful after all). So we + * just check that transfers are OK at the beginning of our + * conversation with the chip and to avoid doing really nasty things + * (like injecting bogus packets in the network stack). + */ +static int mcp251x_spi_trans(struct spi_device *spi, int len) +{ + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct spi_transfer t = { + .tx_buf = priv->spi_tx_buf, + .rx_buf = priv->spi_rx_buf, + .len = len, + .cs_change = 0, + }; + struct spi_message m; + int ret; + + spi_message_init(&m); + + if (mcp251x_enable_dma) { + t.tx_dma = priv->spi_tx_dma; + t.rx_dma = priv->spi_rx_dma; + m.is_dma_mapped = 1; + } + + spi_message_add_tail(&t, &m); + + ret = spi_sync(spi, &m); + if (ret) + dev_err(&spi->dev, "spi transfer failed: ret = %d\n", ret); + return ret; +} + +static u8 mcp251x_read_reg(struct spi_device *spi, uint8_t reg) +{ + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + u8 val = 0; + + mutex_lock(&priv->spi_lock); + + priv->spi_tx_buf[0] = INSTRUCTION_READ; + priv->spi_tx_buf[1] = reg; + + mcp251x_spi_trans(spi, 3); + val = priv->spi_rx_buf[2]; + + mutex_unlock(&priv->spi_lock); + + return val; +} + +static void mcp251x_write_reg(struct spi_device *spi, u8 reg, uint8_t val) +{ + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + + mutex_lock(&priv->spi_lock); + + priv->spi_tx_buf[0] = INSTRUCTION_WRITE; + priv->spi_tx_buf[1] = reg; + priv->spi_tx_buf[2] = val; + + mcp251x_spi_trans(spi, 3); + + mutex_unlock(&priv->spi_lock); +} + +static void mcp251x_write_bits(struct spi_device *spi, u8 reg, + u8 mask, uint8_t val) +{ + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + + mutex_lock(&priv->spi_lock); + + priv->spi_tx_buf[0] = INSTRUCTION_BIT_MODIFY; + priv->spi_tx_buf[1] = reg; + priv->spi_tx_buf[2] = mask; + priv->spi_tx_buf[3] = val; + + mcp251x_spi_trans(spi, 4); + + mutex_unlock(&priv->spi_lock); +} + +static void mcp251x_hw_tx_frame(struct spi_device *spi, u8 *buf, + int len, int tx_buf_idx) +{ + struct mcp251x_platform_data *pdata = spi->dev.platform_data; + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + + if (pdata->model == CAN_MCP251X_MCP2510) { + int i; + + for (i = 1; i < TXBDAT_OFF + len; i++) + mcp251x_write_reg(spi, TXBCTRL(tx_buf_idx) + i, + buf[i]); + } else { + mutex_lock(&priv->spi_lock); + memcpy(priv->spi_tx_buf, buf, TXBDAT_OFF + len); + mcp251x_spi_trans(spi, TXBDAT_OFF + len); + mutex_unlock(&priv->spi_lock); + } +} + +static void mcp251x_hw_tx(struct spi_device *spi, struct can_frame *frame, + int tx_buf_idx) +{ + u32 sid, eid, exide, rtr; + u8 buf[SPI_TRANSFER_BUF_LEN]; + + exide = (frame->can_id & CAN_EFF_FLAG) ? 1 : 0; /* Extended ID Enable */ + if (exide) + sid = (frame->can_id & CAN_EFF_MASK) >> 18; + else + sid = frame->can_id & CAN_SFF_MASK; /* Standard ID */ + eid = frame->can_id & CAN_EFF_MASK; /* Extended ID */ + rtr = (frame->can_id & CAN_RTR_FLAG) ? 1 : 0; /* Remote transmission */ + + buf[TXBCTRL_OFF] = INSTRUCTION_LOAD_TXB(tx_buf_idx); + buf[TXBSIDH_OFF] = sid >> SIDH_SHIFT; + buf[TXBSIDL_OFF] = ((sid & SIDL_SID_MASK) << SIDL_SID_SHIFT) | + (exide << SIDL_EXIDE_SHIFT) | + ((eid >> SIDL_EID_SHIFT) & SIDL_EID_MASK); + buf[TXBEID8_OFF] = GET_BYTE(eid, 1); + buf[TXBEID0_OFF] = GET_BYTE(eid, 0); + buf[TXBDLC_OFF] = (rtr << DLC_RTR_SHIFT) | frame->can_dlc; + memcpy(buf + TXBDAT_OFF, frame->data, frame->can_dlc); + mcp251x_hw_tx_frame(spi, buf, frame->can_dlc, tx_buf_idx); + mcp251x_write_reg(spi, TXBCTRL(tx_buf_idx), TXBCTRL_TXREQ); +} + +static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf, + int buf_idx) +{ + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_platform_data *pdata = spi->dev.platform_data; + + if (pdata->model == CAN_MCP251X_MCP2510) { + int i, len; + + for (i = 1; i < RXBDAT_OFF; i++) + buf[i] = mcp251x_read_reg(spi, RXBCTRL(buf_idx) + i); + len = buf[RXBDLC_OFF] & RXBDLC_LEN_MASK; + if (len > 8) + len = 8; + for (; i < (RXBDAT_OFF + len); i++) + buf[i] = mcp251x_read_reg(spi, RXBCTRL(buf_idx) + i); + } else { + mutex_lock(&priv->spi_lock); + + priv->spi_tx_buf[RXBCTRL_OFF] = INSTRUCTION_READ_RXB(buf_idx); + mcp251x_spi_trans(spi, SPI_TRANSFER_BUF_LEN); + memcpy(buf, priv->spi_rx_buf, SPI_TRANSFER_BUF_LEN); + + mutex_unlock(&priv->spi_lock); + } +} + +static void mcp251x_hw_rx(struct spi_device *spi, int buf_idx) +{ + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct sk_buff *skb; + struct can_frame *frame; + u8 buf[SPI_TRANSFER_BUF_LEN]; + + skb = alloc_can_skb(priv->net, &frame); + if (!skb) { + dev_err(&spi->dev, "cannot allocate RX skb\n"); + priv->net->stats.rx_dropped++; + return; + } + + mcp251x_hw_rx_frame(spi, buf, buf_idx); + if (buf[RXBSIDL_OFF] & RXBSIDL_IDE) { + /* Extended ID format */ + frame->can_id = CAN_EFF_FLAG; + frame->can_id |= + /* Extended ID part */ + SET_BYTE(buf[RXBSIDL_OFF] & RXBSIDL_EID, 2) | + SET_BYTE(buf[RXBEID8_OFF], 1) | + SET_BYTE(buf[RXBEID0_OFF], 0) | + /* Standard ID part */ + (((buf[RXBSIDH_OFF] << RXBSIDH_SHIFT) | + (buf[RXBSIDL_OFF] >> RXBSIDL_SHIFT)) << 18); + /* Remote transmission request */ + if (buf[RXBDLC_OFF] & RXBDLC_RTR) + frame->can_id |= CAN_RTR_FLAG; + } else { + /* Standard ID format */ + frame->can_id = + (buf[RXBSIDH_OFF] << RXBSIDH_SHIFT) | + (buf[RXBSIDL_OFF] >> RXBSIDL_SHIFT); + } + /* Data length */ + frame->can_dlc = buf[RXBDLC_OFF] & RXBDLC_LEN_MASK; + if (frame->can_dlc > 8) { + dev_warn(&spi->dev, "invalid frame recevied\n"); + priv->net->stats.rx_errors++; + dev_kfree_skb(skb); + return; + } + memcpy(frame->data, buf + RXBDAT_OFF, frame->can_dlc); + + priv->net->stats.rx_packets++; + priv->net->stats.rx_bytes += frame->can_dlc; + netif_rx(skb); +} + +static void mcp251x_hw_sleep(struct spi_device *spi) +{ + mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_SLEEP); +} + +static void mcp251x_hw_wakeup(struct spi_device *spi) +{ + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + + priv->wake = 1; + + /* Can only wake up by generating a wake-up interrupt. */ + mcp251x_write_bits(spi, CANINTE, CANINTE_WAKIE, CANINTE_WAKIE); + mcp251x_write_bits(spi, CANINTF, CANINTF_WAKIF, CANINTF_WAKIF); + + /* Wait until the device is awake */ + if (!wait_for_completion_timeout(&priv->awake, HZ)) + dev_err(&spi->dev, "MCP251x didn't wake-up\n"); +} + +static netdev_tx_t mcp251x_hard_start_xmit(struct sk_buff *skb, + struct net_device *net) +{ + struct mcp251x_priv *priv = netdev_priv(net); + struct spi_device *spi = priv->spi; + + if (priv->tx_skb || priv->tx_len) { + dev_warn(&spi->dev, "hard_xmit called while tx busy\n"); + netif_stop_queue(net); + return NETDEV_TX_BUSY; + } + + if (skb->len != sizeof(struct can_frame)) { + dev_err(&spi->dev, "dropping packet - bad length\n"); + dev_kfree_skb(skb); + net->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + netif_stop_queue(net); + priv->tx_skb = skb; + net->trans_start = jiffies; + queue_work(priv->wq, &priv->tx_work); + + return NETDEV_TX_OK; +} + +static int mcp251x_do_set_mode(struct net_device *net, enum can_mode mode) +{ + struct mcp251x_priv *priv = netdev_priv(net); + + switch (mode) { + case CAN_MODE_START: + /* We have to delay work since SPI I/O may sleep */ + priv->can.state = CAN_STATE_ERROR_ACTIVE; + priv->restart_tx = 1; + if (priv->can.restart_ms == 0) + priv->after_suspend = AFTER_SUSPEND_RESTART; + queue_work(priv->wq, &priv->irq_work); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static void mcp251x_set_normal_mode(struct spi_device *spi) +{ + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + unsigned long timeout; + + /* Enable interrupts */ + mcp251x_write_reg(spi, CANINTE, + CANINTE_ERRIE | CANINTE_TX2IE | CANINTE_TX1IE | + CANINTE_TX0IE | CANINTE_RX1IE | CANINTE_RX0IE | + CANINTF_MERRF); + + if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) { + /* Put device into loopback mode */ + mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_LOOPBACK); + } else { + /* Put device into normal mode */ + mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_NORMAL); + + /* Wait for the device to enter normal mode */ + timeout = jiffies + HZ; + while (mcp251x_read_reg(spi, CANSTAT) & CANCTRL_REQOP_MASK) { + schedule(); + if (time_after(jiffies, timeout)) { + dev_err(&spi->dev, "MCP251x didn't" + " enter in normal mode\n"); + return; + } + } + } + priv->can.state = CAN_STATE_ERROR_ACTIVE; +} + +static int mcp251x_do_set_bittiming(struct net_device *net) +{ + struct mcp251x_priv *priv = netdev_priv(net); + struct can_bittiming *bt = &priv->can.bittiming; + struct spi_device *spi = priv->spi; + + mcp251x_write_reg(spi, CNF1, ((bt->sjw - 1) << CNF1_SJW_SHIFT) | + (bt->brp - 1)); + mcp251x_write_reg(spi, CNF2, CNF2_BTLMODE | + (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES ? + CNF2_SAM : 0) | + ((bt->phase_seg1 - 1) << CNF2_PS1_SHIFT) | + (bt->prop_seg - 1)); + mcp251x_write_bits(spi, CNF3, CNF3_PHSEG2_MASK, + (bt->phase_seg2 - 1)); + dev_info(&spi->dev, "CNF: 0x%02x 0x%02x 0x%02x\n", + mcp251x_read_reg(spi, CNF1), + mcp251x_read_reg(spi, CNF2), + mcp251x_read_reg(spi, CNF3)); + + return 0; +} + +static int mcp251x_setup(struct net_device *net, struct mcp251x_priv *priv, + struct spi_device *spi) +{ + int ret; + + ret = open_candev(net); + if (ret) { + dev_err(&spi->dev, "unable to set initial baudrate!\n"); + return ret; + } + + /* Enable RX0->RX1 buffer roll over and disable filters */ + mcp251x_write_bits(spi, RXBCTRL(0), + RXBCTRL_BUKT | RXBCTRL_RXM0 | RXBCTRL_RXM1, + RXBCTRL_BUKT | RXBCTRL_RXM0 | RXBCTRL_RXM1); + mcp251x_write_bits(spi, RXBCTRL(1), + RXBCTRL_RXM0 | RXBCTRL_RXM1, + RXBCTRL_RXM0 | RXBCTRL_RXM1); + return 0; +} + +static void mcp251x_hw_reset(struct spi_device *spi) +{ + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + int ret; + + mutex_lock(&priv->spi_lock); + + priv->spi_tx_buf[0] = INSTRUCTION_RESET; + + ret = spi_write(spi, priv->spi_tx_buf, 1); + + mutex_unlock(&priv->spi_lock); + + if (ret) + dev_err(&spi->dev, "reset failed: ret = %d\n", ret); + /* Wait for reset to finish */ + mdelay(10); +} + +static int mcp251x_hw_probe(struct spi_device *spi) +{ + int st1, st2; + + mcp251x_hw_reset(spi); + + /* + * Please note that these are "magic values" based on after + * reset defaults taken from data sheet which allows us to see + * if we really have a chip on the bus (we avoid common all + * zeroes or all ones situations) + */ + st1 = mcp251x_read_reg(spi, CANSTAT) & 0xEE; + st2 = mcp251x_read_reg(spi, CANCTRL) & 0x17; + + dev_dbg(&spi->dev, "CANSTAT 0x%02x CANCTRL 0x%02x\n", st1, st2); + + /* Check for power up default values */ + return (st1 == 0x80 && st2 == 0x07) ? 1 : 0; +} + +static irqreturn_t mcp251x_can_isr(int irq, void *dev_id) +{ + struct net_device *net = (struct net_device *)dev_id; + struct mcp251x_priv *priv = netdev_priv(net); + + /* Schedule bottom half */ + if (!work_pending(&priv->irq_work)) + queue_work(priv->wq, &priv->irq_work); + + return IRQ_HANDLED; +} + +static int mcp251x_open(struct net_device *net) +{ + struct mcp251x_priv *priv = netdev_priv(net); + struct spi_device *spi = priv->spi; + struct mcp251x_platform_data *pdata = spi->dev.platform_data; + int ret; + + if (pdata->transceiver_enable) + pdata->transceiver_enable(1); + + priv->force_quit = 0; + priv->tx_skb = NULL; + priv->tx_len = 0; + + ret = request_irq(spi->irq, mcp251x_can_isr, + IRQF_TRIGGER_FALLING, DEVICE_NAME, net); + if (ret) { + dev_err(&spi->dev, "failed to acquire irq %d\n", spi->irq); + if (pdata->transceiver_enable) + pdata->transceiver_enable(0); + return ret; + } + + mcp251x_hw_wakeup(spi); + mcp251x_hw_reset(spi); + ret = mcp251x_setup(net, priv, spi); + if (ret) { + free_irq(spi->irq, net); + if (pdata->transceiver_enable) + pdata->transceiver_enable(0); + return ret; + } + mcp251x_set_normal_mode(spi); + netif_wake_queue(net); + + return 0; +} + +static int mcp251x_stop(struct net_device *net) +{ + struct mcp251x_priv *priv = netdev_priv(net); + struct spi_device *spi = priv->spi; + struct mcp251x_platform_data *pdata = spi->dev.platform_data; + + close_candev(net); + + /* Disable and clear pending interrupts */ + mcp251x_write_reg(spi, CANINTE, 0x00); + mcp251x_write_reg(spi, CANINTF, 0x00); + + priv->force_quit = 1; + free_irq(spi->irq, net); + flush_workqueue(priv->wq); + + mcp251x_write_reg(spi, TXBCTRL(0), 0); + if (priv->tx_skb || priv->tx_len) + mcp251x_clean(net); + + mcp251x_hw_sleep(spi); + + if (pdata->transceiver_enable) + pdata->transceiver_enable(0); + + priv->can.state = CAN_STATE_STOPPED; + + return 0; +} + +static void mcp251x_tx_work_handler(struct work_struct *ws) +{ + struct mcp251x_priv *priv = container_of(ws, struct mcp251x_priv, + tx_work); + struct spi_device *spi = priv->spi; + struct net_device *net = priv->net; + struct can_frame *frame; + + if (priv->tx_skb) { + frame = (struct can_frame *)priv->tx_skb->data; + + if (priv->can.state == CAN_STATE_BUS_OFF) { + mcp251x_clean(net); + netif_wake_queue(net); + return; + } + if (frame->can_dlc > CAN_FRAME_MAX_DATA_LEN) + frame->can_dlc = CAN_FRAME_MAX_DATA_LEN; + mcp251x_hw_tx(spi, frame, 0); + priv->tx_len = 1 + frame->can_dlc; + can_put_echo_skb(priv->tx_skb, net, 0); + priv->tx_skb = NULL; + } +} + +static void mcp251x_irq_work_handler(struct work_struct *ws) +{ + struct mcp251x_priv *priv = container_of(ws, struct mcp251x_priv, + irq_work); + struct spi_device *spi = priv->spi; + struct net_device *net = priv->net; + u8 txbnctrl; + u8 intf; + enum can_state new_state; + + if (priv->after_suspend) { + mdelay(10); + mcp251x_hw_reset(spi); + mcp251x_setup(net, priv, spi); + if (priv->after_suspend & AFTER_SUSPEND_RESTART) { + mcp251x_set_normal_mode(spi); + } else if (priv->after_suspend & AFTER_SUSPEND_UP) { + netif_device_attach(net); + /* Clean since we lost tx buffer */ + if (priv->tx_skb || priv->tx_len) { + mcp251x_clean(net); + netif_wake_queue(net); + } + mcp251x_set_normal_mode(spi); + } else { + mcp251x_hw_sleep(spi); + } + priv->after_suspend = 0; + } + + if (priv->can.restart_ms == 0 && priv->can.state == CAN_STATE_BUS_OFF) + return; + + while (!priv->force_quit && !freezing(current)) { + u8 eflag = mcp251x_read_reg(spi, EFLG); + int can_id = 0, data1 = 0; + + mcp251x_write_reg(spi, EFLG, 0x00); + + if (priv->restart_tx) { + priv->restart_tx = 0; + mcp251x_write_reg(spi, TXBCTRL(0), 0); + if (priv->tx_skb || priv->tx_len) + mcp251x_clean(net); + netif_wake_queue(net); + can_id |= CAN_ERR_RESTARTED; + } + + if (priv->wake) { + /* Wait whilst the device wakes up */ + mdelay(10); + priv->wake = 0; + } + + intf = mcp251x_read_reg(spi, CANINTF); + mcp251x_write_bits(spi, CANINTF, intf, 0x00); + + /* Update can state */ + if (eflag & EFLG_TXBO) { + new_state = CAN_STATE_BUS_OFF; + can_id |= CAN_ERR_BUSOFF; + } else if (eflag & EFLG_TXEP) { + new_state = CAN_STATE_ERROR_PASSIVE; + can_id |= CAN_ERR_CRTL; + data1 |= CAN_ERR_CRTL_TX_PASSIVE; + } else if (eflag & EFLG_RXEP) { + new_state = CAN_STATE_ERROR_PASSIVE; + can_id |= CAN_ERR_CRTL; + data1 |= CAN_ERR_CRTL_RX_PASSIVE; + } else if (eflag & EFLG_TXWAR) { + new_state = CAN_STATE_ERROR_WARNING; + can_id |= CAN_ERR_CRTL; + data1 |= CAN_ERR_CRTL_TX_WARNING; + } else if (eflag & EFLG_RXWAR) { + new_state = CAN_STATE_ERROR_WARNING; + can_id |= CAN_ERR_CRTL; + data1 |= CAN_ERR_CRTL_RX_WARNING; + } else { + new_state = CAN_STATE_ERROR_ACTIVE; + } + + /* Update can state statistics */ + switch (priv->can.state) { + case CAN_STATE_ERROR_ACTIVE: + if (new_state >= CAN_STATE_ERROR_WARNING && + new_state <= CAN_STATE_BUS_OFF) + priv->can.can_stats.error_warning++; + case CAN_STATE_ERROR_WARNING: /* fallthrough */ + if (new_state >= CAN_STATE_ERROR_PASSIVE && + new_state <= CAN_STATE_BUS_OFF) + priv->can.can_stats.error_passive++; + break; + default: + break; + } + priv->can.state = new_state; + + if ((intf & CANINTF_ERRIF) || (can_id & CAN_ERR_RESTARTED)) { + struct sk_buff *skb; + struct can_frame *frame; + + /* Create error frame */ + skb = alloc_can_err_skb(net, &frame); + if (skb) { + /* Set error frame flags based on bus state */ + frame->can_id = can_id; + frame->data[1] = data1; + + /* Update net stats for overflows */ + if (eflag & (EFLG_RX0OVR | EFLG_RX1OVR)) { + if (eflag & EFLG_RX0OVR) + net->stats.rx_over_errors++; + if (eflag & EFLG_RX1OVR) + net->stats.rx_over_errors++; + frame->can_id |= CAN_ERR_CRTL; + frame->data[1] |= + CAN_ERR_CRTL_RX_OVERFLOW; + } + + netif_rx(skb); + } else { + dev_info(&spi->dev, + "cannot allocate error skb\n"); + } + } + + if (priv->can.state == CAN_STATE_BUS_OFF) { + if (priv->can.restart_ms == 0) { + can_bus_off(net); + mcp251x_hw_sleep(spi); + return; + } + } + + if (intf == 0) + break; + + if (intf & CANINTF_WAKIF) + complete(&priv->awake); + + if (intf & CANINTF_MERRF) { + /* If there are pending Tx buffers, restart queue */ + txbnctrl = mcp251x_read_reg(spi, TXBCTRL(0)); + if (!(txbnctrl & TXBCTRL_TXREQ)) { + if (priv->tx_skb || priv->tx_len) + mcp251x_clean(net); + netif_wake_queue(net); + } + } + + if (intf & (CANINTF_TX2IF | CANINTF_TX1IF | CANINTF_TX0IF)) { + net->stats.tx_packets++; + net->stats.tx_bytes += priv->tx_len - 1; + if (priv->tx_len) { + can_get_echo_skb(net, 0); + priv->tx_len = 0; + } + netif_wake_queue(net); + } + + if (intf & CANINTF_RX0IF) + mcp251x_hw_rx(spi, 0); + + if (intf & CANINTF_RX1IF) + mcp251x_hw_rx(spi, 1); + } +} + +static const struct net_device_ops mcp251x_netdev_ops = { + .ndo_open = mcp251x_open, + .ndo_stop = mcp251x_stop, + .ndo_start_xmit = mcp251x_hard_start_xmit, +}; + +static int __devinit mcp251x_can_probe(struct spi_device *spi) +{ + struct net_device *net; + struct mcp251x_priv *priv; + struct mcp251x_platform_data *pdata = spi->dev.platform_data; + int ret = -ENODEV; + + if (!pdata) + /* Platform data is required for osc freq */ + goto error_out; + + /* Allocate can/net device */ + net = alloc_candev(sizeof(struct mcp251x_priv), TX_ECHO_SKB_MAX); + if (!net) { + ret = -ENOMEM; + goto error_alloc; + } + + net->netdev_ops = &mcp251x_netdev_ops; + net->flags |= IFF_ECHO; + + priv = netdev_priv(net); + priv->can.bittiming_const = &mcp251x_bittiming_const; + priv->can.do_set_mode = mcp251x_do_set_mode; + priv->can.clock.freq = pdata->oscillator_frequency / 2; + priv->can.do_set_bittiming = mcp251x_do_set_bittiming; + priv->net = net; + dev_set_drvdata(&spi->dev, priv); + + priv->spi = spi; + mutex_init(&priv->spi_lock); + + /* If requested, allocate DMA buffers */ + if (mcp251x_enable_dma) { + spi->dev.coherent_dma_mask = ~0; + + /* + * Minimum coherent DMA allocation is PAGE_SIZE, so allocate + * that much and share it between Tx and Rx DMA buffers. + */ + priv->spi_tx_buf = dma_alloc_coherent(&spi->dev, + PAGE_SIZE, + &priv->spi_tx_dma, + GFP_DMA); + + if (priv->spi_tx_buf) { + priv->spi_rx_buf = (u8 *)(priv->spi_tx_buf + + (PAGE_SIZE / 2)); + priv->spi_rx_dma = (dma_addr_t)(priv->spi_tx_dma + + (PAGE_SIZE / 2)); + } else { + /* Fall back to non-DMA */ + mcp251x_enable_dma = 0; + } + } + + /* Allocate non-DMA buffers */ + if (!mcp251x_enable_dma) { + priv->spi_tx_buf = kmalloc(SPI_TRANSFER_BUF_LEN, GFP_KERNEL); + if (!priv->spi_tx_buf) { + ret = -ENOMEM; + goto error_tx_buf; + } + priv->spi_rx_buf = kmalloc(SPI_TRANSFER_BUF_LEN, GFP_KERNEL); + if (!priv->spi_tx_buf) { + ret = -ENOMEM; + goto error_rx_buf; + } + } + + if (pdata->power_enable) + pdata->power_enable(1); + + /* Call out to platform specific setup */ + if (pdata->board_specific_setup) + pdata->board_specific_setup(spi); + + SET_NETDEV_DEV(net, &spi->dev); + + priv->wq = create_freezeable_workqueue("mcp251x_wq"); + + INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler); + INIT_WORK(&priv->irq_work, mcp251x_irq_work_handler); + + init_completion(&priv->awake); + + /* Configure the SPI bus */ + spi->mode = SPI_MODE_0; + spi->bits_per_word = 8; + spi_setup(spi); + + if (!mcp251x_hw_probe(spi)) { + dev_info(&spi->dev, "Probe failed\n"); + goto error_probe; + } + mcp251x_hw_sleep(spi); + + if (pdata->transceiver_enable) + pdata->transceiver_enable(0); + + ret = register_candev(net); + if (!ret) { + dev_info(&spi->dev, "probed\n"); + return ret; + } +error_probe: + if (!mcp251x_enable_dma) + kfree(priv->spi_rx_buf); +error_rx_buf: + if (!mcp251x_enable_dma) + kfree(priv->spi_tx_buf); +error_tx_buf: + free_candev(net); + if (mcp251x_enable_dma) + dma_free_coherent(&spi->dev, PAGE_SIZE, + priv->spi_tx_buf, priv->spi_tx_dma); +error_alloc: + if (pdata->power_enable) + pdata->power_enable(0); + dev_err(&spi->dev, "probe failed\n"); +error_out: + return ret; +} + +static int __devexit mcp251x_can_remove(struct spi_device *spi) +{ + struct mcp251x_platform_data *pdata = spi->dev.platform_data; + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct net_device *net = priv->net; + + unregister_candev(net); + free_candev(net); + + priv->force_quit = 1; + flush_workqueue(priv->wq); + destroy_workqueue(priv->wq); + + if (mcp251x_enable_dma) { + dma_free_coherent(&spi->dev, PAGE_SIZE, + priv->spi_tx_buf, priv->spi_tx_dma); + } else { + kfree(priv->spi_tx_buf); + kfree(priv->spi_rx_buf); + } + + if (pdata->power_enable) + pdata->power_enable(0); + + return 0; +} + +#ifdef CONFIG_PM +static int mcp251x_can_suspend(struct spi_device *spi, pm_message_t state) +{ + struct mcp251x_platform_data *pdata = spi->dev.platform_data; + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct net_device *net = priv->net; + + if (netif_running(net)) { + netif_device_detach(net); + + mcp251x_hw_sleep(spi); + if (pdata->transceiver_enable) + pdata->transceiver_enable(0); + priv->after_suspend = AFTER_SUSPEND_UP; + } else { + priv->after_suspend = AFTER_SUSPEND_DOWN; + } + + if (pdata->power_enable) { + pdata->power_enable(0); + priv->after_suspend |= AFTER_SUSPEND_POWER; + } + + return 0; +} + +static int mcp251x_can_resume(struct spi_device *spi) +{ + struct mcp251x_platform_data *pdata = spi->dev.platform_data; + struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + + if (priv->after_suspend & AFTER_SUSPEND_POWER) { + pdata->power_enable(1); + queue_work(priv->wq, &priv->irq_work); + } else { + if (priv->after_suspend & AFTER_SUSPEND_UP) { + if (pdata->transceiver_enable) + pdata->transceiver_enable(1); + queue_work(priv->wq, &priv->irq_work); + } else { + priv->after_suspend = 0; + } + } + return 0; +} +#else +#define mcp251x_can_suspend NULL +#define mcp251x_can_resume NULL +#endif + +static struct spi_driver mcp251x_can_driver = { + .driver = { + .name = DEVICE_NAME, + .bus = &spi_bus_type, + .owner = THIS_MODULE, + }, + + .probe = mcp251x_can_probe, + .remove = __devexit_p(mcp251x_can_remove), + .suspend = mcp251x_can_suspend, + .resume = mcp251x_can_resume, +}; + +static int __init mcp251x_can_init(void) +{ + return spi_register_driver(&mcp251x_can_driver); +} + +static void __exit mcp251x_can_exit(void) +{ + spi_unregister_driver(&mcp251x_can_driver); +} + +module_init(mcp251x_can_init); +module_exit(mcp251x_can_exit); + +MODULE_AUTHOR("Chris Elston , " + "Christian Pellegrin "); +MODULE_DESCRIPTION("Microchip 251x CAN driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h new file mode 100644 index 000000000000..1448177d86d5 --- /dev/null +++ b/include/linux/can/platform/mcp251x.h @@ -0,0 +1,36 @@ +#ifndef __CAN_PLATFORM_MCP251X_H__ +#define __CAN_PLATFORM_MCP251X_H__ + +/* + * + * CAN bus driver for Microchip 251x CAN Controller with SPI Interface + * + */ + +#include + +/** + * struct mcp251x_platform_data - MCP251X SPI CAN controller platform data + * @oscillator_frequency: - oscillator frequency in Hz + * @model: - actual type of chip + * @board_specific_setup: - called before probing the chip (power,reset) + * @transceiver_enable: - called to power on/off the transceiver + * @power_enable: - called to power on/off the mcp *and* the + * transceiver + * + * Please note that you should define power_enable or transceiver_enable or + * none of them. Defining both of them is no use. + * + */ + +struct mcp251x_platform_data { + unsigned long oscillator_frequency; + int model; +#define CAN_MCP251X_MCP2510 0 +#define CAN_MCP251X_MCP2515 1 + int (*board_specific_setup)(struct spi_device *spi); + int (*transceiver_enable)(int enable); + int (*power_enable) (int enable); +}; + +#endif /* __CAN_PLATFORM_MCP251X_H__ */ -- cgit v1.3-8-gc7d7 From 24f1e32c60c45c89a997c73395b69c8af6f0a84e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2009 19:22:48 +0200 Subject: hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events This patch rebase the implementation of the breakpoints API on top of perf events instances. Each breakpoints are now perf events that handle the register scheduling, thread/cpu attachment, etc.. The new layering is now made as follows: ptrace kgdb ftrace perf syscall \ | / / \ | / / / Core breakpoint API / / | / | / Breakpoints perf events | | Breakpoints PMU ---- Debug Register constraints handling (Part of core breakpoint API) | | Hardware debug registers Reasons of this rewrite: - Use the centralized/optimized pmu registers scheduling, implying an easier arch integration - More powerful register handling: perf attributes (pinned/flexible events, exclusive/non-exclusive, tunable period, etc...) Impact: - New perf ABI: the hardware breakpoints counters - Ptrace breakpoints setting remains tricky and still needs some per thread breakpoints references. Todo (in the order): - Support breakpoints perf counter events for perf tools (ie: implement perf_bpcounter_event()) - Support from perf tools Changes in v2: - Follow the perf "event " rename - The ptrace regression have been fixed (ptrace breakpoint perf events weren't released when a task ended) - Drop the struct hw_breakpoint and store generic fields in perf_event_attr. - Separate core and arch specific headers, drop asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h - Use new generic len/type for breakpoint - Handle off case: when breakpoints api is not supported by an arch Changes in v3: - Fix broken CONFIG_KVM, we need to propagate the breakpoint api changes to kvm when we exit the guest and restore the bp registers to the host. Changes in v4: - Drop the hw_breakpoint_restore() stub as it is only used by KVM - EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a module - Restore the breakpoints unconditionally on kvm guest exit: TIF_DEBUG_THREAD doesn't anymore cover every cases of running breakpoints and vcpu->arch.switch_db_regs might not always be set when the guest used debug registers. (Waiting for a reliable optimization) Changes in v5: - Split-up the asm-generic/hw-breakpoint.h moving to linux/hw_breakpoint.h into a separate patch - Optimize the breakpoints restoring while switching from kvm guest to host. We only want to restore the state if we have active breakpoints to the host, otherwise we don't care about messed-up address registers. - Add asm/hw_breakpoint.h to Kbuild - Fix bad breakpoint type in trace_selftest.c Changes in v6: - Fix wrong header inclusion in trace.h (triggered a build error with CONFIG_FTRACE_SELFTEST Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt --- arch/Kconfig | 3 + arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/debugreg.h | 11 +- arch/x86/include/asm/hw_breakpoint.h | 58 +++-- arch/x86/include/asm/processor.h | 12 +- arch/x86/kernel/hw_breakpoint.c | 391 +++++++++++++++++++++----------- arch/x86/kernel/process.c | 7 +- arch/x86/kernel/process_32.c | 26 +-- arch/x86/kernel/process_64.c | 26 +-- arch/x86/kernel/ptrace.c | 182 ++++++++++----- arch/x86/kernel/smpboot.c | 3 - arch/x86/kvm/x86.c | 18 +- arch/x86/power/cpu.c | 6 - include/linux/hw_breakpoint.h | 243 ++++++++++---------- include/linux/perf_event.h | 26 ++- kernel/exit.c | 5 + kernel/hw_breakpoint.c | 424 ++++++++++++++--------------------- kernel/perf_event.c | 53 ++++- kernel/trace/trace.h | 5 +- kernel/trace/trace_entries.h | 6 +- kernel/trace/trace_ksym.c | 126 +++++------ kernel/trace/trace_selftest.c | 3 +- 22 files changed, 885 insertions(+), 750 deletions(-) (limited to 'include') diff --git a/arch/Kconfig b/arch/Kconfig index acb664397945..eef3bbb97075 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -128,6 +128,9 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES config HAVE_HW_BREAKPOINT bool + depends on HAVE_PERF_EVENTS + select ANON_INODES + select PERF_EVENTS source "kernel/gcov/Kconfig" diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80cdcfa5..9f828f87ca35 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -10,6 +10,7 @@ header-y += ptrace-abi.h header-y += sigcontext32.h header-y += ucontext.h header-y += processor-flags.h +header-y += hw_breakpoint.h unifdef-y += e820.h unifdef-y += ist.h diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 23439fbb1d0e..9a3333c91f9a 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -75,13 +75,8 @@ */ #ifdef __KERNEL__ -/* For process management */ -extern void flush_thread_hw_breakpoint(struct task_struct *tsk); -extern int copy_thread_hw_breakpoint(struct task_struct *tsk, - struct task_struct *child, unsigned long clone_flags); +DECLARE_PER_CPU(unsigned long, dr7); -/* For CPU management */ -extern void load_debug_registers(void); static inline void hw_breakpoint_disable(void) { /* Zero the control register for HW Breakpoint */ @@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } +#ifdef CONFIG_KVM +extern void hw_breakpoint_restore(void); +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 3cfca8e2b5f6..0675a7c4c20e 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -4,6 +4,11 @@ #ifdef __KERNEL__ #define __ARCH_HW_BREAKPOINT_H +/* + * The name should probably be something dealt in + * a higher level. While dealing with the user + * (display/resolving) + */ struct arch_hw_breakpoint { char *name; /* Contains name of the symbol to set bkpt */ unsigned long address; @@ -12,44 +17,57 @@ struct arch_hw_breakpoint { }; #include -#include +#include +#include /* Available HW breakpoint length encodings */ -#define HW_BREAKPOINT_LEN_1 0x40 -#define HW_BREAKPOINT_LEN_2 0x44 -#define HW_BREAKPOINT_LEN_4 0x4c -#define HW_BREAKPOINT_LEN_EXECUTE 0x40 +#define X86_BREAKPOINT_LEN_1 0x40 +#define X86_BREAKPOINT_LEN_2 0x44 +#define X86_BREAKPOINT_LEN_4 0x4c +#define X86_BREAKPOINT_LEN_EXECUTE 0x40 #ifdef CONFIG_X86_64 -#define HW_BREAKPOINT_LEN_8 0x48 +#define X86_BREAKPOINT_LEN_8 0x48 #endif /* Available HW breakpoint type encodings */ /* trigger on instruction execute */ -#define HW_BREAKPOINT_EXECUTE 0x80 +#define X86_BREAKPOINT_EXECUTE 0x80 /* trigger on memory write */ -#define HW_BREAKPOINT_WRITE 0x81 +#define X86_BREAKPOINT_WRITE 0x81 /* trigger on memory read or write */ -#define HW_BREAKPOINT_RW 0x83 +#define X86_BREAKPOINT_RW 0x83 /* Total number of available HW breakpoint registers */ #define HBP_NUM 4 -extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; -DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); -extern unsigned int hbp_user_refcount[HBP_NUM]; +struct perf_event; +struct pmu; -extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk); -extern void arch_uninstall_thread_hw_breakpoint(void); extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); -extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, - struct task_struct *tsk); -extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk); -extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk); -extern void arch_update_kernel_hw_breakpoint(void *); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, - unsigned long val, void *data); + unsigned long val, void *data); + + +int arch_install_hw_breakpoint(struct perf_event *bp); +void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void hw_breakpoint_pmu_read(struct perf_event *bp); +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); + +extern void +arch_fill_perf_breakpoint(struct perf_event *bp); + +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); + +extern int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type); + +extern struct pmu perf_ops_bp; + #endif /* __KERNEL__ */ #endif /* _I386_HW_BREAKPOINT_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 61aafb71c7ef..820f3000f736 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -423,6 +423,8 @@ extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; +struct perf_event; + struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -444,12 +446,10 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; - /* Hardware debugging registers: */ - unsigned long debugreg[HBP_NUM]; - unsigned long debugreg6; - unsigned long debugreg7; - /* Hardware breakpoint info */ - struct hw_breakpoint *hbp[HBP_NUM]; + /* Save middle states of ptrace breakpoints */ + struct perf_event *ptrace_bps[HBP_NUM]; + /* Debug status used for traps, single steps, etc... */ + unsigned long debugreg6; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 9316a9de4de3..e622620790bd 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ * * Copyright (C) 2007 Alan Stern * Copyright (C) 2009 IBM Corporation + * Copyright (C) 2009 Frederic Weisbecker */ /* @@ -22,6 +23,8 @@ * using the CPU's debug registers. */ +#include +#include #include #include #include @@ -38,26 +41,24 @@ #include #include -/* Unmasked kernel DR7 value */ -static unsigned long kdr7; +/* Per cpu debug control register value */ +DEFINE_PER_CPU(unsigned long, dr7); + +/* Per cpu debug address registers values */ +static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); /* - * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. - * Used to clear and verify the status of bits corresponding to DR0 - DR3 + * Stores the breakpoints currently in use on each breakpoint address + * register for each cpus */ -static const unsigned long dr7_masks[HBP_NUM] = { - 0x000f0003, /* LEN0, R/W0, G0, L0 */ - 0x00f0000c, /* LEN1, R/W1, G1, L1 */ - 0x0f000030, /* LEN2, R/W2, G2, L2 */ - 0xf00000c0 /* LEN3, R/W3, G3, L3 */ -}; +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); /* * Encode the length, type, Exact, and Enable bits for a particular breakpoint * as stored in debug register 7. */ -static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) { unsigned long bp_info; @@ -68,64 +69,89 @@ static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) return bp_info; } -void arch_update_kernel_hw_breakpoint(void *unused) +/* + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. + */ +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) { - struct hw_breakpoint *bp; - int i, cpu = get_cpu(); - unsigned long temp_kdr7 = 0; - - /* Don't allow debug exceptions while we update the registers */ - set_debugreg(0UL, 7); + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - for (i = hbp_kernel_pos; i < HBP_NUM; i++) { - per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; - if (bp) { - temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type); - set_debugreg(bp->info.address, i); - } - } + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; - /* No need to set DR6. Update the debug registers with kernel-space - * breakpoint values from kdr7 and user-space requests from the - * current process - */ - kdr7 = temp_kdr7; - set_debugreg(kdr7 | current->thread.debugreg7, 7); - put_cpu(); + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; } /* - * Install the thread breakpoints in their debug registers. + * Install a perf counter breakpoint. + * + * We seek a free debug address register and use it for this + * breakpoint. Eventually we enable it in the debug control register. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. */ -void arch_install_thread_hw_breakpoint(struct task_struct *tsk) +int arch_install_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - - switch (hbp_kernel_pos) { - case 4: - set_debugreg(thread->debugreg[3], 3); - case 3: - set_debugreg(thread->debugreg[2], 2); - case 2: - set_debugreg(thread->debugreg[1], 1); - case 1: - set_debugreg(thread->debugreg[0], 0); - default: - break; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (!*slot) { + *slot = bp; + break; + } } - /* No need to set DR6 */ - set_debugreg((kdr7 | thread->debugreg7), 7); + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return -EBUSY; + + set_debugreg(info->address, i); + __get_cpu_var(cpu_debugreg[i]) = info->address; + + dr7 = &__get_cpu_var(dr7); + *dr7 |= encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); + + return 0; } /* - * Install the debug register values for just the kernel, no thread. + * Uninstall the breakpoint contained in the given counter. + * + * First we search the debug address register it uses and then we disable + * it. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. */ -void arch_uninstall_thread_hw_breakpoint(void) +void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - /* Clear the user-space portion of debugreg7 by setting only kdr7 */ - set_debugreg(kdr7, 7); + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (*slot == bp) { + *slot = NULL; + break; + } + } + + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return; + dr7 = &__get_cpu_var(dr7); + *dr7 &= ~encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); } static int get_hbp_len(u8 hbp_len) @@ -133,17 +159,17 @@ static int get_hbp_len(u8 hbp_len) unsigned int len_in_bytes = 0; switch (hbp_len) { - case HW_BREAKPOINT_LEN_1: + case X86_BREAKPOINT_LEN_1: len_in_bytes = 1; break; - case HW_BREAKPOINT_LEN_2: + case X86_BREAKPOINT_LEN_2: len_in_bytes = 2; break; - case HW_BREAKPOINT_LEN_4: + case X86_BREAKPOINT_LEN_4: len_in_bytes = 4; break; #ifdef CONFIG_X86_64 - case HW_BREAKPOINT_LEN_8: + case X86_BREAKPOINT_LEN_8: len_in_bytes = 8; break; #endif @@ -178,67 +204,146 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) /* * Store a breakpoint's encoded address, length, and type. */ -static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) +static int arch_store_info(struct perf_event *bp) { - /* - * User-space requests will always have the address field populated - * Symbol names from user-space are rejected - */ - if (tsk && bp->info.name) - return -EINVAL; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); /* * For kernel-addresses, either the address or symbol name can be * specified. */ - if (bp->info.name) - bp->info.address = (unsigned long) - kallsyms_lookup_name(bp->info.name); - if (bp->info.address) + if (info->name) + info->address = (unsigned long) + kallsyms_lookup_name(info->name); + if (info->address) return 0; + return -EINVAL; } -/* - * Validate the arch-specific HW Breakpoint register settings - */ -int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, - struct task_struct *tsk) +int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type) { - unsigned int align; - int ret = -EINVAL; + /* Len */ + switch (x86_len) { + case X86_BREAKPOINT_LEN_1: + *gen_len = HW_BREAKPOINT_LEN_1; + break; + case X86_BREAKPOINT_LEN_2: + *gen_len = HW_BREAKPOINT_LEN_2; + break; + case X86_BREAKPOINT_LEN_4: + *gen_len = HW_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + *gen_len = HW_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } - switch (bp->info.type) { - /* - * Ptrace-refactoring code - * For now, we'll allow instruction breakpoint only for user-space - * addresses - */ - case HW_BREAKPOINT_EXECUTE: - if ((!arch_check_va_in_userspace(bp->info.address, - bp->info.len)) && - bp->info.len != HW_BREAKPOINT_LEN_EXECUTE) - return ret; + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + *gen_type = HW_BREAKPOINT_X; break; - case HW_BREAKPOINT_WRITE: + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; break; - case HW_BREAKPOINT_RW: + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; break; default: - return ret; + return -EINVAL; } - switch (bp->info.len) { + return 0; +} + + +static int arch_build_bp_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + info->address = bp->attr.bp_addr; + + /* Len */ + switch (bp->attr.bp_len) { case HW_BREAKPOINT_LEN_1: - align = 0; + info->len = X86_BREAKPOINT_LEN_1; break; case HW_BREAKPOINT_LEN_2: - align = 1; + info->len = X86_BREAKPOINT_LEN_2; break; case HW_BREAKPOINT_LEN_4: - align = 3; + info->len = X86_BREAKPOINT_LEN_4; break; #ifdef CONFIG_X86_64 case HW_BREAKPOINT_LEN_8: + info->len = X86_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } + + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_W: + info->type = X86_BREAKPOINT_WRITE; + break; + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + info->type = X86_BREAKPOINT_RW; + break; + case HW_BREAKPOINT_X: + info->type = X86_BREAKPOINT_EXECUTE; + break; + default: + return -EINVAL; + } + + return 0; +} +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned int align; + int ret; + + + ret = arch_build_bp_info(bp); + if (ret) + return ret; + + ret = -EINVAL; + + if (info->type == X86_BREAKPOINT_EXECUTE) + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + if ((!arch_check_va_in_userspace(info->address, info->len)) && + info->len != X86_BREAKPOINT_EXECUTE) + return ret; + + switch (info->len) { + case X86_BREAKPOINT_LEN_1: + align = 0; + break; + case X86_BREAKPOINT_LEN_2: + align = 1; + break; + case X86_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: align = 7; break; #endif @@ -246,8 +351,8 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, return ret; } - if (bp->triggered) - ret = arch_store_info(bp, tsk); + if (bp->callback) + ret = arch_store_info(bp); if (ret < 0) return ret; @@ -255,44 +360,47 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, * Check that the low-order bits of the address are appropriate * for the alignment implied by len. */ - if (bp->info.address & align) + if (info->address & align) return -EINVAL; /* Check that the virtual address is in the proper range */ if (tsk) { - if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) + if (!arch_check_va_in_userspace(info->address, info->len)) return -EFAULT; } else { - if (!arch_check_va_in_kernelspace(bp->info.address, - bp->info.len)) + if (!arch_check_va_in_kernelspace(info->address, info->len)) return -EFAULT; } + return 0; } -void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) +/* + * Release the user breakpoints used by ptrace + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { - struct thread_struct *thread = &(tsk->thread); - struct hw_breakpoint *bp = thread->hbp[pos]; - - thread->debugreg7 &= ~dr7_masks[pos]; - if (bp) { - thread->debugreg[pos] = bp->info.address; - thread->debugreg7 |= encode_dr7(pos, bp->info.len, - bp->info.type); - } else - thread->debugreg[pos] = 0; + int i; + struct thread_struct *t = &tsk->thread; + + for (i = 0; i < HBP_NUM; i++) { + unregister_hw_breakpoint(t->ptrace_bps[i]); + t->ptrace_bps[i] = NULL; + } } -void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) +#ifdef CONFIG_KVM +void hw_breakpoint_restore(void) { - int i; - struct thread_struct *thread = &(tsk->thread); - - thread->debugreg7 = 0; - for (i = 0; i < HBP_NUM; i++) - thread->debugreg[i] = 0; + set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); + set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); + set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); + set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); + set_debugreg(current->thread.debugreg6, 6); + set_debugreg(__get_cpu_var(dr7), 7); } +EXPORT_SYMBOL_GPL(hw_breakpoint_restore); +#endif /* * Handle debug exception notifications. @@ -313,7 +421,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) static int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; - struct hw_breakpoint *bp; + struct perf_event *bp; unsigned long dr7, dr6; unsigned long *dr6_p; @@ -325,10 +433,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; - /* Lazy debug register switching */ - if (!test_tsk_thread_flag(current, TIF_DEBUG)) - arch_uninstall_thread_hw_breakpoint(); - get_debugreg(dr7, 7); /* Disable breakpoints during exception handling */ set_debugreg(0UL, 7); @@ -344,17 +448,18 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) for (i = 0; i < HBP_NUM; ++i) { if (likely(!(dr6 & (DR_TRAP0 << i)))) continue; + /* - * Find the corresponding hw_breakpoint structure and - * invoke its triggered callback. + * The counter may be concurrently released but that can only + * occur from a call_rcu() path. We can then safely fetch + * the breakpoint, use its callback, touch its counter + * while we are in an rcu_read_lock() path. */ - if (i >= hbp_kernel_pos) - bp = per_cpu(this_hbp_kernel[i], cpu); - else { - bp = current->thread.hbp[i]; - if (bp) - rc = NOTIFY_DONE; - } + rcu_read_lock(); + + bp = per_cpu(bp_per_reg[i], cpu); + if (bp) + rc = NOTIFY_DONE; /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling @@ -362,19 +467,23 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) (*dr6_p) &= ~(DR_TRAP0 << i); /* * bp can be NULL due to lazy debug register switching - * or due to the delay between updates of hbp_kernel_pos - * and this_hbp_kernel. + * or due to concurrent perf counter removing. */ - if (!bp) - continue; + if (!bp) { + rcu_read_unlock(); + break; + } + + (bp->callback)(bp, args->regs); - (bp->triggered)(bp, args->regs); + rcu_read_unlock(); } if (dr6 & (~DR_TRAP_BITS)) rc = NOTIFY_DONE; set_debugreg(dr7, 7); put_cpu(); + return rc; } @@ -389,3 +498,13 @@ int __kprobes hw_breakpoint_exceptions_notify( return hw_breakpoint_handler(data); } + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ + /* TODO */ +} + +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) +{ + /* TODO */ +} diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cf8ee0016307..744508e7cfdd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ #include #include #include -#include unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -47,8 +47,6 @@ void free_thread_xstate(struct task_struct *tsk) kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); tsk->thread.xstate = NULL; } - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - flush_thread_hw_breakpoint(tsk); WARN(tsk->thread.ds_ctx, "leaking DS context\n"); } @@ -107,8 +105,7 @@ void flush_thread(void) } #endif - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - flush_thread_hw_breakpoint(tsk); + flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 209e74801763..d5bd3132ee70 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,7 +59,6 @@ #include #include #include -#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -264,9 +263,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) - goto out; + + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -287,13 +285,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); -out: if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } - if (err) - flush_thread_hw_breakpoint(p); clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); p->thread.ds_ctx = NULL; @@ -437,23 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) lazy_load_gs(next->gs); percpu_write(current_task, next_p); - /* - * There's a problem with moving the arch_install_thread_hw_breakpoint() - * call before current is updated. Suppose a kernel breakpoint is - * triggered in between the two, the hw-breakpoint handler will see that - * the 'current' task does not have TIF_DEBUG flag set and will think it - * is leftover from an old task (lazy switching) and will erase it. Then - * until the next context switch, no user-breakpoints will be installed. - * - * The real problem is that it's impossible to update both current and - * physical debug registers at the same instant, so there will always be - * a window in which they disagree and a breakpoint might get triggered. - * Since we use lazy switching, we are forced to assume that a - * disagreement means that current is correct and the exception is due - * to lazy debug register switching. - */ - if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) - arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 72edac026a78..5bafdec34441 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -53,7 +53,6 @@ #include #include #include -#include asmlinkage extern void ret_from_fork(void); @@ -244,8 +243,6 @@ void release_thread(struct task_struct *dead_task) BUG(); } } - if (unlikely(dead_task->thread.debugreg7)) - flush_thread_hw_breakpoint(dead_task); } static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) @@ -309,9 +306,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, savesegment(ds, p->thread.ds); err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) - if (copy_thread_hw_breakpoint(me, p, clone_flags)) - goto out; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -351,8 +346,6 @@ out: kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } - if (err) - flush_thread_hw_breakpoint(p); return err; } @@ -508,23 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (preload_fpu) __math_state_restore(); - /* - * There's a problem with moving the arch_install_thread_hw_breakpoint() - * call before current is updated. Suppose a kernel breakpoint is - * triggered in between the two, the hw-breakpoint handler will see that - * the 'current' task does not have TIF_DEBUG flag set and will think it - * is leftover from an old task (lazy switching) and will erase it. Then - * until the next context switch, no user-breakpoints will be installed. - * - * The real problem is that it's impossible to update both current and - * physical debug registers at the same instant, so there will always be - * a window in which they disagree and a breakpoint might get triggered. - * Since we use lazy switching, we are forced to assume that a - * disagreement means that current is correct and the exception is due - * to lazy debug register switching. - */ - if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) - arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 267cb85b479c..e79610d95971 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target, return ret; } -/* - * Decode the length and type bits for a particular breakpoint as - * stored in debug register 7. Return the "enabled" status. - */ -static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, - unsigned *type) -{ - int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - - *len = (bp_info & 0xc) | 0x40; - *type = (bp_info & 0x3) | 0x80; - return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; -} - -static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) +static void ptrace_triggered(struct perf_event *bp, void *data) { - struct thread_struct *thread = &(current->thread); int i; + struct thread_struct *thread = &(current->thread); /* * Store in the virtual DR6 register the fact that the breakpoint * was hit so the thread's debugger will see it. */ - for (i = 0; i < hbp_kernel_pos; i++) - /* - * We will check bp->info.address against the address stored in - * thread's hbp structure and not debugreg[i]. This is to ensure - * that the corresponding bit for 'i' in DR7 register is enabled - */ - if (bp->info.address == thread->hbp[i]->info.address) + for (i = 0; i < HBP_NUM; i++) { + if (thread->ptrace_bps[i] == bp) break; + } thread->debugreg6 |= (DR_TRAP0 << i); } +/* + * Walk through every ptrace breakpoints for this thread and + * build the dr7 value on top of their attributes. + * + */ +static unsigned long ptrace_get_dr7(struct perf_event *bp[]) +{ + int i; + int dr7 = 0; + struct arch_hw_breakpoint *info; + + for (i = 0; i < HBP_NUM; i++) { + if (bp[i] && !bp[i]->attr.disabled) { + info = counter_arch_bp(bp[i]); + dr7 |= encode_dr7(i, info->len, info->type); + } + } + + return dr7; +} + /* * Handle ptrace writes to debug register 7. */ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) { struct thread_struct *thread = &(tsk->thread); - unsigned long old_dr7 = thread->debugreg7; + unsigned long old_dr7; int i, orig_ret = 0, rc = 0; int enabled, second_pass = 0; unsigned len, type; - struct hw_breakpoint *bp; + int gen_len, gen_type; + struct perf_event *bp; data &= ~DR_CONTROL_RESERVED; + old_dr7 = ptrace_get_dr7(thread->ptrace_bps); restore: /* * Loop through all the hardware breakpoints, making the @@ -496,11 +503,12 @@ restore: */ for (i = 0; i < HBP_NUM; i++) { enabled = decode_dr7(data, i, &len, &type); - bp = thread->hbp[i]; + bp = thread->ptrace_bps[i]; if (!enabled) { if (bp) { - /* Don't unregister the breakpoints right-away, + /* + * Don't unregister the breakpoints right-away, * unless all register_user_hw_breakpoint() * requests have succeeded. This prevents * any window of opportunity for debug @@ -508,27 +516,45 @@ restore: */ if (!second_pass) continue; - unregister_user_hw_breakpoint(tsk, bp); - kfree(bp); + thread->ptrace_bps[i] = NULL; + unregister_hw_breakpoint(bp); } continue; } + + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ if (!bp) { - rc = -ENOMEM; - bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (bp) { - bp->info.address = thread->debugreg[i]; - bp->triggered = ptrace_triggered; - bp->info.len = len; - bp->info.type = type; - rc = register_user_hw_breakpoint(tsk, bp); - if (rc) - kfree(bp); - } - } else - rc = modify_user_hw_breakpoint(tsk, bp); + rc = -EINVAL; + break; + } + + rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); if (rc) break; + + /* + * This is a temporary thing as bp is unregistered/registered + * to simulate modification + */ + bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, + gen_type, bp->callback, + tsk, true); + thread->ptrace_bps[i] = NULL; + + if (!bp) { /* incorrect bp, or we have a bug in bp API */ + rc = -EINVAL; + break; + } + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + bp = NULL; + break; + } + thread->ptrace_bps[i] = bp; } /* * Make a second pass to free the remaining unused breakpoints @@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) struct thread_struct *thread = &(tsk->thread); unsigned long val = 0; - if (n < HBP_NUM) - val = thread->debugreg[n]; - else if (n == 6) + if (n < HBP_NUM) { + struct perf_event *bp; + bp = thread->ptrace_bps[n]; + if (!bp) + return 0; + val = bp->hw.info.address; + } else if (n == 6) { val = thread->debugreg6; - else if (n == 7) - val = thread->debugreg7; + } else if (n == 7) { + val = ptrace_get_dr7(thread->ptrace_bps); + } return val; } +static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, + unsigned long addr) +{ + struct perf_event *bp; + struct thread_struct *t = &tsk->thread; + + if (!t->ptrace_bps[nr]) { + /* + * Put stub len and type to register (reserve) an inactive but + * correct bp + */ + bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, + HW_BREAKPOINT_W, + ptrace_triggered, tsk, + false); + } else { + bp = t->ptrace_bps[nr]; + t->ptrace_bps[nr] = NULL; + bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, + bp->attr.bp_type, + bp->callback, + tsk, + bp->attr.disabled); + } + + if (!bp) + return -EIO; + /* + * CHECKME: the previous code returned -EIO if the addr wasn't a + * valid task virtual addr. The new one will return -EINVAL in this + * case. + * -EINVAL may be what we want for in-kernel breakpoints users, but + * -EIO looks better for ptrace, since we refuse a register writing + * for the user. And anyway this is the previous behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; + + return 0; +} + /* * Handle PTRACE_POKEUSR calls for the debug register area. */ @@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) return -EIO; if (n == 6) { - tsk->thread.debugreg6 = val; + thread->debugreg6 = val; goto ret_path; } if (n < HBP_NUM) { - if (thread->hbp[n]) { - if (arch_check_va_in_userspace(val, - thread->hbp[n]->info.len) == 0) { - rc = -EIO; - goto ret_path; - } - thread->hbp[n]->info.address = val; - } - thread->debugreg[n] = val; + rc = ptrace_set_breakpoint_addr(tsk, n, val); + if (rc) + return rc; } /* All that's left is DR7 */ if (n == 7) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 213a7a3e4562..565ebc65920e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include @@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused) x86_cpuinit.setup_percpu_clockev(); wmb(); - load_debug_registers(); cpu_idle(); } @@ -1269,7 +1267,6 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); - hw_breakpoint_disable(); } int native_cpu_disable(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fc2974adf9b6..22dee7aa7813 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -42,6 +42,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" +#include #include #include #include @@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) trace_kvm_entry(vcpu->vcpu_id); kvm_x86_ops->run(vcpu, kvm_run); - if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } + /* + * If the guest has used debug registers, at least dr7 + * will be disabled while returning to the host. + * If we don't have active breakpoints in the host, we don't + * care about the messed up debug address registers. But if + * we have some of them active, restore the old state. + */ + if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK) + hw_breakpoint_restore(); set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index e09a44fc4664..0a979f3e5b8a 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); #endif - hw_breakpoint_disable(); } /* Needed by apm.c */ @@ -144,11 +143,6 @@ static void fix_processor_context(void) #endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - load_debug_registers(); } /** diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 61ccc8f17eac..7eba9b92e5f3 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -1,136 +1,131 @@ #ifndef _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H +#include -#ifdef __KERNEL__ -#include -#include -#include - -/** - * struct hw_breakpoint - unified kernel/user-space hardware breakpoint - * @triggered: callback invoked after target address access - * @info: arch-specific breakpoint info (address, length, and type) - * - * %hw_breakpoint structures are the kernel's way of representing - * hardware breakpoints. These are data breakpoints - * (also known as "watchpoints", triggered on data access), and the breakpoint's - * target address can be located in either kernel space or user space. - * - * The breakpoint's address, length, and type are highly - * architecture-specific. The values are encoded in the @info field; you - * specify them when registering the breakpoint. To examine the encoded - * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared - * below. - * - * The address is specified as a regular kernel pointer (for kernel-space - * breakponts) or as an %__user pointer (for user-space breakpoints). - * With register_user_hw_breakpoint(), the address must refer to a - * location in user space. The breakpoint will be active only while the - * requested task is running. Conversely with - * register_kernel_hw_breakpoint(), the address must refer to a location - * in kernel space, and the breakpoint will be active on all CPUs - * regardless of the current task. - * - * The length is the breakpoint's extent in bytes, which is subject to - * certain limitations. include/asm/hw_breakpoint.h contains macros - * defining the available lengths for a specific architecture. Note that - * the address's alignment must match the length. The breakpoint will - * catch accesses to any byte in the range from address to address + - * (length - 1). - * - * The breakpoint's type indicates the sort of access that will cause it - * to trigger. Possible values may include: - * - * %HW_BREAKPOINT_RW (triggered on read or write access), - * %HW_BREAKPOINT_WRITE (triggered on write access), and - * %HW_BREAKPOINT_READ (triggered on read access). - * - * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all - * possibilities are available on all architectures. Execute breakpoints - * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. - * - * When a breakpoint gets hit, the @triggered callback is - * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the - * processor registers. - * Data breakpoints occur after the memory access has taken place. - * Breakpoints are disabled during execution @triggered, to avoid - * recursive traps and allow unhindered access to breakpointed memory. - * - * This sample code sets a breakpoint on pid_max and registers a callback - * function for writes to that variable. Note that it is not portable - * as written, because not all architectures support HW_BREAKPOINT_LEN_4. - * - * ---------------------------------------------------------------------- - * - * #include - * - * struct hw_breakpoint my_bp; - * - * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) - * { - * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); - * dump_stack(); - * ............... - * } - * - * static struct hw_breakpoint my_bp; - * - * static int init_module(void) - * { - * ...................... - * my_bp.info.type = HW_BREAKPOINT_WRITE; - * my_bp.info.len = HW_BREAKPOINT_LEN_4; - * - * my_bp.installed = (void *)my_bp_installed; - * - * rc = register_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * static void cleanup_module(void) - * { - * ...................... - * unregister_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * ---------------------------------------------------------------------- - */ -struct hw_breakpoint { - void (*triggered)(struct hw_breakpoint *, struct pt_regs *); - struct arch_hw_breakpoint info; +enum { + HW_BREAKPOINT_LEN_1 = 1, + HW_BREAKPOINT_LEN_2 = 2, + HW_BREAKPOINT_LEN_4 = 4, + HW_BREAKPOINT_LEN_8 = 8, }; -/* - * len and type values are defined in include/asm/hw_breakpoint.h. - * Available values vary according to the architecture. On i386 the - * possibilities are: - * - * HW_BREAKPOINT_LEN_1 - * HW_BREAKPOINT_LEN_2 - * HW_BREAKPOINT_LEN_4 - * HW_BREAKPOINT_RW - * HW_BREAKPOINT_READ - * - * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the - * 1-, 2-, and 4-byte lengths may be unavailable. There also may be - * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. - */ +enum { + HW_BREAKPOINT_R = 1, + HW_BREAKPOINT_W = 2, + HW_BREAKPOINT_X = 4, +}; + +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return &bp->hw.info; +} + +static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) +{ + return bp->attr.bp_addr; +} + +static inline int hw_breakpoint_type(struct perf_event *bp) +{ + return bp->attr.bp_type; +} + +static inline int hw_breakpoint_len(struct perf_event *bp) +{ + return bp->attr.bp_len; +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +extern struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active); + +/* FIXME: only change from the attr, and don't unregister */ +extern struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active); -extern int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern int modify_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); /* * Kernel breakpoints are not associated with any particular thread. */ -extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); -extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern struct perf_event * +register_wide_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active); + +extern struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active); + +extern int register_perf_hw_breakpoint(struct perf_event *bp); +extern int __register_perf_hw_breakpoint(struct perf_event *bp); +extern void unregister_hw_breakpoint(struct perf_event *bp); +extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); + +extern int reserve_bp_slot(struct perf_event *bp); +extern void release_bp_slot(struct perf_event *bp); + +extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); + +#else /* !CONFIG_HAVE_HW_BREAKPOINT */ + +static inline struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { return NULL; } +static inline struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { return NULL; } +static inline struct perf_event * +register_wide_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active) { return NULL; } +static inline struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active) { return NULL; } +static inline int +register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } +static inline int +__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } +static inline void unregister_hw_breakpoint(struct perf_event *bp) { } +static inline void +unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { } +static inline int +reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } +static inline void release_bp_slot(struct perf_event *bp) { } + +static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } -extern unsigned int hbp_kernel_pos; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif /* __KERNEL__ */ -#endif /* _LINUX_HW_BREAKPOINT_H */ +#endif /* _LINUX_HW_BREAKPOINT_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8d54e6d25eeb..cead64ea6c15 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -18,6 +18,10 @@ #include #include +#ifdef CONFIG_HAVE_HW_BREAKPOINT +#include +#endif + /* * User-space ABI bits: */ @@ -31,6 +35,7 @@ enum perf_type_id { PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_HW_CACHE = 3, PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, PERF_TYPE_MAX, /* non-ABI */ }; @@ -207,6 +212,15 @@ struct perf_event_attr { __u32 wakeup_events; /* wakeup every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; + + union { + struct { /* Hardware breakpoint info */ + __u64 bp_addr; + __u32 bp_type; + __u32 bp_len; + }; + }; + __u32 __reserved_2; __u64 __reserved_3; @@ -476,6 +490,11 @@ struct hw_perf_event { atomic64_t count; struct hrtimer hrtimer; }; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + union { /* breakpoint */ + struct arch_hw_breakpoint info; + }; +#endif }; atomic64_t prev_count; u64 sample_period; @@ -588,7 +607,7 @@ struct perf_event { u64 tstamp_running; u64 tstamp_stopped; - struct perf_event_attr attr; + struct perf_event_attr attr; struct hw_perf_event hw; struct perf_event_context *ctx; @@ -643,6 +662,8 @@ struct perf_event { perf_callback_t callback; + perf_callback_t event_callback; + #endif /* CONFIG_PERF_EVENTS */ }; @@ -831,6 +852,7 @@ extern int sysctl_perf_event_sample_rate; extern void perf_event_init(void); extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); +extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ @@ -865,6 +887,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; } static inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } +static inline void +perf_bp_event(struct perf_event *event, void *data) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } diff --git a/kernel/exit.c b/kernel/exit.c index e61891f80123..266f8920628a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -979,6 +980,10 @@ NORET_TYPE void do_exit(long code) proc_exit_connector(tsk); + /* + * FIXME: do that only when needed, using sched_exit tracepoint + */ + flush_ptrace_hw_breakpoint(tsk); /* * Flush inherited counters to the parent - before the parent * gets woken up by child-exit notifications. diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c1f64e65a9f3..08f6d0163201 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ * * Copyright (C) 2007 Alan Stern * Copyright (C) IBM Corporation, 2009 + * Copyright (C) 2009, Frederic Weisbecker */ /* @@ -35,334 +36,242 @@ #include #include -#include +#include + #include #ifdef CONFIG_X86 #include #endif -/* - * Spinlock that protects all (un)register operations over kernel/user-space - * breakpoint requests - */ -static DEFINE_SPINLOCK(hw_breakpoint_lock); - -/* Array of kernel-space breakpoint structures */ -struct hw_breakpoint *hbp_kernel[HBP_NUM]; - -/* - * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being - * modified but we need the older copy to handle any hbp exceptions. It will - * sync with hbp_kernel[] value after updation is done through IPIs. - */ -DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); - -/* - * Kernel breakpoints grow downwards, starting from HBP_NUM - * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for - * kernel-space request. We will initialise it here and not in an __init - * routine because load_debug_registers(), which uses this variable can be - * called very early during CPU initialisation. - */ -unsigned int hbp_kernel_pos = HBP_NUM; -/* - * An array containing refcount of threads using a given bkpt register - * Accesses are synchronised by acquiring hw_breakpoint_lock - */ -unsigned int hbp_user_refcount[HBP_NUM]; +static atomic_t bp_slot; -/* - * Load the debug registers during startup of a CPU. - */ -void load_debug_registers(void) +int reserve_bp_slot(struct perf_event *bp) { - unsigned long flags; - struct task_struct *tsk = current; - - spin_lock_bh(&hw_breakpoint_lock); - - /* Prevent IPIs for new kernel breakpoint updates */ - local_irq_save(flags); - arch_update_kernel_hw_breakpoint(NULL); - local_irq_restore(flags); - - if (test_tsk_thread_flag(tsk, TIF_DEBUG)) - arch_install_thread_hw_breakpoint(tsk); - - spin_unlock_bh(&hw_breakpoint_lock); -} + if (atomic_inc_return(&bp_slot) == HBP_NUM) { + atomic_dec(&bp_slot); -/* - * Erase all the hardware breakpoint info associated with a thread. - * - * If tsk != current then tsk must not be usable (for example, a - * child being cleaned up from a failed fork). - */ -void flush_thread_hw_breakpoint(struct task_struct *tsk) -{ - int i; - struct thread_struct *thread = &(tsk->thread); - - spin_lock_bh(&hw_breakpoint_lock); - - /* The thread no longer has any breakpoints associated with it */ - clear_tsk_thread_flag(tsk, TIF_DEBUG); - for (i = 0; i < HBP_NUM; i++) { - if (thread->hbp[i]) { - hbp_user_refcount[i]--; - kfree(thread->hbp[i]); - thread->hbp[i] = NULL; - } + return -ENOSPC; } - arch_flush_thread_hw_breakpoint(tsk); - - /* Actually uninstall the breakpoints if necessary */ - if (tsk == current) - arch_uninstall_thread_hw_breakpoint(); - spin_unlock_bh(&hw_breakpoint_lock); + return 0; } -/* - * Copy the hardware breakpoint info from a thread to its cloned child. - */ -int copy_thread_hw_breakpoint(struct task_struct *tsk, - struct task_struct *child, unsigned long clone_flags) +void release_bp_slot(struct perf_event *bp) { - /* - * We will assume that breakpoint settings are not inherited - * and the child starts out with no debug registers set. - * But what about CLONE_PTRACE? - */ - clear_tsk_thread_flag(child, TIF_DEBUG); - - /* We will call flush routine since the debugregs are not inherited */ - arch_flush_thread_hw_breakpoint(child); - - return 0; + atomic_dec(&bp_slot); } -static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk, - struct hw_breakpoint *bp) +int __register_perf_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - int rc; + int ret; - /* Do not overcommit. Fail if kernel has used the hbp registers */ - if (pos >= hbp_kernel_pos) - return -ENOSPC; + ret = reserve_bp_slot(bp); + if (ret) + return ret; - rc = arch_validate_hwbkpt_settings(bp, tsk); - if (rc) - return rc; + if (!bp->attr.disabled) + ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); - thread->hbp[pos] = bp; - hbp_user_refcount[pos]++; + return ret; +} - arch_update_user_hw_breakpoint(pos, tsk); - /* - * Does it need to be installed right now? - * Otherwise it will get installed the next time tsk runs - */ - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); +int register_perf_hw_breakpoint(struct perf_event *bp) +{ + bp->callback = perf_bp_event; - return rc; + return __register_perf_hw_breakpoint(bp); } /* - * Modify the address of a hbp register already in use by the task - * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint() + * Register a breakpoint bound to a task and a given cpu. + * If cpu is -1, the breakpoint is active for the task in every cpu + * If the task is -1, the breakpoint is active for every tasks in the given + * cpu. */ -static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk, - struct hw_breakpoint *bp) +static struct perf_event * +register_user_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + pid_t pid, + int cpu, + bool active) { - struct thread_struct *thread = &(tsk->thread); - - if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk))) - return -EINVAL; - - if (thread->hbp[pos] == NULL) - return -EINVAL; - - thread->hbp[pos] = bp; + struct perf_event_attr *attr; + struct perf_event *bp; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return ERR_PTR(-ENOMEM); + + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(*attr); + attr->bp_addr = addr; + attr->bp_len = len; + attr->bp_type = type; /* - * 'pos' must be that of a hbp register already used by 'tsk' - * Otherwise arch_modify_user_hw_breakpoint() will fail + * Such breakpoints are used by debuggers to trigger signals when + * we hit the excepted memory op. We can't miss such events, they + * must be pinned. */ - arch_update_user_hw_breakpoint(pos, tsk); + attr->pinned = 1; - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); + if (!active) + attr->disabled = 1; - return 0; -} - -static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk) -{ - hbp_user_refcount[pos]--; - tsk->thread.hbp[pos] = NULL; + bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); + kfree(attr); - arch_update_user_hw_breakpoint(pos, tsk); - - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); + return bp; } /** * register_user_hw_breakpoint - register a hardware breakpoint for user space + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @bp: the breakpoint structure to register - * - * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and - * @bp->triggered must be set properly before invocation + * @active: should we activate it while registering it * */ -int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp) +struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { - struct thread_struct *thread = &(tsk->thread); - int i, rc = -ENOSPC; - - spin_lock_bh(&hw_breakpoint_lock); - - for (i = 0; i < hbp_kernel_pos; i++) { - if (!thread->hbp[i]) { - rc = __register_user_hw_breakpoint(i, tsk, bp); - break; - } - } - if (!rc) - set_tsk_thread_flag(tsk, TIF_DEBUG); - - spin_unlock_bh(&hw_breakpoint_lock); - return rc; + return register_user_hw_breakpoint_cpu(addr, len, type, triggered, + tsk->pid, -1, active); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); /** * modify_user_hw_breakpoint - modify a user-space hardware breakpoint + * @bp: the breakpoint structure to modify + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @bp: the breakpoint structure to unregister - * + * @active: should we activate it while registering it */ -int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp) +struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { - struct thread_struct *thread = &(tsk->thread); - int i, ret = -ENOENT; + /* + * FIXME: do it without unregistering + * - We don't want to lose our slot + * - If the new bp is incorrect, don't lose the older one + */ + unregister_hw_breakpoint(bp); - spin_lock_bh(&hw_breakpoint_lock); - for (i = 0; i < hbp_kernel_pos; i++) { - if (bp == thread->hbp[i]) { - ret = __modify_user_hw_breakpoint(i, tsk, bp); - break; - } - } - spin_unlock_bh(&hw_breakpoint_lock); - return ret; + return register_user_hw_breakpoint(addr, len, type, triggered, + tsk, active); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); /** - * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint - * @tsk: pointer to 'task_struct' of the process to which the address belongs + * unregister_hw_breakpoint - unregister a user-space hardware breakpoint * @bp: the breakpoint structure to unregister - * */ -void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp) +void unregister_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - int i, pos = -1, hbp_counter = 0; - - spin_lock_bh(&hw_breakpoint_lock); - for (i = 0; i < hbp_kernel_pos; i++) { - if (thread->hbp[i]) - hbp_counter++; - if (bp == thread->hbp[i]) - pos = i; - } - if (pos >= 0) { - __unregister_user_hw_breakpoint(pos, tsk); - hbp_counter--; - } - if (!hbp_counter) - clear_tsk_thread_flag(tsk, TIF_DEBUG); - - spin_unlock_bh(&hw_breakpoint_lock); + if (!bp) + return; + perf_event_release_kernel(bp); +} +EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); + +static struct perf_event * +register_kernel_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active) +{ + return register_user_hw_breakpoint_cpu(addr, len, type, triggered, + -1, cpu, active); } -EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint); /** - * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space - * @bp: the breakpoint structure to register - * - * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and - * @bp->triggered must be set properly before invocation + * register_wide_hw_breakpoint - register a wide breakpoint in the kernel + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint + * @active: should we activate it while registering it * + * @return a set of per_cpu pointers to perf events */ -int register_kernel_hw_breakpoint(struct hw_breakpoint *bp) +struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active) { - int rc; + struct perf_event **cpu_events, **pevent, *bp; + long err; + int cpu; + + cpu_events = alloc_percpu(typeof(*cpu_events)); + if (!cpu_events) + return ERR_PTR(-ENOMEM); - rc = arch_validate_hwbkpt_settings(bp, NULL); - if (rc) - return rc; + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + bp = register_kernel_hw_breakpoint_cpu(addr, len, type, + triggered, cpu, active); - spin_lock_bh(&hw_breakpoint_lock); + *pevent = bp; - rc = -ENOSPC; - /* Check if we are over-committing */ - if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) { - hbp_kernel_pos--; - hbp_kernel[hbp_kernel_pos] = bp; - on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); - rc = 0; + if (IS_ERR(bp) || !bp) { + err = PTR_ERR(bp); + goto fail; + } } - spin_unlock_bh(&hw_breakpoint_lock); - return rc; + return cpu_events; + +fail: + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + if (IS_ERR(*pevent) || !*pevent) + break; + unregister_hw_breakpoint(*pevent); + } + free_percpu(cpu_events); + /* return the error if any */ + return ERR_PTR(err); } -EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint); /** - * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space - * @bp: the breakpoint structure to unregister - * - * Uninstalls and unregisters @bp. + * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel + * @cpu_events: the per cpu set of events to unregister */ -void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp) +void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { - int i, j; - - spin_lock_bh(&hw_breakpoint_lock); - - /* Find the 'bp' in our list of breakpoints for kernel */ - for (i = hbp_kernel_pos; i < HBP_NUM; i++) - if (bp == hbp_kernel[i]) - break; + int cpu; + struct perf_event **pevent; - /* Check if we did not find a match for 'bp'. If so return early */ - if (i == HBP_NUM) { - spin_unlock_bh(&hw_breakpoint_lock); - return; + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + unregister_hw_breakpoint(*pevent); } - - /* - * We'll shift the breakpoints one-level above to compact if - * unregistration creates a hole - */ - for (j = i; j > hbp_kernel_pos; j--) - hbp_kernel[j] = hbp_kernel[j-1]; - - hbp_kernel[hbp_kernel_pos] = NULL; - on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); - hbp_kernel_pos++; - - spin_unlock_bh(&hw_breakpoint_lock); + free_percpu(cpu_events); } -EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint); + static struct notifier_block hw_breakpoint_exceptions_nb = { .notifier_call = hw_breakpoint_exceptions_notify, @@ -374,5 +283,12 @@ static int __init init_hw_breakpoint(void) { return register_die_notifier(&hw_breakpoint_exceptions_nb); } - core_initcall(init_hw_breakpoint); + + +struct pmu perf_ops_bp = { + .enable = arch_install_hw_breakpoint, + .disable = arch_uninstall_hw_breakpoint, + .read = hw_breakpoint_pmu_read, + .unthrottle = hw_breakpoint_pmu_unthrottle +}; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 5087125e2a00..98dc56b2ebe4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -4229,6 +4230,51 @@ static void perf_event_free_filter(struct perf_event *event) #endif /* CONFIG_EVENT_PROFILE */ +#ifdef CONFIG_HAVE_HW_BREAKPOINT +static void bp_perf_event_destroy(struct perf_event *event) +{ + release_bp_slot(event); +} + +static const struct pmu *bp_perf_event_init(struct perf_event *bp) +{ + int err; + /* + * The breakpoint is already filled if we haven't created the counter + * through perf syscall + * FIXME: manage to get trigerred to NULL if it comes from syscalls + */ + if (!bp->callback) + err = register_perf_hw_breakpoint(bp); + else + err = __register_perf_hw_breakpoint(bp); + if (err) + return ERR_PTR(err); + + bp->destroy = bp_perf_event_destroy; + + return &perf_ops_bp; +} + +void perf_bp_event(struct perf_event *bp, void *regs) +{ + /* TODO */ +} +#else +static void bp_perf_event_destroy(struct perf_event *event) +{ +} + +static const struct pmu *bp_perf_event_init(struct perf_event *bp) +{ + return NULL; +} + +void perf_bp_event(struct perf_event *bp, void *regs) +{ +} +#endif + atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; static void sw_perf_event_destroy(struct perf_event *event) @@ -4375,6 +4421,11 @@ perf_event_alloc(struct perf_event_attr *attr, pmu = tp_perf_event_init(event); break; + case PERF_TYPE_BREAKPOINT: + pmu = bp_perf_event_init(event); + break; + + default: break; } @@ -4686,7 +4737,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, ctx = find_get_context(pid, cpu); if (IS_ERR(ctx)) - return NULL ; + return NULL; event = perf_event_alloc(attr, cpu, ctx, NULL, NULL, callback, GFP_KERNEL); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 91c3d0e9a5a1..d72f06ff263f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -11,14 +11,11 @@ #include #include #include +#include #include #include -#ifdef CONFIG_KSYM_TRACER -#include -#endif - enum trace_type { __TRACE_FIRST_TYPE = 0, diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e19747d4f860..c16a08f399df 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -372,11 +372,11 @@ FTRACE_ENTRY(ksym_trace, ksym_trace_entry, F_STRUCT( __field( unsigned long, ip ) __field( unsigned char, type ) - __array( char , ksym_name, KSYM_NAME_LEN ) __array( char , cmd, TASK_COMM_LEN ) + __field( unsigned long, addr ) ), - F_printk("ip: %pF type: %d ksym_name: %s cmd: %s", + F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", (void *)__entry->ip, (unsigned int)__entry->type, - __entry->ksym_name, __entry->cmd) + (void *)__entry->addr, __entry->cmd) ); diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 6d5609c67378..fea83eeeef09 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -29,7 +29,11 @@ #include "trace_stat.h" #include "trace.h" -/* For now, let us restrict the no. of symbols traced simultaneously to number +#include +#include + +/* + * For now, let us restrict the no. of symbols traced simultaneously to number * of available hardware breakpoint registers. */ #define KSYM_TRACER_MAX HBP_NUM @@ -37,8 +41,10 @@ #define KSYM_TRACER_OP_LEN 3 /* rw- */ struct trace_ksym { - struct hw_breakpoint *ksym_hbp; + struct perf_event **ksym_hbp; unsigned long ksym_addr; + int type; + int len; #ifdef CONFIG_PROFILE_KSYM_TRACER unsigned long counter; #endif @@ -75,10 +81,11 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) } #endif /* CONFIG_PROFILE_KSYM_TRACER */ -void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) +void ksym_hbp_handler(struct perf_event *hbp, void *data) { struct ring_buffer_event *event; struct ksym_trace_entry *entry; + struct pt_regs *regs = data; struct ring_buffer *buffer; int pc; @@ -96,12 +103,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) entry = ring_buffer_event_data(event); entry->ip = instruction_pointer(regs); - entry->type = hbp->info.type; - strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); + entry->type = hw_breakpoint_type(hbp); + entry->addr = hw_breakpoint_addr(hbp); strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); #ifdef CONFIG_PROFILE_KSYM_TRACER - ksym_collect_stats(hbp->info.address); + ksym_collect_stats(hw_breakpoint_addr(hbp)); #endif /* CONFIG_PROFILE_KSYM_TRACER */ trace_buffer_unlock_commit(buffer, event, 0, pc); @@ -120,31 +127,21 @@ static int ksym_trace_get_access_type(char *str) int access = 0; if (str[0] == 'r') - access += 4; - else if (str[0] != '-') - return -EINVAL; + access |= HW_BREAKPOINT_R; if (str[1] == 'w') - access += 2; - else if (str[1] != '-') - return -EINVAL; + access |= HW_BREAKPOINT_W; - if (str[2] != '-') - return -EINVAL; + if (str[2] == 'x') + access |= HW_BREAKPOINT_X; switch (access) { - case 6: - access = HW_BREAKPOINT_RW; - break; - case 4: - access = -EINVAL; - break; - case 2: - access = HW_BREAKPOINT_WRITE; - break; + case HW_BREAKPOINT_W: + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + return access; + default: + return -EINVAL; } - - return access; } /* @@ -194,36 +191,33 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) if (!entry) return -ENOMEM; - entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (!entry->ksym_hbp) - goto err; - - entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL); - if (!entry->ksym_hbp->info.name) - goto err; - - entry->ksym_hbp->info.type = op; - entry->ksym_addr = entry->ksym_hbp->info.address = addr; -#ifdef CONFIG_X86 - entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4; -#endif - entry->ksym_hbp->triggered = (void *)ksym_hbp_handler; + entry->type = op; + entry->ksym_addr = addr; + entry->len = HW_BREAKPOINT_LEN_4; + + ret = -EAGAIN; + entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, + entry->len, entry->type, + ksym_hbp_handler, true); + if (IS_ERR(entry->ksym_hbp)) { + entry->ksym_hbp = NULL; + ret = PTR_ERR(entry->ksym_hbp); + } - ret = register_kernel_hw_breakpoint(entry->ksym_hbp); - if (ret < 0) { + if (!entry->ksym_hbp) { printk(KERN_INFO "ksym_tracer request failed. Try again" " later!!\n"); - ret = -EAGAIN; goto err; } + hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); ksym_filter_entry_count++; + return 0; + err: - if (entry->ksym_hbp) - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); + return ret; } @@ -244,10 +238,10 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name); - if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) + ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); + if (entry->type == HW_BREAKPOINT_W) ret = trace_seq_puts(s, "-w-\n"); - else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) + else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) ret = trace_seq_puts(s, "rw-\n"); WARN_ON_ONCE(!ret); } @@ -269,12 +263,10 @@ static void __ksym_trace_reset(void) mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, ksym_hlist) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); + unregister_wide_hw_breakpoint(entry->ksym_hbp); ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); } mutex_unlock(&ksym_tracer_mutex); @@ -327,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { if (entry->ksym_addr == ksym_addr) { /* Check for malformed request: (6) */ - if (entry->ksym_hbp->info.type != op) + if (entry->type != op) changed = 1; else goto out; @@ -335,18 +327,21 @@ static ssize_t ksym_trace_filter_write(struct file *file, } } if (changed) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); - entry->ksym_hbp->info.type = op; + unregister_wide_hw_breakpoint(entry->ksym_hbp); + entry->type = op; if (op > 0) { - ret = register_kernel_hw_breakpoint(entry->ksym_hbp); - if (ret == 0) + entry->ksym_hbp = + register_wide_hw_breakpoint(entry->ksym_addr, + entry->len, entry->type, + ksym_hbp_handler, true); + if (IS_ERR(entry->ksym_hbp)) + entry->ksym_hbp = NULL; + if (!entry->ksym_hbp) goto out; } ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); ret = 0; goto out; @@ -413,16 +408,16 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd, - entry->pid, iter->cpu, field->ksym_name); + ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, + entry->pid, iter->cpu, (char *)field->addr); if (!ret) return TRACE_TYPE_PARTIAL_LINE; switch (field->type) { - case HW_BREAKPOINT_WRITE: + case HW_BREAKPOINT_W: ret = trace_seq_printf(s, " W "); break; - case HW_BREAKPOINT_RW: + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: ret = trace_seq_printf(s, " RW "); break; default: @@ -490,14 +485,13 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); - if (entry->ksym_hbp) - access_type = entry->ksym_hbp->info.type; + access_type = entry->type; switch (access_type) { - case HW_BREAKPOINT_WRITE: + case HW_BREAKPOINT_W: seq_puts(m, " W "); break; - case HW_BREAKPOINT_RW: + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: seq_puts(m, " RW "); break; default: diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 7179c12e4f0f..27c5072c2e6b 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) ksym_selftest_dummy = 0; /* Register the read-write tracing request */ - ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW, + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, + HW_BREAKPOINT_R | HW_BREAKPOINT_W, (unsigned long)(&ksym_selftest_dummy)); if (ret < 0) { -- cgit v1.3-8-gc7d7 From 91284224da5b15ec6c2b45e10fa5eccd1c92a204 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Oct 2009 23:32:33 +0200 Subject: pcmcia: add new CIS access helpers As a replacement to pcmcia_get_{first,next}_tuple() and pcmcia_get_tuple_data(), three new -- and easier to use -- functions are added: - pcmcia_get_tuple() to get the very first CIS entry of one type. - pcmcia_loop_tuple() to loop over all CIS entries of one type. - pcmcia_get_mac_from_cis() to read out the hardware MAC address from CISTPL_FUNCE. Only a handful of drivers need these functions anyway, as most CIS access is already handled by pcmcia_loop_config(), which now shares the same backed (pccard_loop_tuple()) with pcmcia_loop_tuple(). A pcmcia_get_mac_from_cis() bug noted by Komuro has been fixed in this revision. Signed-off-by: Dominik Brodowski --- Documentation/pcmcia/driver-changes.txt | 7 ++ drivers/pcmcia/cistpl.c | 61 +++++++++ drivers/pcmcia/cs_internal.h | 7 ++ drivers/pcmcia/pcmcia_resource.c | 217 +++++++++++++++++++++++++++----- drivers/pcmcia/rsrc_mgr.c | 1 + include/pcmcia/ds.h | 57 ++++++--- 6 files changed, 304 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt index 059934363caf..adfb83e58675 100644 --- a/Documentation/pcmcia/driver-changes.txt +++ b/Documentation/pcmcia/driver-changes.txt @@ -1,5 +1,12 @@ This file details changes in 2.6 which affect PCMCIA card driver authors: +* New CIS tuple access (as of 2.6.33) + Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and + pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is + only interested in one (raw) tuple, or "pcmcia_loop_tuple()" if it is + interested in all tuples of one type. To decode the MAC from CISTPL_FUNCE, + a new helper "pcmcia_get_mac_from_cis()" was added. + * New configuration loop helper (as of 2.6.28) By calling pcmcia_loop_config(), a driver can iterate over all available configuration options. During a driver's probe() phase, one doesn't need diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index 6c4a4fc83630..24dd3c1eac0b 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -1482,6 +1482,67 @@ done: } EXPORT_SYMBOL(pccard_read_tuple); + +/** + * pccard_loop_tuple() - loop over tuples in the CIS + * @s: the struct pcmcia_socket where the card is inserted + * @function: the device function we loop for + * @code: which CIS code shall we look for? + * @parse: buffer where the tuple shall be parsed (or NULL, if no parse) + * @priv_data: private data to be passed to the loop_tuple function. + * @loop_tuple: function to call for each CIS entry of type @function. IT + * gets passed the raw tuple, the paresed tuple (if @parse is + * set) and @priv_data. + * + * pccard_loop_tuple() loops over all CIS entries of type @function, and + * calls the @loop_tuple function for each entry. If the call to @loop_tuple + * returns 0, the loop exits. Returns 0 on success or errorcode otherwise. + */ +int pccard_loop_tuple(struct pcmcia_socket *s, unsigned int function, + cisdata_t code, cisparse_t *parse, void *priv_data, + int (*loop_tuple) (tuple_t *tuple, + cisparse_t *parse, + void *priv_data)) +{ + tuple_t tuple; + cisdata_t *buf; + int ret; + + buf = kzalloc(256, GFP_KERNEL); + if (buf == NULL) { + dev_printk(KERN_WARNING, &s->dev, "no memory to read tuple\n"); + return -ENOMEM; + } + + tuple.TupleData = buf; + tuple.TupleDataMax = 255; + tuple.TupleOffset = 0; + tuple.DesiredTuple = code; + tuple.Attributes = 0; + + ret = pccard_get_first_tuple(s, function, &tuple); + while (!ret) { + if (pccard_get_tuple_data(s, &tuple)) + goto next_entry; + + if (parse) + if (pcmcia_parse_tuple(&tuple, parse)) + goto next_entry; + + ret = loop_tuple(&tuple, parse, priv_data); + if (!ret) + break; + +next_entry: + ret = pccard_get_next_tuple(s, function, &tuple); + } + + kfree(buf); + return ret; +} +EXPORT_SYMBOL(pccard_loop_tuple); + + /*====================================================================== This tries to determine if a card has a sensible CIS. It returns diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 1f4098f1354d..06a14c951e92 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -199,6 +199,13 @@ int pcmcia_replace_cis(struct pcmcia_socket *s, const u8 *data, const size_t len); int pccard_validate_cis(struct pcmcia_socket *s, unsigned int *count); +/* loop over CIS entries */ +int pccard_loop_tuple(struct pcmcia_socket *s, unsigned int function, + cisdata_t code, cisparse_t *parse, void *priv_data, + int (*loop_tuple) (tuple_t *tuple, + cisparse_t *parse, + void *priv_data)); + /* rsrc_mgr.c */ int pcmcia_validate_mem(struct pcmcia_socket *s); struct resource *pcmcia_find_io_region(unsigned long base, diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index d919e96c0afd..0bfb05aa8f85 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -885,12 +886,39 @@ EXPORT_SYMBOL(pcmcia_disable_device); struct pcmcia_cfg_mem { - tuple_t tuple; + struct pcmcia_device *p_dev; + void *priv_data; + int (*conf_check) (struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data); cisparse_t parse; - u8 buf[256]; cistpl_cftable_entry_t dflt; }; +/** + * pcmcia_do_loop_config() - internal helper for pcmcia_loop_config() + * + * pcmcia_do_loop_config() is the internal callback for the call from + * pcmcia_loop_config() to pccard_loop_tuple(). Data is transferred + * by a struct pcmcia_cfg_mem. + */ +static int pcmcia_do_loop_config(tuple_t *tuple, cisparse_t *parse, void *priv) +{ + cistpl_cftable_entry_t *cfg = &parse->cftable_entry; + struct pcmcia_cfg_mem *cfg_mem = priv; + + /* default values */ + cfg_mem->p_dev->conf.ConfigIndex = cfg->index; + if (cfg->flags & CISTPL_CFTABLE_DEFAULT) + cfg_mem->dflt = *cfg; + + return cfg_mem->conf_check(cfg_mem->p_dev, cfg, &cfg_mem->dflt, + cfg_mem->p_dev->socket->socket.Vcc, + cfg_mem->priv_data); +} + /** * pcmcia_loop_config() - loop over configuration options * @p_dev: the struct pcmcia_device which we need to loop for. @@ -913,48 +941,173 @@ int pcmcia_loop_config(struct pcmcia_device *p_dev, void *priv_data) { struct pcmcia_cfg_mem *cfg_mem; - - tuple_t *tuple; int ret; - unsigned int vcc; cfg_mem = kzalloc(sizeof(struct pcmcia_cfg_mem), GFP_KERNEL); if (cfg_mem == NULL) return -ENOMEM; - /* get the current Vcc setting */ - vcc = p_dev->socket->socket.Vcc; + cfg_mem->p_dev = p_dev; + cfg_mem->conf_check = conf_check; + cfg_mem->priv_data = priv_data; - tuple = &cfg_mem->tuple; - tuple->TupleData = cfg_mem->buf; - tuple->TupleDataMax = 255; - tuple->TupleOffset = 0; - tuple->DesiredTuple = CISTPL_CFTABLE_ENTRY; - tuple->Attributes = 0; + ret = pccard_loop_tuple(p_dev->socket, p_dev->func, + CISTPL_CFTABLE_ENTRY, &cfg_mem->parse, + cfg_mem, pcmcia_do_loop_config); - ret = pcmcia_get_first_tuple(p_dev, tuple); - while (!ret) { - cistpl_cftable_entry_t *cfg = &cfg_mem->parse.cftable_entry; + kfree(cfg_mem); + return ret; +} +EXPORT_SYMBOL(pcmcia_loop_config); - if (pcmcia_get_tuple_data(p_dev, tuple)) - goto next_entry; - if (pcmcia_parse_tuple(tuple, &cfg_mem->parse)) - goto next_entry; +struct pcmcia_loop_mem { + struct pcmcia_device *p_dev; + void *priv_data; + int (*loop_tuple) (struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data); +}; - /* default values */ - p_dev->conf.ConfigIndex = cfg->index; - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - cfg_mem->dflt = *cfg; +/** + * pcmcia_do_loop_tuple() - internal helper for pcmcia_loop_config() + * + * pcmcia_do_loop_tuple() is the internal callback for the call from + * pcmcia_loop_tuple() to pccard_loop_tuple(). Data is transferred + * by a struct pcmcia_cfg_mem. + */ +static int pcmcia_do_loop_tuple(tuple_t *tuple, cisparse_t *parse, void *priv) +{ + struct pcmcia_loop_mem *loop = priv; - ret = conf_check(p_dev, cfg, &cfg_mem->dflt, vcc, priv_data); - if (!ret) - break; + return loop->loop_tuple(loop->p_dev, tuple, loop->priv_data); +}; + +/** + * pcmcia_loop_tuple() - loop over tuples in the CIS + * @p_dev: the struct pcmcia_device which we need to loop for. + * @code: which CIS code shall we look for? + * @priv_data: private data to be passed to the loop_tuple function. + * @loop_tuple: function to call for each CIS entry of type @function. IT + * gets passed the raw tuple and @priv_data. + * + * pcmcia_loop_tuple() loops over all CIS entries of type @function, and + * calls the @loop_tuple function for each entry. If the call to @loop_tuple + * returns 0, the loop exits. Returns 0 on success or errorcode otherwise. + */ +int pcmcia_loop_tuple(struct pcmcia_device *p_dev, cisdata_t code, + int (*loop_tuple) (struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data), + void *priv_data) +{ + struct pcmcia_loop_mem loop = { + .p_dev = p_dev, + .loop_tuple = loop_tuple, + .priv_data = priv_data}; + + return pccard_loop_tuple(p_dev->socket, p_dev->func, code, NULL, + &loop, pcmcia_do_loop_tuple); +}; +EXPORT_SYMBOL(pcmcia_loop_tuple); -next_entry: - ret = pcmcia_get_next_tuple(p_dev, tuple); + +struct pcmcia_loop_get { + size_t len; + cisdata_t **buf; +}; + +/** + * pcmcia_do_get_tuple() - internal helper for pcmcia_get_tuple() + * + * pcmcia_do_get_tuple() is the internal callback for the call from + * pcmcia_get_tuple() to pcmcia_loop_tuple(). As we're only interested in + * the first tuple, return 0 unconditionally. Create a memory buffer large + * enough to hold the content of the tuple, and fill it with the tuple data. + * The caller is responsible to free the buffer. + */ +static int pcmcia_do_get_tuple(struct pcmcia_device *p_dev, tuple_t *tuple, + void *priv) +{ + struct pcmcia_loop_get *get = priv; + + *get->buf = kzalloc(tuple->TupleDataLen, GFP_KERNEL); + if (*get->buf) { + get->len = tuple->TupleDataLen; + memcpy(*get->buf, tuple->TupleData, tuple->TupleDataLen); } + return 0; +}; + +/** + * pcmcia_get_tuple() - get first tuple from CIS + * @p_dev: the struct pcmcia_device which we need to loop for. + * @code: which CIS code shall we look for? + * @buf: pointer to store the buffer to. + * + * pcmcia_get_tuple() gets the content of the first CIS entry of type @code. + * It returns the buffer length (or zero). The caller is responsible to free + * the buffer passed in @buf. + */ +size_t pcmcia_get_tuple(struct pcmcia_device *p_dev, cisdata_t code, + unsigned char **buf) +{ + struct pcmcia_loop_get get = { + .len = 0, + .buf = buf, + }; + + *get.buf = NULL; + pcmcia_loop_tuple(p_dev, code, pcmcia_do_get_tuple, &get); + + return get.len; +}; +EXPORT_SYMBOL(pcmcia_get_tuple); + + +/** + * pcmcia_do_get_mac() - internal helper for pcmcia_get_mac_from_cis() + * + * pcmcia_do_get_mac() is the internal callback for the call from + * pcmcia_get_mac_from_cis() to pcmcia_loop_tuple(). We check whether the + * tuple contains a proper LAN_NODE_ID of length 6, and copy the data + * to struct net_device->dev_addr[i]. + */ +static int pcmcia_do_get_mac(struct pcmcia_device *p_dev, tuple_t *tuple, + void *priv) +{ + struct net_device *dev = priv; + int i; + + if (tuple->TupleData[0] != CISTPL_FUNCE_LAN_NODE_ID) + return -EINVAL; + if (tuple->TupleDataLen < ETH_ALEN + 2) { + dev_warn(&p_dev->dev, "Invalid CIS tuple length for " + "LAN_NODE_ID\n"); + return -EINVAL; + } + + if (tuple->TupleData[1] != ETH_ALEN) { + dev_warn(&p_dev->dev, "Invalid header for LAN_NODE_ID\n"); + return -EINVAL; + } + for (i = 0; i < 6; i++) + dev->dev_addr[i] = tuple->TupleData[i+2]; + return 0; +}; + +/** + * pcmcia_get_mac_from_cis() - read out MAC address from CISTPL_FUNCE + * @p_dev: the struct pcmcia_device for which we want the address. + * @dev: a properly prepared struct net_device to store the info to. + * + * pcmcia_get_mac_from_cis() reads out the hardware MAC address from + * CISTPL_FUNCE and stores it into struct net_device *dev->dev_addr which + * must be set up properly by the driver (see examples!). + */ +int pcmcia_get_mac_from_cis(struct pcmcia_device *p_dev, struct net_device *dev) +{ + return pcmcia_loop_tuple(p_dev, CISTPL_FUNCE, pcmcia_do_get_mac, dev); +}; +EXPORT_SYMBOL(pcmcia_get_mac_from_cis); - return ret; -} -EXPORT_SYMBOL(pcmcia_loop_config); diff --git a/drivers/pcmcia/rsrc_mgr.c b/drivers/pcmcia/rsrc_mgr.c index e592e0e0d7ed..de0e770ce6a3 100644 --- a/drivers/pcmcia/rsrc_mgr.c +++ b/drivers/pcmcia/rsrc_mgr.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "cs_internal.h" diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index a2be80b9a095..2eb6e24d1a6b 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -34,6 +34,7 @@ struct pcmcia_socket; struct pcmcia_device; struct config_t; +struct net_device; /* dynamic device IDs for PCMCIA device drivers. See * Documentation/pcmcia/driver.txt for details. @@ -176,26 +177,39 @@ const char *pcmcia_error_ret(int ret); pcmcia_error_ret(ret)); \ } -/* CIS access. - * Use the pcmcia_* versions in PCMCIA drivers + +/* + * CIS access. + * + * Please use the following functions to access CIS tuples: + * - pcmcia_get_tuple() + * - pcmcia_loop_tuple() + * - pcmcia_get_mac_from_cis() + * + * To parse a tuple_t, pcmcia_parse_tuple() exists. Its interface + * might change in future. */ -int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse); -int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_first_tuple(p_dev, tuple) \ - pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple) +/* get the very first CIS entry of type @code. Note that buf is pointer + * to u8 *buf; and that you need to kfree(buf) afterwards. */ +size_t pcmcia_get_tuple(struct pcmcia_device *p_dev, cisdata_t code, + u8 **buf); -int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_next_tuple(p_dev, tuple) \ - pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple) +/* loop over CIS entries */ +int pcmcia_loop_tuple(struct pcmcia_device *p_dev, cisdata_t code, + int (*loop_tuple) (struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data), + void *priv_data); -int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); -#define pcmcia_get_tuple_data(p_dev, tuple) \ - pccard_get_tuple_data(p_dev->socket, tuple) +/* get the MAC address from CISTPL_FUNCE */ +int pcmcia_get_mac_from_cis(struct pcmcia_device *p_dev, + struct net_device *dev); +/* parse a tuple_t */ +int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse); + /* loop CIS entries for valid configuration */ int pcmcia_loop_config(struct pcmcia_device *p_dev, int (*conf_check) (struct pcmcia_device *p_dev, @@ -215,6 +229,21 @@ int pcmcia_reset_card(struct pcmcia_socket *skt); int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, conf_reg_t *reg); +/* deprecated -- do not use in drivers. */ +int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); +#define pcmcia_get_first_tuple(p_dev, tuple) \ + pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple) + +int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); +#define pcmcia_get_next_tuple(p_dev, tuple) \ + pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple) + +int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); +#define pcmcia_get_tuple_data(p_dev, tuple) \ + pccard_get_tuple_data(p_dev->socket, tuple) + /* device configuration */ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req); int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req); -- cgit v1.3-8-gc7d7 From 18a7a19b37838789452e0bd2855a51475628b971 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Oct 2009 00:07:39 +0200 Subject: pcmcia: remove pcmcia_get_{first,next}_tuple() Remove the pcmcia_get_{first,next}_tuple() calls no longer needed by (current) pcmcia device drivers. Signed-off-by: Dominik Brodowski --- drivers/pcmcia/cs_internal.h | 9 +++++++++ include/pcmcia/ds.h | 15 --------------- 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 06a14c951e92..70432cae76eb 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -206,6 +206,15 @@ int pccard_loop_tuple(struct pcmcia_socket *s, unsigned int function, cisparse_t *parse, void *priv_data)); +int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); + +int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); + +int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); + + /* rsrc_mgr.c */ int pcmcia_validate_mem(struct pcmcia_socket *s); struct resource *pcmcia_find_io_region(unsigned long base, diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 2eb6e24d1a6b..6c37d4ed7832 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -229,21 +229,6 @@ int pcmcia_reset_card(struct pcmcia_socket *skt); int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, conf_reg_t *reg); -/* deprecated -- do not use in drivers. */ -int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_first_tuple(p_dev, tuple) \ - pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple) - -int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_next_tuple(p_dev, tuple) \ - pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple) - -int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); -#define pcmcia_get_tuple_data(p_dev, tuple) \ - pccard_get_tuple_data(p_dev->socket, tuple) - /* device configuration */ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req); int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req); -- cgit v1.3-8-gc7d7 From fdcc8aa953a1123a289791dd192090651036d593 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Nov 2009 10:17:05 +0000 Subject: udp: add a counter into udp_hslot Adds a counter in udp_hslot to keep an accurate count of sockets present in chain. This will permit to upcoming UDP lookup algo to chose the shortest chain when secondary hash is added. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/udp.h | 8 ++++++++ net/ipv4/udp.c | 3 +++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index 22aa2e7eb1d7..9167281e47dc 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -50,8 +50,16 @@ struct udp_skb_cb { }; #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) +/** + * struct udp_hslot - UDP hash slot + * + * @head: head of list of sockets + * @count: number of sockets in 'head' list + * @lock: spinlock protecting changes to head/count + */ struct udp_hslot { struct hlist_nulls_head head; + int count; spinlock_t lock; } __attribute__((aligned(2 * sizeof(long)))); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d5e75e976513..ffc837643a04 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -218,6 +218,7 @@ found: sk->sk_hash = snum; if (sk_unhashed(sk)) { sk_nulls_add_node_rcu(sk, &hslot->head); + hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; @@ -1053,6 +1054,7 @@ void udp_lib_unhash(struct sock *sk) spin_lock_bh(&hslot->lock); if (sk_nulls_del_node_init_rcu(sk)) { + hslot->count--; inet_sk(sk)->inet_num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } @@ -1862,6 +1864,7 @@ void __init udp_table_init(struct udp_table *table, const char *name) } for (i = 0; i <= table->mask; i++) { INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); + table->hash[i].count = 0; spin_lock_init(&table->hash[i].lock); } } -- cgit v1.3-8-gc7d7 From d4cada4ae1c012815f95fa507eb86a0ae9d607d7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Nov 2009 10:17:30 +0000 Subject: udp: split sk_hash into two u16 hashes Union sk_hash with two u16 hashes for udp (no extra memory taken) One 16 bits hash on (local port) value (the previous udp 'hash') One 16 bits hash on (local address, local port) values, initialized but not yet used. This second hash is using jenkin hash for better distribution. Because the 'port' is xored later, a partial hash is performed on local address + net_hash_mix(net) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/udp.h | 2 ++ include/net/sock.h | 6 +++++- net/ipv4/udp.c | 25 +++++++++++++++++++------ net/ipv6/udp.c | 27 +++++++++++++++++++++++++-- 4 files changed, 51 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 832361e3e596..5b4b5274e683 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -55,6 +55,8 @@ static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask) struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; +#define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] +#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] int pending; /* Any pending frames ? */ unsigned int corkflag; /* Cork is required */ __u16 encap_type; /* Is this an Encapsulation socket? */ diff --git a/include/net/sock.h b/include/net/sock.h index 55de3bd719a5..827366b62680 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -109,6 +109,7 @@ struct net; * @skc_refcnt: reference count * @skc_tx_queue_mapping: tx queue number for this connection * @skc_hash: hash value used with various protocol lookup tables + * @skc_u16hashes: two u16 hash values used by UDP lookup tables * @skc_family: network address family * @skc_state: Connection state * @skc_reuse: %SO_REUSEADDR setting @@ -131,7 +132,10 @@ struct sock_common { atomic_t skc_refcnt; int skc_tx_queue_mapping; - unsigned int skc_hash; + union { + unsigned int skc_hash; + __u16 skc_u16hashes[2]; + }; unsigned short skc_family; volatile unsigned char skc_state; unsigned char skc_reuse; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ffc837643a04..af72de1c8690 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -138,13 +138,14 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && - (bitmap || sk2->sk_hash == num) && + (bitmap || udp_sk(sk2)->udp_port_hash == num) && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && (*saddr_comp)(sk, sk2)) { if (bitmap) - __set_bit(sk2->sk_hash >> log, bitmap); + __set_bit(udp_sk(sk2)->udp_port_hash >> log, + bitmap); else return 1; } @@ -215,7 +216,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, } found: inet_sk(sk)->inet_num = snum; - sk->sk_hash = snum; + udp_sk(sk)->udp_port_hash = snum; + udp_sk(sk)->udp_portaddr_hash ^= snum; if (sk_unhashed(sk)) { sk_nulls_add_node_rcu(sk, &hslot->head); hslot->count++; @@ -238,8 +240,19 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); } +static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, + unsigned int port) +{ + return jhash_1word(saddr, net_hash_mix(net)) ^ port; +} + int udp_v4_get_port(struct sock *sk, unsigned short snum) { + /* precompute partial secondary hash */ + udp_sk(sk)->udp_portaddr_hash = + udp4_portaddr_hash(sock_net(sk), + inet_sk(sk)->inet_rcv_saddr, + 0); return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); } @@ -249,7 +262,7 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, { int score = -1; - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && !ipv6_only_sock(sk)) { struct inet_sock *inet = inet_sk(sk); @@ -360,7 +373,7 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, struct inet_sock *inet = inet_sk(s); if (!net_eq(sock_net(s), net) || - s->sk_hash != hnum || + udp_sk(s)->udp_port_hash != hnum || (inet->inet_daddr && inet->inet_daddr != rmt_addr) || (inet->inet_dport != rmt_port && inet->inet_dport) || (inet->inet_rcv_saddr && @@ -1050,7 +1063,7 @@ void udp_lib_unhash(struct sock *sk) if (sk_hashed(sk)) { struct udp_table *udptable = sk->sk_prot->h.udp_table; struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk), - sk->sk_hash); + udp_sk(sk)->udp_port_hash); spin_lock_bh(&hslot->lock); if (sk_nulls_del_node_init_rcu(sk)) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5bc7cdbf030a..1e5fadd997b7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -81,8 +81,30 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 0; } +static unsigned int udp6_portaddr_hash(struct net *net, + const struct in6_addr *addr6, + unsigned int port) +{ + unsigned int hash, mix = net_hash_mix(net); + + if (ipv6_addr_any(addr6)) + hash = jhash_1word(0, mix); + else if (ipv6_addr_type(addr6) == IPV6_ADDR_MAPPED) + hash = jhash_1word(addr6->s6_addr32[3], mix); + else + hash = jhash2(addr6->s6_addr32, 4, mix); + + return hash ^ port; +} + + int udp_v6_get_port(struct sock *sk, unsigned short snum) { + /* precompute partial secondary hash */ + udp_sk(sk)->udp_portaddr_hash = + udp6_portaddr_hash(sock_net(sk), + &inet6_sk(sk)->rcv_saddr, + 0); return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); } @@ -94,7 +116,7 @@ static inline int compute_score(struct sock *sk, struct net *net, { int score = -1; - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -415,7 +437,8 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, if (!net_eq(sock_net(s), net)) continue; - if (s->sk_hash == num && s->sk_family == PF_INET6) { + if (udp_sk(s)->udp_port_hash == num && + s->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(s); if (inet->inet_dport) { if (inet->inet_dport != rmt_port) -- cgit v1.3-8-gc7d7 From 512615b6b843ff3ff5ad583f34c39b3f302f5f26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Nov 2009 10:17:58 +0000 Subject: udp: secondary hash on (local port, local address) Extends udp_table to contain a secondary hash table. socket anchor for this second hash is free, because UDP doesnt use skc_bind_node : We define an union to hold both skc_bind_node & a new hlist_nulls_node udp_portaddr_node udp_lib_get_port() inserts sockets into second hash chain (additional cost of one atomic op) udp_lib_unhash() deletes socket from second hash chain (additional cost of one atomic op) Note : No spinlock lockdep annotation is needed, because lock for the secondary hash chain is always get after lock for primary hash chain. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/udp.h | 1 + include/net/sock.h | 8 ++++++-- include/net/udp.h | 22 ++++++++++++++++++++-- net/ipv4/udp.c | 31 ++++++++++++++++++++++++++----- 4 files changed, 53 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 5b4b5274e683..59f0ddf2d284 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -57,6 +57,7 @@ struct udp_sock { struct inet_sock inet; #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] +#define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node int pending; /* Any pending frames ? */ unsigned int corkflag; /* Cork is required */ __u16 encap_type; /* Is this an Encapsulation socket? */ diff --git a/include/net/sock.h b/include/net/sock.h index 827366b62680..3f1a4804bb3f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -105,7 +105,7 @@ struct net; /** * struct sock_common - minimal network layer representation of sockets * @skc_node: main hash linkage for various protocol lookup tables - * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol + * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol * @skc_refcnt: reference count * @skc_tx_queue_mapping: tx queue number for this connection * @skc_hash: hash value used with various protocol lookup tables @@ -115,6 +115,7 @@ struct net; * @skc_reuse: %SO_REUSEADDR setting * @skc_bound_dev_if: bound device index if != 0 * @skc_bind_node: bind hash linkage for various protocol lookup tables + * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket * @@ -140,7 +141,10 @@ struct sock_common { volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; - struct hlist_node skc_bind_node; + union { + struct hlist_node skc_bind_node; + struct hlist_nulls_node skc_portaddr_node; + }; struct proto *skc_prot; #ifdef CONFIG_NET_NS struct net *skc_net; diff --git a/include/net/udp.h b/include/net/udp.h index 9167281e47dc..af41850f742a 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -63,10 +63,19 @@ struct udp_hslot { spinlock_t lock; } __attribute__((aligned(2 * sizeof(long)))); +/** + * struct udp_table - UDP table + * + * @hash: hash table, sockets are hashed on (local port) + * @hash2: hash table, sockets are hashed on (local port, local address) + * @mask: number of slots in hash tables, minus 1 + * @log: log2(number of slots in hash table) + */ struct udp_table { struct udp_hslot *hash; - unsigned int mask; - unsigned int log; + struct udp_hslot *hash2; + unsigned int mask; + unsigned int log; }; extern struct udp_table udp_table; extern void udp_table_init(struct udp_table *, const char *); @@ -75,6 +84,15 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table, { return &table->hash[udp_hashfn(net, num, table->mask)]; } +/* + * For secondary hash, net_hash_mix() is performed before calling + * udp_hashslot2(), this explains difference with udp_hashslot() + */ +static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, + unsigned int hash) +{ + return &table->hash2[hash & table->mask]; +} /* Note: this must match 'valbool' in sock_setsockopt */ #define UDP_CSUM_NOXMIT 1 diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index af72de1c8690..5f04216f35ce 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -163,7 +163,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2)) { - struct udp_hslot *hslot; + struct udp_hslot *hslot, *hslot2; struct udp_table *udptable = sk->sk_prot->h.udp_table; int error = 1; struct net *net = sock_net(sk); @@ -222,6 +222,13 @@ found: sk_nulls_add_node_rcu(sk, &hslot->head); hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + spin_lock(&hslot2->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &hslot2->head); + hslot2->count++; + spin_unlock(&hslot2->lock); } error = 0; fail_unlock: @@ -1062,14 +1069,22 @@ void udp_lib_unhash(struct sock *sk) { if (sk_hashed(sk)) { struct udp_table *udptable = sk->sk_prot->h.udp_table; - struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk), - udp_sk(sk)->udp_port_hash); + struct udp_hslot *hslot, *hslot2; + + hslot = udp_hashslot(udptable, sock_net(sk), + udp_sk(sk)->udp_port_hash); + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock_bh(&hslot->lock); if (sk_nulls_del_node_init_rcu(sk)) { hslot->count--; inet_sk(sk)->inet_num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + + spin_lock(&hslot2->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hslot2->count--; + spin_unlock(&hslot2->lock); } spin_unlock_bh(&hslot->lock); } @@ -1857,7 +1872,7 @@ void __init udp_table_init(struct udp_table *table, const char *name) if (!CONFIG_BASE_SMALL) table->hash = alloc_large_system_hash(name, - sizeof(struct udp_hslot), + 2 * sizeof(struct udp_hslot), uhash_entries, 21, /* one slot per 2 MB */ 0, @@ -1869,17 +1884,23 @@ void __init udp_table_init(struct udp_table *table, const char *name) */ if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) { table->hash = kmalloc(UDP_HTABLE_SIZE_MIN * - sizeof(struct udp_hslot), GFP_KERNEL); + 2 * sizeof(struct udp_hslot), GFP_KERNEL); if (!table->hash) panic(name); table->log = ilog2(UDP_HTABLE_SIZE_MIN); table->mask = UDP_HTABLE_SIZE_MIN - 1; } + table->hash2 = table->hash + (table->mask + 1); for (i = 0; i <= table->mask; i++) { INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); table->hash[i].count = 0; spin_lock_init(&table->hash[i].lock); } + for (i = 0; i <= table->mask; i++) { + INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i); + table->hash2[i].count = 0; + spin_lock_init(&table->hash2[i].lock); + } } void __init udp_init(void) -- cgit v1.3-8-gc7d7 From 7a50a240c495478179f01c9df4bd75e39cff79c7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 8 Nov 2009 20:57:03 -0800 Subject: net/compat_ioctl: support SIOCWANDEV This adds compat_ioctl support for SIOCWANDEV, which has always been missing. The definition of struct compat_ifreq was missing an ifru_settings fields that is needed to support SIOCWANDEV, so add that and clean up the whitespace damage in the struct definition. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/linux/compat.h | 41 ++++++++++++++++++++++++----------------- net/socket.c | 23 +++++++++++++++++++++++ 2 files changed, 47 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index 224c7a896172..ef68119a4fd2 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -165,25 +165,32 @@ struct compat_ifmap { unsigned char port; }; +struct compat_if_settings +{ + unsigned int type; /* Type of physical device or protocol */ + unsigned int size; /* Size of the data allocated by the caller */ + compat_uptr_t ifs_ifsu; /* union of pointers */ +}; + struct compat_ifreq { - union { - char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */ - } ifr_ifrn; - union { - struct sockaddr ifru_addr; - struct sockaddr ifru_dstaddr; - struct sockaddr ifru_broadaddr; - struct sockaddr ifru_netmask; - struct sockaddr ifru_hwaddr; - short ifru_flags; - compat_int_t ifru_ivalue; - compat_int_t ifru_mtu; - struct compat_ifmap ifru_map; - char ifru_slave[IFNAMSIZ]; /* Just fits the size */ + union { + char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + } ifr_ifrn; + union { + struct sockaddr ifru_addr; + struct sockaddr ifru_dstaddr; + struct sockaddr ifru_broadaddr; + struct sockaddr ifru_netmask; + struct sockaddr ifru_hwaddr; + short ifru_flags; + compat_int_t ifru_ivalue; + compat_int_t ifru_mtu; + struct compat_ifmap ifru_map; + char ifru_slave[IFNAMSIZ]; /* Just fits the size */ char ifru_newname[IFNAMSIZ]; - compat_caddr_t ifru_data; - /* XXXX? ifru_settings should be here */ - } ifr_ifru; + compat_caddr_t ifru_data; + struct compat_if_settings ifru_settings; + } ifr_ifru; }; struct compat_ifconf { diff --git a/net/socket.c b/net/socket.c index 224e7f73fdf0..befd9f5b1620 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2627,6 +2627,27 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) return dev_ioctl(net, SIOCETHTOOL, ifr); } +static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) +{ + void __user *uptr; + compat_uptr_t uptr32; + struct ifreq __user *uifr; + + uifr = compat_alloc_user_space(sizeof (*uifr)); + if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) + return -EFAULT; + + if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) + return -EFAULT; + + uptr = compat_ptr(uptr32); + + if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc)) + return -EFAULT; + + return dev_ioctl(net, SIOCWANDEV, uifr); +} + static int bond_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *ifr32) { @@ -3058,6 +3079,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, return dev_ifconf(net, argp); case SIOCETHTOOL: return ethtool_ioctl(net, argp); + case SIOCWANDEV: + return compat_siocwandev(net, argp); case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: -- cgit v1.3-8-gc7d7 From 9e0d57fd6dad37d72a3ca6db00ca8c76f2215454 Mon Sep 17 00:00:00 2001 From: Yury Polyanskiy Date: Sun, 8 Nov 2009 20:58:41 -0800 Subject: xfrm: SAD entries do not expire correctly after suspend-resume This fixes the following bug in the current implementation of net/xfrm: SAD entries timeouts do not count the time spent by the machine in the suspended state. This leads to the connectivity problems because after resuming local machine thinks that the SAD entry is still valid, while it has already been expired on the remote server. The cause of this is very simple: the timeouts in the net/xfrm are bound to the old mod_timer() timers. This patch reassigns them to the CLOCK_REALTIME hrtimer. I have been using this version of the patch for a few months on my machines without any problems. Also run a few stress tests w/o any issues. This version of the patch uses tasklet_hrtimer by Peter Zijlstra (commit 9ba5f0). This patch is against 2.6.31.4. Please CC me. Signed-off-by: Yury Polyanskiy Signed-off-by: David S. Miller --- include/net/xfrm.h | 5 ++++- net/xfrm/xfrm_state.c | 30 +++++++++++++++++------------- 2 files changed, 21 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7f38ef509957..93d184b91a8c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -19,6 +19,9 @@ #include #include #include + +#include + #ifdef CONFIG_XFRM_STATISTICS #include #endif @@ -198,7 +201,7 @@ struct xfrm_state { struct xfrm_stats stats; struct xfrm_lifetime_cur curlft; - struct timer_list timer; + struct tasklet_hrtimer mtimer; /* Last used time */ unsigned long lastused; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f2f7c638083e..e9ac0cec0877 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -21,6 +21,9 @@ #include #include #include +#include +#include +#include #include "xfrm_hash.h" @@ -352,7 +355,7 @@ static void xfrm_put_mode(struct xfrm_mode *mode) static void xfrm_state_gc_destroy(struct xfrm_state *x) { - del_timer_sync(&x->timer); + tasklet_hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); kfree(x->aalg); kfree(x->ealg); @@ -398,9 +401,10 @@ static inline unsigned long make_jiffies(long secs) return secs*HZ; } -static void xfrm_timer_handler(unsigned long data) +static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) { - struct xfrm_state *x = (struct xfrm_state*)data; + struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); + struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); struct net *net = xs_net(x); unsigned long now = get_seconds(); long next = LONG_MAX; @@ -451,8 +455,9 @@ static void xfrm_timer_handler(unsigned long data) if (warn) km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX) - mod_timer(&x->timer, jiffies + make_jiffies(next)); + if (next != LONG_MAX){ + tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL); + } goto out; @@ -474,6 +479,7 @@ expired: out: spin_unlock(&x->lock); + return HRTIMER_NORESTART; } static void xfrm_replay_timer_handler(unsigned long data); @@ -492,7 +498,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); - setup_timer(&x->timer, xfrm_timer_handler, (unsigned long)x); + tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_REALTIME, HRTIMER_MODE_ABS); setup_timer(&x->rtimer, xfrm_replay_timer_handler, (unsigned long)x); x->curlft.add_time = get_seconds(); @@ -843,8 +849,7 @@ found: hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; - x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; - add_timer(&x->timer); + tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); } else { @@ -921,7 +926,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } - mod_timer(&x->timer, jiffies + HZ); + tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); if (x->replay_maxage) mod_timer(&x->rtimer, jiffies + x->replay_maxage); @@ -1019,8 +1024,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->props.reqid = reqid; x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; xfrm_state_hold(x); - x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; - add_timer(&x->timer); + tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); h = xfrm_src_hash(net, daddr, saddr, family); @@ -1300,7 +1304,7 @@ out: memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; - mod_timer(&x1->timer, jiffies + HZ); + tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); if (x1->curlft.use_time) xfrm_state_check_expire(x1); @@ -1325,7 +1329,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; - mod_timer(&x->timer, jiffies); + tasklet_hrtimer_start(&x->mtimer, ktime_set(0,0), HRTIMER_MODE_REL); return -EINVAL; } -- cgit v1.3-8-gc7d7 From 9cb495bb41f07a3ebfc60d3b9d26017a1fd7050c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:57:22 +0200 Subject: pcmcia: remove now-defunct cs_error, pcmcia_error_{func,ret} As all in-tree drivers have been converted to not use cs_error() any more, drop these functions and definitions, and update the Documentation. Signed-off-by: Dominik Brodowski --- Documentation/pcmcia/driver-changes.txt | 5 ++ drivers/pcmcia/ds.c | 101 -------------------------------- include/pcmcia/ds.h | 36 ------------ 3 files changed, 5 insertions(+), 137 deletions(-) (limited to 'include') diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt index adfb83e58675..446f43b309df 100644 --- a/Documentation/pcmcia/driver-changes.txt +++ b/Documentation/pcmcia/driver-changes.txt @@ -1,5 +1,10 @@ This file details changes in 2.6 which affect PCMCIA card driver authors: +* no cs_error / CS_CHECK / CONFIG_PCMCIA_DEBUG (as of 2.6.33) + Instead of the cs_error() callback or the CS_CHECK() macro, please use + Linux-style checking of return values, and -- if necessary -- debug + messages using "dev_dbg()" or "pr_debug()". + * New CIS tuple access (as of 2.6.33) Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 5b069aeaf17a..05893d41dd41 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -46,107 +46,6 @@ spinlock_t pcmcia_dev_list_lock; /*====================================================================*/ -/* code which was in cs.c before */ - -/* String tables for error messages */ - -typedef struct lookup_t { - const int key; - const char *msg; -} lookup_t; - -static const lookup_t error_table[] = { - { 0, "Operation succeeded" }, - { -EIO, "Input/Output error" }, - { -ENODEV, "No card present" }, - { -EINVAL, "Bad parameter" }, - { -EACCES, "Configuration locked" }, - { -EBUSY, "Resource in use" }, - { -ENOSPC, "No more items" }, - { -ENOMEM, "Out of resource" }, -}; - - -static const lookup_t service_table[] = { - { AccessConfigurationRegister, "AccessConfigurationRegister" }, - { AddSocketServices, "AddSocketServices" }, - { AdjustResourceInfo, "AdjustResourceInfo" }, - { CheckEraseQueue, "CheckEraseQueue" }, - { CloseMemory, "CloseMemory" }, - { DeregisterClient, "DeregisterClient" }, - { DeregisterEraseQueue, "DeregisterEraseQueue" }, - { GetCardServicesInfo, "GetCardServicesInfo" }, - { GetClientInfo, "GetClientInfo" }, - { GetConfigurationInfo, "GetConfigurationInfo" }, - { GetEventMask, "GetEventMask" }, - { GetFirstClient, "GetFirstClient" }, - { GetFirstRegion, "GetFirstRegion" }, - { GetFirstTuple, "GetFirstTuple" }, - { GetNextClient, "GetNextClient" }, - { GetNextRegion, "GetNextRegion" }, - { GetNextTuple, "GetNextTuple" }, - { GetStatus, "GetStatus" }, - { GetTupleData, "GetTupleData" }, - { MapMemPage, "MapMemPage" }, - { ModifyConfiguration, "ModifyConfiguration" }, - { ModifyWindow, "ModifyWindow" }, - { OpenMemory, "OpenMemory" }, - { ParseTuple, "ParseTuple" }, - { ReadMemory, "ReadMemory" }, - { RegisterClient, "RegisterClient" }, - { RegisterEraseQueue, "RegisterEraseQueue" }, - { RegisterMTD, "RegisterMTD" }, - { ReleaseConfiguration, "ReleaseConfiguration" }, - { ReleaseIO, "ReleaseIO" }, - { ReleaseIRQ, "ReleaseIRQ" }, - { ReleaseWindow, "ReleaseWindow" }, - { RequestConfiguration, "RequestConfiguration" }, - { RequestIO, "RequestIO" }, - { RequestIRQ, "RequestIRQ" }, - { RequestSocketMask, "RequestSocketMask" }, - { RequestWindow, "RequestWindow" }, - { ResetCard, "ResetCard" }, - { SetEventMask, "SetEventMask" }, - { ValidateCIS, "ValidateCIS" }, - { WriteMemory, "WriteMemory" }, - { BindDevice, "BindDevice" }, - { BindMTD, "BindMTD" }, - { ReportError, "ReportError" }, - { SuspendCard, "SuspendCard" }, - { ResumeCard, "ResumeCard" }, - { EjectCard, "EjectCard" }, - { InsertCard, "InsertCard" }, - { ReplaceCIS, "ReplaceCIS" } -}; - -const char *pcmcia_error_func(int func) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(service_table); i++) - if (service_table[i].key == func) - return service_table[i].msg; - - return "Unknown service number"; -} -EXPORT_SYMBOL(pcmcia_error_func); - -const char *pcmcia_error_ret(int ret) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(error_table); i++) - if (error_table[i].key == ret) - return error_table[i].msg; - - return "unknown"; -} -EXPORT_SYMBOL(pcmcia_error_ret); - -/*======================================================================*/ - - - static void pcmcia_check_driver(struct pcmcia_driver *p_drv) { struct pcmcia_device_id *did = p_drv->id_table; diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 6c37d4ed7832..d82392de4e92 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -142,42 +142,6 @@ struct pcmcia_device { #define handle_to_dev(handle) (handle->dev) -/* (deprecated) error reporting by PCMCIA devices. Use dev_printk() - * or dev_dbg() directly in the driver, without referring to pcmcia_error_func() - * and/or pcmcia_error_ret() for those functions will go away soon. - */ -enum service { - AccessConfigurationRegister, AddSocketServices, - AdjustResourceInfo, CheckEraseQueue, CloseMemory, CopyMemory, - DeregisterClient, DeregisterEraseQueue, GetCardServicesInfo, - GetClientInfo, GetConfigurationInfo, GetEventMask, - GetFirstClient, GetFirstPartion, GetFirstRegion, GetFirstTuple, - GetNextClient, GetNextPartition, GetNextRegion, GetNextTuple, - GetStatus, GetTupleData, MapLogSocket, MapLogWindow, MapMemPage, - MapPhySocket, MapPhyWindow, ModifyConfiguration, ModifyWindow, - OpenMemory, ParseTuple, ReadMemory, RegisterClient, - RegisterEraseQueue, RegisterMTD, RegisterTimer, - ReleaseConfiguration, ReleaseExclusive, ReleaseIO, ReleaseIRQ, - ReleaseSocketMask, ReleaseWindow, ReplaceSocketServices, - RequestConfiguration, RequestExclusive, RequestIO, RequestIRQ, - RequestSocketMask, RequestWindow, ResetCard, ReturnSSEntry, - SetEventMask, SetRegion, ValidateCIS, VendorSpecific, - WriteMemory, BindDevice, BindMTD, ReportError, - SuspendCard, ResumeCard, EjectCard, InsertCard, ReplaceCIS, - GetFirstWindow, GetNextWindow, GetMemPage -}; -const char *pcmcia_error_func(int func); -const char *pcmcia_error_ret(int ret); - -#define cs_error(p_dev, func, ret) \ - { \ - dev_printk(KERN_NOTICE, &p_dev->dev, \ - "%s : %s\n", \ - pcmcia_error_func(func), \ - pcmcia_error_ret(ret)); \ - } - - /* * CIS access. * -- cgit v1.3-8-gc7d7 From 1689164a272a962572a1f31af715dfe462cf7910 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 29 Mar 2009 22:43:43 +0100 Subject: PCMCIA: ss: allow PCI IRQs > 255 Signed-off-by: Russell King Signed-off-by: Dominik Brodowski --- include/pcmcia/ss.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index d696a692d94a..753da9b087d3 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -172,7 +172,7 @@ struct pcmcia_socket { u_int irq_mask; u_int map_size; u_int io_offset; - u_char pci_irq; + u_int pci_irq; struct pci_dev * cb_dev; -- cgit v1.3-8-gc7d7 From b71a8eb0fa64ec6d00175f479e3ef851703568af Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 6 Oct 2009 12:42:51 +0200 Subject: tree-wide: fix typos "selct" + "slect" -> "select" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch was generated by git grep -E -i -l 's(le|el)ct' | xargs -r perl -p -i -e 's/([Ss])(le|el)ct/$1elect/ with only skipping net/netfilter/xt_SECMARK.c and include/linux/netfilter/xt_SECMARK.h which have a struct member called selctx. Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- drivers/media/video/ov772x.c | 2 +- drivers/scsi/aic7xxx/aic79xx.seq | 2 +- drivers/scsi/aic7xxx/aic79xx_osm.c | 2 +- drivers/scsi/aic7xxx/aic7xxx.seq | 2 +- drivers/scsi/aic7xxx/aic7xxx_osm.c | 2 +- drivers/scsi/dc395x.c | 2 +- include/linux/spi/spi_bitbang.h | 2 +- kernel/time/clocksource.c | 2 +- sound/pci/hda/patch_cirrus.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c index eccb40ab7fec..205229333466 100644 --- a/drivers/media/video/ov772x.c +++ b/drivers/media/video/ov772x.c @@ -247,7 +247,7 @@ /* COM5 */ #define AFR_ON_OFF 0x80 /* Auto frame rate control ON/OFF selection */ -#define AFR_SPPED 0x40 /* Auto frame rate control speed slection */ +#define AFR_SPPED 0x40 /* Auto frame rate control speed selection */ /* Auto frame rate max rate control */ #define AFR_NO_RATE 0x00 /* No reduction of frame rate */ #define AFR_1p2 0x10 /* Max reduction to 1/2 frame rate */ diff --git a/drivers/scsi/aic7xxx/aic79xx.seq b/drivers/scsi/aic7xxx/aic79xx.seq index 58bc17591b54..3b66b5ae3d9f 100644 --- a/drivers/scsi/aic7xxx/aic79xx.seq +++ b/drivers/scsi/aic7xxx/aic79xx.seq @@ -1281,7 +1281,7 @@ END_CRITICAL; * Is it a disconnect message? Set a flag in the SCB to remind us * and await the bus going free. If this is an untagged transaction * store the SCB id for it in our untagged target table for lookup on - * a reselction. + * a reselection. */ mesgin_disconnect: /* diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c index 75b23317bd26..1222a7ac698a 100644 --- a/drivers/scsi/aic7xxx/aic79xx_osm.c +++ b/drivers/scsi/aic7xxx/aic79xx_osm.c @@ -2335,7 +2335,7 @@ ahd_linux_queue_abort_cmd(struct scsi_cmnd *cmd) /* * The sequencer will never re-reference the * in-core SCB. To make sure we are notified - * during reslection, set the MK_MESSAGE flag in + * during reselection, set the MK_MESSAGE flag in * the card's copy of the SCB. */ ahd_outb(ahd, SCB_CONTROL, diff --git a/drivers/scsi/aic7xxx/aic7xxx.seq b/drivers/scsi/aic7xxx/aic7xxx.seq index 15196390e28d..5a4cfc954a9f 100644 --- a/drivers/scsi/aic7xxx/aic7xxx.seq +++ b/drivers/scsi/aic7xxx/aic7xxx.seq @@ -1693,7 +1693,7 @@ if ((ahc->flags & AHC_INITIATORROLE) != 0) { * Is it a disconnect message? Set a flag in the SCB to remind us * and await the bus going free. If this is an untagged transaction * store the SCB id for it in our untagged target table for lookup on - * a reselction. + * a reselection. */ mesgin_disconnect: /* diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index fd2b9785ff4f..8cb05dc8e6a1 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -2290,7 +2290,7 @@ ahc_linux_queue_recovery_cmd(struct scsi_cmnd *cmd, scb_flag flag) * In the non-paging case, the sequencer will * never re-reference the in-core SCB. * To make sure we are notified during - * reslection, set the MK_MESSAGE flag in + * reselection, set the MK_MESSAGE flag in * the card's copy of the SCB. */ if ((ahc->flags & AHC_PAGESCBS) == 0) { diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index 075e2397273c..6c59c02c1ed9 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -1509,7 +1509,7 @@ static u8 start_scsi(struct AdapterCtlBlk* acb, struct DeviceCtlBlk* dcb, * Try anyway? * * We could, BUT: Sometimes the TRM_S1040 misses to produce a Selection - * Timeout, a Disconnect or a Reselction IRQ, so we would be screwed! + * Timeout, a Disconnect or a Reselection IRQ, so we would be screwed! * (This is likely to be a bug in the hardware. Obviously, most people * only have one initiator per SCSI bus.) * Instead let this fail and have the timer make sure the command is diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index eed4254bd503..3274c507b8a9 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -15,7 +15,7 @@ * Some hardware works well with requests at spi_transfer scope: * * - Drivers leveraging smarter hardware, with fifos or DMA; or for half - * duplex (MicroWire) controllers. Provide chipslect() and txrx_bufs(), + * duplex (MicroWire) controllers. Provide chipselect() and txrx_bufs(), * and custom setup()/cleanup() methods. */ diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 5e18c6ab2c6a..c403567f78c0 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -580,7 +580,7 @@ sysfs_show_current_clocksources(struct sys_device *dev, * @count: length of buffer * * Takes input from sysfs interface for manually overriding the default - * clocksource selction. + * clocksource selection. */ static ssize_t sysfs_override_clocksource(struct sys_device *dev, struct sysdev_attribute *attr, diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 8ba306856d38..7b0446fa6009 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -947,7 +947,7 @@ static void init_input(struct hda_codec *codec) coef |= 0x0500; /* DMIC2 enable 2 channels, disable GPIO1 */ if (is_active_pin(codec, CS_DMIC1_PIN_NID)) coef |= 0x1800; /* DMIC1 enable 2 channels, disable GPIO0 - * No effect if SPDIF_OUT2 is slected in + * No effect if SPDIF_OUT2 is selected in * IDX_SPDIF_CTL. */ cs_vendor_coef_set(codec, IDX_ADC_CFG, coef); -- cgit v1.3-8-gc7d7 From 21ae2956ce289f61f11863cc67080f9a28101ae0 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Wed, 7 Oct 2009 15:21:09 +0200 Subject: tree-wide: fix typos "aquire" -> "acquire", "cumsumed" -> "consumed" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch was generated by git grep -E -i -l '[Aa]quire' | xargs -r perl -p -i -e 's/([Aa])quire/$1cquire/' and the cumsumed was found by checking the diff for aquire. Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- drivers/cpuidle/governor.c | 4 ++-- drivers/infiniband/hw/cxgb3/cxio_hal.c | 2 +- drivers/media/dvb/frontends/drx397xD.c | 2 +- drivers/net/ps3_gelic_wireless.h | 2 +- drivers/net/qla3xxx.c | 2 +- drivers/net/wireless/b43/main.c | 2 +- drivers/net/wireless/b43legacy/main.c | 2 +- include/linux/dm-log-userspace.h | 2 +- mm/kmemleak.c | 4 ++-- mm/memcontrol.c | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index 70b59642a708..724c164d31c9 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -21,7 +21,7 @@ struct cpuidle_governor *cpuidle_curr_governor; * __cpuidle_find_governor - finds a governor of the specified name * @str: the name * - * Must be called with cpuidle_lock aquired. + * Must be called with cpuidle_lock acquired. */ static struct cpuidle_governor * __cpuidle_find_governor(const char *str) { @@ -39,7 +39,7 @@ static struct cpuidle_governor * __cpuidle_find_governor(const char *str) * @gov: the new target governor * * NOTE: "gov" can be NULL to specify disabled - * Must be called with cpuidle_lock aquired. + * Must be called with cpuidle_lock acquired. */ int cpuidle_switch_governor(struct cpuidle_governor *gov) { diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 72ed3396b721..0677fc7dfd51 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -589,7 +589,7 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) /* write len bytes of data into addr (32B aligned address) * If data is NULL, clear len byte of memory to zero. - * caller aquires the ctrl_qp lock before the call + * caller acquires the ctrl_qp lock before the call */ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, u32 len, void *data) diff --git a/drivers/media/dvb/frontends/drx397xD.c b/drivers/media/dvb/frontends/drx397xD.c index 010075535221..868b78bfae75 100644 --- a/drivers/media/dvb/frontends/drx397xD.c +++ b/drivers/media/dvb/frontends/drx397xD.c @@ -81,7 +81,7 @@ static struct { #include "drx397xD_fw.h" }; -/* use only with writer lock aquired */ +/* use only with writer lock acquired */ static void _drx_release_fw(struct drx397xD_state *s, enum fw_ix ix) { memset(&fw[ix].data[0], 0, sizeof(fw[0].data)); diff --git a/drivers/net/ps3_gelic_wireless.h b/drivers/net/ps3_gelic_wireless.h index 5b631c6c9775..0a88b535197a 100644 --- a/drivers/net/ps3_gelic_wireless.h +++ b/drivers/net/ps3_gelic_wireless.h @@ -199,7 +199,7 @@ struct gelic_eurus_rssi_info { /* for 'stat' member of gelic_wl_info */ enum gelic_wl_info_status_bit { GELIC_WL_STAT_CONFIGURED, - GELIC_WL_STAT_CH_INFO, /* ch info aquired */ + GELIC_WL_STAT_CH_INFO, /* ch info acquired */ GELIC_WL_STAT_ESSID_SET, /* ESSID specified by userspace */ GELIC_WL_STAT_BSSID_SET, /* BSSID specified by userspace */ GELIC_WL_STAT_WPA_PSK_SET, /* PMK specified by userspace */ diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c index 4c610511eb40..f72643313bab 100644 --- a/drivers/net/qla3xxx.c +++ b/drivers/net/qla3xxx.c @@ -3651,7 +3651,7 @@ static int ql_adapter_up(struct ql3_adapter *qdev) ql_sem_unlock(qdev, QL_DRVR_SEM_MASK); } else { printk(KERN_ERR PFX - "%s: Could not aquire driver lock.\n", + "%s: Could not acquire driver lock.\n", ndev->name); goto err_lock; } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 86f35827f008..f66efea61667 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -2955,7 +2955,7 @@ static void do_periodic_work(struct b43_wldev *dev) /* Periodic work locking policy: * The whole periodic work handler is protected by * wl->mutex. If another lock is needed somewhere in the - * pwork callchain, it's aquired in-place, where it's needed. + * pwork callchain, it's acquired in-place, where it's needed. */ static void b43_periodic_work_handler(struct work_struct *work) { diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 4b60148a5e61..881784b18b0b 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2277,7 +2277,7 @@ static void do_periodic_work(struct b43legacy_wldev *dev) /* Periodic work locking policy: * The whole periodic work handler is protected by * wl->mutex. If another lock is needed somewhere in the - * pwork callchain, it's aquired in-place, where it's needed. + * pwork callchain, it's acquired in-place, where it's needed. */ static void b43legacy_periodic_work_handler(struct work_struct *work) { diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h index 8a1f972c0fe9..0c3c3a2110c4 100644 --- a/include/linux/dm-log-userspace.h +++ b/include/linux/dm-log-userspace.h @@ -363,7 +363,7 @@ * various request types above. The remaining 24-bits are currently * set to zero and are reserved for future use and compatibility concerns. * - * User-space should always use DM_ULOG_REQUEST_TYPE to aquire the + * User-space should always use DM_ULOG_REQUEST_TYPE to acquire the * request type from the 'request_type' field to maintain forward compatibility. */ #define DM_ULOG_REQUEST_MASK 0xFF diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 8bf765c4f58d..13f33b3081ec 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1050,8 +1050,8 @@ static void scan_object(struct kmemleak_object *object) unsigned long flags; /* - * Once the object->lock is aquired, the corresponding memory block - * cannot be freed (the same lock is aquired in delete_object). + * Once the object->lock is acquired, the corresponding memory block + * cannot be freed (the same lock is acquired in delete_object). */ spin_lock_irqsave(&object->lock, flags); if (object->flags & OBJECT_NO_SCAN) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f99f5991d6bb..7226e60e52af 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1720,7 +1720,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, /* * While swap-in, try_charge -> commit or cancel, the page is locked. * And when try_charge() successfully returns, one refcnt to memcg without - * struct page_cgroup is aquired. This refcnt will be cumsumed by + * struct page_cgroup is acquired. This refcnt will be consumed by * "commit()" or removed by "cancel()" */ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, -- cgit v1.3-8-gc7d7 From dd8dbf2e6880e30c00b18600c962d0cb5a03c555 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 3 Nov 2009 16:35:32 +1100 Subject: security: report the module name to security_module_request For SELinux to do better filtering in userspace we send the name of the module along with the AVC denial when a program is denied module_request. Example output: type=SYSCALL msg=audit(11/03/2009 10:59:43.510:9) : arch=x86_64 syscall=write success=yes exit=2 a0=3 a1=7fc28c0d56c0 a2=2 a3=7fffca0d7440 items=0 ppid=1727 pid=1729 auid=unset uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=(none) ses=unset comm=rpc.nfsd exe=/usr/sbin/rpc.nfsd subj=system_u:system_r:nfsd_t:s0 key=(null) type=AVC msg=audit(11/03/2009 10:59:43.510:9) : avc: denied { module_request } for pid=1729 comm=rpc.nfsd kmod="net-pf-10" scontext=system_u:system_r:nfsd_t:s0 tcontext=system_u:system_r:kernel_t:s0 tclass=system Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/lsm_audit.h | 18 ++++++++++-------- include/linux/security.h | 7 ++++--- kernel/kmod.c | 8 ++++---- security/capability.c | 2 +- security/lsm_audit.c | 4 ++++ security/security.c | 4 ++-- security/selinux/hooks.c | 13 +++++++++++-- 7 files changed, 36 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 190c37854870..f78f83d7663f 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -26,14 +26,15 @@ /* Auxiliary data to use in generating the audit record. */ struct common_audit_data { - char type; -#define LSM_AUDIT_DATA_FS 1 -#define LSM_AUDIT_DATA_NET 2 -#define LSM_AUDIT_DATA_CAP 3 -#define LSM_AUDIT_DATA_IPC 4 -#define LSM_AUDIT_DATA_TASK 5 -#define LSM_AUDIT_DATA_KEY 6 -#define LSM_AUDIT_NO_AUDIT 7 + char type; +#define LSM_AUDIT_DATA_FS 1 +#define LSM_AUDIT_DATA_NET 2 +#define LSM_AUDIT_DATA_CAP 3 +#define LSM_AUDIT_DATA_IPC 4 +#define LSM_AUDIT_DATA_TASK 5 +#define LSM_AUDIT_DATA_KEY 6 +#define LSM_AUDIT_NO_AUDIT 7 +#define LSM_AUDIT_DATA_KMOD 8 struct task_struct *tsk; union { struct { @@ -66,6 +67,7 @@ struct common_audit_data { char *key_desc; } key_struct; #endif + char *kmod_name; } u; /* this union contains LSM specific data */ union { diff --git a/include/linux/security.h b/include/linux/security.h index ed0faea60b82..466cbadbd1ef 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -706,6 +706,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @kernel_module_request: * Ability to trigger the kernel to automatically upcall to userspace for * userspace to load a kernel module with the given name. + * @kmod_name name of the module requested by the kernel * Return 0 if successful. * @task_setuid: * Check permission before setting one or more of the user identity @@ -1577,7 +1578,7 @@ struct security_operations { void (*cred_transfer)(struct cred *new, const struct cred *old); int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); - int (*kernel_module_request)(void); + int (*kernel_module_request)(char *kmod_name); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); @@ -1842,7 +1843,7 @@ void security_commit_creds(struct cred *new, const struct cred *old); void security_transfer_creds(struct cred *new, const struct cred *old); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); -int security_kernel_module_request(void); +int security_kernel_module_request(char *kmod_name); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); @@ -2407,7 +2408,7 @@ static inline int security_kernel_create_files_as(struct cred *cred, return 0; } -static inline int security_kernel_module_request(void) +static inline int security_kernel_module_request(char *kmod_name) { return 0; } diff --git a/kernel/kmod.c b/kernel/kmod.c index 9fcb53a11f87..25b103190364 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -80,16 +80,16 @@ int __request_module(bool wait, const char *fmt, ...) #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ static int kmod_loop_msg; - ret = security_kernel_module_request(); - if (ret) - return ret; - va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); va_end(args); if (ret >= MODULE_NAME_LEN) return -ENAMETOOLONG; + ret = security_kernel_module_request(module_name); + if (ret) + return ret; + /* If modprobe needs a service that is in a module, we get a recursive * loop. Limit the number of running kmod threads to max_threads/2 or * MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method diff --git a/security/capability.c b/security/capability.c index 4f3ab476937f..5c700e1a4fd3 100644 --- a/security/capability.c +++ b/security/capability.c @@ -421,7 +421,7 @@ static int cap_kernel_create_files_as(struct cred *new, struct inode *inode) return 0; } -static int cap_kernel_module_request(void) +static int cap_kernel_module_request(char *kmod_name) { return 0; } diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 3bb90b6f1dd3..51bd0fd9c9f0 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -354,6 +354,10 @@ static void dump_common_audit_data(struct audit_buffer *ab, } break; #endif + case LSM_AUDIT_DATA_KMOD: + audit_log_format(ab, " kmod="); + audit_log_untrustedstring(ab, a->u.kmod_name); + break; } /* switch (a->type) */ } diff --git a/security/security.c b/security/security.c index aad71b2ca195..24e060be9fa5 100644 --- a/security/security.c +++ b/security/security.c @@ -764,9 +764,9 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode) return security_ops->kernel_create_files_as(new, inode); } -int security_kernel_module_request(void) +int security_kernel_module_request(char *kmod_name) { - return security_ops->kernel_module_request(); + return security_ops->kernel_module_request(kmod_name); } int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a29d6612a328..c96d63ec4753 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3337,9 +3337,18 @@ static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode) return 0; } -static int selinux_kernel_module_request(void) +static int selinux_kernel_module_request(char *kmod_name) { - return task_has_system(current, SYSTEM__MODULE_REQUEST); + u32 sid; + struct common_audit_data ad; + + sid = task_sid(current); + + COMMON_AUDIT_DATA_INIT(&ad, KMOD); + ad.u.kmod_name = kmod_name; + + return avc_has_perm(sid, SECINITSID_KERNEL, SECCLASS_SYSTEM, + SYSTEM__MODULE_REQUEST, &ad); } static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) -- cgit v1.3-8-gc7d7 From 86b37281411cf1e9bc0a6b5406c45edb7bd9ea5d Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 10 Nov 2009 11:50:21 +0100 Subject: block: Expose discard granularity While SSDs track block usage on a per-sector basis, RAID arrays often have allocation blocks that are bigger. Allow the discard granularity and alignment to be set and teach the topology stacking logic how to handle them. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 46 ++++++++++++++++++++++++++++++++++++---------- block/blk-sysfs.c | 22 ++++++++++++++++++++++ block/genhd.c | 12 ++++++++++++ fs/partitions/check.c | 12 ++++++++++++ include/linux/blkdev.h | 18 ++++++++++++++++++ include/linux/genhd.h | 1 + 6 files changed, 101 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index 66d4aa8799b7..7f986cafacd5 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -96,7 +96,10 @@ void blk_set_default_limits(struct queue_limits *lim) lim->max_segment_size = MAX_SEGMENT_SIZE; lim->max_sectors = BLK_DEF_MAX_SECTORS; lim->max_hw_sectors = INT_MAX; - lim->max_discard_sectors = SAFE_MAX_SECTORS; + lim->max_discard_sectors = 0; + lim->discard_granularity = 0; + lim->discard_alignment = 0; + lim->discard_misaligned = 0; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->alignment_offset = 0; @@ -488,6 +491,16 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) } EXPORT_SYMBOL(blk_queue_stack_limits); +static unsigned int lcm(unsigned int a, unsigned int b) +{ + if (a && b) + return (a * b) / gcd(a, b); + else if (b) + return b; + + return a; +} + /** * blk_stack_limits - adjust queue_limits for stacked devices * @t: the stacking driver limits (top) @@ -502,6 +515,10 @@ EXPORT_SYMBOL(blk_queue_stack_limits); int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset) { + int ret; + + ret = 0; + t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); @@ -531,7 +548,13 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, if (offset && (offset & (b->physical_block_size - 1)) != b->alignment_offset) { t->misaligned = 1; - return -1; + ret = -1; + } + + if (offset && + (offset & (b->discard_granularity - 1)) != b->discard_alignment) { + t->discard_misaligned = 1; + ret = -1; } /* If top has no alignment offset, inherit from bottom */ @@ -539,23 +562,26 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->alignment_offset = b->alignment_offset & (b->physical_block_size - 1); + if (!t->discard_alignment) + t->discard_alignment = + b->discard_alignment & (b->discard_granularity - 1); + /* Top device aligned on logical block boundary? */ if (t->alignment_offset & (t->logical_block_size - 1)) { t->misaligned = 1; - return -1; + ret = -1; } - /* Find lcm() of optimal I/O size */ - if (t->io_opt && b->io_opt) - t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt); - else if (b->io_opt) - t->io_opt = b->io_opt; + /* Find lcm() of optimal I/O size and granularity */ + t->io_opt = lcm(t->io_opt, b->io_opt); + t->discard_granularity = lcm(t->discard_granularity, + b->discard_granularity); /* Verify that optimal I/O size is a multiple of io_min */ if (t->io_min && t->io_opt % t->io_min) - return -1; + ret = -1; - return 0; + return ret; } EXPORT_SYMBOL(blk_stack_limits); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8a6d81afb284..3147145edc15 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -126,6 +126,16 @@ static ssize_t queue_io_opt_show(struct request_queue *q, char *page) return queue_var_show(queue_io_opt(q), page); } +static ssize_t queue_discard_granularity_show(struct request_queue *q, char *page) +{ + return queue_var_show(q->limits.discard_granularity, page); +} + +static ssize_t queue_discard_max_show(struct request_queue *q, char *page) +{ + return queue_var_show(q->limits.max_discard_sectors << 9, page); +} + static ssize_t queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) { @@ -293,6 +303,16 @@ static struct queue_sysfs_entry queue_io_opt_entry = { .show = queue_io_opt_show, }; +static struct queue_sysfs_entry queue_discard_granularity_entry = { + .attr = {.name = "discard_granularity", .mode = S_IRUGO }, + .show = queue_discard_granularity_show, +}; + +static struct queue_sysfs_entry queue_discard_max_entry = { + .attr = {.name = "discard_max_bytes", .mode = S_IRUGO }, + .show = queue_discard_max_show, +}; + static struct queue_sysfs_entry queue_nonrot_entry = { .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, .show = queue_nonrot_show, @@ -328,6 +348,8 @@ static struct attribute *default_attrs[] = { &queue_physical_block_size_entry.attr, &queue_io_min_entry.attr, &queue_io_opt_entry.attr, + &queue_discard_granularity_entry.attr, + &queue_discard_max_entry.attr, &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, diff --git a/block/genhd.c b/block/genhd.c index 517e4332cb37..b11a4ad7d571 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -861,12 +861,23 @@ static ssize_t disk_alignment_offset_show(struct device *dev, return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue)); } +static ssize_t disk_discard_alignment_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + + return sprintf(buf, "%u\n", queue_discard_alignment(disk->queue)); +} + static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); +static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show, + NULL); static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); @@ -887,6 +898,7 @@ static struct attribute *disk_attrs[] = { &dev_attr_ro.attr, &dev_attr_size.attr, &dev_attr_alignment_offset.attr, + &dev_attr_discard_alignment.attr, &dev_attr_capability.attr, &dev_attr_stat.attr, &dev_attr_inflight.attr, diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 7b685e10cbad..64bc8998ac9a 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -226,6 +226,13 @@ ssize_t part_alignment_offset_show(struct device *dev, return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); } +ssize_t part_discard_alignment_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + return sprintf(buf, "%u\n", p->discard_alignment); +} + ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -288,6 +295,8 @@ static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); +static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, + NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -300,6 +309,7 @@ static struct attribute *part_attrs[] = { &dev_attr_start.attr, &dev_attr_size.attr, &dev_attr_alignment_offset.attr, + &dev_attr_discard_alignment.attr, &dev_attr_stat.attr, &dev_attr_inflight.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -403,6 +413,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, p->start_sect = start; p->alignment_offset = queue_sector_alignment_offset(disk->queue, start); + p->discard_alignment = queue_sector_discard_alignment(disk->queue, + start); p->nr_sects = len; p->partno = partno; p->policy = get_disk_ro(disk); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 39c601f783a0..1cc02972fbe2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -312,12 +312,15 @@ struct queue_limits { unsigned int io_min; unsigned int io_opt; unsigned int max_discard_sectors; + unsigned int discard_granularity; + unsigned int discard_alignment; unsigned short logical_block_size; unsigned short max_hw_segments; unsigned short max_phys_segments; unsigned char misaligned; + unsigned char discard_misaligned; unsigned char no_cluster; }; @@ -1121,6 +1124,21 @@ static inline int bdev_alignment_offset(struct block_device *bdev) return q->limits.alignment_offset; } +static inline int queue_discard_alignment(struct request_queue *q) +{ + if (q->limits.discard_misaligned) + return -1; + + return q->limits.discard_alignment; +} + +static inline int queue_sector_discard_alignment(struct request_queue *q, + sector_t sector) +{ + return ((sector << 9) - q->limits.discard_alignment) + & (q->limits.discard_granularity - 1); +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 297df45ffd0a..c6c0c41af35f 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -91,6 +91,7 @@ struct hd_struct { sector_t start_sect; sector_t nr_sects; sector_t alignment_offset; + unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; int policy, partno; -- cgit v1.3-8-gc7d7 From 9d5ce73a64be2be8112147a3e0b551ad9cd1247b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:16 +0900 Subject: x86: intel-iommu: Convert detect_intel_iommu to use iommu_init hook This changes detect_intel_iommu() to set intel_iommu_init() to iommu_init hook if detect_intel_iommu() finds the IOMMU. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-6-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: build fix for the !CONFIG_DMAR case ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 2 -- drivers/pci/dmar.c | 4 ++++ include/linux/dmar.h | 15 ++++----------- 3 files changed, 8 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 5ca44a9301a0..bed05e2e5890 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -294,8 +294,6 @@ static int __init pci_iommu_init(void) x86_init.iommu.iommu_init(); - intel_iommu_init(); - no_iommu_init(); return 0; } diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 22b02c6df854..bce9cd7c755a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -616,6 +616,10 @@ void __init detect_intel_iommu(void) if (ret && !no_iommu && !iommu_detected && !swiotlb && !dmar_disabled) iommu_detected = 1; +#endif +#ifdef CONFIG_X86 + if (ret) + x86_init.iommu.iommu_init = intel_iommu_init; #endif } early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 4a2b162c256a..5de4c9e5856d 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -208,16 +208,9 @@ struct dmar_atsr_unit { u8 include_all:1; /* include all ports */ }; -/* Intel DMAR initialization functions */ extern int intel_iommu_init(void); -#else -static inline int intel_iommu_init(void) -{ -#ifdef CONFIG_INTR_REMAP - return dmar_dev_scope_init(); -#else - return -ENODEV; -#endif -} -#endif /* !CONFIG_DMAR */ +#else /* !CONFIG_DMAR: */ +static inline int intel_iommu_init(void) { return -ENODEV; } +#endif /* CONFIG_DMAR */ + #endif /* __DMAR_H__ */ -- cgit v1.3-8-gc7d7 From 9f993ac3f708b661207ed7de521f245586217a68 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:17 +0900 Subject: bootmem: Add free_bootmem_late() Add a new function for freeing bootmem after the bootmem allocator has been released and the unreserved pages given to the page allocator. This allows us to reserve bootmem and then release it if we later discover it was not needed. ( This new API will be used by the swiotlb code to recover a significant amount of RAM (64MB). ) Signed-off-by: FUJITA Tomonori Acked-by: Pekka Enberg Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com Cc: hannes@cmpxchg.org Cc: tj@kernel.org Cc: akpm@linux-foundation.org Cc: Linus Torvalds LKML-Reference: <1257849980-22640-7-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- include/linux/bootmem.h | 1 + mm/bootmem.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index dd97fb8408a8..b10ec49ee2dd 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat, unsigned long addr, unsigned long size); extern void free_bootmem(unsigned long addr, unsigned long size); +extern void free_bootmem_late(unsigned long addr, unsigned long size); /* * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE, diff --git a/mm/bootmem.c b/mm/bootmem.c index 555d5d2731c6..d1dc23cc7f10 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -143,6 +143,30 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); } +/* + * free_bootmem_late - free bootmem pages directly to page allocator + * @addr: starting address of the range + * @size: size of the range in bytes + * + * This is only useful when the bootmem allocator has already been torn + * down, but we are still initializing the system. Pages are given directly + * to the page allocator, no bootmem metadata is updated because it is gone. + */ +void __init free_bootmem_late(unsigned long addr, unsigned long size) +{ + unsigned long cursor, end; + + kmemleak_free_part(__va(addr), size); + + cursor = PFN_UP(addr); + end = PFN_DOWN(addr + size); + + for (; cursor < end; cursor++) { + __free_pages_bootmem(pfn_to_page(cursor), 0); + totalram_pages++; + } +} + static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) { int aligned; -- cgit v1.3-8-gc7d7 From 5740afdb68abadc473fd5392df733558a58c1254 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:18 +0900 Subject: swiotlb: Add swiotlb_free() function swiotlb_free() function frees all allocated memory for swiotlb. We need to initialize swiotlb before IOMMU initialization (x86 and powerpc needs to allocate memory from bootmem allocator). If IOMMU initialization is successful, we need to free swiotlb resource (don't want to waste 64MB). Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-8-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: build fix for the !CONFIG_SWIOTLB case ] Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 6 ++++++ lib/swiotlb.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 73b1f1cec423..59bafa690290 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -88,4 +88,10 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); +#ifdef CONFIG_SWIOTLB +extern void __init swiotlb_free(void); +#else +static inline void swiotlb_free(void) { } +#endif + #endif /* __LINUX_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index ac25cd28e807..eee512b63f17 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -97,6 +97,8 @@ static phys_addr_t *io_tlb_orig_addr; */ static DEFINE_SPINLOCK(io_tlb_lock); +static int late_alloc; + static int __init setup_io_tlb_npages(char *str) { @@ -262,6 +264,8 @@ swiotlb_late_init_with_default_size(size_t default_size) swiotlb_print_info(bytes); + late_alloc = 1; + return 0; cleanup4: @@ -281,6 +285,32 @@ cleanup1: return -ENOMEM; } +void __init swiotlb_free(void) +{ + if (!io_tlb_overflow_buffer) + return; + + if (late_alloc) { + free_pages((unsigned long)io_tlb_overflow_buffer, + get_order(io_tlb_overflow)); + free_pages((unsigned long)io_tlb_orig_addr, + get_order(io_tlb_nslabs * sizeof(phys_addr_t))); + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); + free_pages((unsigned long)io_tlb_start, + get_order(io_tlb_nslabs << IO_TLB_SHIFT)); + } else { + free_bootmem_late(__pa(io_tlb_overflow_buffer), + io_tlb_overflow); + free_bootmem_late(__pa(io_tlb_orig_addr), + io_tlb_nslabs * sizeof(phys_addr_t)); + free_bootmem_late(__pa(io_tlb_list), + io_tlb_nslabs * sizeof(int)); + free_bootmem_late(__pa(io_tlb_start), + io_tlb_nslabs << IO_TLB_SHIFT); + } +} + static int is_swiotlb_buffer(phys_addr_t paddr) { return paddr >= virt_to_phys(io_tlb_start) && -- cgit v1.3-8-gc7d7 From ad32e8cb86e7894aac51c8963eaa9f36bb8a4e14 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:19 +0900 Subject: swiotlb: Defer swiotlb init printing, export swiotlb_print_info() This enables us to avoid printing swiotlb memory info when we initialize swiotlb. After swiotlb initialization, we could find that we don't need swiotlb. This patch removes the code to print swiotlb memory info in swiotlb_init() and exports the function to do that. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com Cc: tony.luck@intel.com Cc: benh@kernel.crashing.org LKML-Reference: <1257849980-22640-9-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: merge up conflict ] Signed-off-by: Ingo Molnar --- arch/ia64/kernel/pci-swiotlb.c | 4 ++-- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/x86/kernel/pci-swiotlb.c | 3 +-- include/linux/swiotlb.h | 4 ++-- lib/swiotlb.c | 15 ++++++++------- 6 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 285aae8431c6..53292abf846c 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = { void __init swiotlb_dma_init(void) { dma_ops = &swiotlb_dma_ops; - swiotlb_init(); + swiotlb_init(1); } void __init pci_swiotlb_init(void) @@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void) swiotlb = 1; printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); machvec_init("dig"); - swiotlb_init(); + swiotlb_init(1); dma_ops = &swiotlb_dma_ops; #else panic("Unable to find Intel IOMMU"); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 53bcf3d792db..b152de3e64d4 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 04f638d82fb3..df2c9e932b37 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index aaa6b7839f1e..ea20ef7ca523 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -52,8 +52,7 @@ void __init pci_swiotlb_init(void) if (swiotlb_force) swiotlb = 1; if (swiotlb) { - printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); - swiotlb_init(); + swiotlb_init(0); dma_ops = &swiotlb_dma_ops; } } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 59bafa690290..eb9bdb4d4854 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -20,8 +20,7 @@ struct scatterlist; */ #define IO_TLB_SHIFT 11 -extern void -swiotlb_init(void); +extern void swiotlb_init(int verbose); extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -94,4 +93,5 @@ extern void __init swiotlb_free(void); static inline void swiotlb_free(void) { } #endif +extern void swiotlb_print_info(void); #endif /* __LINUX_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index eee512b63f17..0c12d7cce300 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -123,8 +123,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, return phys_to_dma(hwdev, virt_to_phys(address)); } -static void swiotlb_print_info(unsigned long bytes) +void swiotlb_print_info(void) { + unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; phys_addr_t pstart, pend; pstart = virt_to_phys(io_tlb_start); @@ -142,7 +143,7 @@ static void swiotlb_print_info(unsigned long bytes) * structures for the software IO TLB used to implement the DMA API. */ void __init -swiotlb_init_with_default_size(size_t default_size) +swiotlb_init_with_default_size(size_t default_size, int verbose) { unsigned long i, bytes; @@ -178,14 +179,14 @@ swiotlb_init_with_default_size(size_t default_size) io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); if (!io_tlb_overflow_buffer) panic("Cannot allocate SWIOTLB overflow buffer!\n"); - - swiotlb_print_info(bytes); + if (verbose) + swiotlb_print_info(); } void __init -swiotlb_init(void) +swiotlb_init(int verbose) { - swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ + swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ } /* @@ -262,7 +263,7 @@ swiotlb_late_init_with_default_size(size_t default_size) if (!io_tlb_overflow_buffer) goto cleanup4; - swiotlb_print_info(bytes); + swiotlb_print_info(); late_alloc = 1; -- cgit v1.3-8-gc7d7 From cfd5324e699a2e74a44642d43dcf03d581f2a7db Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 4 Nov 2009 09:58:17 +0200 Subject: MFD: TWL4030: Add audio_mclk to the codec platform data Add audio_mclk to the platform data struct for the twl4030-codec MFD driver. Signed-off-by: Peter Ujfalusi Acked-by: Samuel Ortiz Signed-off-by: Mark Brown --- include/linux/i2c/twl4030.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index 42d6c722bd82..c188961efbce 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -414,6 +414,7 @@ struct twl4030_codec_vibra_data { }; struct twl4030_codec_data { + unsigned int audio_mclk; struct twl4030_codec_audio_data *audio; struct twl4030_codec_vibra_data *vibra; }; -- cgit v1.3-8-gc7d7 From f9b4639e045c750e2bad37462476403995508350 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 4 Nov 2009 09:58:19 +0200 Subject: MFD: twl4030-codec: APLL_INFREQ handling in the MFD driver Configure the APLL_INFREQ field in the APLL_CTL register based on the platform data. Provide also a function for childs to query the audio_mclk frequency. Signed-off-by: Peter Ujfalusi Acked-by: Samuel Ortiz Signed-off-by: Mark Brown --- drivers/mfd/twl4030-codec.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/mfd/twl4030-codec.h | 1 + 2 files changed, 36 insertions(+) (limited to 'include') diff --git a/drivers/mfd/twl4030-codec.c b/drivers/mfd/twl4030-codec.c index 97103078dc2a..77b914907d7c 100644 --- a/drivers/mfd/twl4030-codec.c +++ b/drivers/mfd/twl4030-codec.c @@ -41,6 +41,7 @@ struct twl4030_codec_resource { }; struct twl4030_codec { + unsigned int audio_mclk; struct mutex mutex; struct twl4030_codec_resource resource[TWL4030_CODEC_RES_MAX]; struct mfd_cell cells[TWL4030_CODEC_CELLS]; @@ -145,12 +146,45 @@ int twl4030_codec_disable_resource(unsigned id) } EXPORT_SYMBOL_GPL(twl4030_codec_disable_resource); +unsigned int twl4030_codec_get_mclk(void) +{ + struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev); + + return codec->audio_mclk; +} +EXPORT_SYMBOL_GPL(twl4030_codec_get_mclk); + static int __devinit twl4030_codec_probe(struct platform_device *pdev) { struct twl4030_codec *codec; struct twl4030_codec_data *pdata = pdev->dev.platform_data; struct mfd_cell *cell = NULL; int ret, childs = 0; + u8 val; + + if (!pdata) { + dev_err(&pdev->dev, "Platform data is missing\n"); + return -EINVAL; + } + + /* Configure APLL_INFREQ and disable APLL if enabled */ + val = 0; + switch (pdata->audio_mclk) { + case 19200000: + val |= TWL4030_APLL_INFREQ_19200KHZ; + break; + case 26000000: + val |= TWL4030_APLL_INFREQ_26000KHZ; + break; + case 38400000: + val |= TWL4030_APLL_INFREQ_38400KHZ; + break; + default: + dev_err(&pdev->dev, "Invalid audio_mclk\n"); + return -EINVAL; + } + twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE, + val, TWL4030_REG_APLL_CTL); codec = kzalloc(sizeof(struct twl4030_codec), GFP_KERNEL); if (!codec) @@ -160,6 +194,7 @@ static int __devinit twl4030_codec_probe(struct platform_device *pdev) twl4030_codec_dev = pdev; mutex_init(&codec->mutex); + codec->audio_mclk = pdata->audio_mclk; /* Codec power */ codec->resource[TWL4030_CODEC_RES_POWER].reg = TWL4030_REG_CODEC_MODE; diff --git a/include/linux/mfd/twl4030-codec.h b/include/linux/mfd/twl4030-codec.h index ef0a3041d759..2ec317c68e59 100644 --- a/include/linux/mfd/twl4030-codec.h +++ b/include/linux/mfd/twl4030-codec.h @@ -267,5 +267,6 @@ enum twl4030_codec_res { int twl4030_codec_disable_resource(enum twl4030_codec_res id); int twl4030_codec_enable_resource(enum twl4030_codec_res id); +unsigned int twl4030_codec_get_mclk(void); #endif /* End of __TWL4030_CODEC_H__ */ -- cgit v1.3-8-gc7d7 From e7373b702f6eab35f315e016a4159860a7a4d686 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 10 Nov 2009 10:13:30 +0100 Subject: sound: pcm: record a substream's owner process Record the pid of the task that opened a PCM substream. For sound cards with hardware mixing, this allows determining which process is associated with a specific substream's volume control. Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 3 +++ sound/core/pcm.c | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index de6d981de5d6..c83a4a79f16b 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -348,6 +348,8 @@ struct snd_pcm_group { /* keep linked substreams */ int count; }; +struct pid; + struct snd_pcm_substream { struct snd_pcm *pcm; struct snd_pcm_str *pstr; @@ -379,6 +381,7 @@ struct snd_pcm_substream { atomic_t mmap_count; unsigned int f_flags; void (*pcm_release)(struct snd_pcm_substream *); + struct pid *pid; #if defined(CONFIG_SND_PCM_OSS) || defined(CONFIG_SND_PCM_OSS_MODULE) /* -- OSS things -- */ struct snd_pcm_oss_substream oss; diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 8e2c7833614c..6884ae031f6f 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -435,6 +435,7 @@ static void snd_pcm_substream_proc_status_read(struct snd_info_entry *entry, return; } snd_iprintf(buffer, "state: %s\n", snd_pcm_state_name(status.state)); + snd_iprintf(buffer, "owner_pid : %d\n", pid_vnr(substream->pid)); snd_iprintf(buffer, "trigger_time: %ld.%09ld\n", status.trigger_tstamp.tv_sec, status.trigger_tstamp.tv_nsec); snd_iprintf(buffer, "tstamp : %ld.%09ld\n", @@ -900,6 +901,7 @@ int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream, substream->private_data = pcm->private_data; substream->ref_count = 1; substream->f_flags = file->f_flags; + substream->pid = get_pid(task_pid(current)); pstr->substream_opened++; *rsubstream = substream; return 0; @@ -921,6 +923,8 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream) kfree(runtime->hw_constraints.rules); kfree(runtime); substream->runtime = NULL; + put_pid(substream->pid); + substream->pid = NULL; substream->pstr->substream_opened--; } -- cgit v1.3-8-gc7d7 From 7584af10cf46e0f4aa1696f1be79fa0f19a945ba Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 10 Nov 2009 10:14:04 +0100 Subject: sound: rawmidi: record a substream's owner process Record the pid of the task that opened a RawMIDI substream. Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai --- include/sound/rawmidi.h | 2 ++ sound/core/rawmidi.c | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index c23c26585700..2480e7d10dcf 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -46,6 +46,7 @@ struct snd_rawmidi; struct snd_rawmidi_substream; struct snd_seq_port_info; +struct pid; struct snd_rawmidi_ops { int (*open) (struct snd_rawmidi_substream * substream); @@ -97,6 +98,7 @@ struct snd_rawmidi_substream { struct snd_rawmidi_str *pstr; char name[32]; struct snd_rawmidi_runtime *runtime; + struct pid *pid; /* hardware layer */ struct snd_rawmidi_ops *ops; }; diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 818b1299ed91..2f766123b158 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -278,6 +278,7 @@ static int open_substream(struct snd_rawmidi *rmidi, substream->active_sensing = 0; if (mode & SNDRV_RAWMIDI_LFLG_APPEND) substream->append = 1; + substream->pid = get_pid(task_pid(current)); rmidi->streams[substream->stream].substream_opened++; } substream->use_count++; @@ -488,6 +489,8 @@ static void close_substream(struct snd_rawmidi *rmidi, snd_rawmidi_runtime_free(substream); substream->opened = 0; substream->append = 0; + put_pid(substream->pid); + substream->pid = NULL; rmidi->streams[substream->stream].substream_opened--; } @@ -1336,6 +1339,9 @@ static void snd_rawmidi_proc_info_read(struct snd_info_entry *entry, substream->number, (unsigned long) substream->bytes); if (substream->opened) { + snd_iprintf(buffer, + " Owner PID : %d\n", + pid_vnr(substream->pid)); runtime = substream->runtime; snd_iprintf(buffer, " Mode : %s\n" @@ -1357,6 +1363,9 @@ static void snd_rawmidi_proc_info_read(struct snd_info_entry *entry, substream->number, (unsigned long) substream->bytes); if (substream->opened) { + snd_iprintf(buffer, + " Owner PID : %d\n", + pid_vnr(substream->pid)); runtime = substream->runtime; snd_iprintf(buffer, " Buffer size : %lu\n" -- cgit v1.3-8-gc7d7 From 6b0d07ba152893b40f1014a9db8da5aa564aa00e Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Mon, 9 Nov 2009 02:17:01 +0000 Subject: Phonet: put sockets in a hash table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 1 + net/phonet/af_phonet.c | 1 + net/phonet/socket.c | 79 +++++++++++++++++++++++++++++++-------------- 3 files changed, 56 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index fdb05fa0346f..7b114079a51b 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -46,6 +46,7 @@ static inline struct pn_sock *pn_sk(struct sock *sk) extern const struct proto_ops phonet_dgram_ops; +void pn_sock_init(void); struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *sa); void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb); void phonet_get_local_port_range(int *min, int *max); diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 3bd1be6b26f0..8d3a55b4a30c 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -481,6 +481,7 @@ static int __init phonet_init(void) if (err) return err; + pn_sock_init(); err = sock_register(&phonet_proto_family); if (err) { printk(KERN_ALERT diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 0412beb59a05..4112b6e1c48a 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -45,13 +45,28 @@ static int pn_socket_release(struct socket *sock) return 0; } +#define PN_HASHSIZE 16 +#define PN_HASHMASK (PN_HASHSIZE-1) + + static struct { - struct hlist_head hlist; + struct hlist_head hlist[PN_HASHSIZE]; spinlock_t lock; -} pnsocks = { - .hlist = HLIST_HEAD_INIT, - .lock = __SPIN_LOCK_UNLOCKED(pnsocks.lock), -}; +} pnsocks; + +void __init pn_sock_init(void) +{ + unsigned i; + + for (i = 0; i < PN_HASHSIZE; i++) + INIT_HLIST_HEAD(pnsocks.hlist + i); + spin_lock_init(&pnsocks.lock); +} + +static struct hlist_head *pn_hash_list(u16 obj) +{ + return pnsocks.hlist + (obj & PN_HASHMASK); +} /* * Find address based on socket address, match only certain fields. @@ -64,10 +79,11 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) struct sock *rval = NULL; u16 obj = pn_sockaddr_get_object(spn); u8 res = spn->spn_resource; + struct hlist_head *hlist = pn_hash_list(obj); spin_lock_bh(&pnsocks.lock); - sk_for_each(sknode, node, &pnsocks.hlist) { + sk_for_each(sknode, node, hlist) { struct pn_sock *pn = pn_sk(sknode); BUG_ON(!pn->sobject); /* unbound socket */ @@ -99,31 +115,39 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) /* Deliver a broadcast packet (only in bottom-half) */ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb) { - struct hlist_node *node; - struct sock *sknode; + struct hlist_head *hlist = pnsocks.hlist; + unsigned h; spin_lock(&pnsocks.lock); - sk_for_each(sknode, node, &pnsocks.hlist) { - struct sk_buff *clone; + for (h = 0; h < PN_HASHSIZE; h++) { + struct hlist_node *node; + struct sock *sknode; - if (!net_eq(sock_net(sknode), net)) - continue; - if (!sock_flag(sknode, SOCK_BROADCAST)) - continue; + sk_for_each(sknode, node, hlist) { + struct sk_buff *clone; - clone = skb_clone(skb, GFP_ATOMIC); - if (clone) { - sock_hold(sknode); - sk_receive_skb(sknode, clone, 0); + if (!net_eq(sock_net(sknode), net)) + continue; + if (!sock_flag(sknode, SOCK_BROADCAST)) + continue; + + clone = skb_clone(skb, GFP_ATOMIC); + if (clone) { + sock_hold(sknode); + sk_receive_skb(sknode, clone, 0); + } } + hlist++; } spin_unlock(&pnsocks.lock); } void pn_sock_hash(struct sock *sk) { + struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject); + spin_lock_bh(&pnsocks.lock); - sk_add_node(sk, &pnsocks.hlist); + sk_add_node(sk, hlist); spin_unlock_bh(&pnsocks.lock); } EXPORT_SYMBOL(pn_sock_hash); @@ -439,15 +463,20 @@ EXPORT_SYMBOL(pn_sock_get_port); static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos) { struct net *net = seq_file_net(seq); + struct hlist_head *hlist = pnsocks.hlist; struct hlist_node *node; struct sock *sknode; + unsigned h; - sk_for_each(sknode, node, &pnsocks.hlist) { - if (!net_eq(net, sock_net(sknode))) - continue; - if (!pos) - return sknode; - pos--; + for (h = 0; h < PN_HASHSIZE; h++) { + sk_for_each(sknode, node, hlist) { + if (!net_eq(net, sock_net(sknode))) + continue; + if (!pos) + return sknode; + pos--; + } + hlist++; } return NULL; } -- cgit v1.3-8-gc7d7 From 30fff9231fad757c061285e347b33c5149c2c2e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Nov 2009 05:26:33 +0000 Subject: udp: bind() optimisation UDP bind() can be O(N^2) in some pathological cases. Thanks to secondary hash tables, we can make it O(N) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/udp.h | 6 +++++ include/net/udp.h | 3 ++- net/ipv4/udp.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++------ net/ipv6/udp.c | 14 +++++----- 4 files changed, 80 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 59f0ddf2d284..03f72a2ba028 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -88,6 +88,12 @@ static inline struct udp_sock *udp_sk(const struct sock *sk) return (struct udp_sock *)sk; } +#define udp_portaddr_for_each_entry(__sk, node, list) \ + hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) + +#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \ + hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node) + #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) #endif diff --git a/include/net/udp.h b/include/net/udp.h index af41850f742a..5348d80b25bb 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -158,7 +158,8 @@ static inline void udp_lib_close(struct sock *sk, long timeout) } extern int udp_lib_get_port(struct sock *sk, unsigned short snum, - int (*)(const struct sock*,const struct sock*)); + int (*)(const struct sock *,const struct sock *), + unsigned int hash2_nulladdr); /* net/ipv4/udp.c */ extern int udp_get_port(struct sock *sk, unsigned short snum, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d73e9170536b..1eaf57567ebf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -152,16 +152,49 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, return 0; } +/* + * Note: we still hold spinlock of primary hash chain, so no other writer + * can insert/delete a socket with local_port == num + */ +static int udp_lib_lport_inuse2(struct net *net, __u16 num, + struct udp_hslot *hslot2, + struct sock *sk, + int (*saddr_comp)(const struct sock *sk1, + const struct sock *sk2)) +{ + struct sock *sk2; + struct hlist_nulls_node *node; + int res = 0; + + spin_lock(&hslot2->lock); + udp_portaddr_for_each_entry(sk2, node, &hslot2->head) + if (net_eq(sock_net(sk2), net) && + sk2 != sk && + (udp_sk(sk2)->udp_port_hash == num) && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (*saddr_comp)(sk, sk2)) { + res = 1; + break; + } + spin_unlock(&hslot2->lock); + return res; +} + /** * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 * * @sk: socket struct in question * @snum: port number to look up * @saddr_comp: AF-dependent comparison of bound local IP addresses + * @hash2_nulladdr: AF-dependant hash value in secondary hash chains, + * with NULL address */ int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2)) + const struct sock *sk2), + unsigned int hash2_nulladdr) { struct udp_hslot *hslot, *hslot2; struct udp_table *udptable = sk->sk_prot->h.udp_table; @@ -210,6 +243,30 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, } else { hslot = udp_hashslot(udptable, net, snum); spin_lock_bh(&hslot->lock); + if (hslot->count > 10) { + int exist; + unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; + + slot2 &= udptable->mask; + hash2_nulladdr &= udptable->mask; + + hslot2 = udp_hashslot2(udptable, slot2); + if (hslot->count < hslot2->count) + goto scan_primary_hash; + + exist = udp_lib_lport_inuse2(net, snum, hslot2, + sk, saddr_comp); + if (!exist && (hash2_nulladdr != slot2)) { + hslot2 = udp_hashslot2(udptable, hash2_nulladdr); + exist = udp_lib_lport_inuse2(net, snum, hslot2, + sk, saddr_comp); + } + if (exist) + goto fail_unlock; + else + goto found; + } +scan_primary_hash: if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp, 0)) goto fail_unlock; @@ -255,12 +312,14 @@ static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, int udp_v4_get_port(struct sock *sk, unsigned short snum) { + unsigned int hash2_nulladdr = + udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum); + unsigned int hash2_partial = + udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); + /* precompute partial secondary hash */ - udp_sk(sk)->udp_portaddr_hash = - udp4_portaddr_hash(sock_net(sk), - inet_sk(sk)->inet_rcv_saddr, - 0); - return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); + udp_sk(sk)->udp_portaddr_hash = hash2_partial; + return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); } static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, @@ -336,8 +395,6 @@ static inline int compute_score2(struct sock *sk, struct net *net, return score; } -#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \ - hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node) /* called with read_rcu_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2915e1dad726..f4c85b200051 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -100,12 +100,14 @@ static unsigned int udp6_portaddr_hash(struct net *net, int udp_v6_get_port(struct sock *sk, unsigned short snum) { + unsigned int hash2_nulladdr = + udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); + unsigned int hash2_partial = + udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); + /* precompute partial secondary hash */ - udp_sk(sk)->udp_portaddr_hash = - udp6_portaddr_hash(sock_net(sk), - &inet6_sk(sk)->rcv_saddr, - 0); - return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); + udp_sk(sk)->udp_portaddr_hash = hash2_partial; + return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); } static inline int compute_score(struct sock *sk, struct net *net, @@ -181,8 +183,6 @@ static inline int compute_score2(struct sock *sk, struct net *net, return score; } -#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \ - hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node) /* called with read_rcu_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, -- cgit v1.3-8-gc7d7 From 37e8273cd30592d3a82bcb70cbb1bdc4eaeb6b71 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 4 Nov 2009 15:29:52 +0000 Subject: usbnet: Set link down initially for drivers that update link state Some usbnet drivers update link state while others do not due to hardware limitations. Add a flag to distinguish those that do, and set the link down initially for their devices. This is intended to fix this bug: http://bugs.debian.org/444043 Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/usb/asix.c | 12 ++++++------ drivers/net/usb/cdc_ether.c | 2 +- drivers/net/usb/dm9601.c | 2 +- drivers/net/usb/usbnet.c | 4 +++- include/linux/usb/usbnet.h | 1 + 5 files changed, 12 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c index 6ce7f775bb74..1bef39a60a62 100644 --- a/drivers/net/usb/asix.c +++ b/drivers/net/usb/asix.c @@ -1327,7 +1327,7 @@ static const struct driver_info ax8817x_info = { .status = asix_status, .link_reset = ax88172_link_reset, .reset = ax88172_link_reset, - .flags = FLAG_ETHER, + .flags = FLAG_ETHER | FLAG_LINK_INTR, .data = 0x00130103, }; @@ -1337,7 +1337,7 @@ static const struct driver_info dlink_dub_e100_info = { .status = asix_status, .link_reset = ax88172_link_reset, .reset = ax88172_link_reset, - .flags = FLAG_ETHER, + .flags = FLAG_ETHER | FLAG_LINK_INTR, .data = 0x009f9d9f, }; @@ -1347,7 +1347,7 @@ static const struct driver_info netgear_fa120_info = { .status = asix_status, .link_reset = ax88172_link_reset, .reset = ax88172_link_reset, - .flags = FLAG_ETHER, + .flags = FLAG_ETHER | FLAG_LINK_INTR, .data = 0x00130103, }; @@ -1357,7 +1357,7 @@ static const struct driver_info hawking_uf200_info = { .status = asix_status, .link_reset = ax88172_link_reset, .reset = ax88172_link_reset, - .flags = FLAG_ETHER, + .flags = FLAG_ETHER | FLAG_LINK_INTR, .data = 0x001f1d1f, }; @@ -1367,7 +1367,7 @@ static const struct driver_info ax88772_info = { .status = asix_status, .link_reset = ax88772_link_reset, .reset = ax88772_link_reset, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR, .rx_fixup = asix_rx_fixup, .tx_fixup = asix_tx_fixup, }; @@ -1378,7 +1378,7 @@ static const struct driver_info ax88178_info = { .status = asix_status, .link_reset = ax88178_link_reset, .reset = ax88178_link_reset, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR, .rx_fixup = asix_rx_fixup, .tx_fixup = asix_tx_fixup, }; diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 71d7ff3de99f..7ec24c9b2535 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -413,7 +413,7 @@ static int cdc_bind(struct usbnet *dev, struct usb_interface *intf) static const struct driver_info cdc_info = { .description = "CDC Ethernet Device", - .flags = FLAG_ETHER, + .flags = FLAG_ETHER | FLAG_LINK_INTR, // .check_connect = cdc_check_connect, .bind = cdc_bind, .unbind = usbnet_cdc_unbind, diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index a2b30a10064f..3d406f9b2f29 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -611,7 +611,7 @@ static int dm9601_link_reset(struct usbnet *dev) static const struct driver_info dm9601_info = { .description = "Davicom DM9601 USB Ethernet", - .flags = FLAG_ETHER, + .flags = FLAG_ETHER | FLAG_LINK_INTR, .bind = dm9601_bind, .rx_fixup = dm9601_rx_fixup, .tx_fixup = dm9601_tx_fixup, diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 378da8c938fe..04f3f289e87c 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1352,9 +1352,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) // ok, it's ready to go. usb_set_intfdata (udev, dev); - // start as if the link is up netif_device_attach (net); + if (dev->driver_info->flags & FLAG_LINK_INTR) + netif_carrier_off(net); + return 0; out3: diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 86c31b753266..8c84881f8478 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -92,6 +92,7 @@ struct driver_info { #define FLAG_SEND_ZLP 0x0200 /* hw requires ZLPs are sent */ #define FLAG_WWAN 0x0400 /* use "wwan%d" names */ +#define FLAG_LINK_INTR 0x0800 /* updates link (carrier) status */ /* init device ... can sleep, or cause probe() failure */ int (*bind)(struct usbnet *, struct usb_interface *); -- cgit v1.3-8-gc7d7 From 254245d23396aca1f9100d500163d7bd6019ab6f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 10 Nov 2009 07:54:47 +0000 Subject: netdev: add netdev_continue_rcu This adds an RCU macro for continuing search, useful for some network devices like vlan. Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ include/linux/rculist.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 465add6c43e3..083b5989cecb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1079,6 +1079,8 @@ extern rwlock_t dev_base_lock; /* Device list lock */ list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue(net, d) \ list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_continue_rcu(net, d) \ + list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) static inline struct net_device *next_net_device(struct net_device *dev) diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 5710f43bbc9e..1bf0f708c4fc 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -261,6 +261,20 @@ static inline void list_splice_init_rcu(struct list_head *list, prefetch((pos)->next), (pos) != (head); \ (pos) = rcu_dereference((pos)->next)) +/** + * list_for_each_entry_continue_rcu - continue iteration over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Continue to iterate over list of given type, continuing after + * the current position. + */ +#define list_for_each_entry_continue_rcu(pos, head, member) \ + for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \ + prefetch(pos->member.next), &pos->member != (head); \ + pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) + /** * hlist_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. -- cgit v1.3-8-gc7d7 From 2315ffa0a9f789c588c7139effa7404a387d8685 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Apr 2009 03:18:02 -0700 Subject: sysctl: Don't look at ctl_name and strategy in the generic code The ctl_name and strategy fields are unused, now that sys_sysctl is a compatibility wrapper around /proc/sys. No longer looking at them in the generic code is effectively what we are doing now and provides the guarantee that during further cleanups we can just remove references to those fields and everything will work ok. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 4 ++-- include/linux/sysctl.h | 17 ++--------------- kernel/sysctl.c | 29 +++++++---------------------- 3 files changed, 11 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f667e8aeabdf..6ff9981f0a18 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -48,7 +48,7 @@ out: static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) { int len; - for ( ; p->ctl_name || p->procname; p++) { + for ( ; p->procname; p++) { if (!p->procname) continue; @@ -218,7 +218,7 @@ static int scan(struct ctl_table_header *head, ctl_table *table, void *dirent, filldir_t filldir) { - for (; table->ctl_name || table->procname; table++, (*pos)++) { + for (; table->procname; table++, (*pos)++) { int res; /* Can't do anything without a proc name */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 82c32b89932d..7c4aabc04673 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1005,8 +1005,8 @@ extern ctl_handler sysctl_ms_jiffies; /* * Register a set of sysctl names by calling register_sysctl_table - * with an initialised array of struct ctl_table's. An entry with zero - * ctl_name and NULL procname terminates the table. table->de will be + * with an initialised array of struct ctl_table's. An entry with + * NULL procname terminates the table. table->de will be * set up by the registration and need not be initialised in advance. * * sysctl names can be mirrored automatically under /proc/sys. The @@ -1019,24 +1019,11 @@ extern ctl_handler sysctl_ms_jiffies; * under /proc; non-leaf nodes will be represented by directories. A * null procname disables /proc mirroring at this node. * - * sysctl entries with a zero ctl_name will not be available through - * the binary sysctl interface. - * * sysctl(2) can automatically manage read and write requests through * the sysctl table. The data and maxlen fields of the ctl_table * struct enable minimal validation of the values being written to be * performed, and the mode field allows minimal authentication. * - * More sophisticated management can be enabled by the provision of a - * strategy routine with the table entry. This will be called before - * any automatic read or write of the data is performed. - * - * The strategy routine may return: - * <0: Error occurred (error is passed to user process) - * 0: OK - proceed with automatic read or write. - * >0: OK - read or write has been done by the strategy routine, so - * return immediately. - * * There must be a proc_handler routine for any terminal nodes * mirrored under /proc/sys (non-terminals are handled by a built-in * directory handler). Several default handlers are available to diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f6dacc383c16..b6b6eb1abebb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1618,7 +1618,7 @@ int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) { - for (; table->ctl_name || table->procname; table++) { + for (; table->procname; table++) { table->parent = parent; if (table->child) sysctl_set_parent(table, table->child); @@ -1650,11 +1650,11 @@ static struct ctl_table *is_branch_in(struct ctl_table *branch, return NULL; /* ... and nothing else */ - if (branch[1].procname || branch[1].ctl_name) + if (branch[1].procname) return NULL; /* table should contain subdirectory with the same name */ - for (p = table; p->procname || p->ctl_name; p++) { + for (p = table; p->procname; p++) { if (!p->child) continue; if (p->procname && strcmp(p->procname, s) == 0) @@ -1699,8 +1699,7 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) * * The members of the &struct ctl_table structure are used as follows: * - * ctl_name - This is the numeric sysctl value used by sysctl(2). The number - * must be unique within that level of sysctl + * ctl_name - Dead * * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not * enter a sysctl file @@ -1716,7 +1715,7 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) * * proc_handler - the text handler routine (described below) * - * strategy - the strategy routine (described below) + * strategy - Dead * * de - for internal use by the sysctl routines * @@ -1730,19 +1729,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) * struct enable minimal validation of the values being written to be * performed, and the mode field allows minimal authentication. * - * More sophisticated management can be enabled by the provision of a - * strategy routine with the table entry. This will be called before - * any automatic read or write of the data is performed. - * - * The strategy routine may return - * - * < 0 - Error occurred (error is passed to user process) - * - * 0 - OK - proceed with automatic read or write. - * - * > 0 - OK - read or write has been done by the strategy routine, so - * return immediately. - * * There must be a proc_handler routine for any terminal nodes * mirrored under /proc/sys (non-terminals are handled by a built-in * directory handler). Several default handlers are available to @@ -1769,13 +1755,13 @@ struct ctl_table_header *__register_sysctl_paths( struct ctl_table_set *set; /* Count the path components */ - for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath) + for (npath = 0; path[npath].procname; ++npath) ; /* * For each path component, allocate a 2-element ctl_table array. * The first array element will be filled with the sysctl entry - * for this, the second will be the sentinel (ctl_name == 0). + * for this, the second will be the sentinel (procname == 0). * * We allocate everything in one go so that we don't have to * worry about freeing additional memory in unregister_sysctl_table. @@ -1792,7 +1778,6 @@ struct ctl_table_header *__register_sysctl_paths( for (n = 0; n < npath; ++n, ++path) { /* Copy the procname */ new->procname = path->procname; - new->ctl_name = path->ctl_name; new->mode = 0555; *prevp = new; -- cgit v1.3-8-gc7d7 From 3491707a070c1183c709516b2f876f798c7a9a84 Mon Sep 17 00:00:00 2001 From: Rui Paulo Date: Mon, 9 Nov 2009 23:46:39 +0000 Subject: mac80211: update meshconf IE This updates the Mesh Configuration IE according to the latest draft (3.03). Notable changes include the simplified protocol IDs. Signed-off-by: Rui Paulo Signed-off-by: Javier Cardona Reviewed-by: Andrey Yurovsky Tested-by: Brian Cavagnolo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 +- net/mac80211/ieee80211_i.h | 10 +++---- net/mac80211/mesh.c | 69 +++++++++++++++++++--------------------------- 3 files changed, 35 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 0aa831467493..50c684db33c7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -115,7 +115,7 @@ #define IEEE80211_MAX_SSID_LEN 32 #define IEEE80211_MAX_MESH_ID_LEN 32 -#define IEEE80211_MESH_CONFIG_LEN 24 +#define IEEE80211_MESH_CONFIG_LEN 7 #define IEEE80211_QOS_CTL_LEN 2 #define IEEE80211_QOS_CTL_TID_MASK 0x000F diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1ef767366b77..1f4f88a8f80c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -361,15 +361,15 @@ struct ieee80211_if_mesh { u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; size_t mesh_id_len; /* Active Path Selection Protocol Identifier */ - u8 mesh_pp_id[4]; + u8 mesh_pp_id; /* Active Path Selection Metric Identifier */ - u8 mesh_pm_id[4]; + u8 mesh_pm_id; /* Congestion Control Mode Identifier */ - u8 mesh_cc_id[4]; + u8 mesh_cc_id; /* Synchronization Protocol Identifier */ - u8 mesh_sp_id[4]; + u8 mesh_sp_id; /* Authentication Protocol Identifier */ - u8 mesh_auth_id[4]; + u8 mesh_auth_id; /* Local mesh Destination Sequence Number */ u32 dsn; /* Last used PREQ ID */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 9a733890eb47..a49a3374acb1 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -15,14 +15,14 @@ #define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) -#define PP_OFFSET 1 /* Path Selection Protocol */ -#define PM_OFFSET 5 /* Path Selection Metric */ -#define CC_OFFSET 9 /* Congestion Control Mode */ -#define SP_OFFSET 13 /* Synchronization Protocol */ -#define AUTH_OFFSET 17 /* Authentication Protocol */ -#define CAPAB_OFFSET 22 -#define CAPAB_ACCEPT_PLINKS 0x80 -#define CAPAB_FORWARDING 0x10 +#define MESHCONF_PP_OFFSET 0 /* Path Selection Protocol */ +#define MESHCONF_PM_OFFSET 1 /* Path Selection Metric */ +#define MESHCONF_CC_OFFSET 2 /* Congestion Control Mode */ +#define MESHCONF_SP_OFFSET 3 /* Synchronization Protocol */ +#define MESHCONF_AUTH_OFFSET 4 /* Authentication Protocol */ +#define MESHCONF_CAPAB_OFFSET 6 +#define MESHCONF_CAPAB_ACCEPT_PLINKS 0x01 +#define MESHCONF_CAPAB_FORWARDING 0x08 #define TMR_RUNNING_HK 0 #define TMR_RUNNING_MP 1 @@ -85,11 +85,12 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat */ if (ifmsh->mesh_id_len == ie->mesh_id_len && memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && - memcmp(ifmsh->mesh_pp_id, ie->mesh_config + PP_OFFSET, 4) == 0 && - memcmp(ifmsh->mesh_pm_id, ie->mesh_config + PM_OFFSET, 4) == 0 && - memcmp(ifmsh->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0 && - memcmp(ifmsh->mesh_sp_id, ie->mesh_config + SP_OFFSET, 4) == 0 && - memcmp(ifmsh->mesh_auth_id, ie->mesh_config + AUTH_OFFSET, 4) == 0) + (ifmsh->mesh_pp_id == *(ie->mesh_config + MESHCONF_PP_OFFSET))&& + (ifmsh->mesh_pm_id == *(ie->mesh_config + MESHCONF_PM_OFFSET))&& + (ifmsh->mesh_cc_id == *(ie->mesh_config + MESHCONF_CC_OFFSET))&& + (ifmsh->mesh_sp_id == *(ie->mesh_config + MESHCONF_SP_OFFSET))&& + (ifmsh->mesh_auth_id == *(ie->mesh_config + + MESHCONF_AUTH_OFFSET))) return true; return false; @@ -102,7 +103,8 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat */ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) { - return (*(ie->mesh_config + CAPAB_OFFSET) & CAPAB_ACCEPT_PLINKS) != 0; + return (*(ie->mesh_config + MESHCONF_CAPAB_OFFSET) & + MESHCONF_CAPAB_ACCEPT_PLINKS) != 0; } /** @@ -128,18 +130,11 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) void mesh_ids_set_default(struct ieee80211_if_mesh *sta) { - u8 oui[3] = {0x00, 0x0F, 0xAC}; - - memcpy(sta->mesh_pp_id, oui, sizeof(oui)); - memcpy(sta->mesh_pm_id, oui, sizeof(oui)); - memcpy(sta->mesh_cc_id, oui, sizeof(oui)); - memcpy(sta->mesh_sp_id, oui, sizeof(oui)); - memcpy(sta->mesh_auth_id, oui, sizeof(oui)); - sta->mesh_pp_id[sizeof(oui)] = 0; - sta->mesh_pm_id[sizeof(oui)] = 0; - sta->mesh_cc_id[sizeof(oui)] = 0xff; - sta->mesh_sp_id[sizeof(oui)] = 0xff; - sta->mesh_auth_id[sizeof(oui)] = 0x0; + sta->mesh_pp_id = 0; /* HWMP */ + sta->mesh_pm_id = 0; /* Airtime */ + sta->mesh_cc_id = 0; /* Disabled */ + sta->mesh_sp_id = 0; /* Neighbor Offset */ + sta->mesh_auth_id = 0; /* Disabled */ } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) @@ -260,28 +255,21 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) pos = skb_put(skb, 2 + IEEE80211_MESH_CONFIG_LEN); *pos++ = WLAN_EID_MESH_CONFIG; *pos++ = IEEE80211_MESH_CONFIG_LEN; - /* Version */ - *pos++ = 1; /* Active path selection protocol ID */ - memcpy(pos, sdata->u.mesh.mesh_pp_id, 4); - pos += 4; + *pos++ = sdata->u.mesh.mesh_pp_id; /* Active path selection metric ID */ - memcpy(pos, sdata->u.mesh.mesh_pm_id, 4); - pos += 4; + *pos++ = sdata->u.mesh.mesh_pm_id; /* Congestion control mode identifier */ - memcpy(pos, sdata->u.mesh.mesh_cc_id, 4); - pos += 4; + *pos++ = sdata->u.mesh.mesh_cc_id; /* Synchronization protocol identifier */ - memcpy(pos, sdata->u.mesh.mesh_sp_id, 4); - pos += 4; + *pos++ = sdata->u.mesh.mesh_sp_id; /* Authentication Protocol identifier */ - memcpy(pos, sdata->u.mesh.mesh_auth_id, 4); - pos += 4; + *pos++ = sdata->u.mesh.mesh_auth_id; /* Mesh Formation Info */ memset(pos, 0x00, 1); @@ -289,8 +277,9 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) /* Mesh capability */ sdata->u.mesh.accepting_plinks = mesh_plink_availables(sdata); - *pos = CAPAB_FORWARDING; - *pos++ |= sdata->u.mesh.accepting_plinks ? CAPAB_ACCEPT_PLINKS : 0x00; + *pos = MESHCONF_CAPAB_FORWARDING; + *pos++ |= sdata->u.mesh.accepting_plinks ? + MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; *pos++ = 0x00; return; -- cgit v1.3-8-gc7d7 From 8b787643ca0a5130c647109d77fe512f89cfa611 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 10 Nov 2009 18:53:10 +0100 Subject: nl80211: add a parameter for using 4-address frames on virtual interfaces Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ include/net/cfg80211.h | 2 ++ net/wireless/nl80211.c | 11 +++++++++++ 3 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 50afca3dcff1..203adef972cf 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -584,6 +584,8 @@ enum nl80211_commands { * changed then the list changed and the dump should be repeated * completely from scratch. * + * @NL80211_ATTR_4ADDR: Use 4-address frames on a virtual interface + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -714,6 +716,8 @@ enum nl80211_attrs { NL80211_ATTR_PID, + NL80211_ATTR_4ADDR, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ff67865de231..1ee41e4a92e2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -206,10 +206,12 @@ struct ieee80211_supported_band { * struct vif_params - describes virtual interface parameters * @mesh_id: mesh ID to use * @mesh_id_len: length of the mesh ID + * @use_4addr: use 4-address frames */ struct vif_params { u8 *mesh_id; int mesh_id_len; + int use_4addr; }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8ed62b6c172b..8c8e4eae6a17 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -138,6 +138,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, + [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, }; /* policy for the attributes */ @@ -987,6 +988,13 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) change = true; } + if (info->attrs[NL80211_ATTR_4ADDR]) { + params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); + change = true; + } else { + params.use_4addr = -1; + } + if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { if (ntype != NL80211_IFTYPE_MONITOR) { err = -EINVAL; @@ -1053,6 +1061,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); } + if (info->attrs[NL80211_ATTR_4ADDR]) + params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); + err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); -- cgit v1.3-8-gc7d7 From f8572d8f2a2ba75408b97dc24ef47c83671795d7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 5 Nov 2009 13:32:03 -0800 Subject: sysctl net: Remove unused binary sysctl code Now that sys_sysctl is a compatiblity wrapper around /proc/sys all sysctl strategy routines, and all ctl_name and strategy entries in the sysctl tables are unused, and can be revmoed. In addition neigh_sysctl_register has been modified to no longer take a strategy argument and it's callers have been modified not to pass one. Cc: "David Miller" Cc: Hideaki YOSHIFUJI Cc: netdev@vger.kernel.org Signed-off-by: Eric W. Biederman --- include/net/dn_dev.h | 1 - include/net/neighbour.h | 3 +- net/802/tr.c | 7 +- net/appletalk/sysctl_net_atalk.c | 13 +- net/ax25/sysctl_net_ax25.c | 38 +----- net/bridge/br_netfilter.c | 6 +- net/core/neighbour.c | 47 +------ net/core/sysctl_net_core.c | 21 +--- net/dccp/sysctl.c | 8 +- net/decnet/dn_dev.c | 64 +--------- net/decnet/sysctl_net_decnet.c | 124 +------------------ net/ipv4/arp.c | 2 +- net/ipv4/devinet.c | 111 +++-------------- net/ipv4/ip_fragment.c | 6 - net/ipv4/netfilter.c | 6 +- net/ipv4/netfilter/ip_queue.c | 3 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 10 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 +- net/ipv4/route.c | 73 ++--------- net/ipv4/sysctl_net_ipv4.c | 164 +------------------------ net/ipv4/xfrm4_policy.c | 1 - net/ipv6/addrconf.c | 90 ++------------ net/ipv6/icmp.c | 4 +- net/ipv6/ndisc.c | 39 +----- net/ipv6/netfilter/ip6_queue.c | 4 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 4 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 4 +- net/ipv6/reassembly.c | 6 - net/ipv6/route.c | 18 +-- net/ipv6/sysctl_net_ipv6.c | 12 +- net/ipv6/xfrm6_policy.c | 1 - net/ipx/sysctl_net_ipx.c | 7 +- net/irda/irsysctl.c | 31 +---- net/llc/sysctl_net_llc.c | 25 +--- net/netfilter/core.c | 4 +- net/netfilter/ipvs/ip_vs_ctl.c | 6 +- net/netfilter/ipvs/ip_vs_lblc.c | 2 +- net/netfilter/ipvs/ip_vs_lblcr.c | 2 +- net/netfilter/nf_conntrack_acct.c | 1 - net/netfilter/nf_conntrack_ecache.c | 2 - net/netfilter/nf_conntrack_proto_dccp.c | 12 +- net/netfilter/nf_conntrack_proto_generic.c | 8 +- net/netfilter/nf_conntrack_proto_sctp.c | 8 +- net/netfilter/nf_conntrack_proto_tcp.c | 14 +-- net/netfilter/nf_conntrack_proto_udp.c | 8 +- net/netfilter/nf_conntrack_proto_udplite.c | 6 +- net/netfilter/nf_conntrack_standalone.c | 14 +-- net/netfilter/nf_log.c | 7 +- net/netrom/sysctl_net_netrom.c | 30 +---- net/phonet/sysctl.c | 8 +- net/rds/ib_sysctl.c | 14 +-- net/rds/iw_sysctl.c | 14 +-- net/rds/sysctl.c | 11 +- net/rose/sysctl_net_rose.c | 26 +--- net/sctp/sysctl.c | 49 +------- net/sunrpc/sysctl.c | 5 +- net/sunrpc/xprtrdma/svc_rdma.c | 16 +-- net/sunrpc/xprtrdma/transport.c | 20 +-- net/sunrpc/xprtsock.c | 18 +-- net/unix/sysctl_net_unix.c | 7 +- net/x25/sysctl_net_x25.c | 15 +-- net/xfrm/xfrm_sysctl.c | 4 - 62 files changed, 162 insertions(+), 1130 deletions(-) (limited to 'include') diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h index cee46821dc53..3f781a4cafbe 100644 --- a/include/net/dn_dev.h +++ b/include/net/dn_dev.h @@ -75,7 +75,6 @@ struct dn_dev_parms { unsigned long t3; /* Default value of t3 */ int priority; /* Priority to be a router */ char *name; /* Name for sysctl */ - int ctl_name; /* Index for sysctl */ int (*up)(struct net_device *); void (*down)(struct net_device *); void (*timer3)(struct net_device *, struct dn_ifaddr *ifa); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 3817fda82a80..da99fdd63cf5 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -264,8 +264,7 @@ extern int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, char *p_name, - proc_handler *proc_handler, - ctl_handler *strategy); + proc_handler *proc_handler); extern void neigh_sysctl_unregister(struct neigh_parms *p); static inline void __neigh_parms_put(struct neigh_parms *parms) diff --git a/net/802/tr.c b/net/802/tr.c index e874447ad144..44acce47fcdc 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -635,19 +635,18 @@ struct net_device *alloc_trdev(int sizeof_priv) #ifdef CONFIG_SYSCTL static struct ctl_table tr_table[] = { { - .ctl_name = NET_TR_RIF_TIMEOUT, .procname = "rif_timeout", .data = &sysctl_tr_rif_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { 0 }, + { }, }; static __initdata struct ctl_path tr_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "token-ring", .ctl_name = NET_TR, }, + { .procname = "net", }, + { .procname = "token-ring", }, { } }; #endif diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index 8d237b15183b..04e9c0da7aa9 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -12,25 +12,20 @@ static struct ctl_table atalk_table[] = { { - .ctl_name = NET_ATALK_AARP_EXPIRY_TIME, .procname = "aarp-expiry-time", .data = &sysctl_aarp_expiry_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_ATALK_AARP_TICK_TIME, .procname = "aarp-tick-time", .data = &sysctl_aarp_tick_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_ATALK_AARP_RETRANSMIT_LIMIT, .procname = "aarp-retransmit-limit", .data = &sysctl_aarp_retransmit_limit, .maxlen = sizeof(int), @@ -38,20 +33,18 @@ static struct ctl_table atalk_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_ATALK_AARP_RESOLVE_TIME, .procname = "aarp-resolve-time", .data = &sysctl_aarp_resolve_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, - { 0 }, + { }, }; static struct ctl_path atalk_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "appletalk", .ctl_name = NET_ATALK, }, + { .procname = "net", }, + { .procname = "appletalk", }, { } }; diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index 62ee3fb34732..5159be6b2625 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -34,156 +34,128 @@ static ctl_table *ax25_table; static int ax25_table_size; static struct ctl_path ax25_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ax25", .ctl_name = NET_AX25, }, + { .procname = "net", }, + { .procname = "ax25", }, { } }; static const ctl_table ax25_param_table[] = { { - .ctl_name = NET_AX25_IP_DEFAULT_MODE, .procname = "ip_default_mode", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_ipdefmode, .extra2 = &max_ipdefmode }, { - .ctl_name = NET_AX25_DEFAULT_MODE, .procname = "ax25_default_mode", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_axdefmode, .extra2 = &max_axdefmode }, { - .ctl_name = NET_AX25_BACKOFF_TYPE, .procname = "backoff_type", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_backoff, .extra2 = &max_backoff }, { - .ctl_name = NET_AX25_CONNECT_MODE, .procname = "connect_mode", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_conmode, .extra2 = &max_conmode }, { - .ctl_name = NET_AX25_STANDARD_WINDOW, .procname = "standard_window_size", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, { - .ctl_name = NET_AX25_EXTENDED_WINDOW, .procname = "extended_window_size", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_ewindow, .extra2 = &max_ewindow }, { - .ctl_name = NET_AX25_T1_TIMEOUT, .procname = "t1_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t1, .extra2 = &max_t1 }, { - .ctl_name = NET_AX25_T2_TIMEOUT, .procname = "t2_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, { - .ctl_name = NET_AX25_T3_TIMEOUT, .procname = "t3_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t3, .extra2 = &max_t3 }, { - .ctl_name = NET_AX25_IDLE_TIMEOUT, .procname = "idle_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, { - .ctl_name = NET_AX25_N2, .procname = "maximum_retry_count", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_n2, .extra2 = &max_n2 }, { - .ctl_name = NET_AX25_PACLEN, .procname = "maximum_packet_length", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_paclen, .extra2 = &max_paclen }, { - .ctl_name = NET_AX25_PROTOCOL, .procname = "protocol", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_proto, .extra2 = &max_proto }, #ifdef CONFIG_AX25_DAMA_SLAVE { - .ctl_name = NET_AX25_DAMA_SLAVE_TIMEOUT, .procname = "dama_slave_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_ds_timeout, .extra2 = &max_ds_timeout }, #endif - { .ctl_name = 0 } /* that's all, folks! */ + { } /* that's all, folks! */ }; void ax25_register_sysctl(void) @@ -212,11 +184,9 @@ void ax25_register_sysctl(void) return; } ax25_table[n].child = ax25_dev->systable = child; - ax25_table[n].ctl_name = n + 1; ax25_table[n].procname = ax25_dev->dev->name; ax25_table[n].mode = 0555; - child[AX25_MAX_VALUES].ctl_name = 0; /* just in case... */ for (k = 0; k < AX25_MAX_VALUES; k++) child[k].data = &ax25_dev->values[k]; @@ -233,7 +203,7 @@ void ax25_unregister_sysctl(void) ctl_table *p; unregister_sysctl_table(ax25_table_header); - for (p = ax25_table; p->ctl_name; p++) + for (p = ax25_table; p->procname; p++) kfree(p->child); kfree(ax25_table); } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index a16a2342f6bf..268e2e725888 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -1013,12 +1013,12 @@ static ctl_table brnf_table[] = { .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, - { .ctl_name = 0 } + { } }; static struct ctl_path brnf_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "bridge", .ctl_name = NET_BRIDGE, }, + { .procname = "net", }, + { .procname = "bridge", }, { } }; #endif diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e587e6819698..2b54e6c6a7c8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2566,21 +2566,18 @@ static struct neigh_sysctl_table { } neigh_sysctl_template __read_mostly = { .neigh_vars = { { - .ctl_name = NET_NEIGH_MCAST_SOLICIT, .procname = "mcast_solicit", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_UCAST_SOLICIT, .procname = "ucast_solicit", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_APP_SOLICIT, .procname = "app_solicit", .maxlen = sizeof(int), .mode = 0644, @@ -2593,38 +2590,30 @@ static struct neigh_sysctl_table { .proc_handler = proc_dointvec_userhz_jiffies, }, { - .ctl_name = NET_NEIGH_REACHABLE_TIME, .procname = "base_reachable_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_DELAY_PROBE_TIME, .procname = "delay_first_probe_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_GC_STALE_TIME, .procname = "gc_stale_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_UNRES_QLEN, .procname = "unres_qlen", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_PROXY_QLEN, .procname = "proxy_qlen", .maxlen = sizeof(int), .mode = 0644, @@ -2649,45 +2638,36 @@ static struct neigh_sysctl_table { .proc_handler = proc_dointvec_userhz_jiffies, }, { - .ctl_name = NET_NEIGH_RETRANS_TIME_MS, .procname = "retrans_time_ms", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies, }, { - .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, .procname = "base_reachable_time_ms", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies, }, { - .ctl_name = NET_NEIGH_GC_INTERVAL, .procname = "gc_interval", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_GC_THRESH1, .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_GC_THRESH2, .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_GC_THRESH3, .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, @@ -2699,7 +2679,7 @@ static struct neigh_sysctl_table { int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, char *p_name, - proc_handler *handler, ctl_handler *strategy) + proc_handler *handler) { struct neigh_sysctl_table *t; const char *dev_name_source = NULL; @@ -2710,10 +2690,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, #define NEIGH_CTL_PATH_DEV 3 struct ctl_path neigh_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "proto", .ctl_name = 0, }, - { .procname = "neigh", .ctl_name = 0, }, - { .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, }, + { .procname = "net", }, + { .procname = "proto", }, + { .procname = "neigh", }, + { .procname = "default", }, { }, }; @@ -2738,7 +2718,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, if (dev) { dev_name_source = dev->name; - neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex; /* Terminate the table early */ memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); } else { @@ -2750,31 +2729,19 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, } - if (handler || strategy) { + if (handler) { /* RetransTime */ t->neigh_vars[3].proc_handler = handler; - t->neigh_vars[3].strategy = strategy; t->neigh_vars[3].extra1 = dev; - if (!strategy) - t->neigh_vars[3].ctl_name = CTL_UNNUMBERED; /* ReachableTime */ t->neigh_vars[4].proc_handler = handler; - t->neigh_vars[4].strategy = strategy; t->neigh_vars[4].extra1 = dev; - if (!strategy) - t->neigh_vars[4].ctl_name = CTL_UNNUMBERED; /* RetransTime (in milliseconds)*/ t->neigh_vars[12].proc_handler = handler; - t->neigh_vars[12].strategy = strategy; t->neigh_vars[12].extra1 = dev; - if (!strategy) - t->neigh_vars[12].ctl_name = CTL_UNNUMBERED; /* ReachableTime (in milliseconds) */ t->neigh_vars[13].proc_handler = handler; - t->neigh_vars[13].strategy = strategy; t->neigh_vars[13].extra1 = dev; - if (!strategy) - t->neigh_vars[13].ctl_name = CTL_UNNUMBERED; } t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); @@ -2782,9 +2749,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, goto free; neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name; - neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id; neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name; - neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id; t->sysctl_header = register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7db1de0497c6..1ce4e6e85125 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -17,7 +17,6 @@ static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { - .ctl_name = NET_CORE_WMEM_MAX, .procname = "wmem_max", .data = &sysctl_wmem_max, .maxlen = sizeof(int), @@ -25,7 +24,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_RMEM_MAX, .procname = "rmem_max", .data = &sysctl_rmem_max, .maxlen = sizeof(int), @@ -33,7 +31,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_WMEM_DEFAULT, .procname = "wmem_default", .data = &sysctl_wmem_default, .maxlen = sizeof(int), @@ -41,7 +38,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_RMEM_DEFAULT, .procname = "rmem_default", .data = &sysctl_rmem_default, .maxlen = sizeof(int), @@ -49,7 +45,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_DEV_WEIGHT, .procname = "dev_weight", .data = &weight_p, .maxlen = sizeof(int), @@ -57,7 +52,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_MAX_BACKLOG, .procname = "netdev_max_backlog", .data = &netdev_max_backlog, .maxlen = sizeof(int), @@ -65,16 +59,13 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_MSG_COST, .procname = "message_cost", .data = &net_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_CORE_MSG_BURST, .procname = "message_burst", .data = &net_ratelimit_state.burst, .maxlen = sizeof(int), @@ -82,7 +73,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_CORE_OPTMEM_MAX, .procname = "optmem_max", .data = &sysctl_optmem_max, .maxlen = sizeof(int), @@ -91,7 +81,6 @@ static struct ctl_table net_core_table[] = { }, #endif /* CONFIG_NET */ { - .ctl_name = NET_CORE_BUDGET, .procname = "netdev_budget", .data = &netdev_budget, .maxlen = sizeof(int), @@ -99,31 +88,29 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_WARNINGS, .procname = "warnings", .data = &net_msg_warn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; static struct ctl_table netns_core_table[] = { { - .ctl_name = NET_CORE_SOMAXCONN, .procname = "somaxconn", .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; __net_initdata struct ctl_path net_core_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "core", .ctl_name = NET_CORE, }, + { .procname = "net", }, + { .procname = "core", }, { }, }; diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index a5a1856234e7..563943822e58 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -93,13 +93,13 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec_ms_jiffies, }, - { .ctl_name = 0, } + { } }; static struct ctl_path dccp_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "dccp", .ctl_name = NET_DCCP, }, - { .procname = "default", .ctl_name = NET_DCCP_DEFAULT, }, + { .procname = "net", }, + { .procname = "dccp", }, + { .procname = "default", }, { } }; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 6e1f085db06a..1b1daeb151f2 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -89,7 +89,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 10, .name = "ethernet", - .ctl_name = NET_DECNET_CONF_ETHER, .up = dn_eth_up, .down = dn_eth_down, .timer3 = dn_send_brd_hello, @@ -101,7 +100,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 10, .name = "ipgre", - .ctl_name = NET_DECNET_CONF_GRE, .timer3 = dn_send_brd_hello, }, #if 0 @@ -112,7 +110,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 120, .name = "x25", - .ctl_name = NET_DECNET_CONF_X25, .timer3 = dn_send_ptp_hello, }, #endif @@ -124,7 +121,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 10, .name = "ppp", - .ctl_name = NET_DECNET_CONF_PPP, .timer3 = dn_send_brd_hello, }, #endif @@ -135,7 +131,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 120, .name = "ddcmp", - .ctl_name = NET_DECNET_CONF_DDCMP, .timer3 = dn_send_ptp_hello, }, { @@ -145,7 +140,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 10, .name = "loopback", - .ctl_name = NET_DECNET_CONF_LOOPBACK, .timer3 = dn_send_brd_hello, } }; @@ -166,10 +160,6 @@ static int max_priority[] = { 127 }; /* From DECnet spec */ static int dn_forwarding_proc(ctl_table *, int, void __user *, size_t *, loff_t *); -static int dn_forwarding_sysctl(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen); - static struct dn_dev_sysctl_table { struct ctl_table_header *sysctl_header; ctl_table dn_dev_vars[5]; @@ -177,44 +167,36 @@ static struct dn_dev_sysctl_table { NULL, { { - .ctl_name = NET_DECNET_CONF_DEV_FORWARDING, .procname = "forwarding", .data = (void *)DN_DEV_PARMS_OFFSET(forwarding), .maxlen = sizeof(int), .mode = 0644, .proc_handler = dn_forwarding_proc, - .strategy = dn_forwarding_sysctl, }, { - .ctl_name = NET_DECNET_CONF_DEV_PRIORITY, .procname = "priority", .data = (void *)DN_DEV_PARMS_OFFSET(priority), .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_priority, .extra2 = &max_priority }, { - .ctl_name = NET_DECNET_CONF_DEV_T2, .procname = "t2", .data = (void *)DN_DEV_PARMS_OFFSET(t2), .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, { - .ctl_name = NET_DECNET_CONF_DEV_T3, .procname = "t3", .data = (void *)DN_DEV_PARMS_OFFSET(t3), .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t3, .extra2 = &max_t3 }, @@ -230,9 +212,9 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms * #define DN_CTL_PATH_DEV 3 struct ctl_path dn_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "decnet", .ctl_name = NET_DECNET, }, - { .procname = "conf", .ctl_name = NET_DECNET_CONF, }, + { .procname = "net", }, + { .procname = "decnet", }, + { .procname = "conf", }, { /* to be set */ }, { }, }; @@ -248,10 +230,8 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms * if (dev) { dn_ctl_path[DN_CTL_PATH_DEV].procname = dev->name; - dn_ctl_path[DN_CTL_PATH_DEV].ctl_name = dev->ifindex; } else { dn_ctl_path[DN_CTL_PATH_DEV].procname = parms->name; - dn_ctl_path[DN_CTL_PATH_DEV].ctl_name = parms->ctl_name; } t->dn_dev_vars[0].extra1 = (void *)dev; @@ -317,44 +297,6 @@ static int dn_forwarding_proc(ctl_table *table, int write, #endif } -static int dn_forwarding_sysctl(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ -#ifdef CONFIG_DECNET_ROUTER - struct net_device *dev = table->extra1; - struct dn_dev *dn_db; - int value; - - if (table->extra1 == NULL) - return -EINVAL; - - dn_db = dev->dn_ptr; - - if (newval && newlen) { - if (newlen != sizeof(int)) - return -EINVAL; - - if (get_user(value, (int __user *)newval)) - return -EFAULT; - if (value < 0) - return -EINVAL; - if (value > 2) - return -EINVAL; - - if (dn_db->parms.down) - dn_db->parms.down(dev); - dn_db->parms.forwarding = value; - if (dn_db->parms.up) - dn_db->parms.up(dev); - } - - return 0; -#else - return -EINVAL; -#endif -} - #else /* CONFIG_SYSCTL */ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) { diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 26b0ab1e9f56..be3eb8e23288 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -131,39 +131,6 @@ static int parse_addr(__le16 *addr, char *str) return 0; } - -static int dn_node_address_strategy(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - size_t len; - __le16 addr; - - if (oldval && oldlenp) { - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - if (len != sizeof(unsigned short)) - return -EINVAL; - if (put_user(decnet_address, (__le16 __user *)oldval)) - return -EFAULT; - } - } - if (newval && newlen) { - if (newlen != sizeof(unsigned short)) - return -EINVAL; - if (get_user(addr, (__le16 __user *)newval)) - return -EFAULT; - - dn_dev_devices_off(); - - decnet_address = addr; - - dn_dev_devices_on(); - } - return 0; -} - static int dn_node_address_handler(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -215,65 +182,6 @@ static int dn_node_address_handler(ctl_table *table, int write, return 0; } - -static int dn_def_dev_strategy(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - size_t len; - struct net_device *dev; - char devname[17]; - size_t namel; - int rv = 0; - - devname[0] = 0; - - if (oldval && oldlenp) { - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - dev = dn_dev_get_default(); - if (dev) { - strcpy(devname, dev->name); - dev_put(dev); - } - - namel = strlen(devname) + 1; - if (len > namel) len = namel; - - if (copy_to_user(oldval, devname, len)) - return -EFAULT; - - if (put_user(len, oldlenp)) - return -EFAULT; - } - } - - if (newval && newlen) { - if (newlen > 16) - return -E2BIG; - - if (copy_from_user(devname, newval, newlen)) - return -EFAULT; - - devname[newlen] = 0; - - dev = dev_get_by_name(&init_net, devname); - if (dev == NULL) - return -ENODEV; - - rv = -ENODEV; - if (dev->dn_ptr != NULL) { - rv = dn_dev_set_default(dev, 1); - if (rv) - dev_put(dev); - } - } - - return rv; -} - - static int dn_def_dev_handler(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -339,138 +247,112 @@ static int dn_def_dev_handler(ctl_table *table, int write, static ctl_table dn_table[] = { { - .ctl_name = NET_DECNET_NODE_ADDRESS, .procname = "node_address", .maxlen = 7, .mode = 0644, .proc_handler = dn_node_address_handler, - .strategy = dn_node_address_strategy, }, { - .ctl_name = NET_DECNET_NODE_NAME, .procname = "node_name", .data = node_name, .maxlen = 7, .mode = 0644, .proc_handler = proc_dostring, - .strategy = sysctl_string, }, { - .ctl_name = NET_DECNET_DEFAULT_DEVICE, .procname = "default_device", .maxlen = 16, .mode = 0644, .proc_handler = dn_def_dev_handler, - .strategy = dn_def_dev_strategy, }, { - .ctl_name = NET_DECNET_TIME_WAIT, .procname = "time_wait", .data = &decnet_time_wait, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_decnet_time_wait, .extra2 = &max_decnet_time_wait }, { - .ctl_name = NET_DECNET_DN_COUNT, .procname = "dn_count", .data = &decnet_dn_count, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, { - .ctl_name = NET_DECNET_DI_COUNT, .procname = "di_count", .data = &decnet_di_count, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, { - .ctl_name = NET_DECNET_DR_COUNT, .procname = "dr_count", .data = &decnet_dr_count, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, { - .ctl_name = NET_DECNET_DST_GC_INTERVAL, .procname = "dst_gc_interval", .data = &decnet_dst_gc_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_decnet_dst_gc_interval, .extra2 = &max_decnet_dst_gc_interval }, { - .ctl_name = NET_DECNET_NO_FC_MAX_CWND, .procname = "no_fc_max_cwnd", .data = &decnet_no_fc_max_cwnd, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_decnet_no_fc_max_cwnd, .extra2 = &max_decnet_no_fc_max_cwnd }, { - .ctl_name = NET_DECNET_MEM, .procname = "decnet_mem", .data = &sysctl_decnet_mem, .maxlen = sizeof(sysctl_decnet_mem), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec, }, { - .ctl_name = NET_DECNET_RMEM, .procname = "decnet_rmem", .data = &sysctl_decnet_rmem, .maxlen = sizeof(sysctl_decnet_rmem), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec, }, { - .ctl_name = NET_DECNET_WMEM, .procname = "decnet_wmem", .data = &sysctl_decnet_wmem, .maxlen = sizeof(sysctl_decnet_wmem), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec, }, { - .ctl_name = NET_DECNET_DEBUG_LEVEL, .procname = "debug", .data = &decnet_debug_level, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec, }, - {0} + { } }; static struct ctl_path dn_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "decnet", .ctl_name = NET_DECNET, }, + { .procname = "net", }, + { .procname = "decnet", }, { } }; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4e80f336c0cf..c95cd93acf29 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1240,7 +1240,7 @@ void __init arp_init(void) arp_proc_init(); #ifdef CONFIG_SYSCTL neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL, NULL); + NET_IPV4_NEIGH, "ipv4", NULL); #endif register_netdevice_notifier(&arp_netdev_notifier); } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5df2f6a0b0f0..e049da8311b5 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1293,58 +1293,6 @@ static int devinet_conf_proc(ctl_table *ctl, int write, return ret; } -static int devinet_conf_sysctl(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - struct ipv4_devconf *cnf; - struct net *net; - int *valp = table->data; - int new; - int i; - - if (!newval || !newlen) - return 0; - - if (newlen != sizeof(int)) - return -EINVAL; - - if (get_user(new, (int __user *)newval)) - return -EFAULT; - - if (new == *valp) - return 0; - - if (oldval && oldlenp) { - size_t len; - - if (get_user(len, oldlenp)) - return -EFAULT; - - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if (copy_to_user(oldval, valp, len)) - return -EFAULT; - if (put_user(len, oldlenp)) - return -EFAULT; - } - } - - *valp = new; - - cnf = table->extra1; - net = table->extra2; - i = (int *)table->data - cnf->data; - - set_bit(i, cnf->state); - - if (cnf == net->ipv4.devconf_dflt) - devinet_copy_dflt_conf(net, i); - - return 1; -} - static int devinet_sysctl_forward(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -1390,47 +1338,28 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, return ret; } -int ipv4_doint_and_flush_strategy(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen); - struct net *net = table->extra2; - - if (ret == 1) - rt_cache_flush(net, 0); - - return ret; -} - - -#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \ +#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ { \ - .ctl_name = NET_IPV4_CONF_ ## attr, \ .procname = name, \ .data = ipv4_devconf.data + \ NET_IPV4_CONF_ ## attr - 1, \ .maxlen = sizeof(int), \ .mode = mval, \ .proc_handler = proc, \ - .strategy = sysctl, \ .extra1 = &ipv4_devconf, \ } #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ - DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \ - devinet_conf_sysctl) + DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ - DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \ - devinet_conf_sysctl) + DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) -#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \ - DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl) +#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ + DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ - DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \ - ipv4_doint_and_flush_strategy) + DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; @@ -1439,8 +1368,7 @@ static struct devinet_sysctl_table { } devinet_sysctl = { .devinet_vars = { DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", - devinet_sysctl_forward, - devinet_conf_sysctl), + devinet_sysctl_forward), DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), @@ -1471,7 +1399,7 @@ static struct devinet_sysctl_table { }; static int __devinet_sysctl_register(struct net *net, char *dev_name, - int ctl_name, struct ipv4_devconf *p) + struct ipv4_devconf *p) { int i; struct devinet_sysctl_table *t; @@ -1479,9 +1407,9 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, #define DEVINET_CTL_PATH_DEV 3 struct ctl_path devinet_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, - { .procname = "conf", .ctl_name = NET_IPV4_CONF, }, + { .procname = "net", }, + { .procname = "ipv4", }, + { .procname = "conf", }, { /* to be set */ }, { }, }; @@ -1506,7 +1434,6 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, goto free; devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; - devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name; t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, t->devinet_vars); @@ -1540,9 +1467,9 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) static void devinet_sysctl_register(struct in_device *idev) { neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL, NULL); + NET_IPV4_NEIGH, "ipv4", NULL); __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, - idev->dev->ifindex, &idev->cnf); + &idev->cnf); } static void devinet_sysctl_unregister(struct in_device *idev) @@ -1553,14 +1480,12 @@ static void devinet_sysctl_unregister(struct in_device *idev) static struct ctl_table ctl_forward_entry[] = { { - .ctl_name = NET_IPV4_FORWARD, .procname = "ip_forward", .data = &ipv4_devconf.data[ NET_IPV4_CONF_FORWARDING - 1], .maxlen = sizeof(int), .mode = 0644, .proc_handler = devinet_sysctl_forward, - .strategy = devinet_conf_sysctl, .extra1 = &ipv4_devconf, .extra2 = &init_net, }, @@ -1568,8 +1493,8 @@ static struct ctl_table ctl_forward_entry[] = { }; static __net_initdata struct ctl_path net_ipv4_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "net", }, + { .procname = "ipv4", }, { }, }; #endif @@ -1608,13 +1533,11 @@ static __net_init int devinet_init_net(struct net *net) } #ifdef CONFIG_SYSCTL - err = __devinet_sysctl_register(net, "all", - NET_PROTO_CONF_ALL, all); + err = __devinet_sysctl_register(net, "all", all); if (err < 0) goto err_reg_all; - err = __devinet_sysctl_register(net, "default", - NET_PROTO_CONF_DEFAULT, dflt); + err = __devinet_sysctl_register(net, "default", dflt); if (err < 0) goto err_reg_dflt; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 575f9bd51ccd..ef24497436fd 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -603,7 +603,6 @@ static int zero; static struct ctl_table ip4_frags_ns_ctl_table[] = { { - .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, .procname = "ipfrag_high_thresh", .data = &init_net.ipv4.frags.high_thresh, .maxlen = sizeof(int), @@ -611,7 +610,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, .procname = "ipfrag_low_thresh", .data = &init_net.ipv4.frags.low_thresh, .maxlen = sizeof(int), @@ -619,26 +617,22 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_IPFRAG_TIME, .procname = "ipfrag_time", .data = &init_net.ipv4.frags.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { } }; static struct ctl_table ip4_frags_ctl_table[] = { { - .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, .procname = "ipfrag_secret_interval", .data = &ip4_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { .procname = "ipfrag_max_dist", diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 1725dc0ef688..db52c0cb0c11 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -248,9 +248,9 @@ module_exit(ipv4_netfilter_fini); #ifdef CONFIG_SYSCTL struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, - { .procname = "netfilter", .ctl_name = NET_IPV4_NETFILTER, }, + { .procname = "net", }, + { .procname = "ipv4", }, + { .procname = "netfilter", }, { } }; EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index c156db215987..c9f90e8c5191 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -516,14 +516,13 @@ static struct ctl_table_header *ipq_sysctl_header; static ctl_table ipq_table[] = { { - .ctl_name = NET_IPQ_QMAX, .procname = NET_IPQ_QMAX_NAME, .data = &queue_maxlen, .maxlen = sizeof(queue_maxlen), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index aa95bb82ee6c..092d68f916e6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -195,7 +195,6 @@ static int log_invalid_proto_max = 255; static ctl_table ip_ct_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_MAX, .procname = "ip_conntrack_max", .data = &nf_conntrack_max, .maxlen = sizeof(int), @@ -203,7 +202,6 @@ static ctl_table ip_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, .procname = "ip_conntrack_count", .data = &init_net.ct.count, .maxlen = sizeof(int), @@ -211,7 +209,6 @@ static ctl_table ip_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, .procname = "ip_conntrack_buckets", .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), @@ -219,7 +216,6 @@ static ctl_table ip_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, .procname = "ip_conntrack_checksum", .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), @@ -227,19 +223,15 @@ static ctl_table ip_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, .procname = "ip_conntrack_log_invalid", .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index d71ba7677344..9072058778b8 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -270,9 +270,7 @@ static struct ctl_table icmp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table icmp_compat_sysctl_table[] = { @@ -283,9 +281,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5b1050a5d874..0d9f584a3811 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3056,23 +3056,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, return -EINVAL; } -static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, - size_t newlen) -{ - int delay; - struct net *net; - if (newlen != sizeof(int)) - return -EINVAL; - if (get_user(delay, (int __user *)newval)) - return -EFAULT; - net = (struct net *)table->extra1; - rt_cache_flush(net, delay); - return 0; -} - static void rt_secret_reschedule(int old) { struct net *net; @@ -3117,23 +3100,8 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, return ret; } -static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, - size_t newlen) -{ - int old = ip_rt_secret_interval; - int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen); - - rt_secret_reschedule(old); - - return ret; -} - static ctl_table ipv4_route_table[] = { { - .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", .data = &ipv4_dst_ops.gc_thresh, .maxlen = sizeof(int), @@ -3141,7 +3109,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, .procname = "max_size", .data = &ip_rt_max_size, .maxlen = sizeof(int), @@ -3151,43 +3118,34 @@ static ctl_table ipv4_route_table[] = { { /* Deprecated. Use gc_min_interval_ms */ - .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL, .procname = "gc_min_interval", .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, .procname = "gc_min_interval_ms", .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies, }, { - .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, .procname = "gc_timeout", .data = &ip_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, .procname = "gc_interval", .data = &ip_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, .procname = "redirect_load", .data = &ip_rt_redirect_load, .maxlen = sizeof(int), @@ -3195,7 +3153,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, .procname = "redirect_number", .data = &ip_rt_redirect_number, .maxlen = sizeof(int), @@ -3203,7 +3160,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, .procname = "redirect_silence", .data = &ip_rt_redirect_silence, .maxlen = sizeof(int), @@ -3211,7 +3167,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_ERROR_COST, .procname = "error_cost", .data = &ip_rt_error_cost, .maxlen = sizeof(int), @@ -3219,7 +3174,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, .procname = "error_burst", .data = &ip_rt_error_burst, .maxlen = sizeof(int), @@ -3227,7 +3181,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, .procname = "gc_elasticity", .data = &ip_rt_gc_elasticity, .maxlen = sizeof(int), @@ -3235,16 +3188,13 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, .procname = "mtu_expires", .data = &ip_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, .procname = "min_pmtu", .data = &ip_rt_min_pmtu, .maxlen = sizeof(int), @@ -3252,7 +3202,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, .procname = "min_adv_mss", .data = &ip_rt_min_advmss, .maxlen = sizeof(int), @@ -3260,50 +3209,46 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, .procname = "secret_interval", .data = &ip_rt_secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = ipv4_sysctl_rt_secret_interval, - .strategy = ipv4_sysctl_rt_secret_interval_strategy, }, - { .ctl_name = 0 } + { } }; static struct ctl_table empty[1]; static struct ctl_table ipv4_skeleton[] = { - { .procname = "route", .ctl_name = NET_IPV4_ROUTE, + { .procname = "route", .mode = 0555, .child = ipv4_route_table}, - { .procname = "neigh", .ctl_name = NET_IPV4_NEIGH, + { .procname = "neigh", .mode = 0555, .child = empty}, { } }; static __net_initdata struct ctl_path ipv4_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "net", }, + { .procname = "ipv4", }, { }, }; static struct ctl_table ipv4_route_flush_table[] = { { - .ctl_name = NET_IPV4_ROUTE_FLUSH, .procname = "flush", .maxlen = sizeof(int), .mode = 0200, .proc_handler = ipv4_sysctl_rtcache_flush, - .strategy = ipv4_sysctl_rtcache_flush_strategy, }, - { .ctl_name = 0 }, + { }, }; static __net_initdata struct ctl_path ipv4_route_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, - { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, + { .procname = "net", }, + { .procname = "ipv4", }, + { .procname = "route", }, { }, }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 2dcf04d9b005..300056732953 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -63,34 +63,6 @@ static int ipv4_local_port_range(ctl_table *table, int write, return ret; } -/* Validate changes from sysctl interface. */ -static int ipv4_sysctl_local_port_range(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - int ret; - int range[2]; - ctl_table tmp = { - .data = &range, - .maxlen = sizeof(range), - .mode = table->mode, - .extra1 = &ip_local_port_range_min, - .extra2 = &ip_local_port_range_max, - }; - - inet_get_local_port_range(range, range + 1); - ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen); - if (ret == 0 && newval && newlen) { - if (range[1] < range[0]) - ret = -EINVAL; - else - set_local_port_range(range); - } - return ret; -} - - static int proc_tcp_congestion_control(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -109,25 +81,6 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, return ret; } -static int sysctl_tcp_congestion_control(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - char val[TCP_CA_NAME_MAX]; - ctl_table tbl = { - .data = val, - .maxlen = TCP_CA_NAME_MAX, - }; - int ret; - - tcp_get_default_congestion_control(val); - ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); - if (ret == 1 && newval && newlen) - ret = tcp_set_default_congestion_control(val); - return ret; -} - static int proc_tcp_available_congestion_control(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, @@ -165,32 +118,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } -static int strategy_allowed_congestion_control(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, - size_t newlen) -{ - ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; - int ret; - - tbl.data = kmalloc(tbl.maxlen, GFP_USER); - if (!tbl.data) - return -ENOMEM; - - tcp_get_available_congestion_control(tbl.data, tbl.maxlen); - ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); - if (ret == 1 && newval && newlen) - ret = tcp_set_allowed_congestion_control(tbl.data); - kfree(tbl.data); - - return ret; - -} - static struct ctl_table ipv4_table[] = { { - .ctl_name = NET_IPV4_TCP_TIMESTAMPS, .procname = "tcp_timestamps", .data = &sysctl_tcp_timestamps, .maxlen = sizeof(int), @@ -198,7 +127,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, .procname = "tcp_window_scaling", .data = &sysctl_tcp_window_scaling, .maxlen = sizeof(int), @@ -206,7 +134,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_SACK, .procname = "tcp_sack", .data = &sysctl_tcp_sack, .maxlen = sizeof(int), @@ -214,7 +141,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_RETRANS_COLLAPSE, .procname = "tcp_retrans_collapse", .data = &sysctl_tcp_retrans_collapse, .maxlen = sizeof(int), @@ -222,17 +148,14 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_DEFAULT_TTL, .procname = "ip_default_ttl", .data = &sysctl_ip_default_ttl, .maxlen = sizeof(int), .mode = 0644, .proc_handler = ipv4_doint_and_flush, - .strategy = ipv4_doint_and_flush_strategy, .extra2 = &init_net, }, { - .ctl_name = NET_IPV4_NO_PMTU_DISC, .procname = "ip_no_pmtu_disc", .data = &ipv4_config.no_pmtu_disc, .maxlen = sizeof(int), @@ -240,7 +163,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_NONLOCAL_BIND, .procname = "ip_nonlocal_bind", .data = &sysctl_ip_nonlocal_bind, .maxlen = sizeof(int), @@ -248,7 +170,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_SYN_RETRIES, .procname = "tcp_syn_retries", .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), @@ -256,7 +177,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_SYNACK_RETRIES, .procname = "tcp_synack_retries", .data = &sysctl_tcp_synack_retries, .maxlen = sizeof(int), @@ -264,7 +184,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_MAX_ORPHANS, .procname = "tcp_max_orphans", .data = &sysctl_tcp_max_orphans, .maxlen = sizeof(int), @@ -272,7 +191,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_MAX_TW_BUCKETS, .procname = "tcp_max_tw_buckets", .data = &tcp_death_row.sysctl_max_tw_buckets, .maxlen = sizeof(int), @@ -280,7 +198,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_DYNADDR, .procname = "ip_dynaddr", .data = &sysctl_ip_dynaddr, .maxlen = sizeof(int), @@ -288,16 +205,13 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, .procname = "tcp_keepalive_time", .data = &sysctl_tcp_keepalive_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_IPV4_TCP_KEEPALIVE_PROBES, .procname = "tcp_keepalive_probes", .data = &sysctl_tcp_keepalive_probes, .maxlen = sizeof(int), @@ -305,26 +219,21 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_KEEPALIVE_INTVL, .procname = "tcp_keepalive_intvl", .data = &sysctl_tcp_keepalive_intvl, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_IPV4_TCP_RETRIES1, .procname = "tcp_retries1", .data = &sysctl_tcp_retries1, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra2 = &tcp_retr1_max }, { - .ctl_name = NET_IPV4_TCP_RETRIES2, .procname = "tcp_retries2", .data = &sysctl_tcp_retries2, .maxlen = sizeof(int), @@ -332,17 +241,14 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_FIN_TIMEOUT, .procname = "tcp_fin_timeout", .data = &sysctl_tcp_fin_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, #ifdef CONFIG_SYN_COOKIES { - .ctl_name = NET_TCP_SYNCOOKIES, .procname = "tcp_syncookies", .data = &sysctl_tcp_syncookies, .maxlen = sizeof(int), @@ -351,7 +257,6 @@ static struct ctl_table ipv4_table[] = { }, #endif { - .ctl_name = NET_TCP_TW_RECYCLE, .procname = "tcp_tw_recycle", .data = &tcp_death_row.sysctl_tw_recycle, .maxlen = sizeof(int), @@ -359,7 +264,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_ABORT_ON_OVERFLOW, .procname = "tcp_abort_on_overflow", .data = &sysctl_tcp_abort_on_overflow, .maxlen = sizeof(int), @@ -367,7 +271,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_STDURG, .procname = "tcp_stdurg", .data = &sysctl_tcp_stdurg, .maxlen = sizeof(int), @@ -375,7 +278,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_RFC1337, .procname = "tcp_rfc1337", .data = &sysctl_tcp_rfc1337, .maxlen = sizeof(int), @@ -383,7 +285,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_MAX_SYN_BACKLOG, .procname = "tcp_max_syn_backlog", .data = &sysctl_max_syn_backlog, .maxlen = sizeof(int), @@ -391,17 +292,14 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, .procname = "ip_local_port_range", .data = &sysctl_local_ports.range, .maxlen = sizeof(sysctl_local_ports.range), .mode = 0644, .proc_handler = ipv4_local_port_range, - .strategy = ipv4_sysctl_local_port_range, }, #ifdef CONFIG_IP_MULTICAST { - .ctl_name = NET_IPV4_IGMP_MAX_MEMBERSHIPS, .procname = "igmp_max_memberships", .data = &sysctl_igmp_max_memberships, .maxlen = sizeof(int), @@ -411,7 +309,6 @@ static struct ctl_table ipv4_table[] = { #endif { - .ctl_name = NET_IPV4_IGMP_MAX_MSF, .procname = "igmp_max_msf", .data = &sysctl_igmp_max_msf, .maxlen = sizeof(int), @@ -419,7 +316,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, .procname = "inet_peer_threshold", .data = &inet_peer_threshold, .maxlen = sizeof(int), @@ -427,43 +323,34 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_INET_PEER_MINTTL, .procname = "inet_peer_minttl", .data = &inet_peer_minttl, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_IPV4_INET_PEER_MAXTTL, .procname = "inet_peer_maxttl", .data = &inet_peer_maxttl, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, .procname = "inet_peer_gc_mintime", .data = &inet_peer_gc_mintime, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, .procname = "inet_peer_gc_maxtime", .data = &inet_peer_gc_maxtime, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_TCP_ORPHAN_RETRIES, .procname = "tcp_orphan_retries", .data = &sysctl_tcp_orphan_retries, .maxlen = sizeof(int), @@ -471,7 +358,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_FACK, .procname = "tcp_fack", .data = &sysctl_tcp_fack, .maxlen = sizeof(int), @@ -479,7 +365,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_REORDERING, .procname = "tcp_reordering", .data = &sysctl_tcp_reordering, .maxlen = sizeof(int), @@ -487,7 +372,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_ECN, .procname = "tcp_ecn", .data = &sysctl_tcp_ecn, .maxlen = sizeof(int), @@ -495,7 +379,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_DSACK, .procname = "tcp_dsack", .data = &sysctl_tcp_dsack, .maxlen = sizeof(int), @@ -503,7 +386,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_MEM, .procname = "tcp_mem", .data = &sysctl_tcp_mem, .maxlen = sizeof(sysctl_tcp_mem), @@ -511,7 +393,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_WMEM, .procname = "tcp_wmem", .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), @@ -519,7 +400,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_RMEM, .procname = "tcp_rmem", .data = &sysctl_tcp_rmem, .maxlen = sizeof(sysctl_tcp_rmem), @@ -527,7 +407,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_APP_WIN, .procname = "tcp_app_win", .data = &sysctl_tcp_app_win, .maxlen = sizeof(int), @@ -535,7 +414,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_ADV_WIN_SCALE, .procname = "tcp_adv_win_scale", .data = &sysctl_tcp_adv_win_scale, .maxlen = sizeof(int), @@ -543,7 +421,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_TW_REUSE, .procname = "tcp_tw_reuse", .data = &sysctl_tcp_tw_reuse, .maxlen = sizeof(int), @@ -551,7 +428,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_FRTO, .procname = "tcp_frto", .data = &sysctl_tcp_frto, .maxlen = sizeof(int), @@ -559,7 +435,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_FRTO_RESPONSE, .procname = "tcp_frto_response", .data = &sysctl_tcp_frto_response, .maxlen = sizeof(int), @@ -567,7 +442,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_LOW_LATENCY, .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), @@ -575,7 +449,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_NO_METRICS_SAVE, .procname = "tcp_no_metrics_save", .data = &sysctl_tcp_nometrics_save, .maxlen = sizeof(int), @@ -583,7 +456,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_TCP_MODERATE_RCVBUF, .procname = "tcp_moderate_rcvbuf", .data = &sysctl_tcp_moderate_rcvbuf, .maxlen = sizeof(int), @@ -591,7 +463,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_TCP_TSO_WIN_DIVISOR, .procname = "tcp_tso_win_divisor", .data = &sysctl_tcp_tso_win_divisor, .maxlen = sizeof(int), @@ -599,15 +470,12 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_TCP_CONG_CONTROL, .procname = "tcp_congestion_control", .mode = 0644, .maxlen = TCP_CA_NAME_MAX, .proc_handler = proc_tcp_congestion_control, - .strategy = sysctl_tcp_congestion_control, }, { - .ctl_name = NET_TCP_ABC, .procname = "tcp_abc", .data = &sysctl_tcp_abc, .maxlen = sizeof(int), @@ -615,7 +483,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_TCP_MTU_PROBING, .procname = "tcp_mtu_probing", .data = &sysctl_tcp_mtu_probing, .maxlen = sizeof(int), @@ -623,7 +490,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_TCP_BASE_MSS, .procname = "tcp_base_mss", .data = &sysctl_tcp_base_mss, .maxlen = sizeof(int), @@ -631,7 +497,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, .procname = "tcp_workaround_signed_windows", .data = &sysctl_tcp_workaround_signed_windows, .maxlen = sizeof(int), @@ -640,7 +505,6 @@ static struct ctl_table ipv4_table[] = { }, #ifdef CONFIG_NET_DMA { - .ctl_name = NET_TCP_DMA_COPYBREAK, .procname = "tcp_dma_copybreak", .data = &sysctl_tcp_dma_copybreak, .maxlen = sizeof(int), @@ -649,7 +513,6 @@ static struct ctl_table ipv4_table[] = { }, #endif { - .ctl_name = NET_TCP_SLOW_START_AFTER_IDLE, .procname = "tcp_slow_start_after_idle", .data = &sysctl_tcp_slow_start_after_idle, .maxlen = sizeof(int), @@ -658,7 +521,6 @@ static struct ctl_table ipv4_table[] = { }, #ifdef CONFIG_NETLABEL { - .ctl_name = NET_CIPSOV4_CACHE_ENABLE, .procname = "cipso_cache_enable", .data = &cipso_v4_cache_enabled, .maxlen = sizeof(int), @@ -666,7 +528,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, .procname = "cipso_cache_bucket_size", .data = &cipso_v4_cache_bucketsize, .maxlen = sizeof(int), @@ -674,7 +535,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_CIPSOV4_RBM_OPTFMT, .procname = "cipso_rbm_optfmt", .data = &cipso_v4_rbm_optfmt, .maxlen = sizeof(int), @@ -682,7 +542,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, .procname = "cipso_rbm_strictvalid", .data = &cipso_v4_rbm_strictvalid, .maxlen = sizeof(int), @@ -697,15 +556,12 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_tcp_available_congestion_control, }, { - .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL, .procname = "tcp_allowed_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0644, .proc_handler = proc_allowed_congestion_control, - .strategy = strategy_allowed_congestion_control, }, { - .ctl_name = NET_TCP_MAX_SSTHRESH, .procname = "tcp_max_ssthresh", .data = &sysctl_tcp_max_ssthresh, .maxlen = sizeof(int), @@ -713,41 +569,34 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &zero }, { - .ctl_name = CTL_UNNUMBERED, .procname = "udp_rmem_min", .data = &sysctl_udp_rmem_min, .maxlen = sizeof(sysctl_udp_rmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &zero }, { - .ctl_name = CTL_UNNUMBERED, .procname = "udp_wmem_min", .data = &sysctl_udp_wmem_min, .maxlen = sizeof(sysctl_udp_wmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &zero }, - { .ctl_name = 0 } + { } }; static struct ctl_table ipv4_net_table[] = { { - .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL, .procname = "icmp_echo_ignore_all", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, .maxlen = sizeof(int), @@ -755,7 +604,6 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, .procname = "icmp_echo_ignore_broadcasts", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, .maxlen = sizeof(int), @@ -763,7 +611,6 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, .procname = "icmp_ignore_bogus_error_responses", .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, .maxlen = sizeof(int), @@ -771,7 +618,6 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, .procname = "icmp_errors_use_inbound_ifaddr", .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, .maxlen = sizeof(int), @@ -779,16 +625,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_ICMP_RATELIMIT, .procname = "icmp_ratelimit", .data = &init_net.ipv4.sysctl_icmp_ratelimit, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies }, { - .ctl_name = NET_IPV4_ICMP_RATEMASK, .procname = "icmp_ratemask", .data = &init_net.ipv4.sysctl_icmp_ratemask, .maxlen = sizeof(int), @@ -796,7 +639,6 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rt_cache_rebuild_count", .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, .maxlen = sizeof(int), @@ -807,8 +649,8 @@ static struct ctl_table ipv4_net_table[] = { }; struct ctl_path net_ipv4_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "net", }, + { .procname = "ipv4", }, { }, }; EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 74fb2eb833ec..8c08a28d8f83 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -267,7 +267,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { #ifdef CONFIG_SYSCTL static struct ctl_table xfrm4_policy_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "xfrm4_gc_thresh", .data = &xfrm4_dst_ops.gc_thresh, .maxlen = sizeof(int), diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1fd0a3d775d2..f918399c985c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4000,41 +4000,6 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, return ret; } -static int addrconf_sysctl_forward_strategy(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - int *valp = table->data; - int val = *valp; - int new; - - if (!newval || !newlen) - return 0; - if (newlen != sizeof(int)) - return -EINVAL; - if (get_user(new, (int __user *)newval)) - return -EFAULT; - if (new == *valp) - return 0; - if (oldval && oldlenp) { - size_t len; - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if (copy_to_user(oldval, valp, len)) - return -EFAULT; - if (put_user(len, oldlenp)) - return -EFAULT; - } - } - - *valp = new; - return addrconf_fixup_forwarding(table, valp, val); -} - static void dev_disable_change(struct inet6_dev *idev) { if (!idev || !idev->dev) @@ -4113,16 +4078,13 @@ static struct addrconf_sysctl_table .sysctl_header = NULL, .addrconf_vars = { { - .ctl_name = NET_IPV6_FORWARDING, .procname = "forwarding", .data = &ipv6_devconf.forwarding, .maxlen = sizeof(int), .mode = 0644, .proc_handler = addrconf_sysctl_forward, - .strategy = addrconf_sysctl_forward_strategy, }, { - .ctl_name = NET_IPV6_HOP_LIMIT, .procname = "hop_limit", .data = &ipv6_devconf.hop_limit, .maxlen = sizeof(int), @@ -4130,7 +4092,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_MTU, .procname = "mtu", .data = &ipv6_devconf.mtu6, .maxlen = sizeof(int), @@ -4138,7 +4099,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ACCEPT_RA, .procname = "accept_ra", .data = &ipv6_devconf.accept_ra, .maxlen = sizeof(int), @@ -4146,7 +4106,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ACCEPT_REDIRECTS, .procname = "accept_redirects", .data = &ipv6_devconf.accept_redirects, .maxlen = sizeof(int), @@ -4154,7 +4113,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_AUTOCONF, .procname = "autoconf", .data = &ipv6_devconf.autoconf, .maxlen = sizeof(int), @@ -4162,7 +4120,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_DAD_TRANSMITS, .procname = "dad_transmits", .data = &ipv6_devconf.dad_transmits, .maxlen = sizeof(int), @@ -4170,7 +4127,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_RTR_SOLICITS, .procname = "router_solicitations", .data = &ipv6_devconf.rtr_solicits, .maxlen = sizeof(int), @@ -4178,25 +4134,20 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL, .procname = "router_solicitation_interval", .data = &ipv6_devconf.rtr_solicit_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY, .procname = "router_solicitation_delay", .data = &ipv6_devconf.rtr_solicit_delay, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_FORCE_MLD_VERSION, .procname = "force_mld_version", .data = &ipv6_devconf.force_mld_version, .maxlen = sizeof(int), @@ -4205,7 +4156,6 @@ static struct addrconf_sysctl_table }, #ifdef CONFIG_IPV6_PRIVACY { - .ctl_name = NET_IPV6_USE_TEMPADDR, .procname = "use_tempaddr", .data = &ipv6_devconf.use_tempaddr, .maxlen = sizeof(int), @@ -4213,7 +4163,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_TEMP_VALID_LFT, .procname = "temp_valid_lft", .data = &ipv6_devconf.temp_valid_lft, .maxlen = sizeof(int), @@ -4221,7 +4170,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_TEMP_PREFERED_LFT, .procname = "temp_prefered_lft", .data = &ipv6_devconf.temp_prefered_lft, .maxlen = sizeof(int), @@ -4229,7 +4177,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_REGEN_MAX_RETRY, .procname = "regen_max_retry", .data = &ipv6_devconf.regen_max_retry, .maxlen = sizeof(int), @@ -4237,7 +4184,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR, .procname = "max_desync_factor", .data = &ipv6_devconf.max_desync_factor, .maxlen = sizeof(int), @@ -4246,7 +4192,6 @@ static struct addrconf_sysctl_table }, #endif { - .ctl_name = NET_IPV6_MAX_ADDRESSES, .procname = "max_addresses", .data = &ipv6_devconf.max_addresses, .maxlen = sizeof(int), @@ -4254,7 +4199,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR, .procname = "accept_ra_defrtr", .data = &ipv6_devconf.accept_ra_defrtr, .maxlen = sizeof(int), @@ -4262,7 +4206,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ACCEPT_RA_PINFO, .procname = "accept_ra_pinfo", .data = &ipv6_devconf.accept_ra_pinfo, .maxlen = sizeof(int), @@ -4271,7 +4214,6 @@ static struct addrconf_sysctl_table }, #ifdef CONFIG_IPV6_ROUTER_PREF { - .ctl_name = NET_IPV6_ACCEPT_RA_RTR_PREF, .procname = "accept_ra_rtr_pref", .data = &ipv6_devconf.accept_ra_rtr_pref, .maxlen = sizeof(int), @@ -4279,17 +4221,14 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL, .procname = "router_probe_interval", .data = &ipv6_devconf.rtr_probe_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, #ifdef CONFIG_IPV6_ROUTE_INFO { - .ctl_name = NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, .procname = "accept_ra_rt_info_max_plen", .data = &ipv6_devconf.accept_ra_rt_info_max_plen, .maxlen = sizeof(int), @@ -4299,7 +4238,6 @@ static struct addrconf_sysctl_table #endif #endif { - .ctl_name = NET_IPV6_PROXY_NDP, .procname = "proxy_ndp", .data = &ipv6_devconf.proxy_ndp, .maxlen = sizeof(int), @@ -4307,7 +4245,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE, .procname = "accept_source_route", .data = &ipv6_devconf.accept_source_route, .maxlen = sizeof(int), @@ -4316,7 +4253,6 @@ static struct addrconf_sysctl_table }, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD { - .ctl_name = CTL_UNNUMBERED, .procname = "optimistic_dad", .data = &ipv6_devconf.optimistic_dad, .maxlen = sizeof(int), @@ -4327,7 +4263,6 @@ static struct addrconf_sysctl_table #endif #ifdef CONFIG_IPV6_MROUTE { - .ctl_name = CTL_UNNUMBERED, .procname = "mc_forwarding", .data = &ipv6_devconf.mc_forwarding, .maxlen = sizeof(int), @@ -4336,16 +4271,13 @@ static struct addrconf_sysctl_table }, #endif { - .ctl_name = CTL_UNNUMBERED, .procname = "disable_ipv6", .data = &ipv6_devconf.disable_ipv6, .maxlen = sizeof(int), .mode = 0644, .proc_handler = addrconf_sysctl_disable, - .strategy = sysctl_intvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "accept_dad", .data = &ipv6_devconf.accept_dad, .maxlen = sizeof(int), @@ -4353,13 +4285,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = 0, /* sentinel */ + /* sentinel */ } }, }; static int __addrconf_sysctl_register(struct net *net, char *dev_name, - int ctl_name, struct inet6_dev *idev, struct ipv6_devconf *p) + struct inet6_dev *idev, struct ipv6_devconf *p) { int i; struct addrconf_sysctl_table *t; @@ -4367,9 +4299,9 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, #define ADDRCONF_CTL_PATH_DEV 3 struct ctl_path addrconf_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv6", .ctl_name = NET_IPV6, }, - { .procname = "conf", .ctl_name = NET_IPV6_CONF, }, + { .procname = "net", }, + { .procname = "ipv6", }, + { .procname = "conf", }, { /* to be set */ }, { }, }; @@ -4395,7 +4327,6 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, goto free; addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name; - addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].ctl_name = ctl_name; t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path, t->addrconf_vars); @@ -4431,10 +4362,9 @@ static void addrconf_sysctl_register(struct inet6_dev *idev) { neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6", - &ndisc_ifinfo_sysctl_change, - ndisc_ifinfo_sysctl_strategy); + &ndisc_ifinfo_sysctl_change); __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, - idev->dev->ifindex, idev, &idev->cnf); + idev, &idev->cnf); } static void addrconf_sysctl_unregister(struct inet6_dev *idev) @@ -4473,13 +4403,11 @@ static int addrconf_init_net(struct net *net) net->ipv6.devconf_dflt = dflt; #ifdef CONFIG_SYSCTL - err = __addrconf_sysctl_register(net, "all", NET_PROTO_CONF_ALL, - NULL, all); + err = __addrconf_sysctl_register(net, "all", NULL, all); if (err < 0) goto err_reg_all; - err = __addrconf_sysctl_register(net, "default", NET_PROTO_CONF_DEFAULT, - NULL, dflt); + err = __addrconf_sysctl_register(net, "default", NULL, dflt); if (err < 0) goto err_reg_dflt; #endif diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f23ebbec0631..4ae661bc3677 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -942,15 +942,13 @@ EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL ctl_table ipv6_icmp_table_template[] = { { - .ctl_name = NET_IPV6_ICMP_RATELIMIT, .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies }, - { .ctl_name = 0 }, + { }, }; struct ctl_table *ipv6_icmp_sysctl_init(struct net *net) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f74e4e2cdd06..3d0520e455d8 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1768,42 +1768,6 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *bu return ret; } -int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - struct net_device *dev = ctl->extra1; - struct inet6_dev *idev; - int ret; - - if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME || - ctl->ctl_name == NET_NEIGH_REACHABLE_TIME) - ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default"); - - switch (ctl->ctl_name) { - case NET_NEIGH_REACHABLE_TIME: - ret = sysctl_jiffies(ctl, oldval, oldlenp, newval, newlen); - break; - case NET_NEIGH_RETRANS_TIME_MS: - case NET_NEIGH_REACHABLE_TIME_MS: - ret = sysctl_ms_jiffies(ctl, oldval, oldlenp, newval, newlen); - break; - default: - ret = 0; - } - - if (newval && newlen && ret > 0 && - dev && (idev = in6_dev_get(dev)) != NULL) { - if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME || - ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS) - idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time); - idev->tstamp = jiffies; - inet6_ifinfo_notify(RTM_NEWLINK, idev); - in6_dev_put(idev); - } - - return ret; -} #endif @@ -1857,8 +1821,7 @@ int __init ndisc_init(void) #ifdef CONFIG_SYSCTL err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6", - &ndisc_ifinfo_sysctl_change, - &ndisc_ifinfo_sysctl_strategy); + &ndisc_ifinfo_sysctl_change); if (err) goto out_unregister_pernet; #endif diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 1cf3f0c6a959..14e52aa624c2 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -36,7 +36,6 @@ #define IPQ_QMAX_DEFAULT 1024 #define IPQ_PROC_FS_NAME "ip6_queue" -#define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); @@ -518,14 +517,13 @@ static struct ctl_table_header *ipq_sysctl_header; static ctl_table ipq_table[] = { { - .ctl_name = NET_IPQ_QMAX, .procname = NET_IPQ_QMAX_NAME, .data = &queue_maxlen, .maxlen = sizeof(queue_maxlen), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; #endif diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 642dcb127bab..2acadc8c7883 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -277,9 +277,7 @@ static struct ctl_table icmpv6_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_SYSCTL */ diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index f3aba255ad9f..e0b9424fa1b2 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -83,7 +83,6 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, .procname = "nf_conntrack_frag6_low_thresh", .data = &nf_init_frags.low_thresh, .maxlen = sizeof(unsigned int), @@ -91,14 +90,13 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, .procname = "nf_conntrack_frag6_high_thresh", .data = &nf_init_frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } + { } }; #endif diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index da5bd0ed83df..2499e9712031 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -636,7 +636,6 @@ static const struct inet6_protocol frag_protocol = #ifdef CONFIG_SYSCTL static struct ctl_table ip6_frags_ns_ctl_table[] = { { - .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, .procname = "ip6frag_high_thresh", .data = &init_net.ipv6.frags.high_thresh, .maxlen = sizeof(int), @@ -644,7 +643,6 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, .procname = "ip6frag_low_thresh", .data = &init_net.ipv6.frags.low_thresh, .maxlen = sizeof(int), @@ -652,26 +650,22 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV6_IP6FRAG_TIME, .procname = "ip6frag_time", .data = &init_net.ipv6.frags.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { } }; static struct ctl_table ip6_frags_ctl_table[] = { { - .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, .procname = "ip6frag_secret_interval", .data = &ip6_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { } }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d6fe7646a8ff..6aa202e26f97 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2546,7 +2546,6 @@ ctl_table ipv6_route_table_template[] = { .proc_handler = ipv6_sysctl_rtcache_flush }, { - .ctl_name = NET_IPV6_ROUTE_GC_THRESH, .procname = "gc_thresh", .data = &ip6_dst_ops_template.gc_thresh, .maxlen = sizeof(int), @@ -2554,7 +2553,6 @@ ctl_table ipv6_route_table_template[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, .procname = "max_size", .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), @@ -2562,69 +2560,55 @@ ctl_table ipv6_route_table_template[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, .procname = "gc_min_interval", .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, .procname = "gc_timeout", .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, .procname = "gc_interval", .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, .procname = "gc_elasticity", .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, .procname = "mtu_expires", .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, .procname = "min_adv_mss", .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, .procname = "gc_min_interval_ms", .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies, }, - { .ctl_name = 0 } + { } }; struct ctl_table *ipv6_route_sysctl_init(struct net *net) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 0dc6a4e5ed4a..c690736885b4 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,45 +16,41 @@ static ctl_table ipv6_table_template[] = { { - .ctl_name = NET_IPV6_ROUTE, .procname = "route", .maxlen = 0, .mode = 0555, .child = ipv6_route_table_template }, { - .ctl_name = NET_IPV6_ICMP, .procname = "icmp", .maxlen = 0, .mode = 0555, .child = ipv6_icmp_table_template }, { - .ctl_name = NET_IPV6_BINDV6ONLY, .procname = "bindv6only", .data = &init_net.ipv6.sysctl.bindv6only, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; static ctl_table ipv6_rotable[] = { { - .ctl_name = NET_IPV6_MLD_MAX_MSF, .procname = "mld_max_msf", .data = &sysctl_mld_max_msf, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; struct ctl_path net_ipv6_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv6", .ctl_name = NET_IPV6, }, + { .procname = "net", }, + { .procname = "ipv6", }, { }, }; EXPORT_SYMBOL_GPL(net_ipv6_ctl_path); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8ec3d45cd1d9..7254e3f899a7 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -309,7 +309,6 @@ static void xfrm6_policy_fini(void) #ifdef CONFIG_SYSCTL static struct ctl_table xfrm6_policy_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "xfrm6_gc_thresh", .data = &xfrm6_dst_ops.gc_thresh, .maxlen = sizeof(int), diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c index 633fcab35580..bd6dca00fb85 100644 --- a/net/ipx/sysctl_net_ipx.c +++ b/net/ipx/sysctl_net_ipx.c @@ -18,19 +18,18 @@ extern int sysctl_ipx_pprop_broadcasting; static struct ctl_table ipx_table[] = { { - .ctl_name = NET_IPX_PPROP_BROADCASTING, .procname = "ipx_pprop_broadcasting", .data = &sysctl_ipx_pprop_broadcasting, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { 0 }, + { }, }; static struct ctl_path ipx_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipx", .ctl_name = NET_IPX, }, + { .procname = "net", }, + { .procname = "ipx", }, { } }; diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 5c86567e5a78..d0b70dadf73b 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -113,26 +113,21 @@ static int do_discovery(ctl_table *table, int write, /* One file */ static ctl_table irda_table[] = { { - .ctl_name = NET_IRDA_DISCOVERY, .procname = "discovery", .data = &sysctl_discovery, .maxlen = sizeof(int), .mode = 0644, .proc_handler = do_discovery, - .strategy = sysctl_intvec }, { - .ctl_name = NET_IRDA_DEVNAME, .procname = "devname", .data = sysctl_devname, .maxlen = 65, .mode = 0644, .proc_handler = do_devname, - .strategy = sysctl_string }, #ifdef CONFIG_IRDA_DEBUG { - .ctl_name = NET_IRDA_DEBUG, .procname = "debug", .data = &irda_debug, .maxlen = sizeof(int), @@ -142,7 +137,6 @@ static ctl_table irda_table[] = { #endif #ifdef CONFIG_IRDA_FAST_RR { - .ctl_name = NET_IRDA_FAST_POLL, .procname = "fast_poll_increase", .data = &sysctl_fast_poll_increase, .maxlen = sizeof(int), @@ -151,18 +145,15 @@ static ctl_table irda_table[] = { }, #endif { - .ctl_name = NET_IRDA_DISCOVERY_SLOTS, .procname = "discovery_slots", .data = &sysctl_discovery_slots, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_discovery_slots, .extra2 = &max_discovery_slots }, { - .ctl_name = NET_IRDA_DISCOVERY_TIMEOUT, .procname = "discovery_timeout", .data = &sysctl_discovery_timeout, .maxlen = sizeof(int), @@ -170,99 +161,83 @@ static ctl_table irda_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IRDA_SLOT_TIMEOUT, .procname = "slot_timeout", .data = &sysctl_slot_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_slot_timeout, .extra2 = &max_slot_timeout }, { - .ctl_name = NET_IRDA_MAX_BAUD_RATE, .procname = "max_baud_rate", .data = &sysctl_max_baud_rate, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_max_baud_rate, .extra2 = &max_max_baud_rate }, { - .ctl_name = NET_IRDA_MIN_TX_TURN_TIME, .procname = "min_tx_turn_time", .data = &sysctl_min_tx_turn_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_min_tx_turn_time, .extra2 = &max_min_tx_turn_time }, { - .ctl_name = NET_IRDA_MAX_TX_DATA_SIZE, .procname = "max_tx_data_size", .data = &sysctl_max_tx_data_size, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_max_tx_data_size, .extra2 = &max_max_tx_data_size }, { - .ctl_name = NET_IRDA_MAX_TX_WINDOW, .procname = "max_tx_window", .data = &sysctl_max_tx_window, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_max_tx_window, .extra2 = &max_max_tx_window }, { - .ctl_name = NET_IRDA_MAX_NOREPLY_TIME, .procname = "max_noreply_time", .data = &sysctl_max_noreply_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_max_noreply_time, .extra2 = &max_max_noreply_time }, { - .ctl_name = NET_IRDA_WARN_NOREPLY_TIME, .procname = "warn_noreply_time", .data = &sysctl_warn_noreply_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_warn_noreply_time, .extra2 = &max_warn_noreply_time }, { - .ctl_name = NET_IRDA_LAP_KEEPALIVE_TIME, .procname = "lap_keepalive_time", .data = &sysctl_lap_keepalive_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_lap_keepalive_time, .extra2 = &max_lap_keepalive_time }, - { .ctl_name = 0 } + { } }; static struct ctl_path irda_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "irda", .ctl_name = NET_IRDA, }, + { .procname = "net", }, + { .procname = "irda", }, { } }; diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 57b9304d444c..e2ebe3586263 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -15,86 +15,73 @@ static struct ctl_table llc2_timeout_table[] = { { - .ctl_name = NET_LLC2_ACK_TIMEOUT, .procname = "ack", .data = &sysctl_llc2_ack_timeout, .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_LLC2_BUSY_TIMEOUT, .procname = "busy", .data = &sysctl_llc2_busy_timeout, .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_LLC2_P_TIMEOUT, .procname = "p", .data = &sysctl_llc2_p_timeout, .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_LLC2_REJ_TIMEOUT, .procname = "rej", .data = &sysctl_llc2_rej_timeout, .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, - { 0 }, + { }, }; static struct ctl_table llc_station_table[] = { { - .ctl_name = NET_LLC_STATION_ACK_TIMEOUT, .procname = "ack_timeout", .data = &sysctl_llc_station_ack_timeout, .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, - { 0 }, + { }, }; static struct ctl_table llc2_dir_timeout_table[] = { { - .ctl_name = NET_LLC2, .procname = "timeout", .mode = 0555, .child = llc2_timeout_table, }, - { 0 }, + { }, }; static struct ctl_table llc_table[] = { { - .ctl_name = NET_LLC2, .procname = "llc2", .mode = 0555, .child = llc2_dir_timeout_table, }, { - .ctl_name = NET_LLC_STATION, .procname = "station", .mode = 0555, .child = llc_station_table, }, - { 0 }, + { }, }; static struct ctl_path llc_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "llc", .ctl_name = NET_LLC, }, + { .procname = "net", }, + { .procname = "llc", }, { } }; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 5bb34737501f..60ec4e4badaa 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -273,8 +273,8 @@ void __init netfilter_init(void) #ifdef CONFIG_SYSCTL struct ctl_path nf_net_netfilter_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "netfilter", .ctl_name = NET_NETFILTER, }, + { .procname = "net", }, + { .procname = "netfilter", }, { } }; EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 446e9bd4b4bc..e55a6861d26f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1706,12 +1706,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } + { } }; const struct ctl_path net_vs_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "net", }, + { .procname = "ipv4", }, { .procname = "vs", }, { } }; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index c1757f3620cd..1b9370db2305 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -121,7 +121,7 @@ static ctl_table vs_vars_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { .ctl_name = 0 } + { } }; static struct ctl_table_header * sysctl_header; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 715b57f9540d..f7476b95ab46 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -302,7 +302,7 @@ static ctl_table vs_vars_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { .ctl_name = 0 } + { } }; static struct ctl_table_header * sysctl_header; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 4a1d94aac20b..018f90db511c 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -30,7 +30,6 @@ MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); #ifdef CONFIG_SYSCTL static struct ctl_table acct_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_acct", .data = &init_net.ct.sysctl_acct, .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index aee560b4768d..d5a9bcd7d61b 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -151,7 +151,6 @@ static int nf_ct_events_retry_timeout __read_mostly = 15*HZ; #ifdef CONFIG_SYSCTL static struct ctl_table event_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_events", .data = &init_net.ct.sysctl_events, .maxlen = sizeof(unsigned int), @@ -159,7 +158,6 @@ static struct ctl_table event_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_events_retry_timeout", .data = &init_net.ct.sysctl_events_retry_timeout, .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 1b816a2ea813..7bf1395b4158 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -703,64 +703,54 @@ static int dccp_nlattr_size(void) /* template, data assigned later */ static struct ctl_table dccp_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_request", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_respond", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_partopen", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_open", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_closereq", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_closing", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_timewait", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_loose", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { - .ctl_name = 0, - } + { } }; #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 829374f426c4..e2091d0c7a2f 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -69,9 +69,7 @@ static struct ctl_table generic_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table generic_compat_sysctl_table[] = { @@ -82,9 +80,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c10e6f36e31e..f9d930f80276 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -595,9 +595,7 @@ static struct ctl_table sctp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT @@ -651,9 +649,7 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 97a82ba75376..f86284662756 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1303,7 +1303,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, .procname = "nf_conntrack_tcp_loose", .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), @@ -1311,7 +1310,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, .procname = "nf_conntrack_tcp_be_liberal", .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), @@ -1319,16 +1317,13 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, .procname = "nf_conntrack_tcp_max_retrans", .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, - { - .ctl_name = 0 - } + { } }; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT @@ -1404,7 +1399,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE, .procname = "ip_conntrack_tcp_loose", .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), @@ -1412,7 +1406,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, .procname = "ip_conntrack_tcp_be_liberal", .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), @@ -1420,16 +1413,13 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, .procname = "ip_conntrack_tcp_max_retrans", .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 70809d117b91..5c5518bedb4b 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -154,9 +154,7 @@ static struct ctl_table udp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table udp_compat_sysctl_table[] = { @@ -174,9 +172,7 @@ static struct ctl_table udp_compat_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 0badedc542d3..458655bb2106 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -146,7 +146,6 @@ static unsigned int udplite_sysctl_table_users; static struct ctl_table_header *udplite_sysctl_header; static struct ctl_table udplite_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_udplite_timeout", .data = &nf_ct_udplite_timeout, .maxlen = sizeof(unsigned int), @@ -154,16 +153,13 @@ static struct ctl_table udplite_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_udplite_timeout_stream", .data = &nf_ct_udplite_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 193515381970..028aba667ef7 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -340,7 +340,6 @@ static struct ctl_table_header *nf_ct_netfilter_header; static ctl_table nf_ct_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_MAX, .procname = "nf_conntrack_max", .data = &nf_conntrack_max, .maxlen = sizeof(int), @@ -348,7 +347,6 @@ static ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_COUNT, .procname = "nf_conntrack_count", .data = &init_net.ct.count, .maxlen = sizeof(int), @@ -356,7 +354,6 @@ static ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_BUCKETS, .procname = "nf_conntrack_buckets", .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), @@ -364,7 +361,6 @@ static ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_CHECKSUM, .procname = "nf_conntrack_checksum", .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(unsigned int), @@ -372,43 +368,39 @@ static ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, .procname = "nf_conntrack_log_invalid", .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_expect_max", .data = &nf_ct_expect_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } + { } }; #define NET_NF_CONNTRACK_MAX 2089 static ctl_table nf_ct_netfilter_table[] = { { - .ctl_name = NET_NF_CONNTRACK_MAX, .procname = "nf_conntrack_max", .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } + { } }; static struct ctl_path nf_ct_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "net", }, { } }; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index c93494fef8ef..565e0a31d521 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -216,9 +216,9 @@ static const struct file_operations nflog_file_ops = { #ifdef CONFIG_SYSCTL static struct ctl_path nf_log_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "netfilter", .ctl_name = NET_NETFILTER, }, - { .procname = "nf_log", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "net", }, + { .procname = "netfilter", }, + { .procname = "nf_log", }, { } }; @@ -273,7 +273,6 @@ static __init int netfilter_log_sysctl_init(void) for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i); - nf_log_sysctl_table[i].ctl_name = CTL_UNNUMBERED; nf_log_sysctl_table[i].procname = nf_log_sysctl_fnames[i-NFPROTO_UNSPEC]; nf_log_sysctl_table[i].data = NULL; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 7b49591fe87c..1e0fa9e57aac 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -36,143 +36,119 @@ static struct ctl_table_header *nr_table_header; static ctl_table nr_table[] = { { - .ctl_name = NET_NETROM_DEFAULT_PATH_QUALITY, .procname = "default_path_quality", .data = &sysctl_netrom_default_path_quality, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_quality, .extra2 = &max_quality }, { - .ctl_name = NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER, .procname = "obsolescence_count_initialiser", .data = &sysctl_netrom_obsolescence_count_initialiser, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_obs, .extra2 = &max_obs }, { - .ctl_name = NET_NETROM_NETWORK_TTL_INITIALISER, .procname = "network_ttl_initialiser", .data = &sysctl_netrom_network_ttl_initialiser, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_ttl, .extra2 = &max_ttl }, { - .ctl_name = NET_NETROM_TRANSPORT_TIMEOUT, .procname = "transport_timeout", .data = &sysctl_netrom_transport_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t1, .extra2 = &max_t1 }, { - .ctl_name = NET_NETROM_TRANSPORT_MAXIMUM_TRIES, .procname = "transport_maximum_tries", .data = &sysctl_netrom_transport_maximum_tries, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_n2, .extra2 = &max_n2 }, { - .ctl_name = NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY, .procname = "transport_acknowledge_delay", .data = &sysctl_netrom_transport_acknowledge_delay, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, { - .ctl_name = NET_NETROM_TRANSPORT_BUSY_DELAY, .procname = "transport_busy_delay", .data = &sysctl_netrom_transport_busy_delay, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t4, .extra2 = &max_t4 }, { - .ctl_name = NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE, .procname = "transport_requested_window_size", .data = &sysctl_netrom_transport_requested_window_size, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, { - .ctl_name = NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT, .procname = "transport_no_activity_timeout", .data = &sysctl_netrom_transport_no_activity_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, { - .ctl_name = NET_NETROM_ROUTING_CONTROL, .procname = "routing_control", .data = &sysctl_netrom_routing_control, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_route, .extra2 = &max_route }, { - .ctl_name = NET_NETROM_LINK_FAILS_COUNT, .procname = "link_fails_count", .data = &sysctl_netrom_link_fails_count, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_fails, .extra2 = &max_fails }, { - .ctl_name = NET_NETROM_RESET, .procname = "reset", .data = &sysctl_netrom_reset_circuit, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_reset, .extra2 = &max_reset }, - { .ctl_name = 0 } + { } }; static struct ctl_path nr_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "netrom", .ctl_name = NET_NETROM, }, + { .procname = "net", }, + { .procname = "netrom", }, { } }; diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 2220f3322326..cea1c7dbdae2 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -84,20 +84,18 @@ static int proc_local_port_range(ctl_table *table, int write, static struct ctl_table phonet_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "local_port_range", .data = &local_port_range, .maxlen = sizeof(local_port_range), .mode = 0644, .proc_handler = proc_local_port_range, - .strategy = NULL, }, - { .ctl_name = 0 } + { } }; static struct ctl_path phonet_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "phonet", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "net", }, + { .procname = "phonet", }, { }, }; diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c index 84b5ffcb280f..517c6c9987ba 100644 --- a/net/rds/ib_sysctl.c +++ b/net/rds/ib_sysctl.c @@ -67,7 +67,6 @@ unsigned int rds_ib_sysctl_flow_control = 0; ctl_table rds_ib_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "max_send_wr", .data = &rds_ib_sysctl_max_send_wr, .maxlen = sizeof(unsigned long), @@ -77,7 +76,6 @@ ctl_table rds_ib_sysctl_table[] = { .extra2 = &rds_ib_sysctl_max_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_recv_wr", .data = &rds_ib_sysctl_max_recv_wr, .maxlen = sizeof(unsigned long), @@ -87,7 +85,6 @@ ctl_table rds_ib_sysctl_table[] = { .extra2 = &rds_ib_sysctl_max_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unsignaled_wr", .data = &rds_ib_sysctl_max_unsig_wrs, .maxlen = sizeof(unsigned long), @@ -97,7 +94,6 @@ ctl_table rds_ib_sysctl_table[] = { .extra2 = &rds_ib_sysctl_max_unsig_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unsignaled_bytes", .data = &rds_ib_sysctl_max_unsig_bytes, .maxlen = sizeof(unsigned long), @@ -107,7 +103,6 @@ ctl_table rds_ib_sysctl_table[] = { .extra2 = &rds_ib_sysctl_max_unsig_bytes_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_recv_allocation", .data = &rds_ib_sysctl_max_recv_allocation, .maxlen = sizeof(unsigned long), @@ -115,20 +110,19 @@ ctl_table rds_ib_sysctl_table[] = { .proc_handler = &proc_doulongvec_minmax, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "flow_control", .data = &rds_ib_sysctl_flow_control, .maxlen = sizeof(rds_ib_sysctl_flow_control), .mode = 0644, .proc_handler = &proc_dointvec, }, - { .ctl_name = 0} + { } }; static struct ctl_path rds_ib_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, - { .procname = "ib", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "net", }, + { .procname = "rds", }, + { .procname = "ib", }, { } }; diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c index 9590678cd616..3e00b01559f2 100644 --- a/net/rds/iw_sysctl.c +++ b/net/rds/iw_sysctl.c @@ -57,7 +57,6 @@ unsigned int rds_iw_sysctl_flow_control = 1; ctl_table rds_iw_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "max_send_wr", .data = &rds_iw_sysctl_max_send_wr, .maxlen = sizeof(unsigned long), @@ -67,7 +66,6 @@ ctl_table rds_iw_sysctl_table[] = { .extra2 = &rds_iw_sysctl_max_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_recv_wr", .data = &rds_iw_sysctl_max_recv_wr, .maxlen = sizeof(unsigned long), @@ -77,7 +75,6 @@ ctl_table rds_iw_sysctl_table[] = { .extra2 = &rds_iw_sysctl_max_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unsignaled_wr", .data = &rds_iw_sysctl_max_unsig_wrs, .maxlen = sizeof(unsigned long), @@ -87,7 +84,6 @@ ctl_table rds_iw_sysctl_table[] = { .extra2 = &rds_iw_sysctl_max_unsig_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unsignaled_bytes", .data = &rds_iw_sysctl_max_unsig_bytes, .maxlen = sizeof(unsigned long), @@ -97,7 +93,6 @@ ctl_table rds_iw_sysctl_table[] = { .extra2 = &rds_iw_sysctl_max_unsig_bytes_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_recv_allocation", .data = &rds_iw_sysctl_max_recv_allocation, .maxlen = sizeof(unsigned long), @@ -105,20 +100,19 @@ ctl_table rds_iw_sysctl_table[] = { .proc_handler = &proc_doulongvec_minmax, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "flow_control", .data = &rds_iw_sysctl_flow_control, .maxlen = sizeof(rds_iw_sysctl_flow_control), .mode = 0644, .proc_handler = &proc_dointvec, }, - { .ctl_name = 0} + { } }; static struct ctl_path rds_iw_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, - { .procname = "iw", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "net", }, + { .procname = "rds", }, + { .procname = "iw", }, { } }; diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index 307dc5c1be15..8fb499ee3687 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -51,7 +51,6 @@ unsigned int rds_sysctl_ping_enable = 1; static ctl_table rds_sysctl_rds_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "reconnect_min_delay_ms", .data = &rds_sysctl_reconnect_min_jiffies, .maxlen = sizeof(unsigned long), @@ -61,7 +60,6 @@ static ctl_table rds_sysctl_rds_table[] = { .extra2 = &rds_sysctl_reconnect_max_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "reconnect_max_delay_ms", .data = &rds_sysctl_reconnect_max_jiffies, .maxlen = sizeof(unsigned long), @@ -71,7 +69,6 @@ static ctl_table rds_sysctl_rds_table[] = { .extra2 = &rds_sysctl_reconnect_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unacked_packets", .data = &rds_sysctl_max_unacked_packets, .maxlen = sizeof(unsigned long), @@ -79,7 +76,6 @@ static ctl_table rds_sysctl_rds_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unacked_bytes", .data = &rds_sysctl_max_unacked_bytes, .maxlen = sizeof(unsigned long), @@ -87,19 +83,18 @@ static ctl_table rds_sysctl_rds_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "ping_enable", .data = &rds_sysctl_ping_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, }, - { .ctl_name = 0} + { } }; static struct ctl_path rds_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "net", }, + { .procname = "rds", }, { } }; diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index 3bfe504faf86..df6d9dac2186 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -26,121 +26,101 @@ static struct ctl_table_header *rose_table_header; static ctl_table rose_table[] = { { - .ctl_name = NET_ROSE_RESTART_REQUEST_TIMEOUT, .procname = "restart_request_timeout", .data = &sysctl_rose_restart_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, { - .ctl_name = NET_ROSE_CALL_REQUEST_TIMEOUT, .procname = "call_request_timeout", .data = &sysctl_rose_call_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, { - .ctl_name = NET_ROSE_RESET_REQUEST_TIMEOUT, .procname = "reset_request_timeout", .data = &sysctl_rose_reset_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, { - .ctl_name = NET_ROSE_CLEAR_REQUEST_TIMEOUT, .procname = "clear_request_timeout", .data = &sysctl_rose_clear_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, { - .ctl_name = NET_ROSE_NO_ACTIVITY_TIMEOUT, .procname = "no_activity_timeout", .data = &sysctl_rose_no_activity_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, { - .ctl_name = NET_ROSE_ACK_HOLD_BACK_TIMEOUT, .procname = "acknowledge_hold_back_timeout", .data = &sysctl_rose_ack_hold_back_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, { - .ctl_name = NET_ROSE_ROUTING_CONTROL, .procname = "routing_control", .data = &sysctl_rose_routing_control, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_route, .extra2 = &max_route }, { - .ctl_name = NET_ROSE_LINK_FAIL_TIMEOUT, .procname = "link_fail_timeout", .data = &sysctl_rose_link_fail_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_ftimer, .extra2 = &max_ftimer }, { - .ctl_name = NET_ROSE_MAX_VCS, .procname = "maximum_virtual_circuits", .data = &sysctl_rose_maximum_vcs, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_maxvcs, .extra2 = &max_maxvcs }, { - .ctl_name = NET_ROSE_WINDOW_SIZE, .procname = "window_size", .data = &sysctl_rose_window_size, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, - { .ctl_name = 0 } + { } }; static struct ctl_path rose_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "rose", .ctl_name = NET_ROSE, }, + { .procname = "net", }, + { .procname = "rose", }, { } }; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index ab7151da120f..c4ece9829541 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -59,180 +59,145 @@ extern int sysctl_sctp_wmem[3]; static ctl_table sctp_table[] = { { - .ctl_name = NET_SCTP_RTO_INITIAL, .procname = "rto_initial", .data = &sctp_rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_RTO_MIN, .procname = "rto_min", .data = &sctp_rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_RTO_MAX, .procname = "rto_max", .data = &sctp_rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_VALID_COOKIE_LIFE, .procname = "valid_cookie_life", .data = &sctp_valid_cookie_life, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_MAX_BURST, .procname = "max_burst", .data = &sctp_max_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &zero, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS, .procname = "association_max_retrans", .data = &sctp_max_retrans_association, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_SNDBUF_POLICY, .procname = "sndbuf_policy", .data = &sctp_sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_RCVBUF_POLICY, .procname = "rcvbuf_policy", .data = &sctp_rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_PATH_MAX_RETRANS, .procname = "path_max_retrans", .data = &sctp_max_retrans_path, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS, .procname = "max_init_retransmits", .data = &sctp_max_retrans_init, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_HB_INTERVAL, .procname = "hb_interval", .data = &sctp_hb_interval, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_PRESERVE_ENABLE, .procname = "cookie_preserve_enable", .data = &sctp_cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_RTO_ALPHA, .procname = "rto_alpha_exp_divisor", .data = &sctp_rto_alpha, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_RTO_BETA, .procname = "rto_beta_exp_divisor", .data = &sctp_rto_beta, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_ADDIP_ENABLE, .procname = "addip_enable", .data = &sctp_addip_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_PRSCTP_ENABLE, .procname = "prsctp_enable", .data = &sctp_prsctp_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_SACK_TIMEOUT, .procname = "sack_timeout", .data = &sctp_sack_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "sctp_mem", .data = &sysctl_sctp_mem, .maxlen = sizeof(sysctl_sctp_mem), @@ -240,7 +205,6 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "sctp_rmem", .data = &sysctl_sctp_rmem, .maxlen = sizeof(sysctl_sctp_rmem), @@ -248,7 +212,6 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "sctp_wmem", .data = &sysctl_sctp_wmem, .maxlen = sizeof(sysctl_sctp_wmem), @@ -256,40 +219,34 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "auth_enable", .data = &sctp_auth_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "addip_noauth_enable", .data = &sctp_addip_noauth, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "addr_scope_policy", .data = &sctp_scope_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &addr_scope_max, }, - { .ctl_name = 0 } + { } }; static struct ctl_path sctp_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "sctp", .ctl_name = NET_SCTP, }, + { .procname = "net", }, + { .procname = "sctp", }, { } }; diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 42f9748ae093..f0ce326d0178 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -168,17 +168,16 @@ static ctl_table debug_table[] = { .mode = 0444, .proc_handler = &proc_do_xprt, }, - { .ctl_name = 0 } + { } }; static ctl_table sunrpc_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = debug_table }, - { .ctl_name = 0 } + { } }; #endif diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 35fb68b9c8ec..678cee22013f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -121,7 +121,6 @@ static ctl_table svcrdma_parm_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &min_max_requests, .extra2 = &max_max_requests }, @@ -131,7 +130,6 @@ static ctl_table svcrdma_parm_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &min_max_inline, .extra2 = &max_max_inline }, @@ -141,7 +139,6 @@ static ctl_table svcrdma_parm_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &min_ord, .extra2 = &max_ord, }, @@ -209,9 +206,7 @@ static ctl_table svcrdma_parm_table[] = { .mode = 0644, .proc_handler = &read_reset_stat, }, - { - .ctl_name = 0, - }, + { }, }; static ctl_table svcrdma_table[] = { @@ -220,21 +215,16 @@ static ctl_table svcrdma_table[] = { .mode = 0555, .child = svcrdma_parm_table }, - { - .ctl_name = 0, - }, + { }, }; static ctl_table svcrdma_root_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = svcrdma_table }, - { - .ctl_name = 0, - }, + { }, }; void svc_rdma_cleanup(void) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 9a63f669ece4..476816062243 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -86,79 +86,63 @@ static struct ctl_table_header *sunrpc_table_header; static ctl_table xr_tunables_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_slot_table_entries", .data = &xprt_rdma_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_max_inline_read", .data = &xprt_rdma_max_inline_read, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_max_inline_write", .data = &xprt_rdma_max_inline_write, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_inline_write_padding", .data = &xprt_rdma_inline_write_padding, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &max_padding, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_memreg_strategy", .data = &xprt_rdma_memreg_strategy, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &min_memreg, .extra2 = &max_memreg, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_pad_optimize", .data = &xprt_rdma_pad_optimize, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, }, - { - .ctl_name = 0, - }, + { }, }; static ctl_table sunrpc_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = xr_tunables_table }, - { - .ctl_name = 0, - }, + { }, }; #endif diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 37c5475ba258..8b9a2079f2e3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -81,46 +81,38 @@ static struct ctl_table_header *sunrpc_table_header; */ static ctl_table xs_tunables_table[] = { { - .ctl_name = CTL_SLOTTABLE_UDP, .procname = "udp_slot_table_entries", .data = &xprt_udp_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { - .ctl_name = CTL_SLOTTABLE_TCP, .procname = "tcp_slot_table_entries", .data = &xprt_tcp_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { - .ctl_name = CTL_MIN_RESVPORT, .procname = "min_resvport", .data = &xprt_min_resvport, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, { - .ctl_name = CTL_MAX_RESVPORT, .procname = "max_resvport", .data = &xprt_max_resvport, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, @@ -130,23 +122,17 @@ static ctl_table xs_tunables_table[] = { .maxlen = sizeof(xs_tcp_fin_timeout), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, - .strategy = sysctl_jiffies - }, - { - .ctl_name = 0, }, + { }, }; static ctl_table sunrpc_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = xs_tunables_table }, - { - .ctl_name = 0, - }, + { }, }; #endif diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 83c093077ebc..708f5df6b7f0 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -16,19 +16,18 @@ static ctl_table unix_table[] = { { - .ctl_name = NET_UNIX_MAX_DGRAM_QLEN, .procname = "max_dgram_qlen", .data = &init_net.unx.sysctl_max_dgram_qlen, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; static struct ctl_path unix_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "unix", .ctl_name = NET_UNIX, }, + { .procname = "net", }, + { .procname = "unix", }, { }, }; diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index a5d3416522de..d2efd29f434e 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -19,62 +19,51 @@ static struct ctl_table_header *x25_table_header; static struct ctl_table x25_table[] = { { - .ctl_name = NET_X25_RESTART_REQUEST_TIMEOUT, .procname = "restart_request_timeout", .data = &sysctl_x25_restart_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, { - .ctl_name = NET_X25_CALL_REQUEST_TIMEOUT, .procname = "call_request_timeout", .data = &sysctl_x25_call_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, { - .ctl_name = NET_X25_RESET_REQUEST_TIMEOUT, .procname = "reset_request_timeout", .data = &sysctl_x25_reset_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, { - .ctl_name = NET_X25_CLEAR_REQUEST_TIMEOUT, .procname = "clear_request_timeout", .data = &sysctl_x25_clear_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, { - .ctl_name = NET_X25_ACK_HOLD_BACK_TIMEOUT, .procname = "acknowledgement_hold_back_timeout", .data = &sysctl_x25_ack_holdback_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, { - .ctl_name = NET_X25_FORWARD, .procname = "x25_forward", .data = &sysctl_x25_forward, .maxlen = sizeof(int), @@ -85,8 +74,8 @@ static struct ctl_table x25_table[] = { }; static struct ctl_path x25_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "x25", .ctl_name = NET_X25, }, + { .procname = "net", }, + { .procname = "x25", }, { } }; diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c index 2e6ffb66f06f..2e221f2cad7e 100644 --- a/net/xfrm/xfrm_sysctl.c +++ b/net/xfrm/xfrm_sysctl.c @@ -13,28 +13,24 @@ static void __xfrm_sysctl_init(struct net *net) #ifdef CONFIG_SYSCTL static struct ctl_table xfrm_table[] = { { - .ctl_name = NET_CORE_AEVENT_ETIME, .procname = "xfrm_aevent_etime", .maxlen = sizeof(u32), .mode = 0644, .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_AEVENT_RSEQTH, .procname = "xfrm_aevent_rseqth", .maxlen = sizeof(u32), .mode = 0644, .proc_handler = proc_dointvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "xfrm_larval_drop", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "xfrm_acq_expires", .maxlen = sizeof(int), .mode = 0644, -- cgit v1.3-8-gc7d7 From 4739a9748e1bd7459f22f7e94e7d85710ca83954 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Apr 2009 05:36:01 -0700 Subject: sysctl: Remove the last of the generic binary sysctl support Now that all of the users stopped using ctl_name and strategy it is safe to remove the fields from struct ctl_table, and it is safe to remove the stub strategy routines as well. Signed-off-by: Eric W. Biederman --- include/linux/sysctl.h | 3 --- kernel/sysctl.c | 44 -------------------------------------------- 2 files changed, 47 deletions(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 7c4aabc04673..4e40442777cf 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1033,7 +1033,6 @@ extern ctl_handler sysctl_ms_jiffies; /* A sysctl table is an array of struct ctl_table: */ struct ctl_table { - int ctl_name; /* Binary ID */ const char *procname; /* Text ID for /proc/sys, or zero */ void *data; int maxlen; @@ -1041,7 +1040,6 @@ struct ctl_table struct ctl_table *child; struct ctl_table *parent; /* Automatically set */ proc_handler *proc_handler; /* Callback for text formatting */ - ctl_handler *strategy; /* Callback function for all r/w */ void *extra1; void *extra2; }; @@ -1075,7 +1073,6 @@ struct ctl_table_header /* struct ctl_path describes where in the hierarchy a table is added */ struct ctl_path { const char *procname; - int ctl_name; }; void register_sysctl_root(struct ctl_table_root *root); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b6b6eb1abebb..b4a5763d6dc8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1699,8 +1699,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) * * The members of the &struct ctl_table structure are used as follows: * - * ctl_name - Dead - * * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not * enter a sysctl file * @@ -1715,8 +1713,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) * * proc_handler - the text handler routine (described below) * - * strategy - Dead - * * de - for internal use by the sysctl routines * * extra1, extra2 - extra pointers usable by the proc handler routines @@ -2639,41 +2635,6 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, #endif /* CONFIG_PROC_FS */ -int sysctl_data(struct ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - -int sysctl_string(struct ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - -int sysctl_intvec(struct ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - -int sysctl_jiffies(struct ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - -int sysctl_ms_jiffies(struct ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - /* * No sense putting this after each symbol definition, twice, * exception granted :-) @@ -2688,9 +2649,4 @@ EXPORT_SYMBOL(proc_doulongvec_minmax); EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); EXPORT_SYMBOL(register_sysctl_table); EXPORT_SYMBOL(register_sysctl_paths); -EXPORT_SYMBOL(sysctl_intvec); -EXPORT_SYMBOL(sysctl_jiffies); -EXPORT_SYMBOL(sysctl_ms_jiffies); -EXPORT_SYMBOL(sysctl_string); -EXPORT_SYMBOL(sysctl_data); EXPORT_SYMBOL(unregister_sysctl_table); -- cgit v1.3-8-gc7d7 From 7aae816dae150caad8880357e42303935c0179a8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 Nov 2009 16:08:04 +0000 Subject: ASoC: Add bit clock rate calculator utility functions Many devices need to calculate the bit clock rate desired to work out the clock configuration required for the device. Provide utility functions to do this using both hw_params structures and raw numbers. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- include/sound/soc.h | 5 ++++ sound/soc/Makefile | 2 +- sound/soc/soc-utils.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 sound/soc/soc-utils.c (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 7f3a4c5028da..310a21949a3e 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -227,6 +227,11 @@ int snd_soc_codec_set_cache_io(struct snd_soc_codec *codec, void snd_soc_free_pcms(struct snd_soc_device *socdev); int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid); +/* Utility functions to get clock rates from various things */ +int snd_soc_calc_frame_size(int sample_size, int channels, int tdm_slots); +int snd_soc_params_to_frame_size(struct snd_pcm_hw_params *params); +int snd_soc_params_to_bclk(struct snd_pcm_hw_params *parms); + /* set runtime hw params */ int snd_soc_set_runtime_hwparams(struct snd_pcm_substream *substream, const struct snd_pcm_hardware *hw); diff --git a/sound/soc/Makefile b/sound/soc/Makefile index 0c5eac01bf2e..1470141d4167 100644 --- a/sound/soc/Makefile +++ b/sound/soc/Makefile @@ -1,4 +1,4 @@ -snd-soc-core-objs := soc-core.o soc-dapm.o soc-jack.o soc-cache.o +snd-soc-core-objs := soc-core.o soc-dapm.o soc-jack.o soc-cache.o soc-utils.o obj-$(CONFIG_SND_SOC) += snd-soc-core.o obj-$(CONFIG_SND_SOC) += codecs/ diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c new file mode 100644 index 000000000000..b16aaaeb0aab --- /dev/null +++ b/sound/soc/soc-utils.c @@ -0,0 +1,68 @@ +/* + * soc-util.c -- ALSA SoC Audio Layer utility functions + * + * Copyright 2009 Wolfson Microelectronics PLC. + * + * Author: Mark Brown + * Liam Girdwood + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include + +int snd_soc_calc_frame_size(int sample_size, int channels, int tdm_slots) +{ + return sample_size * channels * tdm_slots; +} +EXPORT_SYMBOL_GPL(snd_soc_calc_frame_size); + +int snd_soc_params_to_frame_size(struct snd_pcm_hw_params *params) +{ + int sample_size; + + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S16_LE: + case SNDRV_PCM_FORMAT_S16_BE: + sample_size = 16; + break; + case SNDRV_PCM_FORMAT_S20_3LE: + case SNDRV_PCM_FORMAT_S20_3BE: + sample_size = 20; + break; + case SNDRV_PCM_FORMAT_S24_LE: + case SNDRV_PCM_FORMAT_S24_BE: + sample_size = 24; + break; + case SNDRV_PCM_FORMAT_S32_LE: + case SNDRV_PCM_FORMAT_S32_BE: + sample_size = 32; + break; + default: + return -ENOTSUPP; + } + + return snd_soc_calc_frame_size(sample_size, params_channels(params), + 1); +} +EXPORT_SYMBOL_GPL(snd_soc_params_to_frame_size); + +int snd_soc_params_to_bclk(struct snd_pcm_hw_params *params) +{ + int ret; + + ret = snd_soc_params_to_frame_size(params); + + if (ret > 0) + return ret * params_rate(params); + else + return ret; +} +EXPORT_SYMBOL_GPL(snd_soc_params_to_bclk); -- cgit v1.3-8-gc7d7 From c871a05315d1a76034ea06feeda92081e1d608bf Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Thu, 12 Nov 2009 17:14:04 +0900 Subject: ASoC: Add jack_status_check callback function for GPIO jacks The jack_status_check callback function is the interface to check the status of the jack. Some target provides the method to distinguish what is the jack inserted - headphone jack, microphone jack, tvout jack, etc, so we can implement it using the jack_status_check function. Signed-off-by: Joonyoung Shim Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- include/sound/soc.h | 2 ++ sound/soc/soc-jack.c | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 310a21949a3e..13b117aac5d9 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -332,6 +332,8 @@ struct snd_soc_jack_gpio { int debounce_time; struct snd_soc_jack *jack; struct work_struct work; + + int (*jack_status_check)(void); }; #endif diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c index 12124149601e..3c07a94c2e30 100644 --- a/sound/soc/soc-jack.c +++ b/sound/soc/soc-jack.c @@ -163,6 +163,9 @@ static void snd_soc_jack_gpio_detect(struct snd_soc_jack_gpio *gpio) else report = 0; + if (gpio->jack_status_check) + report = gpio->jack_status_check(); + snd_soc_jack_report(jack, report, gpio->report); } -- cgit v1.3-8-gc7d7 From 67178767b936fb47a3a5e88097cff41ccbda7acb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Nov 2009 10:06:34 +0100 Subject: tracing: Rename 'lockdep' event subsystem into 'lock' Lockdep events subsystem gathers various locking related events such as a request, release, contention or acquisition of a lock. The name of this event subsystem is a bit of a misnomer since these events are not quite related to lockdep but more generally to locking, ie: these events are not reporting lock dependencies or possible deadlock scenario but pure locking events. Hence this rename. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Acked-by: Hitoshi Mitake Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt Cc: Li Zefan LKML-Reference: <1258103194-843-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/trace/events/lock.h | 96 ++++++++++++++++++++++++++++++++++++++++++ include/trace/events/lockdep.h | 96 ------------------------------------------ kernel/lockdep.c | 2 +- 3 files changed, 97 insertions(+), 97 deletions(-) create mode 100644 include/trace/events/lock.h delete mode 100644 include/trace/events/lockdep.h (limited to 'include') diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h new file mode 100644 index 000000000000..a870ba125aa8 --- /dev/null +++ b/include/trace/events/lock.h @@ -0,0 +1,96 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lock + +#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCK_H + +#include +#include + +#ifdef CONFIG_LOCKDEP + +TRACE_EVENT(lock_acquire, + + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __string(name, lock->name) + ), + + TP_fast_assign( + __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); + __assign_str(name, lock->name); + ), + + TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", + (__entry->flags & 2) ? "read " : "", + __get_str(name)) +); + +TRACE_EVENT(lock_release, + + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + + TP_ARGS(lock, nested, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +#ifdef CONFIG_LOCK_STAT + +TRACE_EVENT(lock_contended, + + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __string(name, lock->name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __assign_str(name, lock->name); + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCK_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h deleted file mode 100644 index bcf1d209a00d..000000000000 --- a/include/trace/events/lockdep.h +++ /dev/null @@ -1,96 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lockdep - -#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_LOCKDEP_H - -#include -#include - -#ifdef CONFIG_LOCKDEP - -TRACE_EVENT(lock_acquire, - - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - - TP_STRUCT__entry( - __field(unsigned int, flags) - __string(name, lock->name) - ), - - TP_fast_assign( - __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); - __assign_str(name, lock->name); - ), - - TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", - (__entry->flags & 2) ? "read " : "", - __get_str(name)) -); - -TRACE_EVENT(lock_release, - - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - - TP_ARGS(lock, nested, ip), - - TP_STRUCT__entry( - __string(name, lock->name) - ), - - TP_fast_assign( - __assign_str(name, lock->name); - ), - - TP_printk("%s", __get_str(name)) -); - -#ifdef CONFIG_LOCK_STAT - -TRACE_EVENT(lock_contended, - - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - - TP_ARGS(lock, ip), - - TP_STRUCT__entry( - __string(name, lock->name) - ), - - TP_fast_assign( - __assign_str(name, lock->name); - ), - - TP_printk("%s", __get_str(name)) -); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __string(name, lock->name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __assign_str(name, lock->name); - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#endif /* _TRACE_LOCKDEP_H */ - -/* This part must be outside protection */ -#include diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 9af56723c096..f5dcd36d3151 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -49,7 +49,7 @@ #include "lockdep_internals.h" #define CREATE_TRACE_POINTS -#include +#include #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; -- cgit v1.3-8-gc7d7 From 23af368e9a904f59256c27d371ce223d6cee0430 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 11 Nov 2009 14:05:25 +0000 Subject: clockevents: Use u32 for mult and shift factors The mult and shift factors of clock events differ in their data type from those of clock sources for no reason. u32 is sufficient for both. shift is always <= 32 and mult is limited to 2^32-1 to avoid 64bit multiplication overflows in the conversion. Preparatory patch for a generic mult/shift factor calculation function. Signed-off-by: Thomas Gleixner Tested-by: Mikael Pettersson Acked-by: Ralf Baechle Acked-by: Linus Walleij Cc: John Stultz LKML-Reference: <20091111134229.725664788@linutronix.de> --- include/linux/clockchips.h | 4 ++-- kernel/time/timer_list.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 3a1dbba4d3ae..3b5841016276 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -79,8 +79,8 @@ struct clock_event_device { unsigned int features; unsigned long max_delta_ns; unsigned long min_delta_ns; - unsigned long mult; - int shift; + u32 mult; + u32 shift; int rating; int irq; const struct cpumask *cpumask; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 1b5b7aa2fdfd..fa00da108a14 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -206,8 +206,8 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) SEQ_printf(m, "%s\n", dev->name); SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns); SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns); - SEQ_printf(m, " mult: %lu\n", dev->mult); - SEQ_printf(m, " shift: %d\n", dev->shift); + SEQ_printf(m, " mult: %u\n", dev->mult); + SEQ_printf(m, " shift: %u\n", dev->shift); SEQ_printf(m, " mode: %d\n", dev->mode); SEQ_printf(m, " next_event: %Ld nsecs\n", (unsigned long long) ktime_to_ns(dev->next_event)); -- cgit v1.3-8-gc7d7 From 7d2f944a2b836c69a9d260a0a5f0d1720d57fdff Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 11 Nov 2009 14:05:29 +0000 Subject: clocksource: Provide a generic mult/shift factor calculation MIPS has two functions to calculcate the mult/shift factors for clock sources and clock events at run time. ARM needs such functions as well. Implement a function which calculates the mult/shift factors based on the frequencies to which and from which is converted. The function also has a parameter to specify the minimum conversion range in seconds. This range is guaranteed not to produce a 64bit overflow when a value is multiplied with the calculated mult factor. The larger the conversion range the less becomes the conversion accuracy. Provide two inline wrappers which handle clock events and clock sources. For clock events the "from" frequency is nano seconds per second which corresponds to 1GHz and "to" is the device frequency. For clock sources "from" is the device frequency and "to" is nano seconds per second. Signed-off-by: Thomas Gleixner Tested-by: Mikael Pettersson Acked-by: Ralf Baechle Acked-by: Linus Walleij Cc: John Stultz LKML-Reference: <20091111134229.766673305@linutronix.de> --- include/linux/clockchips.h | 7 ++++++ include/linux/clocksource.h | 10 +++++++++ kernel/time/clocksource.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 3b5841016276..4d438b0bc10a 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -130,6 +130,13 @@ extern int clockevents_program_event(struct clock_event_device *dev, extern void clockevents_handle_noop(struct clock_event_device *dev); +static inline void +clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) +{ + return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, + freq, minsec); +} + #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void clockevents_notify(unsigned long reason, void *arg); #else diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 83d2fbd81b93..f57f88250526 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -279,6 +279,16 @@ extern void clocksource_resume(void); extern struct clocksource * __init __weak clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); +extern void +clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); + +static inline void +clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec) +{ + return clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, + NSEC_PER_SEC, minsec); +} + #ifdef CONFIG_GENERIC_TIME_VSYSCALL extern void update_vsyscall(struct timespec *ts, struct clocksource *c); extern void update_vsyscall_tz(void); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 5e18c6ab2c6a..407c0894ef37 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -107,6 +107,59 @@ u64 timecounter_cyc2time(struct timecounter *tc, } EXPORT_SYMBOL(timecounter_cyc2time); +/** + * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks + * @mult: pointer to mult variable + * @shift: pointer to shift variable + * @from: frequency to convert from + * @to: frequency to convert to + * @minsec: guaranteed runtime conversion range in seconds + * + * The function evaluates the shift/mult pair for the scaled math + * operations of clocksources and clockevents. + * + * @to and @from are frequency values in HZ. For clock sources @to is + * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock + * event @to is the counter frequency and @from is NSEC_PER_SEC. + * + * The @minsec conversion range argument controls the time frame in + * seconds which must be covered by the runtime conversion with the + * calculated mult and shift factors. This guarantees that no 64bit + * overflow happens when the input value of the conversion is + * multiplied with the calculated mult factor. Larger ranges may + * reduce the conversion accuracy by chosing smaller mult and shift + * factors. + */ +void +clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) +{ + u64 tmp; + u32 sft, sftacc= 32; + + /* + * Calculate the shift factor which is limiting the conversion + * range: + */ + tmp = ((u64)minsec * from) >> 32; + while (tmp) { + tmp >>=1; + sftacc--; + } + + /* + * Find the conversion shift/mult pair which has the best + * accuracy and fits the maxsec conversion range: + */ + for (sft = 32; sft > 0; sft--) { + tmp = (u64) to << sft; + do_div(tmp, from); + if ((tmp >> sftacc) == 0) + break; + } + *mult = tmp; + *shift = sft; +} + /*[Clocksource internal variables]--------- * curr_clocksource: * currently selected clocksource. -- cgit v1.3-8-gc7d7 From 98962465ed9e6ea99c38e0af63fe1dcb5a79dc25 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 18 Aug 2009 12:45:10 -0500 Subject: nohz: Prevent clocksource wrapping during idle The dynamic tick allows the kernel to sleep for periods longer than a single tick, but it does not limit the sleep time currently. In the worst case the kernel could sleep longer than the wrap around time of the time keeping clock source which would result in losing track of time. Prevent this by limiting it to the safe maximum sleep time of the current time keeping clock source. The value is calculated when the clock source is registered. [ tglx: simplified the code a bit and massaged the commit msg ] Signed-off-by: Jon Hunter Cc: John Stultz LKML-Reference: <1250617512-23567-2-git-send-email-jon-hunter@ti.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 2 ++ include/linux/time.h | 1 + kernel/time/clocksource.c | 44 ++++++++++++++++++++++++++++++++++++++ kernel/time/tick-sched.c | 52 +++++++++++++++++++++++++++++++++------------ kernel/time/timekeeping.c | 11 ++++++++++ 5 files changed, 96 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index f57f88250526..279c5478e8a6 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -151,6 +151,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * subtraction of non 64 bit counters * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) + * @max_idle_ns: max idle time permitted by the clocksource (nsecs) * @flags: flags describing special properties * @vread: vsyscall based read * @resume: resume function for the clocksource, if necessary @@ -168,6 +169,7 @@ struct clocksource { cycle_t mask; u32 mult; u32 shift; + u64 max_idle_ns; unsigned long flags; cycle_t (*vread)(void); void (*resume)(void); diff --git a/include/linux/time.h b/include/linux/time.h index fe04e5ef6a59..6e026e45a179 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -148,6 +148,7 @@ extern void monotonic_to_bootbased(struct timespec *ts); extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_valid_for_hres(void); +extern u64 timekeeping_max_deferment(void); extern void update_wall_time(void); extern void update_xtime_cache(u64 nsec); extern void timekeeping_leap_insert(int leapsecond); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 407c0894ef37..b65b242f04dd 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -468,6 +468,47 @@ void clocksource_touch_watchdog(void) #ifdef CONFIG_GENERIC_TIME +/** + * clocksource_max_deferment - Returns max time the clocksource can be deferred + * @cs: Pointer to clocksource + * + */ +static u64 clocksource_max_deferment(struct clocksource *cs) +{ + u64 max_nsecs, max_cycles; + + /* + * Calculate the maximum number of cycles that we can pass to the + * cyc2ns function without overflowing a 64-bit signed result. The + * maximum number of cycles is equal to ULLONG_MAX/cs->mult which + * is equivalent to the below. + * max_cycles < (2^63)/cs->mult + * max_cycles < 2^(log2((2^63)/cs->mult)) + * max_cycles < 2^(log2(2^63) - log2(cs->mult)) + * max_cycles < 2^(63 - log2(cs->mult)) + * max_cycles < 1 << (63 - log2(cs->mult)) + * Please note that we add 1 to the result of the log2 to account for + * any rounding errors, ensure the above inequality is satisfied and + * no overflow will occur. + */ + max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1)); + + /* + * The actual maximum number of cycles we can defer the clocksource is + * determined by the minimum of max_cycles and cs->mask. + */ + max_cycles = min_t(u64, max_cycles, (u64) cs->mask); + max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift); + + /* + * To ensure that the clocksource does not wrap whilst we are idle, + * limit the time the clocksource can be deferred by 12.5%. Please + * note a margin of 12.5% is used because this can be computed with + * a shift, versus say 10% which would require division. + */ + return max_nsecs - (max_nsecs >> 5); +} + /** * clocksource_select - Select the best clocksource available * @@ -564,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs) */ int clocksource_register(struct clocksource *cs) { + /* calculate max idle time permitted for this clocksource */ + cs->max_idle_ns = clocksource_max_deferment(cs); + mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); clocksource_select(); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c65ba0faa98f..a80b4644fe6b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -208,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle) struct tick_sched *ts; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; + u64 time_delta; int cpu; local_irq_save(flags); @@ -262,6 +263,17 @@ void tick_nohz_stop_sched_tick(int inidle) seq = read_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; + + /* + * On SMP we really should only care for the CPU which + * has the do_timer duty assigned. All other CPUs can + * sleep as long as they want. + */ + if (cpu == tick_do_timer_cpu || + tick_do_timer_cpu == TICK_DO_TIMER_NONE) + time_delta = timekeeping_max_deferment(); + else + time_delta = KTIME_MAX; } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || @@ -284,11 +296,26 @@ void tick_nohz_stop_sched_tick(int inidle) if ((long)delta_jiffies >= 1) { /* - * calculate the expiry time for the next timer wheel - * timer - */ - expires = ktime_add_ns(last_update, tick_period.tv64 * - delta_jiffies); + * calculate the expiry time for the next timer wheel + * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals + * that there is no timer pending or at least extremely + * far into the future (12 days for HZ=1000). In this + * case we set the expiry to the end of time. + */ + if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { + /* + * Calculate the time delta for the next timer event. + * If the time delta exceeds the maximum time delta + * permitted by the current clocksource then adjust + * the time delta accordingly to ensure the + * clocksource does not wrap. + */ + time_delta = min_t(u64, time_delta, + tick_period.tv64 * delta_jiffies); + expires = ktime_add_ns(last_update, time_delta); + } else { + expires.tv64 = KTIME_MAX; + } /* * If this cpu is the one which updates jiffies, then @@ -332,22 +359,19 @@ void tick_nohz_stop_sched_tick(int inidle) ts->idle_sleeps++; + /* Mark expires */ + ts->idle_expires = expires; + /* - * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that - * there is no timer pending or at least extremly far - * into the future (12 days for HZ=1000). In this case - * we simply stop the tick timer: + * If the expiration time == KTIME_MAX, then + * in this case we simply stop the tick timer. */ - if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { - ts->idle_expires.tv64 = KTIME_MAX; + if (unlikely(expires.tv64 == KTIME_MAX)) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) hrtimer_cancel(&ts->sched_timer); goto out; } - /* Mark expiries */ - ts->idle_expires = expires; - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 96b3f0dfa5dc..5d4d4239a0aa 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -477,6 +477,17 @@ int timekeeping_valid_for_hres(void) return ret; } +/** + * timekeeping_max_deferment - Returns max time the clocksource can be deferred + * + * Caller must observe xtime_lock via read_seqbegin/read_seqretry to + * ensure that the clocksource does not change! + */ +u64 timekeeping_max_deferment(void) +{ + return timekeeper.clock->max_idle_ns; +} + /** * read_persistent_clock - Return time from the persistent clock. * -- cgit v1.3-8-gc7d7 From 27185016b806d5a1181ff501cae120582b2b27dd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 12 Nov 2009 22:12:06 +0100 Subject: nohz: Track last do_timer() cpu The previous patch which limits the sleep time to the maximum deferment time of the time keeping clocksource has some limitations on SMP machines: if all CPUs are idle then for all CPUs the maximum sleep time is limited. Solve this by keeping track of which cpu had the do_timer() duty assigned last and limit the sleep time only for this cpu. Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Jon Hunter Cc: John Stultz --- include/linux/tick.h | 2 ++ kernel/time/tick-sched.c | 52 ++++++++++++++++++++++++++---------------------- 2 files changed, 30 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/tick.h b/include/linux/tick.h index 8dc082194b22..d2ae79e21be3 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -43,6 +43,7 @@ enum tick_nohz_mode { * @idle_exittime: Time when the idle state was left * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped * @sleep_length: Duration of the current idle sleep + * @do_timer_lst: CPU was the last one doing do_timer before going idle */ struct tick_sched { struct hrtimer sched_timer; @@ -64,6 +65,7 @@ struct tick_sched { unsigned long last_jiffies; unsigned long next_jiffies; ktime_t idle_expires; + int do_timer_last; }; extern void __init tick_init(void); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a80b4644fe6b..df133bc29f89 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -263,17 +263,7 @@ void tick_nohz_stop_sched_tick(int inidle) seq = read_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; - - /* - * On SMP we really should only care for the CPU which - * has the do_timer duty assigned. All other CPUs can - * sleep as long as they want. - */ - if (cpu == tick_do_timer_cpu || - tick_do_timer_cpu == TICK_DO_TIMER_NONE) - time_delta = timekeeping_max_deferment(); - else - time_delta = KTIME_MAX; + time_delta = timekeeping_max_deferment(); } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || @@ -295,6 +285,29 @@ void tick_nohz_stop_sched_tick(int inidle) /* Schedule the tick, if we are at least one jiffie off */ if ((long)delta_jiffies >= 1) { + /* + * If this cpu is the one which updates jiffies, then + * give up the assignment and let it be taken by the + * cpu which runs the tick timer next, which might be + * this cpu as well. If we don't drop this here the + * jiffies might be stale and do_timer() never + * invoked. Keep track of the fact that it was the one + * which had the do_timer() duty last. If this cpu is + * the one which had the do_timer() duty last, we + * limit the sleep time to the timekeeping + * max_deferement value which we retrieved + * above. Otherwise we can sleep as long as we want. + */ + if (cpu == tick_do_timer_cpu) { + tick_do_timer_cpu = TICK_DO_TIMER_NONE; + ts->do_timer_last = 1; + } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { + time_delta = KTIME_MAX; + ts->do_timer_last = 0; + } else if (!ts->do_timer_last) { + time_delta = KTIME_MAX; + } + /* * calculate the expiry time for the next timer wheel * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals @@ -312,21 +325,12 @@ void tick_nohz_stop_sched_tick(int inidle) */ time_delta = min_t(u64, time_delta, tick_period.tv64 * delta_jiffies); - expires = ktime_add_ns(last_update, time_delta); - } else { - expires.tv64 = KTIME_MAX; } - /* - * If this cpu is the one which updates jiffies, then - * give up the assignment and let it be taken by the - * cpu which runs the tick timer next, which might be - * this cpu as well. If we don't drop this here the - * jiffies might be stale and do_timer() never - * invoked. - */ - if (cpu == tick_do_timer_cpu) - tick_do_timer_cpu = TICK_DO_TIMER_NONE; + if (time_delta < KTIME_MAX) + expires = ktime_add_ns(last_update, time_delta); + else + expires.tv64 = KTIME_MAX; if (delta_jiffies > 1) cpumask_set_cpu(cpu, nohz_cpu_mask); -- cgit v1.3-8-gc7d7 From 97813f2fe77804a4464564c75ba8d8826377feea Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 18 Aug 2009 12:45:11 -0500 Subject: nohz: Allow 32-bit machines to sleep for more than 2.15 seconds In the dynamic tick code, "max_delta_ns" (member of the "clock_event_device" structure) represents the maximum sleep time that can occur between timer events in nanoseconds. The variable, "max_delta_ns", is defined as an unsigned long which is a 32-bit integer for 32-bit machines and a 64-bit integer for 64-bit machines (if -m64 option is used for gcc). The value of max_delta_ns is set by calling the function "clockevent_delta2ns()" which returns a maximum value of LONG_MAX. For a 32-bit machine LONG_MAX is equal to 0x7fffffff and in nanoseconds this equates to ~2.15 seconds. Hence, the maximum sleep time for a 32-bit machine is ~2.15 seconds, where as for a 64-bit machine it will be many years. This patch changes the type of max_delta_ns to be "u64" instead of "unsigned long" so that this variable is a 64-bit type for both 32-bit and 64-bit machines. It also changes the maximum value returned by clockevent_delta2ns() to KTIME_MAX. Hence this allows a 32-bit machine to sleep for longer than ~2.15 seconds. Please note that this patch also changes "min_delta_ns" to be "u64" too and although this is unnecessary, it makes the patch simpler as it avoids to fixup all callers of clockevent_delta2ns(). [ tglx: changed "unsigned long long" to u64 as we use this data type through out the time code ] Signed-off-by: Jon Hunter Cc: John Stultz LKML-Reference: <1250617512-23567-3-git-send-email-jon-hunter@ti.com> Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 8 ++++---- kernel/hrtimer.c | 3 ++- kernel/time/clockevents.c | 11 +++++------ kernel/time/tick-oneshot.c | 4 ++-- kernel/time/timer_list.c | 6 ++++-- 5 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 4d438b0bc10a..0cf725bdd2a1 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -77,8 +77,8 @@ enum clock_event_nofitiers { struct clock_event_device { const char *name; unsigned int features; - unsigned long max_delta_ns; - unsigned long min_delta_ns; + u64 max_delta_ns; + u64 min_delta_ns; u32 mult; u32 shift; int rating; @@ -116,8 +116,8 @@ static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec, } /* Clock event layer functions */ -extern unsigned long clockevent_delta2ns(unsigned long latch, - struct clock_event_device *evt); +extern u64 clockevent_delta2ns(unsigned long latch, + struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); extern void clockevents_exchange_device(struct clock_event_device *old, diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 6d7020490f94..c215b74cd953 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1240,7 +1240,8 @@ hrtimer_interrupt_hanging(struct clock_event_device *dev, force_clock_reprogram = 1; dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; printk(KERN_WARNING "hrtimer: interrupt too slow, " - "forcing clock min delta to %lu ns\n", dev->min_delta_ns); + "forcing clock min delta to %llu ns\n", + (unsigned long long) dev->min_delta_ns); } /* * High resolution timer interrupt diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 620b58abdc32..05e8aeedcdf3 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -37,10 +37,9 @@ static DEFINE_SPINLOCK(clockevents_lock); * * Math helper, returns latch value converted to nanoseconds (bound checked) */ -unsigned long clockevent_delta2ns(unsigned long latch, - struct clock_event_device *evt) +u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) { - u64 clc = ((u64) latch << evt->shift); + u64 clc = (u64) latch << evt->shift; if (unlikely(!evt->mult)) { evt->mult = 1; @@ -50,10 +49,10 @@ unsigned long clockevent_delta2ns(unsigned long latch, do_div(clc, evt->mult); if (clc < 1000) clc = 1000; - if (clc > LONG_MAX) - clc = LONG_MAX; + if (clc > KTIME_MAX) + clc = KTIME_MAX; - return (unsigned long) clc; + return clc; } EXPORT_SYMBOL_GPL(clockevent_delta2ns); diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index a96c0e2b89cf..0a8a213016f0 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -50,9 +50,9 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, dev->min_delta_ns += dev->min_delta_ns >> 1; printk(KERN_WARNING - "CE: %s increasing min_delta_ns to %lu nsec\n", + "CE: %s increasing min_delta_ns to %llu nsec\n", dev->name ? dev->name : "?", - dev->min_delta_ns << 1); + (unsigned long long) dev->min_delta_ns << 1); i = 0; } diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index fa00da108a14..665c76edbf17 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -204,8 +204,10 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) return; } SEQ_printf(m, "%s\n", dev->name); - SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns); - SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns); + SEQ_printf(m, " max_delta_ns: %llu\n", + (unsigned long long) dev->max_delta_ns); + SEQ_printf(m, " min_delta_ns: %llu\n", + (unsigned long long) dev->min_delta_ns); SEQ_printf(m, " mult: %u\n", dev->mult); SEQ_printf(m, " shift: %u\n", dev->shift); SEQ_printf(m, " mode: %d\n", dev->mode); -- cgit v1.3-8-gc7d7 From 6beb000923882f6204ea2cfcd932e568e900803f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Nov 2009 15:21:34 +0000 Subject: locking: Make inlining decision Kconfig based commit 892a7c67 (locking: Allow arch-inlined spinlocks) implements the selection of which lock functions are inlined based on defines in arch/.../spinlock.h: #define __always_inline__LOCK_FUNCTION Despite of the name __always_inline__* the lock functions can be built out of line depending on config options. Also if the arch does not set some inline defines the generic code might set them; again depending on config options. This makes it unnecessary hard to figure out when and which lock functions are inlined. Aside of that it makes it way harder and messier for -rt to manipulate the lock functions. Convert the inlining decision to CONFIG switches. Each lock function is inlined depending on CONFIG_INLINE_*. The configs implement the existing dependencies. The architecture code can select ARCH_INLINE_* to signal that it wants the corresponding lock function inlined. ARCH_INLINE_* is necessary as Kconfig ignores "depends on" restrictions when a config element is selected. No functional change. Signed-off-by: Thomas Gleixner LKML-Reference: <20091109151428.504477141@linutronix.de> Acked-by: Heiko Carstens Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra --- arch/s390/Kconfig | 28 ++++++ arch/s390/include/asm/spinlock.h | 29 ------ include/linux/spinlock_api_smp.h | 75 ++++++--------- init/Kconfig | 1 + kernel/Kconfig.locks | 199 +++++++++++++++++++++++++++++++++++++++ kernel/spinlock.c | 56 +++++------ 6 files changed, 284 insertions(+), 104 deletions(-) create mode 100644 kernel/Kconfig.locks (limited to 'include') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 43c0acad7160..16c673096a22 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -95,6 +95,34 @@ config S390 select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE select HAVE_PERF_EVENTS + select ARCH_INLINE_SPIN_TRYLOCK + select ARCH_INLINE_SPIN_TRYLOCK_BH + select ARCH_INLINE_SPIN_LOCK + select ARCH_INLINE_SPIN_LOCK_BH + select ARCH_INLINE_SPIN_LOCK_IRQ + select ARCH_INLINE_SPIN_LOCK_IRQSAVE + select ARCH_INLINE_SPIN_UNLOCK + select ARCH_INLINE_SPIN_UNLOCK_BH + select ARCH_INLINE_SPIN_UNLOCK_IRQ + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + select ARCH_INLINE_READ_TRYLOCK + select ARCH_INLINE_READ_LOCK + select ARCH_INLINE_READ_LOCK_BH + select ARCH_INLINE_READ_LOCK_IRQ + select ARCH_INLINE_READ_LOCK_IRQSAVE + select ARCH_INLINE_READ_UNLOCK + select ARCH_INLINE_READ_UNLOCK_BH + select ARCH_INLINE_READ_UNLOCK_IRQ + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE + select ARCH_INLINE_WRITE_TRYLOCK + select ARCH_INLINE_WRITE_LOCK + select ARCH_INLINE_WRITE_LOCK_BH + select ARCH_INLINE_WRITE_LOCK_IRQ + select ARCH_INLINE_WRITE_LOCK_IRQSAVE + select ARCH_INLINE_WRITE_UNLOCK + select ARCH_INLINE_WRITE_UNLOCK_BH + select ARCH_INLINE_WRITE_UNLOCK_IRQ + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE config SCHED_OMIT_FRAME_POINTER bool diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 41ce6861174e..c9af0d19c7ab 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -191,33 +191,4 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) #define _raw_read_relax(lock) cpu_relax() #define _raw_write_relax(lock) cpu_relax() -#define __always_inline__spin_lock -#define __always_inline__read_lock -#define __always_inline__write_lock -#define __always_inline__spin_lock_bh -#define __always_inline__read_lock_bh -#define __always_inline__write_lock_bh -#define __always_inline__spin_lock_irq -#define __always_inline__read_lock_irq -#define __always_inline__write_lock_irq -#define __always_inline__spin_lock_irqsave -#define __always_inline__read_lock_irqsave -#define __always_inline__write_lock_irqsave -#define __always_inline__spin_trylock -#define __always_inline__read_trylock -#define __always_inline__write_trylock -#define __always_inline__spin_trylock_bh -#define __always_inline__spin_unlock -#define __always_inline__read_unlock -#define __always_inline__write_unlock -#define __always_inline__spin_unlock_bh -#define __always_inline__read_unlock_bh -#define __always_inline__write_unlock_bh -#define __always_inline__spin_unlock_irq -#define __always_inline__read_unlock_irq -#define __always_inline__write_unlock_irq -#define __always_inline__spin_unlock_irqrestore -#define __always_inline__read_unlock_irqrestore -#define __always_inline__write_unlock_irqrestore - #endif /* __ASM_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 7a7e18fc2415..8264a7f459bc 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -60,137 +60,118 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); -/* - * We inline the unlock functions in the nondebug case: - */ -#if !defined(CONFIG_DEBUG_SPINLOCK) && !defined(CONFIG_PREEMPT) -#define __always_inline__spin_unlock -#define __always_inline__read_unlock -#define __always_inline__write_unlock -#define __always_inline__spin_unlock_irq -#define __always_inline__read_unlock_irq -#define __always_inline__write_unlock_irq -#endif - -#ifndef CONFIG_DEBUG_SPINLOCK -#ifndef CONFIG_GENERIC_LOCKBREAK - -#ifdef __always_inline__spin_lock +#ifdef CONFIG_INLINE_SPIN_LOCK #define _spin_lock(lock) __spin_lock(lock) #endif -#ifdef __always_inline__read_lock +#ifdef CONFIG_INLINE_READ_LOCK #define _read_lock(lock) __read_lock(lock) #endif -#ifdef __always_inline__write_lock +#ifdef CONFIG_INLINE_WRITE_LOCK #define _write_lock(lock) __write_lock(lock) #endif -#ifdef __always_inline__spin_lock_bh +#ifdef CONFIG_INLINE_SPIN_LOCK_BH #define _spin_lock_bh(lock) __spin_lock_bh(lock) #endif -#ifdef __always_inline__read_lock_bh +#ifdef CONFIG_INLINE_READ_LOCK_BH #define _read_lock_bh(lock) __read_lock_bh(lock) #endif -#ifdef __always_inline__write_lock_bh +#ifdef CONFIG_INLINE_WRITE_LOCK_BH #define _write_lock_bh(lock) __write_lock_bh(lock) #endif -#ifdef __always_inline__spin_lock_irq +#ifdef CONFIG_INLINE_SPIN_LOCK_IRQ #define _spin_lock_irq(lock) __spin_lock_irq(lock) #endif -#ifdef __always_inline__read_lock_irq +#ifdef CONFIG_INLINE_READ_LOCK_IRQ #define _read_lock_irq(lock) __read_lock_irq(lock) #endif -#ifdef __always_inline__write_lock_irq +#ifdef CONFIG_INLINE_WRITE_LOCK_IRQ #define _write_lock_irq(lock) __write_lock_irq(lock) #endif -#ifdef __always_inline__spin_lock_irqsave +#ifdef CONFIG_INLINE_SPIN_LOCK_IRQSAVE #define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock) #endif -#ifdef __always_inline__read_lock_irqsave +#ifdef CONFIG_INLINE_READ_LOCK_IRQSAVE #define _read_lock_irqsave(lock) __read_lock_irqsave(lock) #endif -#ifdef __always_inline__write_lock_irqsave +#ifdef CONFIG_INLINE_WRITE_LOCK_IRQSAVE #define _write_lock_irqsave(lock) __write_lock_irqsave(lock) #endif -#endif /* !CONFIG_GENERIC_LOCKBREAK */ - -#ifdef __always_inline__spin_trylock +#ifdef CONFIG_INLINE_SPIN_TRYLOCK #define _spin_trylock(lock) __spin_trylock(lock) #endif -#ifdef __always_inline__read_trylock +#ifdef CONFIG_INLINE_READ_TRYLOCK #define _read_trylock(lock) __read_trylock(lock) #endif -#ifdef __always_inline__write_trylock +#ifdef CONFIG_INLINE_WRITE_TRYLOCK #define _write_trylock(lock) __write_trylock(lock) #endif -#ifdef __always_inline__spin_trylock_bh +#ifdef CONFIG_INLINE_SPIN_TRYLOCK_BH #define _spin_trylock_bh(lock) __spin_trylock_bh(lock) #endif -#ifdef __always_inline__spin_unlock +#ifdef CONFIG_INLINE_SPIN_UNLOCK #define _spin_unlock(lock) __spin_unlock(lock) #endif -#ifdef __always_inline__read_unlock +#ifdef CONFIG_INLINE_READ_UNLOCK #define _read_unlock(lock) __read_unlock(lock) #endif -#ifdef __always_inline__write_unlock +#ifdef CONFIG_INLINE_WRITE_UNLOCK #define _write_unlock(lock) __write_unlock(lock) #endif -#ifdef __always_inline__spin_unlock_bh +#ifdef CONFIG_INLINE_SPIN_UNLOCK_BH #define _spin_unlock_bh(lock) __spin_unlock_bh(lock) #endif -#ifdef __always_inline__read_unlock_bh +#ifdef CONFIG_INLINE_READ_UNLOCK_BH #define _read_unlock_bh(lock) __read_unlock_bh(lock) #endif -#ifdef __always_inline__write_unlock_bh +#ifdef CONFIG_INLINE_WRITE_UNLOCK_BH #define _write_unlock_bh(lock) __write_unlock_bh(lock) #endif -#ifdef __always_inline__spin_unlock_irq +#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQ #define _spin_unlock_irq(lock) __spin_unlock_irq(lock) #endif -#ifdef __always_inline__read_unlock_irq +#ifdef CONFIG_INLINE_READ_UNLOCK_IRQ #define _read_unlock_irq(lock) __read_unlock_irq(lock) #endif -#ifdef __always_inline__write_unlock_irq +#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQ #define _write_unlock_irq(lock) __write_unlock_irq(lock) #endif -#ifdef __always_inline__spin_unlock_irqrestore +#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE #define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags) #endif -#ifdef __always_inline__read_unlock_irqrestore +#ifdef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE #define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags) #endif -#ifdef __always_inline__write_unlock_irqrestore +#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE #define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags) #endif -#endif /* CONFIG_DEBUG_SPINLOCK */ - static inline int __spin_trylock(spinlock_t *lock) { preempt_disable(); diff --git a/init/Kconfig b/init/Kconfig index 9e03ef8b311e..06863dd7bf49 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1210,3 +1210,4 @@ source "block/Kconfig" config PREEMPT_NOTIFIERS bool +source "kernel/Kconfig.locks" diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks new file mode 100644 index 000000000000..d1f86db71451 --- /dev/null +++ b/kernel/Kconfig.locks @@ -0,0 +1,199 @@ +# +# The ARCH_INLINE foo is necessary because select ignores "depends on" +# +config ARCH_INLINE_SPIN_TRYLOCK + bool + +config ARCH_INLINE_SPIN_TRYLOCK_BH + bool + +config ARCH_INLINE_SPIN_LOCK + bool + +config ARCH_INLINE_SPIN_LOCK_BH + bool + +config ARCH_INLINE_SPIN_LOCK_IRQ + bool + +config ARCH_INLINE_SPIN_LOCK_IRQSAVE + bool + +config ARCH_INLINE_SPIN_UNLOCK + bool + +config ARCH_INLINE_SPIN_UNLOCK_BH + bool + +config ARCH_INLINE_SPIN_UNLOCK_IRQ + bool + +config ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + bool + + +config ARCH_INLINE_READ_TRYLOCK + bool + +config ARCH_INLINE_READ_LOCK + bool + +config ARCH_INLINE_READ_LOCK_BH + bool + +config ARCH_INLINE_READ_LOCK_IRQ + bool + +config ARCH_INLINE_READ_LOCK_IRQSAVE + bool + +config ARCH_INLINE_READ_UNLOCK + bool + +config ARCH_INLINE_READ_UNLOCK_BH + bool + +config ARCH_INLINE_READ_UNLOCK_IRQ + bool + +config ARCH_INLINE_READ_UNLOCK_IRQRESTORE + bool + + +config ARCH_INLINE_WRITE_TRYLOCK + bool + +config ARCH_INLINE_WRITE_LOCK + bool + +config ARCH_INLINE_WRITE_LOCK_BH + bool + +config ARCH_INLINE_WRITE_LOCK_IRQ + bool + +config ARCH_INLINE_WRITE_LOCK_IRQSAVE + bool + +config ARCH_INLINE_WRITE_UNLOCK + bool + +config ARCH_INLINE_WRITE_UNLOCK_BH + bool + +config ARCH_INLINE_WRITE_UNLOCK_IRQ + bool + +config ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + bool + +# +# lock_* functions are inlined when: +# - DEBUG_SPINLOCK=n and GENERIC_LOCKBREAK=n and ARCH_INLINE_*LOCK=y +# +# trylock_* functions are inlined when: +# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y +# +# unlock and unlock_irq functions are inlined when: +# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y +# or +# - DEBUG_SPINLOCK=n and PREEMPT=n +# +# unlock_bh and unlock_irqrestore functions are inlined when: +# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y +# + +config INLINE_SPIN_TRYLOCK + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK + +config INLINE_SPIN_TRYLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK_BH + +config INLINE_SPIN_LOCK + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_SPIN_LOCK + +config INLINE_SPIN_LOCK_BH + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_SPIN_LOCK_BH + +config INLINE_SPIN_LOCK_IRQ + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_SPIN_LOCK_IRQ + +config INLINE_SPIN_LOCK_IRQSAVE + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_SPIN_LOCK_IRQSAVE + +config INLINE_SPIN_UNLOCK + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK) + +config INLINE_SPIN_UNLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH + +config INLINE_SPIN_UNLOCK_IRQ + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK_BH) + +config INLINE_SPIN_UNLOCK_IRQRESTORE + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + + +config INLINE_READ_TRYLOCK + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_TRYLOCK + +config INLINE_READ_LOCK + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_READ_LOCK + +config INLINE_READ_LOCK_BH + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_READ_LOCK_BH + +config INLINE_READ_LOCK_IRQ + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_READ_LOCK_IRQ + +config INLINE_READ_LOCK_IRQSAVE + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_READ_LOCK_IRQSAVE + +config INLINE_READ_UNLOCK + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK) + +config INLINE_READ_UNLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_BH + +config INLINE_READ_UNLOCK_IRQ + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK_BH) + +config INLINE_READ_UNLOCK_IRQRESTORE + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_IRQRESTORE + + +config INLINE_WRITE_TRYLOCK + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_TRYLOCK + +config INLINE_WRITE_LOCK + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_WRITE_LOCK + +config INLINE_WRITE_LOCK_BH + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_WRITE_LOCK_BH + +config INLINE_WRITE_LOCK_IRQ + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_WRITE_LOCK_IRQ + +config INLINE_WRITE_LOCK_IRQSAVE + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_WRITE_LOCK_IRQSAVE + +config INLINE_WRITE_UNLOCK + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK) + +config INLINE_WRITE_UNLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_BH + +config INLINE_WRITE_UNLOCK_IRQ + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK_BH) + +config INLINE_WRITE_UNLOCK_IRQRESTORE + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 5ddab730cb2f..235a9579a875 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -21,7 +21,7 @@ #include #include -#ifndef _spin_trylock +#ifndef CONFIG_INLINE_SPIN_TRYLOCK int __lockfunc _spin_trylock(spinlock_t *lock) { return __spin_trylock(lock); @@ -29,7 +29,7 @@ int __lockfunc _spin_trylock(spinlock_t *lock) EXPORT_SYMBOL(_spin_trylock); #endif -#ifndef _read_trylock +#ifndef CONFIG_INLINE_READ_TRYLOCK int __lockfunc _read_trylock(rwlock_t *lock) { return __read_trylock(lock); @@ -37,7 +37,7 @@ int __lockfunc _read_trylock(rwlock_t *lock) EXPORT_SYMBOL(_read_trylock); #endif -#ifndef _write_trylock +#ifndef CONFIG_INLINE_WRITE_TRYLOCK int __lockfunc _write_trylock(rwlock_t *lock) { return __write_trylock(lock); @@ -52,7 +52,7 @@ EXPORT_SYMBOL(_write_trylock); */ #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) -#ifndef _read_lock +#ifndef CONFIG_INLINE_READ_LOCK void __lockfunc _read_lock(rwlock_t *lock) { __read_lock(lock); @@ -60,7 +60,7 @@ void __lockfunc _read_lock(rwlock_t *lock) EXPORT_SYMBOL(_read_lock); #endif -#ifndef _spin_lock_irqsave +#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) { return __spin_lock_irqsave(lock); @@ -68,7 +68,7 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) EXPORT_SYMBOL(_spin_lock_irqsave); #endif -#ifndef _spin_lock_irq +#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ void __lockfunc _spin_lock_irq(spinlock_t *lock) { __spin_lock_irq(lock); @@ -76,7 +76,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock) EXPORT_SYMBOL(_spin_lock_irq); #endif -#ifndef _spin_lock_bh +#ifndef CONFIG_INLINE_SPIN_LOCK_BH void __lockfunc _spin_lock_bh(spinlock_t *lock) { __spin_lock_bh(lock); @@ -84,7 +84,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock) EXPORT_SYMBOL(_spin_lock_bh); #endif -#ifndef _read_lock_irqsave +#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) { return __read_lock_irqsave(lock); @@ -92,7 +92,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) EXPORT_SYMBOL(_read_lock_irqsave); #endif -#ifndef _read_lock_irq +#ifndef CONFIG_INLINE_READ_LOCK_IRQ void __lockfunc _read_lock_irq(rwlock_t *lock) { __read_lock_irq(lock); @@ -100,7 +100,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock) EXPORT_SYMBOL(_read_lock_irq); #endif -#ifndef _read_lock_bh +#ifndef CONFIG_INLINE_READ_LOCK_BH void __lockfunc _read_lock_bh(rwlock_t *lock) { __read_lock_bh(lock); @@ -108,7 +108,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock) EXPORT_SYMBOL(_read_lock_bh); #endif -#ifndef _write_lock_irqsave +#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) { return __write_lock_irqsave(lock); @@ -116,7 +116,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) EXPORT_SYMBOL(_write_lock_irqsave); #endif -#ifndef _write_lock_irq +#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ void __lockfunc _write_lock_irq(rwlock_t *lock) { __write_lock_irq(lock); @@ -124,7 +124,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock) EXPORT_SYMBOL(_write_lock_irq); #endif -#ifndef _write_lock_bh +#ifndef CONFIG_INLINE_WRITE_LOCK_BH void __lockfunc _write_lock_bh(rwlock_t *lock) { __write_lock_bh(lock); @@ -132,7 +132,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock) EXPORT_SYMBOL(_write_lock_bh); #endif -#ifndef _spin_lock +#ifndef CONFIG_INLINE_SPIN_LOCK void __lockfunc _spin_lock(spinlock_t *lock) { __spin_lock(lock); @@ -140,7 +140,7 @@ void __lockfunc _spin_lock(spinlock_t *lock) EXPORT_SYMBOL(_spin_lock); #endif -#ifndef _write_lock +#ifndef CONFIG_INLINE_WRITE_LOCK void __lockfunc _write_lock(rwlock_t *lock) { __write_lock(lock); @@ -272,7 +272,7 @@ EXPORT_SYMBOL(_spin_lock_nest_lock); #endif -#ifndef _spin_unlock +#ifndef CONFIG_INLINE_SPIN_UNLOCK void __lockfunc _spin_unlock(spinlock_t *lock) { __spin_unlock(lock); @@ -280,7 +280,7 @@ void __lockfunc _spin_unlock(spinlock_t *lock) EXPORT_SYMBOL(_spin_unlock); #endif -#ifndef _write_unlock +#ifndef CONFIG_INLINE_WRITE_UNLOCK void __lockfunc _write_unlock(rwlock_t *lock) { __write_unlock(lock); @@ -288,7 +288,7 @@ void __lockfunc _write_unlock(rwlock_t *lock) EXPORT_SYMBOL(_write_unlock); #endif -#ifndef _read_unlock +#ifndef CONFIG_INLINE_READ_UNLOCK void __lockfunc _read_unlock(rwlock_t *lock) { __read_unlock(lock); @@ -296,7 +296,7 @@ void __lockfunc _read_unlock(rwlock_t *lock) EXPORT_SYMBOL(_read_unlock); #endif -#ifndef _spin_unlock_irqrestore +#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) { __spin_unlock_irqrestore(lock, flags); @@ -304,7 +304,7 @@ void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) EXPORT_SYMBOL(_spin_unlock_irqrestore); #endif -#ifndef _spin_unlock_irq +#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ void __lockfunc _spin_unlock_irq(spinlock_t *lock) { __spin_unlock_irq(lock); @@ -312,7 +312,7 @@ void __lockfunc _spin_unlock_irq(spinlock_t *lock) EXPORT_SYMBOL(_spin_unlock_irq); #endif -#ifndef _spin_unlock_bh +#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH void __lockfunc _spin_unlock_bh(spinlock_t *lock) { __spin_unlock_bh(lock); @@ -320,7 +320,7 @@ void __lockfunc _spin_unlock_bh(spinlock_t *lock) EXPORT_SYMBOL(_spin_unlock_bh); #endif -#ifndef _read_unlock_irqrestore +#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { __read_unlock_irqrestore(lock, flags); @@ -328,7 +328,7 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) EXPORT_SYMBOL(_read_unlock_irqrestore); #endif -#ifndef _read_unlock_irq +#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ void __lockfunc _read_unlock_irq(rwlock_t *lock) { __read_unlock_irq(lock); @@ -336,7 +336,7 @@ void __lockfunc _read_unlock_irq(rwlock_t *lock) EXPORT_SYMBOL(_read_unlock_irq); #endif -#ifndef _read_unlock_bh +#ifndef CONFIG_INLINE_READ_UNLOCK_BH void __lockfunc _read_unlock_bh(rwlock_t *lock) { __read_unlock_bh(lock); @@ -344,7 +344,7 @@ void __lockfunc _read_unlock_bh(rwlock_t *lock) EXPORT_SYMBOL(_read_unlock_bh); #endif -#ifndef _write_unlock_irqrestore +#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { __write_unlock_irqrestore(lock, flags); @@ -352,7 +352,7 @@ void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) EXPORT_SYMBOL(_write_unlock_irqrestore); #endif -#ifndef _write_unlock_irq +#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ void __lockfunc _write_unlock_irq(rwlock_t *lock) { __write_unlock_irq(lock); @@ -360,7 +360,7 @@ void __lockfunc _write_unlock_irq(rwlock_t *lock) EXPORT_SYMBOL(_write_unlock_irq); #endif -#ifndef _write_unlock_bh +#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH void __lockfunc _write_unlock_bh(rwlock_t *lock) { __write_unlock_bh(lock); @@ -368,7 +368,7 @@ void __lockfunc _write_unlock_bh(rwlock_t *lock) EXPORT_SYMBOL(_write_unlock_bh); #endif -#ifndef _spin_trylock_bh +#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH int __lockfunc _spin_trylock_bh(spinlock_t *lock) { return __spin_trylock_bh(lock); -- cgit v1.3-8-gc7d7 From 572a9d7b6fc7f20f573664063324c086be310c42 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 10 Nov 2009 06:14:14 +0000 Subject: net: allow to propagate errors through ->ndo_hard_start_xmit() Currently the ->ndo_hard_start_xmit() callbacks are only permitted to return one of the NETDEV_TX codes. This prevents any kind of error propagation for virtual devices, like queue congestion of the underlying device in case of layered devices, or unreachability in case of tunnels. This patches changes the NET_XMIT codes to avoid clashes with the NETDEV_TX codes and changes the two callers of dev_hard_start_xmit() to expect either errno codes, NET_XMIT codes or NETDEV_TX codes as return value. In case of qdisc_restart(), all non NETDEV_TX codes are mapped to NETDEV_TX_OK since no error propagation is possible when using qdiscs. In case of dev_queue_xmit(), the error is propagated upwards. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 43 ++++++++++++++++++++++++++++++++----------- net/core/dev.c | 32 ++++++++++++++++++++++++++------ net/sched/sch_generic.c | 9 ++++++++- 3 files changed, 66 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 083b5989cecb..8b266390b9e2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,27 +63,48 @@ struct wireless_dev; #define HAVE_FREE_NETDEV /* free_netdev() */ #define HAVE_NETDEV_PRIV /* netdev_priv() */ -#define NET_XMIT_SUCCESS 0 -#define NET_XMIT_DROP 1 /* skb dropped */ -#define NET_XMIT_CN 2 /* congestion notification */ -#define NET_XMIT_POLICED 3 /* skb is shot by police */ -#define NET_XMIT_MASK 0xFFFF /* qdisc flags in net/sch_generic.h */ +/* + * Transmit return codes: transmit return codes originate from three different + * namespaces: + * + * - qdisc return codes + * - driver transmit return codes + * - errno values + * + * Drivers are allowed to return any one of those in their hard_start_xmit() + * function. Real network devices commonly used with qdiscs should only return + * the driver transmit return codes though - when qdiscs are used, the actual + * transmission happens asynchronously, so the value is not propagated to + * higher layers. Virtual network devices transmit synchronously, in this case + * the driver transmit return codes are consumed by dev_queue_xmit(), all + * others are propagated to higher layers. + */ + +/* qdisc ->enqueue() return codes. */ +#define NET_XMIT_SUCCESS 0x00 +#define NET_XMIT_DROP 0x10 /* skb dropped */ +#define NET_XMIT_CN 0x20 /* congestion notification */ +#define NET_XMIT_POLICED 0x30 /* skb is shot by police */ +#define NET_XMIT_MASK 0xf0 /* qdisc flags in net/sch_generic.h */ /* Backlog congestion levels */ -#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ -#define NET_RX_DROP 1 /* packet dropped */ +#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ +#define NET_RX_DROP 1 /* packet dropped */ /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It * indicates that the device will soon be dropping packets, or already drops * some packets of the same priority; prompting us to send less aggressively. */ -#define net_xmit_eval(e) ((e) == NET_XMIT_CN? 0 : (e)) +#define net_xmit_eval(e) ((e) == NET_XMIT_CN ? 0 : (e)) #define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) /* Driver transmit return codes */ +#define NETDEV_TX_MASK 0xf + enum netdev_tx { - NETDEV_TX_OK = 0, /* driver took care of packet */ - NETDEV_TX_BUSY, /* driver tx path was busy*/ - NETDEV_TX_LOCKED = -1, /* driver tx lock was already taken */ + __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */ + NETDEV_TX_OK = 0, /* driver took care of packet */ + NETDEV_TX_BUSY = 1, /* driver tx path was busy*/ + NETDEV_TX_LOCKED = 2, /* driver tx lock was already taken */ }; typedef enum netdev_tx netdev_tx_t; diff --git a/net/core/dev.c b/net/core/dev.c index ad8e320ceba7..548340b57296 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1757,7 +1757,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; - int rc; + int rc = NETDEV_TX_OK; if (likely(!skb->next)) { if (!list_empty(&ptype_all)) @@ -1805,6 +1805,8 @@ gso: nskb->next = NULL; rc = ops->ndo_start_xmit(nskb, dev); if (unlikely(rc != NETDEV_TX_OK)) { + if (rc & ~NETDEV_TX_MASK) + goto out_kfree_gso_skb; nskb->next = skb->next; skb->next = nskb; return rc; @@ -1814,11 +1816,12 @@ gso: return NETDEV_TX_BUSY; } while (skb->next); - skb->destructor = DEV_GSO_CB(skb)->destructor; - +out_kfree_gso_skb: + if (likely(skb->next == NULL)) + skb->destructor = DEV_GSO_CB(skb)->destructor; out_kfree_skb: kfree_skb(skb); - return NETDEV_TX_OK; + return rc; } static u32 skb_tx_hashrnd; @@ -1906,6 +1909,23 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } +static inline bool dev_xmit_complete(int rc) +{ + /* successful transmission */ + if (rc == NETDEV_TX_OK) + return true; + + /* error while transmitting, driver consumed skb */ + if (rc < 0) + return true; + + /* error while queueing to a different device, driver consumed skb */ + if (rc & NET_XMIT_MASK) + return true; + + return false; +} + /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -2003,8 +2023,8 @@ gso: HARD_TX_LOCK(dev, txq, cpu); if (!netif_tx_queue_stopped(txq)) { - rc = NET_XMIT_SUCCESS; - if (!dev_hard_start_xmit(skb, dev, txq)) { + rc = dev_hard_start_xmit(skb, dev, txq); + if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); goto out; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4ae6aa562f2b..b13821ad2fb6 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -120,8 +120,15 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_tx_queue_stopped(txq) && - !netif_tx_queue_frozen(txq)) + !netif_tx_queue_frozen(txq)) { ret = dev_hard_start_xmit(skb, dev, txq); + + /* an error implies that the skb was consumed */ + if (ret < 0) + ret = NETDEV_TX_OK; + /* all NET_XMIT codes map to NETDEV_TX_OK */ + ret &= ~NET_XMIT_MASK; + } HARD_TX_UNLOCK(dev, txq); spin_lock(root_lock); -- cgit v1.3-8-gc7d7 From d9b263528e01bfbaf716b51f38606b3dfe5ac1e9 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 13 Nov 2009 14:57:00 -0500 Subject: x86, setup: Store the boot cursor state Add a field to store the boot cursor state and implement this for VGA on x86. This can then be used to set the default policy for the boot console. Signed-off-by: Matthew Garrett LKML-Reference: <1258142222-16092-1-git-send-email-mjg@redhat.com> Signed-off-by: H. Peter Anvin --- arch/x86/boot/video.c | 6 ++++++ include/linux/screen_info.h | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index d42da3802499..f767164cd5df 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -27,6 +27,12 @@ static void store_cursor_position(void) boot_params.screen_info.orig_x = oreg.dl; boot_params.screen_info.orig_y = oreg.dh; + + if (oreg.ch & 0x20) + boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; + + if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f)) + boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; } static void store_video_mode(void) diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 1ee2c05142f6..899fbb487c94 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -14,7 +14,8 @@ struct screen_info { __u16 orig_video_page; /* 0x04 */ __u8 orig_video_mode; /* 0x06 */ __u8 orig_video_cols; /* 0x07 */ - __u16 unused2; /* 0x08 */ + __u8 flags; /* 0x08 */ + __u8 unused2; /* 0x09 */ __u16 orig_video_ega_bx;/* 0x0a */ __u16 unused3; /* 0x0c */ __u8 orig_video_lines; /* 0x0e */ @@ -65,6 +66,8 @@ struct screen_info { #define VIDEO_TYPE_EFI 0x70 /* EFI graphic mode */ +#define VIDEO_FLAGS_NOCURSOR (1 << 0) /* The video mode has no cursor set */ + #ifdef __KERNEL__ extern struct screen_info screen_info; -- cgit v1.3-8-gc7d7 From 90a5e16992fa6105f7ebf3f29f5cf5feb1bbf7dc Mon Sep 17 00:00:00 2001 From: Rui Paulo Date: Wed, 11 Nov 2009 00:01:31 +0000 Subject: mac80211: implement RANN processing and forwarding Process the RANN (Root Annoucement) Frame and try to find the HWMP root station by sending a PREQ. Signed-off-by: Rui Paulo Signed-off-by: Javier Cardona Reviewed-by: Andrey Yurovsky Tested-by: Brian Cavagnolo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 15 +++++++++ net/mac80211/ieee80211_i.h | 1 + net/mac80211/mesh_hwmp.c | 79 +++++++++++++++++++++++++++++++++++++++++----- net/mac80211/util.c | 4 +++ 4 files changed, 91 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 50c684db33c7..49b1abd2fe97 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -554,6 +554,20 @@ struct ieee80211_tim_ie { u8 virtual_map[1]; } __attribute__ ((packed)); +/** + * struct ieee80211_rann_ie + * + * This structure refers to "Root Announcement information element" + */ +struct ieee80211_rann_ie { + u8 rann_flags; + u8 rann_hopcount; + u8 rann_ttl; + u8 rann_addr[6]; + u32 rann_seq; + u32 rann_metric; +} __attribute__ ((packed)); + #define WLAN_SA_QUERY_TR_ID_LEN 2 struct ieee80211_mgmt { @@ -1070,6 +1084,7 @@ enum ieee80211_eid { WLAN_EID_PREQ = 68, WLAN_EID_PREP = 69, WLAN_EID_PERR = 70, + WLAN_EID_RANN = 49, /* compatible with FreeBSD */ /* 802.11h */ WLAN_EID_PWR_CONSTRAINT = 32, WLAN_EID_PWR_CAPABILITY = 33, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 19b0c128d940..2a7d3d4067e1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -804,6 +804,7 @@ struct ieee802_11_elems { u8 *preq; u8 *prep; u8 *perr; + struct ieee80211_rann_ie *rann; u8 *ch_switch_elem; u8 *country_elem; u8 *pwr_constr_elem; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index db1a33098a88..7b9dd87cf9f2 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -28,6 +28,8 @@ /* Reply and forward */ #define MP_F_RF 0x2 +static void mesh_queue_preq(struct mesh_path *, u8); + static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) { if (ae) @@ -81,7 +83,8 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) enum mpath_frame_type { MPATH_PREQ = 0, MPATH_PREP, - MPATH_PERR + MPATH_PERR, + MPATH_RANN }; static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, @@ -109,7 +112,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - /* BSSID is left zeroed, wildcard value */ + /* BSSID == SA */ + memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); mgmt->u.action.category = MESH_PATH_SEL_CATEGORY; mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION; @@ -126,6 +130,12 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREP; break; + case MPATH_RANN: + mhwmp_dbg("sending RANN from %pM\n", orig_addr); + ie_len = sizeof(struct ieee80211_rann_ie); + pos = skb_put(skb, 2 + ie_len); + *pos++ = WLAN_EID_RANN; + break; default: kfree_skb(skb); return -ENOTSUPP; @@ -143,8 +153,10 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, pos += ETH_ALEN; memcpy(pos, &orig_dsn, 4); pos += 4; - memcpy(pos, &lifetime, 4); - pos += 4; + if (action != MPATH_RANN) { + memcpy(pos, &lifetime, 4); + pos += 4; + } memcpy(pos, &metric, 4); pos += 4; if (action == MPATH_PREQ) { @@ -152,9 +164,11 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, *pos++ = 1; *pos++ = dst_flags; } - memcpy(pos, dst, ETH_ALEN); - pos += ETH_ALEN; - memcpy(pos, &dst_dsn, 4); + if (action != MPATH_RANN) { + memcpy(pos, dst, ETH_ALEN); + pos += ETH_ALEN; + memcpy(pos, &dst_dsn, 4); + } ieee80211_tx_skb(sdata, skb, 1); return 0; @@ -610,6 +624,54 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); } +static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + struct ieee80211_rann_ie *rann) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct mesh_path *mpath; + u8 *ta; + u8 ttl, flags, hopcount; + u8 *orig_addr; + u32 orig_dsn, metric; + + ta = mgmt->sa; + ttl = rann->rann_ttl; + if (ttl <= 1) { + ifmsh->mshstats.dropped_frames_ttl++; + return; + } + ttl--; + flags = rann->rann_flags; + orig_addr = rann->rann_addr; + orig_dsn = rann->rann_seq; + hopcount = rann->rann_hopcount; + metric = rann->rann_metric; + mhwmp_dbg("received RANN from %pM\n", orig_addr); + + rcu_read_lock(); + mpath = mesh_path_lookup(orig_addr, sdata); + if (!mpath) { + mesh_path_add(orig_addr, sdata); + mpath = mesh_path_lookup(orig_addr, sdata); + if (!mpath) { + rcu_read_unlock(); + sdata->u.mesh.mshstats.dropped_frames_no_route++; + return; + } + mesh_queue_preq(mpath, + PREQ_Q_F_START | PREQ_Q_F_REFRESH); + } + if (mpath->dsn < orig_dsn) { + mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, + cpu_to_le32(orig_dsn), + 0, NULL, 0, sdata->dev->broadcast, + hopcount, ttl, 0, cpu_to_le32(metric), + 0, sdata); + mpath->dsn = orig_dsn; + } + rcu_read_unlock(); +} void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, @@ -654,7 +716,8 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, return; hwmp_perr_frame_process(sdata, mgmt, elems.perr); } - + if (elems.rann) + hwmp_rann_frame_process(sdata, mgmt, elems.rann); } /** diff --git a/net/mac80211/util.c b/net/mac80211/util.c index aedbaaa067e6..da86e1592f8c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -685,6 +685,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elems->perr = pos; elems->perr_len = elen; break; + case WLAN_EID_RANN: + if (elen >= sizeof(struct ieee80211_rann_ie)) + elems->rann = (void *)pos; + break; case WLAN_EID_CHANNEL_SWITCH: elems->ch_switch_elem = pos; elems->ch_switch_elem_len = elen; -- cgit v1.3-8-gc7d7 From d19b3bf6384e66ac6e11a61ee31ed2cfe149f4d8 Mon Sep 17 00:00:00 2001 From: Rui Paulo Date: Mon, 9 Nov 2009 23:46:55 +0000 Subject: mac80211: replace "destination" with "target" to follow the spec Resulting object files have the same MD5 as before. Signed-off-by: Rui Paulo Signed-off-by: Javier Cardona Reviewed-by: Andrey Yurovsky Tested-by: Brian Cavagnolo Signed-off-by: John W. Linville --- include/linux/nl80211.h | 8 +- include/net/cfg80211.h | 8 +- net/mac80211/cfg.c | 8 +- net/mac80211/ieee80211_i.h | 10 +- net/mac80211/mesh.c | 2 +- net/mac80211/mesh.h | 10 +- net/mac80211/mesh_hwmp.c | 221 ++++++++++++++++++++++---------------------- net/mac80211/mesh_pathtbl.c | 12 +-- net/wireless/nl80211.c | 6 +- 9 files changed, 144 insertions(+), 141 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 203adef972cf..7a0bd6ea6f63 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -899,14 +899,14 @@ enum nl80211_sta_info { * * @NL80211_MPATH_FLAG_ACTIVE: the mesh path is active * @NL80211_MPATH_FLAG_RESOLVING: the mesh path discovery process is running - * @NL80211_MPATH_FLAG_DSN_VALID: the mesh path contains a valid DSN + * @NL80211_MPATH_FLAG_SN_VALID: the mesh path contains a valid SN * @NL80211_MPATH_FLAG_FIXED: the mesh path has been manually set * @NL80211_MPATH_FLAG_RESOLVED: the mesh path discovery process succeeded */ enum nl80211_mpath_flags { NL80211_MPATH_FLAG_ACTIVE = 1<<0, NL80211_MPATH_FLAG_RESOLVING = 1<<1, - NL80211_MPATH_FLAG_DSN_VALID = 1<<2, + NL80211_MPATH_FLAG_SN_VALID = 1<<2, NL80211_MPATH_FLAG_FIXED = 1<<3, NL80211_MPATH_FLAG_RESOLVED = 1<<4, }; @@ -919,7 +919,7 @@ enum nl80211_mpath_flags { * * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved * @NL80211_ATTR_MPATH_FRAME_QLEN: number of queued frames for this destination - * @NL80211_ATTR_MPATH_DSN: destination sequence number + * @NL80211_ATTR_MPATH_SN: destination sequence number * @NL80211_ATTR_MPATH_METRIC: metric (cost) of this mesh path * @NL80211_ATTR_MPATH_EXPTIME: expiration time for the path, in msec from now * @NL80211_ATTR_MPATH_FLAGS: mesh path flags, enumerated in @@ -930,7 +930,7 @@ enum nl80211_mpath_flags { enum nl80211_mpath_info { __NL80211_MPATH_INFO_INVALID, NL80211_MPATH_INFO_FRAME_QLEN, - NL80211_MPATH_INFO_DSN, + NL80211_MPATH_INFO_SN, NL80211_MPATH_INFO_METRIC, NL80211_MPATH_INFO_EXPTIME, NL80211_MPATH_INFO_FLAGS, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1ee41e4a92e2..7d0ae9c18286 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -420,7 +420,7 @@ enum monitor_flags { * in during get_station() or dump_station(). * * MPATH_INFO_FRAME_QLEN: @frame_qlen filled - * MPATH_INFO_DSN: @dsn filled + * MPATH_INFO_SN: @sn filled * MPATH_INFO_METRIC: @metric filled * MPATH_INFO_EXPTIME: @exptime filled * MPATH_INFO_DISCOVERY_TIMEOUT: @discovery_timeout filled @@ -429,7 +429,7 @@ enum monitor_flags { */ enum mpath_info_flags { MPATH_INFO_FRAME_QLEN = BIT(0), - MPATH_INFO_DSN = BIT(1), + MPATH_INFO_SN = BIT(1), MPATH_INFO_METRIC = BIT(2), MPATH_INFO_EXPTIME = BIT(3), MPATH_INFO_DISCOVERY_TIMEOUT = BIT(4), @@ -444,7 +444,7 @@ enum mpath_info_flags { * * @filled: bitfield of flags from &enum mpath_info_flags * @frame_qlen: number of queued frames for this destination - * @dsn: destination sequence number + * @sn: target sequence number * @metric: metric (cost) of this mesh path * @exptime: expiration time for the mesh path from now, in msecs * @flags: mesh path flags @@ -458,7 +458,7 @@ enum mpath_info_flags { struct mpath_info { u32 filled; u32 frame_qlen; - u32 dsn; + u32 sn; u32 metric; u32 exptime; u32 discovery_timeout; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 576b86f81d1b..81053587e72b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -935,7 +935,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, pinfo->generation = mesh_paths_generation; pinfo->filled = MPATH_INFO_FRAME_QLEN | - MPATH_INFO_DSN | + MPATH_INFO_SN | MPATH_INFO_METRIC | MPATH_INFO_EXPTIME | MPATH_INFO_DISCOVERY_TIMEOUT | @@ -943,7 +943,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, MPATH_INFO_FLAGS; pinfo->frame_qlen = mpath->frame_queue.qlen; - pinfo->dsn = mpath->dsn; + pinfo->sn = mpath->sn; pinfo->metric = mpath->metric; if (time_before(jiffies, mpath->exp_time)) pinfo->exptime = jiffies_to_msecs(mpath->exp_time - jiffies); @@ -955,8 +955,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, pinfo->flags |= NL80211_MPATH_FLAG_ACTIVE; if (mpath->flags & MESH_PATH_RESOLVING) pinfo->flags |= NL80211_MPATH_FLAG_RESOLVING; - if (mpath->flags & MESH_PATH_DSN_VALID) - pinfo->flags |= NL80211_MPATH_FLAG_DSN_VALID; + if (mpath->flags & MESH_PATH_SN_VALID) + pinfo->flags |= NL80211_MPATH_FLAG_SN_VALID; if (mpath->flags & MESH_PATH_FIXED) pinfo->flags |= NL80211_MPATH_FLAG_FIXED; if (mpath->flags & MESH_PATH_RESOLVING) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2a7d3d4067e1..fb54ddb9b27d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -373,14 +373,14 @@ struct ieee80211_if_mesh { u8 mesh_sp_id; /* Authentication Protocol Identifier */ u8 mesh_auth_id; - /* Local mesh Destination Sequence Number */ - u32 dsn; + /* Local mesh Sequence Number */ + u32 sn; /* Last used PREQ ID */ u32 preq_id; atomic_t mpaths; - /* Timestamp of last DSN update */ - unsigned long last_dsn_update; - /* Timestamp of last DSN sent */ + /* Timestamp of last SN update */ + unsigned long last_sn_update; + /* Timestamp of last SN sent */ unsigned long last_preq; struct mesh_rmc *rmc; spinlock_t mesh_preq_queue_lock; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 222a31338390..0f3e1147ec4f 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -672,7 +672,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) MESH_MIN_DISCOVERY_TIMEOUT; ifmsh->accepting_plinks = true; ifmsh->preq_id = 0; - ifmsh->dsn = 0; + ifmsh->sn = 0; atomic_set(&ifmsh->mpaths, 0); mesh_rmc_init(sdata); ifmsh->last_preq = jiffies; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 1d534c702406..ee687add3927 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -26,7 +26,7 @@ * * @MESH_PATH_ACTIVE: the mesh path can be used for forwarding * @MESH_PATH_RESOLVING: the discovery process is running for this mesh path - * @MESH_PATH_DSN_VALID: the mesh path contains a valid destination sequence + * @MESH_PATH_SN_VALID: the mesh path contains a valid destination sequence * number * @MESH_PATH_FIXED: the mesh path has been manually set and should not be * modified @@ -38,7 +38,7 @@ enum mesh_path_flags { MESH_PATH_ACTIVE = BIT(0), MESH_PATH_RESOLVING = BIT(1), - MESH_PATH_DSN_VALID = BIT(2), + MESH_PATH_SN_VALID = BIT(2), MESH_PATH_FIXED = BIT(3), MESH_PATH_RESOLVED = BIT(4), }; @@ -70,7 +70,7 @@ enum mesh_deferred_task_flags { * @timer: mesh path discovery timer * @frame_queue: pending queue for frames sent to this destination while the * path is unresolved - * @dsn: destination sequence number of the destination + * @sn: target sequence number * @metric: current metric to this destination * @hop_count: hops to destination * @exp_time: in jiffies, when the path will expire or when it expired @@ -94,7 +94,7 @@ struct mesh_path { struct timer_list timer; struct sk_buff_head frame_queue; struct rcu_head rcu; - u32 dsn; + u32 sn; u32 metric; u8 hop_count; unsigned long exp_time; @@ -280,7 +280,7 @@ void mesh_mpp_table_grow(void); u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl); /* Mesh paths */ -int mesh_path_error_tx(u8 ttl, u8 *dest, __le32 dest_dsn, __le16 dest_rcode, +int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode, u8 *ra, struct ieee80211_sub_if_data *sdata); void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 7d36f3a741a5..f03a27dfd616 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -56,33 +56,33 @@ static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae) #define PREQ_IE_TTL(x) (*(x + 2)) #define PREQ_IE_PREQ_ID(x) u32_field_get(x, 3, 0) #define PREQ_IE_ORIG_ADDR(x) (x + 7) -#define PREQ_IE_ORIG_DSN(x) u32_field_get(x, 13, 0); +#define PREQ_IE_ORIG_SN(x) u32_field_get(x, 13, 0); #define PREQ_IE_LIFETIME(x) u32_field_get(x, 17, AE_F_SET(x)); #define PREQ_IE_METRIC(x) u32_field_get(x, 21, AE_F_SET(x)); -#define PREQ_IE_DST_F(x) (*(AE_F_SET(x) ? x + 32 : x + 26)) -#define PREQ_IE_DST_ADDR(x) (AE_F_SET(x) ? x + 33 : x + 27) -#define PREQ_IE_DST_DSN(x) u32_field_get(x, 33, AE_F_SET(x)); +#define PREQ_IE_TARGET_F(x) (*(AE_F_SET(x) ? x + 32 : x + 26)) +#define PREQ_IE_TARGET_ADDR(x) (AE_F_SET(x) ? x + 33 : x + 27) +#define PREQ_IE_TARGET_SN(x) u32_field_get(x, 33, AE_F_SET(x)); #define PREP_IE_FLAGS(x) PREQ_IE_FLAGS(x) #define PREP_IE_HOPCOUNT(x) PREQ_IE_HOPCOUNT(x) #define PREP_IE_TTL(x) PREQ_IE_TTL(x) #define PREP_IE_ORIG_ADDR(x) (x + 3) -#define PREP_IE_ORIG_DSN(x) u32_field_get(x, 9, 0); +#define PREP_IE_ORIG_SN(x) u32_field_get(x, 9, 0); #define PREP_IE_LIFETIME(x) u32_field_get(x, 13, AE_F_SET(x)); #define PREP_IE_METRIC(x) u32_field_get(x, 17, AE_F_SET(x)); -#define PREP_IE_DST_ADDR(x) (AE_F_SET(x) ? x + 27 : x + 21) -#define PREP_IE_DST_DSN(x) u32_field_get(x, 27, AE_F_SET(x)); +#define PREP_IE_TARGET_ADDR(x) (AE_F_SET(x) ? x + 27 : x + 21) +#define PREP_IE_TARGET_SN(x) u32_field_get(x, 27, AE_F_SET(x)); #define PERR_IE_TTL(x) (*(x)) -#define PERR_IE_DST_FLAGS(x) (*(x + 2)) -#define PERR_IE_DST_ADDR(x) (x + 3) -#define PERR_IE_DST_DSN(x) u32_field_get(x, 9, 0); -#define PERR_IE_DST_RCODE(x) u16_field_get(x, 13, 0); +#define PERR_IE_TARGET_FLAGS(x) (*(x + 2)) +#define PERR_IE_TARGET_ADDR(x) (x + 3) +#define PERR_IE_TARGET_SN(x) u32_field_get(x, 9, 0); +#define PERR_IE_TARGET_RCODE(x) u16_field_get(x, 13, 0); #define MSEC_TO_TU(x) (x*1000/1024) -#define DSN_GT(x, y) ((long) (y) - (long) (x) < 0) -#define DSN_LT(x, y) ((long) (x) - (long) (y) < 0) +#define SN_GT(x, y) ((long) (y) - (long) (x) < 0) +#define SN_LT(x, y) ((long) (x) - (long) (y) < 0) #define net_traversal_jiffies(s) \ msecs_to_jiffies(s->u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime) @@ -102,9 +102,10 @@ enum mpath_frame_type { }; static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, - u8 *orig_addr, __le32 orig_dsn, u8 dst_flags, u8 *dst, - __le32 dst_dsn, u8 *da, u8 hop_count, u8 ttl, __le32 lifetime, - __le32 metric, __le32 preq_id, struct ieee80211_sub_if_data *sdata) + u8 *orig_addr, __le32 orig_sn, u8 target_flags, u8 *target, + __le32 target_sn, u8 *da, u8 hop_count, u8 ttl,__le32 lifetime, + __le32 metric, __le32 preq_id, + struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); @@ -133,13 +134,13 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, switch (action) { case MPATH_PREQ: - mhwmp_dbg("sending PREQ to %pM\n", dst); + mhwmp_dbg("sending PREQ to %pM\n", target); ie_len = 37; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREQ; break; case MPATH_PREP: - mhwmp_dbg("sending PREP to %pM\n", dst); + mhwmp_dbg("sending PREP to %pM\n", target); ie_len = 31; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREP; @@ -165,7 +166,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, } memcpy(pos, orig_addr, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &orig_dsn, 4); + memcpy(pos, &orig_sn, 4); pos += 4; if (action != MPATH_RANN) { memcpy(pos, &lifetime, 4); @@ -176,12 +177,12 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, if (action == MPATH_PREQ) { /* destination count */ *pos++ = 1; - *pos++ = dst_flags; + *pos++ = target_flags; } if (action != MPATH_RANN) { - memcpy(pos, dst, ETH_ALEN); + memcpy(pos, target, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &dst_dsn, 4); + memcpy(pos, &target_sn, 4); } ieee80211_tx_skb(sdata, skb, 1); @@ -191,12 +192,14 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, /** * mesh_send_path error - Sends a PERR mesh management frame * - * @dst: broken destination - * @dst_dsn: dsn of the broken destination + * @target: broken destination + * @target_sn: SN of the broken destination + * @target_rcode: reason code for this PERR * @ra: node this frame is addressed to */ -int mesh_path_error_tx(u8 ttl, u8 *dst, __le32 dst_dsn, __le16 dst_rcode, - u8 *ra, struct ieee80211_sub_if_data *sdata) +int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, + __le16 target_rcode, u8 *ra, + struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); @@ -234,16 +237,16 @@ int mesh_path_error_tx(u8 ttl, u8 *dst, __le32 dst_dsn, __le16 dst_rcode, * bit 2 is set if we have a reason code */ *pos = 0; - if (!dst_dsn) + if (!target_sn) *pos |= MP_F_USN; - if (dst_rcode) + if (target_rcode) *pos |= MP_F_RCODE; pos++; - memcpy(pos, dst, ETH_ALEN); + memcpy(pos, target, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &dst_dsn, 4); + memcpy(pos, &target_sn, 4); pos += 4; - memcpy(pos, &dst_rcode, 2); + memcpy(pos, &target_rcode, 2); ieee80211_tx_skb(sdata, skb, 1); return 0; @@ -324,7 +327,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; bool fresh_info; u8 *orig_addr, *ta; - u32 orig_dsn, orig_metric; + u32 orig_sn, orig_metric; unsigned long orig_lifetime, exp_time; u32 last_hop_metric, new_metric; bool process = true; @@ -343,7 +346,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, switch (action) { case MPATH_PREQ: orig_addr = PREQ_IE_ORIG_ADDR(hwmp_ie); - orig_dsn = PREQ_IE_ORIG_DSN(hwmp_ie); + orig_sn = PREQ_IE_ORIG_SN(hwmp_ie); orig_lifetime = PREQ_IE_LIFETIME(hwmp_ie); orig_metric = PREQ_IE_METRIC(hwmp_ie); break; @@ -356,7 +359,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, * information from both PREQ and PREP frames. */ orig_addr = PREP_IE_ORIG_ADDR(hwmp_ie); - orig_dsn = PREP_IE_ORIG_DSN(hwmp_ie); + orig_sn = PREP_IE_ORIG_SN(hwmp_ie); orig_lifetime = PREP_IE_LIFETIME(hwmp_ie); orig_metric = PREP_IE_METRIC(hwmp_ie); break; @@ -382,9 +385,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, if (mpath->flags & MESH_PATH_FIXED) fresh_info = false; else if ((mpath->flags & MESH_PATH_ACTIVE) && - (mpath->flags & MESH_PATH_DSN_VALID)) { - if (DSN_GT(mpath->dsn, orig_dsn) || - (mpath->dsn == orig_dsn && + (mpath->flags & MESH_PATH_SN_VALID)) { + if (SN_GT(mpath->sn, orig_sn) || + (mpath->sn == orig_sn && action == MPATH_PREQ && new_metric > mpath->metric)) { process = false; @@ -403,9 +406,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, if (fresh_info) { mesh_path_assign_nexthop(mpath, sta); - mpath->flags |= MESH_PATH_DSN_VALID; + mpath->flags |= MESH_PATH_SN_VALID; mpath->metric = new_metric; - mpath->dsn = orig_dsn; + mpath->sn = orig_sn; mpath->exp_time = time_after(mpath->exp_time, exp_time) ? mpath->exp_time : exp_time; mesh_path_activate(mpath); @@ -444,7 +447,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, if (fresh_info) { mesh_path_assign_nexthop(mpath, sta); - mpath->flags &= ~MESH_PATH_DSN_VALID; + mpath->flags &= ~MESH_PATH_SN_VALID; mpath->metric = last_hop_metric; mpath->exp_time = time_after(mpath->exp_time, exp_time) ? mpath->exp_time : exp_time; @@ -466,47 +469,47 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; - u8 *dst_addr, *orig_addr; - u8 dst_flags, ttl; - u32 orig_dsn, dst_dsn, lifetime; + u8 *target_addr, *orig_addr; + u8 target_flags, ttl; + u32 orig_sn, target_sn, lifetime; bool reply = false; bool forward = true; - /* Update destination DSN, if present */ - dst_addr = PREQ_IE_DST_ADDR(preq_elem); + /* Update target SN, if present */ + target_addr = PREQ_IE_TARGET_ADDR(preq_elem); orig_addr = PREQ_IE_ORIG_ADDR(preq_elem); - dst_dsn = PREQ_IE_DST_DSN(preq_elem); - orig_dsn = PREQ_IE_ORIG_DSN(preq_elem); - dst_flags = PREQ_IE_DST_F(preq_elem); + target_sn = PREQ_IE_TARGET_SN(preq_elem); + orig_sn = PREQ_IE_ORIG_SN(preq_elem); + target_flags = PREQ_IE_TARGET_F(preq_elem); mhwmp_dbg("received PREQ from %pM\n", orig_addr); - if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) { + if (memcmp(target_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) { mhwmp_dbg("PREQ is for us\n"); forward = false; reply = true; metric = 0; - if (time_after(jiffies, ifmsh->last_dsn_update + + if (time_after(jiffies, ifmsh->last_sn_update + net_traversal_jiffies(sdata)) || - time_before(jiffies, ifmsh->last_dsn_update)) { - dst_dsn = ++ifmsh->dsn; - ifmsh->last_dsn_update = jiffies; + time_before(jiffies, ifmsh->last_sn_update)) { + target_sn = ++ifmsh->sn; + ifmsh->last_sn_update = jiffies; } } else { rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, sdata); + mpath = mesh_path_lookup(target_addr, sdata); if (mpath) { - if ((!(mpath->flags & MESH_PATH_DSN_VALID)) || - DSN_LT(mpath->dsn, dst_dsn)) { - mpath->dsn = dst_dsn; - mpath->flags |= MESH_PATH_DSN_VALID; - } else if ((!(dst_flags & MP_F_DO)) && + if ((!(mpath->flags & MESH_PATH_SN_VALID)) || + SN_LT(mpath->sn, target_sn)) { + mpath->sn = target_sn; + mpath->flags |= MESH_PATH_SN_VALID; + } else if ((!(target_flags & MP_F_DO)) && (mpath->flags & MESH_PATH_ACTIVE)) { reply = true; metric = mpath->metric; - dst_dsn = mpath->dsn; - if (dst_flags & MP_F_RF) - dst_flags |= MP_F_DO; + target_sn = mpath->sn; + if (target_flags & MP_F_RF) + target_flags |= MP_F_DO; else forward = false; } @@ -519,9 +522,9 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, ttl = ifmsh->mshcfg.dot11MeshTTL; if (ttl != 0) { mhwmp_dbg("replying to the PREQ\n"); - mesh_path_sel_frame_tx(MPATH_PREP, 0, dst_addr, - cpu_to_le32(dst_dsn), 0, orig_addr, - cpu_to_le32(orig_dsn), mgmt->sa, 0, ttl, + mesh_path_sel_frame_tx(MPATH_PREP, 0, target_addr, + cpu_to_le32(target_sn), 0, orig_addr, + cpu_to_le32(orig_sn), mgmt->sa, 0, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), 0, sdata); } else @@ -544,8 +547,8 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, preq_id = PREQ_IE_PREQ_ID(preq_elem); hopcount = PREQ_IE_HOPCOUNT(preq_elem) + 1; mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr, - cpu_to_le32(orig_dsn), dst_flags, dst_addr, - cpu_to_le32(dst_dsn), sdata->dev->broadcast, + cpu_to_le32(orig_sn), target_flags, target_addr, + cpu_to_le32(target_sn), sdata->dev->broadcast, hopcount, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), cpu_to_le32(preq_id), sdata); @@ -560,10 +563,10 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, u8 *prep_elem, u32 metric) { struct mesh_path *mpath; - u8 *dst_addr, *orig_addr; + u8 *target_addr, *orig_addr; u8 ttl, hopcount, flags; u8 next_hop[ETH_ALEN]; - u32 dst_dsn, orig_dsn, lifetime; + u32 target_sn, orig_sn, lifetime; mhwmp_dbg("received PREP from %pM\n", PREP_IE_ORIG_ADDR(prep_elem)); @@ -573,8 +576,8 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, * which corresponds with the originator of the PREQ which this PREP * replies */ - dst_addr = PREP_IE_DST_ADDR(prep_elem); - if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) + target_addr = PREP_IE_TARGET_ADDR(prep_elem); + if (memcmp(target_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) /* destination, no forwarding required */ return; @@ -585,7 +588,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, } rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, sdata); + mpath = mesh_path_lookup(target_addr, sdata); if (mpath) spin_lock_bh(&mpath->state_lock); else @@ -601,13 +604,13 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, lifetime = PREP_IE_LIFETIME(prep_elem); hopcount = PREP_IE_HOPCOUNT(prep_elem) + 1; orig_addr = PREP_IE_ORIG_ADDR(prep_elem); - dst_dsn = PREP_IE_DST_DSN(prep_elem); - orig_dsn = PREP_IE_ORIG_DSN(prep_elem); + target_sn = PREP_IE_TARGET_SN(prep_elem); + orig_sn = PREP_IE_ORIG_SN(prep_elem); mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr, - cpu_to_le32(orig_dsn), 0, dst_addr, - cpu_to_le32(dst_dsn), mpath->next_hop->sta.addr, hopcount, ttl, - cpu_to_le32(lifetime), cpu_to_le32(metric), + cpu_to_le32(orig_sn), 0, target_addr, + cpu_to_le32(target_sn), mpath->next_hop->sta.addr, hopcount, + ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), 0, sdata); rcu_read_unlock(); @@ -627,10 +630,10 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; u8 ttl; - u8 *ta, *dst_addr; - u8 dst_flags; - u32 dst_dsn; - u16 dst_rcode; + u8 *ta, *target_addr; + u8 target_flags; + u32 target_sn; + u16 target_rcode; ta = mgmt->sa; ttl = PERR_IE_TTL(perr_elem); @@ -639,24 +642,24 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, return; } ttl--; - dst_flags = PERR_IE_DST_FLAGS(perr_elem); - dst_addr = PERR_IE_DST_ADDR(perr_elem); - dst_dsn = PERR_IE_DST_DSN(perr_elem); - dst_rcode = PERR_IE_DST_RCODE(perr_elem); + target_flags = PERR_IE_TARGET_FLAGS(perr_elem); + target_addr = PERR_IE_TARGET_ADDR(perr_elem); + target_sn = PERR_IE_TARGET_SN(perr_elem); + target_rcode = PERR_IE_TARGET_RCODE(perr_elem); rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, sdata); + mpath = mesh_path_lookup(target_addr, sdata); if (mpath) { spin_lock_bh(&mpath->state_lock); if (mpath->flags & MESH_PATH_ACTIVE && memcmp(ta, mpath->next_hop->sta.addr, ETH_ALEN) == 0 && - (!(mpath->flags & MESH_PATH_DSN_VALID) || - DSN_GT(dst_dsn, mpath->dsn))) { + (!(mpath->flags & MESH_PATH_SN_VALID) || + SN_GT(target_sn, mpath->sn))) { mpath->flags &= ~MESH_PATH_ACTIVE; - mpath->dsn = dst_dsn; + mpath->sn = target_sn; spin_unlock_bh(&mpath->state_lock); - mesh_path_error_tx(ttl, dst_addr, cpu_to_le32(dst_dsn), - cpu_to_le16(dst_rcode), + mesh_path_error_tx(ttl, target_addr, cpu_to_le32(target_sn), + cpu_to_le16(target_rcode), sdata->dev->broadcast, sdata); } else spin_unlock_bh(&mpath->state_lock); @@ -673,7 +676,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, u8 *ta; u8 ttl, flags, hopcount; u8 *orig_addr; - u32 orig_dsn, metric; + u32 orig_sn, metric; ta = mgmt->sa; ttl = rann->rann_ttl; @@ -684,7 +687,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, ttl--; flags = rann->rann_flags; orig_addr = rann->rann_addr; - orig_dsn = rann->rann_seq; + orig_sn = rann->rann_seq; hopcount = rann->rann_hopcount; hopcount++; metric = rann->rann_metric; @@ -703,14 +706,14 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); } - if (mpath->dsn < orig_dsn) { + if (mpath->sn < orig_sn) { mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, - cpu_to_le32(orig_dsn), + cpu_to_le32(orig_sn), 0, NULL, 0, sdata->dev->broadcast, hopcount, ttl, 0, cpu_to_le32(metric + mpath->metric), 0, sdata); - mpath->dsn = orig_dsn; + mpath->sn = orig_sn; } rcu_read_unlock(); } @@ -823,7 +826,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_preq_queue *preq_node; struct mesh_path *mpath; - u8 ttl, dst_flags; + u8 ttl, target_flags; u32 lifetime; spin_lock_bh(&ifmsh->mesh_preq_queue_lock); @@ -865,11 +868,11 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) ifmsh->last_preq = jiffies; - if (time_after(jiffies, ifmsh->last_dsn_update + + if (time_after(jiffies, ifmsh->last_sn_update + net_traversal_jiffies(sdata)) || - time_before(jiffies, ifmsh->last_dsn_update)) { - ++ifmsh->dsn; - sdata->u.mesh.last_dsn_update = jiffies; + time_before(jiffies, ifmsh->last_sn_update)) { + ++ifmsh->sn; + sdata->u.mesh.last_sn_update = jiffies; } lifetime = default_lifetime(sdata); ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; @@ -880,14 +883,14 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) } if (preq_node->flags & PREQ_Q_F_REFRESH) - dst_flags = MP_F_DO; + target_flags = MP_F_DO; else - dst_flags = MP_F_RF; + target_flags = MP_F_RF; spin_unlock_bh(&mpath->state_lock); mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->dev->dev_addr, - cpu_to_le32(ifmsh->dsn), dst_flags, mpath->dst, - cpu_to_le32(mpath->dsn), sdata->dev->broadcast, 0, + cpu_to_le32(ifmsh->sn), target_flags, mpath->dst, + cpu_to_le32(mpath->sn), sdata->dev->broadcast, 0, ttl, cpu_to_le32(lifetime), 0, cpu_to_le32(ifmsh->preq_id++), sdata); mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); @@ -914,15 +917,15 @@ int mesh_nexthop_lookup(struct sk_buff *skb, struct sk_buff *skb_to_free = NULL; struct mesh_path *mpath; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u8 *dst_addr = hdr->addr3; + u8 *target_addr = hdr->addr3; int err = 0; rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, sdata); + mpath = mesh_path_lookup(target_addr, sdata); if (!mpath) { - mesh_path_add(dst_addr, sdata); - mpath = mesh_path_lookup(dst_addr, sdata); + mesh_path_add(target_addr, sdata); + mpath = mesh_path_lookup(target_addr, sdata); if (!mpath) { sdata->u.mesh.mshstats.dropped_frames_no_route++; err = -ENOSPC; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 5d541235ca2f..2cebdf52b7bc 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -463,10 +463,10 @@ void mesh_plink_broken(struct sta_info *sta) mpath->flags & MESH_PATH_ACTIVE && !(mpath->flags & MESH_PATH_FIXED)) { mpath->flags &= ~MESH_PATH_ACTIVE; - ++mpath->dsn; + ++mpath->sn; spin_unlock_bh(&mpath->state_lock); mesh_path_error_tx(MESH_TTL, mpath->dst, - cpu_to_le32(mpath->dsn), + cpu_to_le32(mpath->sn), PERR_RCODE_DEST_UNREACH, sdata->dev->broadcast, sdata); } else @@ -602,7 +602,7 @@ void mesh_path_discard_frame(struct sk_buff *skb, { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct mesh_path *mpath; - u32 dsn = 0; + u32 sn = 0; if (memcmp(hdr->addr4, sdata->dev->dev_addr, ETH_ALEN) != 0) { u8 *ra, *da; @@ -611,8 +611,8 @@ void mesh_path_discard_frame(struct sk_buff *skb, ra = hdr->addr1; mpath = mesh_path_lookup(da, sdata); if (mpath) - dsn = ++mpath->dsn; - mesh_path_error_tx(MESH_TTL, skb->data, cpu_to_le32(dsn), + sn = ++mpath->sn; + mesh_path_error_tx(MESH_TTL, skb->data, cpu_to_le32(sn), PERR_RCODE_NO_ROUTE, ra, sdata); } @@ -648,7 +648,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) { spin_lock_bh(&mpath->state_lock); mesh_path_assign_nexthop(mpath, next_hop); - mpath->dsn = 0xffff; + mpath->sn = 0xffff; mpath->metric = 0; mpath->hop_count = 0; mpath->exp_time = 0; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8c8e4eae6a17..7c1999872503 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2116,9 +2116,9 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, if (pinfo->filled & MPATH_INFO_FRAME_QLEN) NLA_PUT_U32(msg, NL80211_MPATH_INFO_FRAME_QLEN, pinfo->frame_qlen); - if (pinfo->filled & MPATH_INFO_DSN) - NLA_PUT_U32(msg, NL80211_MPATH_INFO_DSN, - pinfo->dsn); + if (pinfo->filled & MPATH_INFO_SN) + NLA_PUT_U32(msg, NL80211_MPATH_INFO_SN, + pinfo->sn); if (pinfo->filled & MPATH_INFO_METRIC) NLA_PUT_U32(msg, NL80211_MPATH_INFO_METRIC, pinfo->metric); -- cgit v1.3-8-gc7d7 From 63c5723bc3af8d4e86984dd4ff0c78218de418d0 Mon Sep 17 00:00:00 2001 From: Rui Paulo Date: Mon, 9 Nov 2009 23:46:57 +0000 Subject: mac80211: add nl80211/cfg80211 handling of the new mesh root mode option. Signed-off-by: Rui Paulo Signed-off-by: Javier Cardona Reviewed-by: Andrey Yurovsky Tested-by: Brian Cavagnolo Signed-off-by: John W. Linville --- include/linux/nl80211.h | 3 +++ include/net/cfg80211.h | 1 + net/mac80211/cfg.c | 7 +++++++ net/mac80211/debugfs_netdev.c | 2 ++ net/mac80211/mesh.c | 13 +++++++++++++ net/mac80211/mesh.h | 1 + net/wireless/nl80211.c | 6 ++++++ 7 files changed, 33 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 7a0bd6ea6f63..d2f276de9abe 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1200,6 +1200,8 @@ enum nl80211_mntr_flags { * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs) * that it takes for an HWMP information element to propagate across the mesh * + * @NL80211_MESHCONF_ROOTMODE: whether root mode is enabled or not + * * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use @@ -1219,6 +1221,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + NL80211_MESHCONF_HWMP_ROOTMODE, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7d0ae9c18286..0e4c51fc63e5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -508,6 +508,7 @@ struct mesh_config { u32 dot11MeshHWMPactivePathTimeout; u16 dot11MeshHWMPpreqMinInterval; u16 dot11MeshHWMPnetDiameterTraversalTime; + u8 dot11MeshHWMPRootMode; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 81053587e72b..7f18c8fa1880 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1029,7 +1029,10 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy, { struct mesh_config *conf; struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_mesh *ifmsh; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + ifmsh = &sdata->u.mesh; /* Set the config options which we are interested in setting */ conf = &(sdata->u.mesh.mshcfg); @@ -1064,6 +1067,10 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy, mask)) conf->dot11MeshHWMPnetDiameterTraversalTime = nconf->dot11MeshHWMPnetDiameterTraversalTime; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOTMODE, mask)) { + conf->dot11MeshHWMPRootMode = nconf->dot11MeshHWMPRootMode; + ieee80211_mesh_root_setup(ifmsh); + } return 0; } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 8782264f49e7..472b2039906c 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -149,6 +149,8 @@ IEEE80211_IF_FILE(path_refresh_time, u.mesh.mshcfg.path_refresh_time, DEC); IEEE80211_IF_FILE(min_discovery_timeout, u.mesh.mshcfg.min_discovery_timeout, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPRootMode, + u.mesh.mshcfg.dot11MeshHWMPRootMode, DEC); #endif diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 88dcfe3030b1..05955dc6b3d3 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -373,6 +373,17 @@ static void ieee80211_mesh_path_root_timer(unsigned long data) ieee80211_queue_work(&local->hw, &ifmsh->work); } +void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) +{ + if (ifmsh->mshcfg.dot11MeshHWMPRootMode) + set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); + else { + clear_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); + /* stop running timer */ + del_timer_sync(&ifmsh->mesh_path_root_timer); + } +} + /** * ieee80211_fill_mesh_addresses - fill addresses of a locally originated mesh frame * @hdr: 802.11 frame header @@ -503,6 +514,7 @@ void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata) add_timer(&ifmsh->mesh_path_timer); if (test_and_clear_bit(TMR_RUNNING_MPR, &ifmsh->timers_running)) add_timer(&ifmsh->mesh_path_root_timer); + ieee80211_mesh_root_setup(ifmsh); } #endif @@ -512,6 +524,7 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); + ieee80211_mesh_root_setup(ifmsh); ieee80211_queue_work(&local->hw, &ifmsh->work); sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON | diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 00ee84258196..5ad6975bf28c 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -242,6 +242,7 @@ ieee80211_rx_result ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); +void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); /* Mesh paths */ int mesh_nexthop_lookup(struct sk_buff *skb, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7c1999872503..17bcad69428d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2616,6 +2616,8 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, cur_params.dot11MeshHWMPpreqMinInterval); NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, cur_params.dot11MeshHWMPnetDiameterTraversalTime); + NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_ROOTMODE, + cur_params.dot11MeshHWMPRootMode); nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); err = genlmsg_reply(msg, info); @@ -2726,6 +2728,10 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) dot11MeshHWMPnetDiameterTraversalTime, mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, + dot11MeshHWMPRootMode, mask, + NL80211_MESHCONF_HWMP_ROOTMODE, + nla_get_u8); /* Apply changes */ err = rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask); -- cgit v1.3-8-gc7d7 From 61fa713c751683da915fa0c1aa502be85822c357 Mon Sep 17 00:00:00 2001 From: Holger Schurig Date: Wed, 11 Nov 2009 12:25:40 +0100 Subject: cfg80211: return channel noise via survey API This patch implements the NL80211_CMD_GET_SURVEY command and an get_survey() ops that a driver can implement. The goal of this command is to allow a drivers to report channel survey data (e.g. channel noise, channel occupation). For now, only the mechanism to report back channel noise has been implemented. In future, there will either be a survey-trigger command --- or the existing scan-trigger command will be enhanced. This will allow user-space to request survey for arbitrary channels. Note: any driver that cannot report channel noise should not report any value at all, e.g. made-up -92 dBm. Signed-off-by: Holger Schurig Signed-off-by: John W. Linville --- include/linux/nl80211.h | 34 ++++++++++++++++ include/net/cfg80211.h | 34 ++++++++++++++++ net/wireless/nl80211.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 173 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index d2f276de9abe..45db17f81aa3 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -160,6 +160,11 @@ * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons, * partial scan results may be available * + * @NL80211_CMD_GET_SURVEY: get survey resuls, e.g. channel occupation + * or noise level + * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to + * NL80211_CMD_GET_SURVEY and on the "scan" multicast group) + * * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain * has been changed and provides details of the request information * that caused the change such as who initiated the regulatory request @@ -341,6 +346,9 @@ enum nl80211_commands { NL80211_CMD_SET_WIPHY_NETNS, + NL80211_CMD_GET_SURVEY, + NL80211_CMD_NEW_SURVEY_RESULTS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -586,6 +594,10 @@ enum nl80211_commands { * * @NL80211_ATTR_4ADDR: Use 4-address frames on a virtual interface * + * @NL80211_ATTR_SURVEY_INFO: survey information about a channel, part of + * the survey response for %NL80211_CMD_GET_SURVEY, nested attribute + * containing info as possible, see &enum survey_info. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -718,6 +730,8 @@ enum nl80211_attrs { NL80211_ATTR_4ADDR, + NL80211_ATTR_SURVEY_INFO, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1120,6 +1134,26 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_IBSS = 1<<8, }; +/** + * enum nl80211_survey_info - survey information + * + * These attribute types are used with %NL80211_ATTR_SURVEY_INFO + * when getting information about a survey. + * + * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved + * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel + * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm) + */ +enum nl80211_survey_info { + __NL80211_SURVEY_INFO_INVALID, + NL80211_SURVEY_INFO_FREQUENCY, + NL80211_SURVEY_INFO_NOISE, + + /* keep last */ + __NL80211_SURVEY_INFO_AFTER_LAST, + NL80211_SURVEY_INFO_MAX = __NL80211_SURVEY_INFO_AFTER_LAST - 1 +}; + /** * enum nl80211_mntr_flags - monitor configuration flags * diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0e4c51fc63e5..21710fc17eaf 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -234,6 +234,35 @@ struct key_params { u32 cipher; }; +/** + * enum survey_info_flags - survey information flags + * + * Used by the driver to indicate which info in &struct survey_info + * it has filled in during the get_survey(). + */ +enum survey_info_flags { + SURVEY_INFO_NOISE_DBM = 1<<0, +}; + +/** + * struct survey_info - channel survey response + * + * Used by dump_survey() to report back per-channel survey information. + * + * @channel: the channel this survey record reports, mandatory + * @filled: bitflag of flags from &enum survey_info_flags + * @noise: channel noise in dBm. This and all following fields are + * optional + * + * This structure can later be expanded with things like + * channel duty cycle etc. + */ +struct survey_info { + struct ieee80211_channel *channel; + u32 filled; + s8 noise; +}; + /** * struct beacon_parameters - beacon parameters * @@ -944,6 +973,8 @@ struct cfg80211_bitrate_mask { * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting * functions to adjust rfkill hw state * + * @dump_survey: get site survey information. + * * @testmode_cmd: run a test mode command */ struct cfg80211_ops { @@ -1063,6 +1094,9 @@ struct cfg80211_ops { const u8 *peer, const struct cfg80211_bitrate_mask *mask); + int (*dump_survey)(struct wiphy *wiphy, struct net_device *netdev, + int idx, struct survey_info *info); + /* some temporary stuff to finish wext */ int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4dc139cdba5c..5c8b3bfada4b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3245,6 +3245,106 @@ static int nl80211_dump_scan(struct sk_buff *skb, return err; } +static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq, + int flags, struct net_device *dev, + struct survey_info *survey) +{ + void *hdr; + struct nlattr *infoattr; + + /* Survey without a channel doesn't make sense */ + if (!survey->channel) + return -EINVAL; + + hdr = nl80211hdr_put(msg, pid, seq, flags, + NL80211_CMD_NEW_SURVEY_RESULTS); + if (!hdr) + return -ENOMEM; + + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); + + infoattr = nla_nest_start(msg, NL80211_ATTR_SURVEY_INFO); + if (!infoattr) + goto nla_put_failure; + + NLA_PUT_U32(msg, NL80211_SURVEY_INFO_FREQUENCY, + survey->channel->center_freq); + if (survey->filled & SURVEY_INFO_NOISE_DBM) + NLA_PUT_U8(msg, NL80211_SURVEY_INFO_NOISE, + survey->noise); + + nla_nest_end(msg, infoattr); + + return genlmsg_end(msg, hdr); + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int nl80211_dump_survey(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct survey_info survey; + struct cfg80211_registered_device *dev; + struct net_device *netdev; + int ifidx = cb->args[0]; + int survey_idx = cb->args[1]; + int res; + + if (!ifidx) + ifidx = nl80211_get_ifidx(cb); + if (ifidx < 0) + return ifidx; + cb->args[0] = ifidx; + + rtnl_lock(); + + netdev = __dev_get_by_index(sock_net(skb->sk), ifidx); + if (!netdev) { + res = -ENODEV; + goto out_rtnl; + } + + dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx); + if (IS_ERR(dev)) { + res = PTR_ERR(dev); + goto out_rtnl; + } + + if (!dev->ops->dump_survey) { + res = -EOPNOTSUPP; + goto out_err; + } + + while (1) { + res = dev->ops->dump_survey(&dev->wiphy, netdev, survey_idx, + &survey); + if (res == -ENOENT) + break; + if (res) + goto out_err; + + if (nl80211_send_survey(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + netdev, + &survey) < 0) + goto out; + survey_idx++; + } + + out: + cb->args[1] = survey_idx; + res = skb->len; + out_err: + cfg80211_unlock_rdev(dev); + out_rtnl: + rtnl_unlock(); + + return res; +} + static bool nl80211_valid_auth_type(enum nl80211_auth_type auth_type) { return auth_type <= NL80211_AUTHTYPE_MAX; @@ -4322,6 +4422,11 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_GET_SURVEY, + .policy = nl80211_policy, + .dumpit = nl80211_dump_survey, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { .name = "mlme", -- cgit v1.3-8-gc7d7 From 687b16fb617bd446439425a368ad7c7bbd202c73 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Nov 2009 13:16:15 +0100 Subject: hw-breakpoints: Provide an off-case for counter_arch_bp() If an arch doesn't support the hw breakpoints, counter_arch_bp() has no off case to cover the missing breakpoint info structure from the perf event. The result is a build error in non-x86 configs. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Prasad LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Prasad --- include/linux/hw_breakpoint.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 7eba9b92e5f3..18710e0c84bd 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -16,11 +16,6 @@ enum { HW_BREAKPOINT_X = 4, }; -static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) -{ - return &bp->hw.info; -} - static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -83,6 +78,11 @@ extern void release_bp_slot(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return &bp->hw.info; +} + #else /* !CONFIG_HAVE_HW_BREAKPOINT */ static inline struct perf_event * @@ -126,6 +126,11 @@ static inline void release_bp_slot(struct perf_event *bp) { } static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return NULL; +} + #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* _LINUX_HW_BREAKPOINT_H */ -- cgit v1.3-8-gc7d7 From f6c06b6807ff9281295989ebad72523865325a4f Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 13 Nov 2009 15:14:11 -0500 Subject: vc: Add support for hiding the cursor when creating VTs Add support for setting a global default for whether or not a visible cursor should be enabled when creating VCs. The default will be to do so, unless overridden by the user at boot time or by a driver. Signed-off-by: Matthew Garrett LKML-Reference: <1258143251-5818-1-git-send-email-mjg@redhat.com> Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 9 +++++++++ drivers/char/vt.c | 11 ++++++++++- drivers/video/console/vgacon.c | 2 ++ include/linux/vt_kern.h | 3 +++ 4 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9107b387e91f..dd42eb65fd27 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2729,6 +2729,15 @@ and is between 256 and 4096 characters. It is defined in the file Default is 1, i.e. UTF-8 mode is enabled for all newly opened terminals. + vt.global_cursor_default= + [VT] + Format=<-1|0|1> + Set system-wide default for whether a cursor + is shown on new VTs. Default is -1, + i.e. cursors will be created by default unless + overridden by individual drivers. 0 will hide + cursors, 1 will display them. + waveartist= [HW,OSS] Format: ,,, diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 0c80c68cd047..1e3d728dbf7e 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -161,6 +161,8 @@ static void set_palette(struct vc_data *vc); static int printable; /* Is console ready for printing? */ int default_utf8 = true; module_param(default_utf8, int, S_IRUGO | S_IWUSR); +int global_cursor_default = -1; +module_param(global_cursor_default, int, S_IRUGO | S_IWUSR); /* * ignore_poke: don't unblank the screen when things are typed. This is @@ -775,6 +777,12 @@ int vc_allocate(unsigned int currcons) /* return 0 on success */ vc_cons[currcons].d = NULL; return -ENOMEM; } + + /* If no drivers have overridden us and the user didn't pass a + boot option, default to displaying the cursor */ + if (global_cursor_default == -1) + global_cursor_default = 1; + vc_init(vc, vc->vc_rows, vc->vc_cols, 1); vcs_make_sysfs(currcons); atomic_notifier_call_chain(&vt_notifier_list, VT_ALLOCATE, ¶m); @@ -1616,7 +1624,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear) vc->vc_decscnm = 0; vc->vc_decom = 0; vc->vc_decawm = 1; - vc->vc_deccm = 1; + vc->vc_deccm = global_cursor_default; vc->vc_decim = 0; set_kbd(vc, decarm); @@ -4078,6 +4086,7 @@ EXPORT_SYMBOL(fg_console); EXPORT_SYMBOL(console_blank_hook); EXPORT_SYMBOL(console_blanked); EXPORT_SYMBOL(vc_cons); +EXPORT_SYMBOL(global_cursor_default); #ifndef VT_SINGLE_DRIVER EXPORT_SYMBOL(take_over_console); EXPORT_SYMBOL(give_up_console); diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index da55ccaf4d55..564643edb3c1 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -585,6 +585,8 @@ static void vgacon_init(struct vc_data *c, int init) vgacon_uni_pagedir[1]++; if (!vgacon_uni_pagedir[0] && p) con_set_default_unimap(c); + + hide_boot_cursor(screen_info.flags & VIDEO_FLAGS_NOCURSOR); } static void vgacon_deinit(struct vc_data *c) diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index c0c4e1103a73..7f56db4a79f0 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -110,6 +110,7 @@ extern char con_buf[CON_BUF_SIZE]; extern struct mutex con_buf_mtx; extern char vt_dont_switch; extern int default_utf8; +extern int global_cursor_default; struct vt_spawn_console { spinlock_t lock; @@ -130,4 +131,6 @@ struct vt_notifier_param { extern int register_vt_notifier(struct notifier_block *nb); extern int unregister_vt_notifier(struct notifier_block *nb); +extern void hide_boot_cursor(bool hide); + #endif /* _VT_KERN_H */ -- cgit v1.3-8-gc7d7 From 688bcaff291cf2fe2734e43f2793d4d05b850518 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Nov 2009 01:12:47 +0100 Subject: hw-breakpoints: Fix build on !perf architectures the arch/alpha build fails with: In file included from tip/kernel/exit.c:52: tip/include/linux/hw_breakpoint.h: In function 'hw_breakpoint_addr': tip/include/linux/hw_breakpoint.h:21: error: 'struct perf_event' has no member named 'attr' [...] Move these helper inlines inside the CONFIG_HAVE_HW_BREAKPOINT ifdef. Cc: Frederic Weisbecker Cc: Prasad LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 18710e0c84bd..0b98cbf76da7 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -16,6 +16,8 @@ enum { HW_BREAKPOINT_X = 4, }; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -31,7 +33,6 @@ static inline int hw_breakpoint_len(struct perf_event *bp) return bp->attr.bp_len; } -#ifdef CONFIG_HAVE_HW_BREAKPOINT extern struct perf_event * register_user_hw_breakpoint(unsigned long addr, int len, -- cgit v1.3-8-gc7d7 From 811cb50baf63461ce0bdb234927046131fc7fa8b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Nov 2009 23:40:09 +0100 Subject: tracing: Fix event format export For some reason the export of the event print format to userspace uses '#fmt' which breaks if the format string is anything but a plain string, for example if it is built with macros then the macro names are exported instead of their contents. Use "\"%s\"", fmt instead of "%s", #fmt to export the string and not the way it is built. For example, in net/mac80211/driver-trace.h for the trace event drv_start there is: TP_printk( LOCAL_PR_FMT, LOCAL_PR_ARG ) Which use to produce: print fmt: LOCAL_PR_FMT, REC->wiphy_name Now produces: print fmt: "%s", REC->wiphy_name Signed-off-by: Johannes Berg LKML-Reference: <20091113224009.GB23942@elte.hu> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index cc0d9667e182..dacb8ef67000 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -159,7 +159,7 @@ #undef __get_str #undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) +#define TP_printk(fmt, args...) "\"%s\", %s\n", fmt, __stringify(args) #undef TP_fast_assign #define TP_fast_assign(args...) args -- cgit v1.3-8-gc7d7 From bee7ca9ec03a26676ea2b1c28dc4039348eff3e1 Mon Sep 17 00:00:00 2001 From: William Allen Simpson Date: Tue, 10 Nov 2009 09:51:18 +0000 Subject: net: TCP_MSS_DEFAULT, TCP_MSS_DESIRED Define two symbols needed in both kernel and user space. Remove old (somewhat incorrect) kernel variant that wasn't used in most cases. Default should apply to both RMSS and SMSS (RFC2581). Replace numeric constants with defined symbols. Stand-alone patch, originally developed for TCPCT. Signed-off-by: William.Allen.Simpson@gmail.com Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++++ include/net/tcp.h | 3 --- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/tcp_minisocks.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index eeecb8547a2a..32d7d77b4a01 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -81,6 +81,12 @@ enum { TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000) }; +/* + * TCP general constants + */ +#define TCP_MSS_DEFAULT 536U /* IPv4 (RFC1122, RFC2581) */ +#define TCP_MSS_DESIRED 1220U /* IPv6 (tunneled), EDNS0 (RFC3226) */ + /* TCP socket options */ #define TCP_NODELAY 1 /* Turn off Nagle's algorithm. */ #define TCP_MAXSEG 2 /* Limit MSS */ diff --git a/include/net/tcp.h b/include/net/tcp.h index bf20f88fd033..325bfcf5c934 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -62,9 +62,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U -/* Minimal RCV_MSS. */ -#define TCP_MIN_RCVMSS 536U - /* The least MTU to use for probing */ #define TCP_BASE_MSS 512 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index be0c5bf7bfca..cc306ac6eb51 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -140,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) * "len" is invariant segment length, including TCP header. */ len += skb->data - skb_transport_header(skb); - if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || + if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) || /* If PSH is not set, packet should be * full sized, provided peer TCP is not badly broken. * This observation (if it is correct 8)) allows @@ -411,7 +411,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd / 2); - hint = min(hint, TCP_MIN_RCVMSS); + hint = min(hint, TCP_MSS_DEFAULT); hint = max(hint, TCP_MIN_MSS); inet_csk(sk)->icsk_ack.rcv_mss = hint; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 657ae334f125..cf7f2086e6e0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -217,7 +217,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (inet->opt) inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; - tp->rx_opt.mss_clamp = 536; + tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; /* Socket identity is still unknown (sport may be zero). * However we set state to SYN-SENT and not releasing socket @@ -1268,7 +1268,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = 536; + tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; tcp_parse_options(skb, &tmp_opt, 0, dst); @@ -1815,7 +1815,7 @@ static int tcp_v4_init_sock(struct sock *sk) */ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; + tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = sysctl_tcp_reordering; icsk->icsk_ca_ops = &tcp_init_congestion_ops; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a9d34e224cb6..4be22280e6b3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -476,7 +476,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, if (newtp->af_specific->md5_lookup(sk, newsk)) newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif - if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len) + if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 696a22f034e8..de709091b26d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1851,7 +1851,7 @@ static int tcp_v6_init_sock(struct sock *sk) */ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; + tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = sysctl_tcp_reordering; -- cgit v1.3-8-gc7d7 From ce81b76a39835a721cd168e0c0bcfe7132f1f66b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Nov 2009 17:34:30 +0000 Subject: ipv6: use RCU to walk list of network devices No longer need read_lock(&dev_base_lock), use RCU instead. We also can avoid taking references on inet6_dev structs. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ net/ipv6/anycast.c | 29 +++++++++++++-------------- net/ipv6/mcast.c | 51 +++++++++++++++++++++-------------------------- 3 files changed, 47 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8b266390b9e2..61425d0c6123 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1114,6 +1114,16 @@ static inline struct net_device *next_net_device(struct net_device *dev) return lh == &net->dev_base_head ? NULL : net_device_entry(lh); } +static inline struct net_device *next_net_device_rcu(struct net_device *dev) +{ + struct list_head *lh; + struct net *net; + + net = dev_net(dev); + lh = rcu_dereference(dev->dev_list.next); + return lh == &net->dev_base_head ? NULL : net_device_entry(lh); +} + static inline struct net_device *first_net_device(struct net *net) { return list_empty(&net->dev_base_head) ? NULL : diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 2f00ca83f049..f1c74c8ef9de 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -431,9 +431,9 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); state->idev = NULL; - for_each_netdev(net, state->dev) { + for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; - idev = in6_dev_get(state->dev); + idev = __in6_dev_get(state->dev); if (!idev) continue; read_lock_bh(&idev->lock); @@ -443,7 +443,6 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) break; } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } return im; } @@ -454,16 +453,15 @@ static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im im = im->aca_next; while (!im) { - if (likely(state->idev != NULL)) { + if (likely(state->idev != NULL)) read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); - } - state->dev = next_net_device(state->dev); + + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; break; } - state->idev = in6_dev_get(state->dev); + state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; read_lock_bh(&state->idev->lock); @@ -482,29 +480,30 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos) } static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { - read_lock(&dev_base_lock); + rcu_read_lock(); return ac6_get_idx(seq, *pos); } static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct ifacaddr6 *im; - im = ac6_get_next(seq, v); + struct ifacaddr6 *im = ac6_get_next(seq, v); + ++*pos; return im; } static void ac6_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { struct ac6_iter_state *state = ac6_seq_private(seq); + if (likely(state->idev != NULL)) { read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); + state->idev = NULL; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int ac6_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f9fcf690bd5d..1f9c44442e65 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2375,9 +2375,9 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); state->idev = NULL; - for_each_netdev(net, state->dev) { + for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; - idev = in6_dev_get(state->dev); + idev = __in6_dev_get(state->dev); if (!idev) continue; read_lock_bh(&idev->lock); @@ -2387,7 +2387,6 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) break; } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } return im; } @@ -2398,16 +2397,15 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr im = im->next; while (!im) { - if (likely(state->idev != NULL)) { + if (likely(state->idev != NULL)) read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); - } - state->dev = next_net_device(state->dev); + + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; break; } - state->idev = in6_dev_get(state->dev); + state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; read_lock_bh(&state->idev->lock); @@ -2426,31 +2424,31 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { - read_lock(&dev_base_lock); + rcu_read_lock(); return igmp6_mc_get_idx(seq, *pos); } static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct ifmcaddr6 *im; - im = igmp6_mc_get_next(seq, v); + struct ifmcaddr6 *im = igmp6_mc_get_next(seq, v); + ++*pos; return im; } static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); + if (likely(state->idev != NULL)) { read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); state->idev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int igmp6_mc_seq_show(struct seq_file *seq, void *v) @@ -2507,9 +2505,9 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) state->idev = NULL; state->im = NULL; - for_each_netdev(net, state->dev) { + for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; - idev = in6_dev_get(state->dev); + idev = __in6_dev_get(state->dev); if (unlikely(idev == NULL)) continue; read_lock_bh(&idev->lock); @@ -2525,7 +2523,6 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) spin_unlock_bh(&im->mca_lock); } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } return psf; } @@ -2539,16 +2536,15 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s spin_unlock_bh(&state->im->mca_lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) { + if (likely(state->idev != NULL)) read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); - } - state->dev = next_net_device(state->dev); + + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; goto out; } - state->idev = in6_dev_get(state->dev); + state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; read_lock_bh(&state->idev->lock); @@ -2573,9 +2569,9 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { - read_lock(&dev_base_lock); + rcu_read_lock(); return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -2591,7 +2587,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); if (likely(state->im != NULL)) { @@ -2600,11 +2596,10 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) } if (likely(state->idev != NULL)) { read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); state->idev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) -- cgit v1.3-8-gc7d7 From 5256f2ef3a40d784b8369035bff3f4dc637a9801 Mon Sep 17 00:00:00 2001 From: Lucian Adrian Grijincu Date: Thu, 12 Nov 2009 05:07:26 +0000 Subject: inet: fix inet_bind_bucket_for_each The first "node" is supposed to be the cursor used in the for_each. The second "node" is ment literally and should not be macro expanded: it's the name of the hlist_node field from the inet_bind_bucket. This currently works because when inet_bind_bucket_for_each is called it's argument is still "node". Signed-off-by: Lucian Adrian Grijincu Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5b698b3b463d..41cbddd25b70 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -92,8 +92,8 @@ static inline struct net *ib_net(struct inet_bind_bucket *ib) return read_pnet(&ib->ib_net); } -#define inet_bind_bucket_for_each(tb, node, head) \ - hlist_for_each_entry(tb, node, head, node) +#define inet_bind_bucket_for_each(tb, pos, head) \ + hlist_for_each_entry(tb, pos, head, node) struct inet_bind_hashbucket { spinlock_t lock; -- cgit v1.3-8-gc7d7 From 2c1409a0a2b88585ec0c03f1de0aafa178c56313 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Nov 2009 09:33:09 +0000 Subject: inetpeer: Optimize inet_getid() While investigating for network latencies, I found inet_getid() was a contention point for some workloads, as inet_peer_idlock is shared by all inet_getid() users regardless of peers. One way to fix this is to make ip_id_count an atomic_t instead of __u16, and use atomic_add_return(). In order to keep sizeof(struct inet_peer) = 64 on 64bit arches tcp_ts_stamp is also converted to __u32 instead of "unsigned long". Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inetpeer.h | 16 +++++----------- net/ipv4/inetpeer.c | 5 +---- net/ipv4/route.c | 2 +- net/ipv4/tcp_ipv4.c | 16 ++++++++-------- 4 files changed, 15 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 35ad7b930467..87b1df0d4d8c 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -17,15 +17,15 @@ struct inet_peer { /* group together avl_left,avl_right,v4daddr to speedup lookups */ struct inet_peer *avl_left, *avl_right; __be32 v4daddr; /* peer's address */ - __u16 avl_height; - __u16 ip_id_count; /* IP ID for the next packet */ + __u32 avl_height; struct list_head unused; __u32 dtime; /* the time of last use of not * referenced entries */ atomic_t refcnt; atomic_t rid; /* Frag reception counter */ + atomic_t ip_id_count; /* IP ID for the next packet */ __u32 tcp_ts; - unsigned long tcp_ts_stamp; + __u32 tcp_ts_stamp; }; void inet_initpeers(void) __init; @@ -36,17 +36,11 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create); /* can be called from BH context or outside */ extern void inet_putpeer(struct inet_peer *p); -extern spinlock_t inet_peer_idlock; /* can be called with or without local BH being disabled */ static inline __u16 inet_getid(struct inet_peer *p, int more) { - __u16 id; - - spin_lock_bh(&inet_peer_idlock); - id = p->ip_id_count; - p->ip_id_count += 1 + more; - spin_unlock_bh(&inet_peer_idlock); - return id; + more++; + return atomic_add_return(more, &p->ip_id_count) - more; } #endif /* _NET_INETPEER_H */ diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index b1fbe18feb5a..6bcfe52a9c87 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -67,9 +67,6 @@ * ip_id_count: idlock */ -/* Exported for inet_getid inline function. */ -DEFINE_SPINLOCK(inet_peer_idlock); - static struct kmem_cache *peer_cachep __read_mostly; #define node_height(x) x->avl_height @@ -390,7 +387,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) n->v4daddr = daddr; atomic_set(&n->refcnt, 1); atomic_set(&n->rid, 0); - n->ip_id_count = secure_ip_id(daddr); + atomic_set(&n->ip_id_count, secure_ip_id(daddr)); n->tcp_ts_stamp = 0; write_lock_bh(&peer_pool_lock); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ff258b57680b..4284ceef7945 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2852,7 +2852,7 @@ static int rt_fill_info(struct net *net, error = rt->u.dst.error; expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; if (rt->peer) { - id = rt->peer->ip_id_count; + id = atomic_read(&rt->peer->ip_id_count) & 0xffff; if (rt->peer->tcp_ts_stamp) { ts = rt->peer->tcp_ts; tsage = get_seconds() - rt->peer->tcp_ts_stamp; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cf7f2086e6e0..df18ce04f41e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -204,7 +204,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * when trying new connection. */ if (peer != NULL && - peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { + (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; tp->rx_opt.ts_recent = peer->tcp_ts; } @@ -1308,7 +1308,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_death_row.sysctl_tw_recycle && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { - if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && + if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); @@ -1727,9 +1727,9 @@ int tcp_v4_remember_stamp(struct sock *sk) if (peer) { if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || - (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && - peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { - peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; + ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && + peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { + peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; peer->tcp_ts = tp->rx_opt.ts_recent; } if (release_it) @@ -1748,9 +1748,9 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || - (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && - peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { - peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; + ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && + peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { + peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; peer->tcp_ts = tcptw->tw_ts_recent; } inet_putpeer(peer); -- cgit v1.3-8-gc7d7 From 4c49b12853fbb5eff4849b7b6a1e895776f027a1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 13 Nov 2009 21:47:33 -0800 Subject: perf_event: Fix invalid type in ioctl definition u64 is invalid in userspace headers, including ioctl definitions; use __u64 instead Signed-off-by: Arjan van de Ven Cc: LKML-Reference: <20091113214733.7cd76be9@infradead.org> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 45b56faf5cdc..ec3768a81058 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -219,7 +219,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_DISABLE _IO ('$', 1) #define PERF_EVENT_IOC_REFRESH _IO ('$', 2) #define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -- cgit v1.3-8-gc7d7 From 6959450e567c1f17d3ce8489099fc56c3721d577 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 14 Nov 2009 20:46:38 +0900 Subject: swiotlb: Remove duplicate swiotlb_force extern declarations Signed-off-by: FUJITA Tomonori Cc: tony.luck@intel.com LKML-Reference: <1258199198-16657-4-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/swiotlb.h | 2 -- arch/x86/include/asm/swiotlb.h | 4 ---- include/linux/swiotlb.h | 2 ++ 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h index dcbaea7ce128..f0acde68aaea 100644 --- a/arch/ia64/include/asm/swiotlb.h +++ b/arch/ia64/include/asm/swiotlb.h @@ -4,8 +4,6 @@ #include #include -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; extern void pci_swiotlb_init(void); diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 940f13a213f8..87ffcb12a1b8 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -3,10 +3,6 @@ #include -/* SWIOTLB interface */ - -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; extern int pci_swiotlb_init(void); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index eb9bdb4d4854..febedcf67c7e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -7,6 +7,8 @@ struct device; struct dma_attrs; struct scatterlist; +extern int swiotlb_force; + /* * Maximum allowable number of contiguous slabs to map, * must be a power of 2. What is the appropriate value ? -- cgit v1.3-8-gc7d7 From 9a1654ba0b50402a6bd03c7b0fe9b0200a5ea7b1 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 15 Nov 2009 07:20:12 +0000 Subject: net: Optimize hard_start_xmit() return checking Recent changes in the TX error propagation require additional checking and masking of values returned from hard_start_xmit(), mainly to separate cases where skb was consumed. This aim can be simplified by changing the order of NETDEV_TX and NET_XMIT codes, because the latter are treated similarly to negative (ERRNO) values. After this change much simpler dev_xmit_complete() is also used in sch_direct_xmit(), so it is moved to netdevice.h. Additionally NET_RX definitions in netdevice.h are moved up from between TX codes to avoid confusion while reading the TX comment. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 42 ++++++++++++++++++++++++++++++------------ net/core/dev.c | 17 ----------------- net/sched/sch_generic.c | 23 +++++------------------ 3 files changed, 35 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 61425d0c6123..7043f85e643d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,6 +63,10 @@ struct wireless_dev; #define HAVE_FREE_NETDEV /* free_netdev() */ #define HAVE_NETDEV_PRIV /* netdev_priv() */ +/* Backlog congestion levels */ +#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ +#define NET_RX_DROP 1 /* packet dropped */ + /* * Transmit return codes: transmit return codes originate from three different * namespaces: @@ -82,14 +86,10 @@ struct wireless_dev; /* qdisc ->enqueue() return codes. */ #define NET_XMIT_SUCCESS 0x00 -#define NET_XMIT_DROP 0x10 /* skb dropped */ -#define NET_XMIT_CN 0x20 /* congestion notification */ -#define NET_XMIT_POLICED 0x30 /* skb is shot by police */ -#define NET_XMIT_MASK 0xf0 /* qdisc flags in net/sch_generic.h */ - -/* Backlog congestion levels */ -#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ -#define NET_RX_DROP 1 /* packet dropped */ +#define NET_XMIT_DROP 0x01 /* skb dropped */ +#define NET_XMIT_CN 0x02 /* congestion notification */ +#define NET_XMIT_POLICED 0x03 /* skb is shot by police */ +#define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */ /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It * indicates that the device will soon be dropping packets, or already drops @@ -98,16 +98,34 @@ struct wireless_dev; #define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) /* Driver transmit return codes */ -#define NETDEV_TX_MASK 0xf +#define NETDEV_TX_MASK 0xf0 enum netdev_tx { __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */ - NETDEV_TX_OK = 0, /* driver took care of packet */ - NETDEV_TX_BUSY = 1, /* driver tx path was busy*/ - NETDEV_TX_LOCKED = 2, /* driver tx lock was already taken */ + NETDEV_TX_OK = 0x00, /* driver took care of packet */ + NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/ + NETDEV_TX_LOCKED = 0x20, /* driver tx lock was already taken */ }; typedef enum netdev_tx netdev_tx_t; +/* + * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant; + * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed. + */ +static inline bool dev_xmit_complete(int rc) +{ + /* + * Positive cases with an skb consumed by a driver: + * - successful transmission (rc == NETDEV_TX_OK) + * - error while transmitting (rc < 0) + * - error while queueing to a different device (rc & NET_XMIT_MASK) + */ + if (likely(rc < NET_XMIT_MASK)) + return true; + + return false; +} + #endif #define MAX_ADDR_LEN 32 /* Largest hardware address length */ diff --git a/net/core/dev.c b/net/core/dev.c index 32045df1da5c..4b24d79414e3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1924,23 +1924,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } -static inline bool dev_xmit_complete(int rc) -{ - /* successful transmission */ - if (rc == NETDEV_TX_OK) - return true; - - /* error while transmitting, driver consumed skb */ - if (rc < 0) - return true; - - /* error while queueing to a different device, driver consumed skb */ - if (rc & NET_XMIT_MASK) - return true; - - return false; -} - /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b13821ad2fb6..5173c1e1b19c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -119,39 +119,26 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, spin_unlock(root_lock); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_tx_queue_stopped(txq) && - !netif_tx_queue_frozen(txq)) { + if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) ret = dev_hard_start_xmit(skb, dev, txq); - /* an error implies that the skb was consumed */ - if (ret < 0) - ret = NETDEV_TX_OK; - /* all NET_XMIT codes map to NETDEV_TX_OK */ - ret &= ~NET_XMIT_MASK; - } HARD_TX_UNLOCK(dev, txq); spin_lock(root_lock); - switch (ret) { - case NETDEV_TX_OK: - /* Driver sent out skb successfully */ + if (dev_xmit_complete(ret)) { + /* Driver sent out skb successfully or skb was consumed */ ret = qdisc_qlen(q); - break; - - case NETDEV_TX_LOCKED: + } else if (ret == NETDEV_TX_LOCKED) { /* Driver try lock failed */ ret = handle_dev_cpu_collision(skb, txq, q); - break; - - default: + } else { /* Driver returned NETDEV_TX_BUSY - requeue skb */ if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) printk(KERN_WARNING "BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); ret = dev_requeue_skb(skb, q); - break; } if (ret && (netif_tx_queue_stopped(txq) || -- cgit v1.3-8-gc7d7 From b9f5d52670c27e71f04c466aee77e3a2eeca8080 Mon Sep 17 00:00:00 2001 From: Marin Mitov Date: Fri, 13 Nov 2009 07:58:41 +0000 Subject: remove deprecated and not used: print_mac() The function print_mac in net/ethernet/eth.c is marked __deprecated and not used. Remove it. Signed-off-by: Marin Mitov Signed-off-by: David S. Miller --- include/linux/if_ether.h | 4 ---- net/ethernet/eth.c | 7 ------- 2 files changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 580b6004d00e..005e1525ab86 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -136,10 +136,6 @@ extern struct ctl_table ether_table[]; extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); -/* - * Display a 6 byte device address (MAC) in a readable format. - */ -extern char *print_mac(char *buf, const unsigned char *addr) __deprecated; #define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" #define MAC_BUF_SIZE 18 #define DECLARE_MAC_BUF(var) char var[MAC_BUF_SIZE] diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 5a883affecd3..dd3db88f8f0a 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -393,10 +393,3 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) return ((ssize_t) l); } EXPORT_SYMBOL(sysfs_format_mac); - -char *print_mac(char *buf, const unsigned char *addr) -{ - _format_mac_addr(buf, MAC_BUF_SIZE, addr, ETH_ALEN); - return buf; -} -EXPORT_SYMBOL(print_mac); -- cgit v1.3-8-gc7d7 From 559fdc3c1b624edb1933a875022fe7e27934d11c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Nov 2009 12:45:14 +0100 Subject: perf_event: Optimize perf_output_lock() The purpose of perf_output_{un,}lock() is to: 1) avoid publishing incomplete data [ possible when publishing a head that is ahead of an entry that is still being written ] 2) guarantee fwd progress [ a simple refcount on pending writers doesn't need to drop to 0, making it so would end up implementing something like forced quiecent states of RCU ] To satisfy the above without undue complexity it serializes between CPUs, this means that a pending writer can only be the same cpu in a nested context, and since (under normal operation) a cpu always makes progress we're good -- if the head is only published when the bottom most writer completes. Now we don't need to disable IRQs in order to serialize between CPUs, disabling preemption ought to be sufficient, esp since we already deal with nesting due to NMIs. This avoids potentially expensive (and needless) local IRQ disable/enable ops. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <1258373161.26714.254.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 - kernel/perf_event.c | 21 +++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index df4e73e33774..7f87563c8485 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -714,7 +714,6 @@ struct perf_output_handle { int nmi; int sample; int locked; - unsigned long flags; }; #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 6f4ed3b4cd73..3256e36ad251 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2674,20 +2674,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle) static void perf_output_lock(struct perf_output_handle *handle) { struct perf_mmap_data *data = handle->data; - int cpu; + int cur, cpu = get_cpu(); handle->locked = 0; - local_irq_save(handle->flags); - cpu = smp_processor_id(); - - if (in_nmi() && atomic_read(&data->lock) == cpu) - return; + for (;;) { + cur = atomic_cmpxchg(&data->lock, -1, cpu); + if (cur == -1) { + handle->locked = 1; + break; + } + if (cur == cpu) + break; - while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) cpu_relax(); - - handle->locked = 1; + } } static void perf_output_unlock(struct perf_output_handle *handle) @@ -2733,7 +2734,7 @@ again: if (atomic_xchg(&data->wakeup, 0)) perf_output_wakeup(handle); out: - local_irq_restore(handle->flags); + put_cpu(); } void perf_output_copy(struct perf_output_handle *handle, -- cgit v1.3-8-gc7d7 From 0696b711e4be45fa104c12329f617beb29c03f78 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Tue, 17 Nov 2009 13:49:50 +0800 Subject: timekeeping: Fix clock_gettime vsyscall time warp Since commit 0a544198 "timekeeping: Move NTP adjusted clock multiplier to struct timekeeper" the clock multiplier of vsyscall is updated with the unmodified clock multiplier of the clock source and not with the NTP adjusted multiplier of the timekeeper. This causes user space observerable time warps: new CLOCK-warp maximum: 120 nsecs, 00000025c337c537 -> 00000025c337c4bf Add a new argument "mult" to update_vsyscall() and hand in the timekeeping internal NTP adjusted multiplier. Signed-off-by: Lin Ming Cc: "Zhang Yanmin" Cc: Martin Schwidefsky Cc: Benjamin Herrenschmidt Cc: Tony Luck LKML-Reference: <1258436990.17765.83.camel@minggr.sh.intel.com> Signed-off-by: Thomas Gleixner --- arch/ia64/kernel/time.c | 4 ++-- arch/powerpc/kernel/time.c | 5 +++-- arch/s390/kernel/time.c | 3 ++- arch/x86/kernel/vsyscall_64.c | 5 +++-- include/linux/clocksource.h | 6 ++++-- kernel/time/timekeeping.c | 6 +++--- 6 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 4990495d7531..a35c661e5e89 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -473,7 +473,7 @@ void update_vsyscall_tz(void) { } -void update_vsyscall(struct timespec *wall, struct clocksource *c) +void update_vsyscall(struct timespec *wall, struct clocksource *c, u32 mult) { unsigned long flags; @@ -481,7 +481,7 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c) /* copy fsyscall clock data */ fsyscall_gtod_data.clk_mask = c->mask; - fsyscall_gtod_data.clk_mult = c->mult; + fsyscall_gtod_data.clk_mult = mult; fsyscall_gtod_data.clk_shift = c->shift; fsyscall_gtod_data.clk_fsys_mmio = c->fsys_mmio; fsyscall_gtod_data.clk_cycle_last = c->cycle_last; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a136a11c490d..39713312fbc7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -828,7 +828,8 @@ static cycle_t timebase_read(struct clocksource *cs) return (cycle_t)get_tb(); } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, + u32 mult) { u64 t2x, stamp_xsec; @@ -841,7 +842,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) /* XXX this assumes clock->shift == 22 */ /* 4611686018 ~= 2^(20+64-22) / 1e9 */ - t2x = (u64) clock->mult * 4611686018ULL; + t2x = (u64) mult * 4611686018ULL; stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC; do_div(stamp_xsec, 1000000000); stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 34162a0b2caa..68e1ecf5ebab 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -214,7 +214,8 @@ struct clocksource * __init clocksource_default_clock(void) return &clocksource_tod; } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, + u32 mult) { if (clock != &clocksource_tod) return; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8cb4974ff599..62f39d79b775 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -73,7 +73,8 @@ void update_vsyscall_tz(void) write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, + u32 mult) { unsigned long flags; @@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) vsyscall_gtod_data.clock.vread = clock->vread; vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; vsyscall_gtod_data.clock.mask = clock->mask; - vsyscall_gtod_data.clock.mult = clock->mult; + vsyscall_gtod_data.clock.mult = mult; vsyscall_gtod_data.clock.shift = clock->shift; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 83d2fbd81b93..95e4995d9987 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -280,10 +280,12 @@ extern struct clocksource * __init __weak clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); #ifdef CONFIG_GENERIC_TIME_VSYSCALL -extern void update_vsyscall(struct timespec *ts, struct clocksource *c); +extern void +update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult); extern void update_vsyscall_tz(void); #else -static inline void update_vsyscall(struct timespec *ts, struct clocksource *c) +static inline void +update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult) { } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c3a4e2907eaa..2a6d3e3e2c3e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -177,7 +177,7 @@ void timekeeping_leap_insert(int leapsecond) { xtime.tv_sec += leapsecond; wall_to_monotonic.tv_sec -= leapsecond; - update_vsyscall(&xtime, timekeeper.clock); + update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); } #ifdef CONFIG_GENERIC_TIME @@ -337,7 +337,7 @@ int do_settimeofday(struct timespec *tv) timekeeper.ntp_error = 0; ntp_clear(); - update_vsyscall(&xtime, timekeeper.clock); + update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); write_sequnlock_irqrestore(&xtime_lock, flags); @@ -811,7 +811,7 @@ void update_wall_time(void) update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ - update_vsyscall(&xtime, timekeeper.clock); + update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); } /** -- cgit v1.3-8-gc7d7 From c85e9d7739fc8d879c4293ea020760926d6f87cd Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 17 Nov 2009 10:16:32 -0500 Subject: znet: fix build failure from i82593.h relocation znet was including "wireless/i82593.h" (which is a bit wierd), and I missed that when I relocated i82593.h to drivers/staging/wavelan. Since I don't have ISA turned-on in my normal .config, I didn't see the build failures -- mea culpa! Signed-off-by: John W. Linville --- drivers/net/znet.c | 3 +- drivers/staging/wavelan/i82593.h | 229 --------------------------------- drivers/staging/wavelan/wavelan_cs.p.h | 2 +- include/linux/i82593.h | 229 +++++++++++++++++++++++++++++++++ 4 files changed, 231 insertions(+), 232 deletions(-) delete mode 100644 drivers/staging/wavelan/i82593.h create mode 100644 include/linux/i82593.h (limited to 'include') diff --git a/drivers/net/znet.c b/drivers/net/znet.c index b42347333750..443c4eee28c1 100644 --- a/drivers/net/znet.c +++ b/drivers/net/znet.c @@ -103,8 +103,7 @@ #include #include -/* This include could be elsewhere, since it is not wireless specific */ -#include "wireless/i82593.h" +#include static char version[] __initdata = "znet.c:v1.02 9/23/94 becker@scyld.com\n"; diff --git a/drivers/staging/wavelan/i82593.h b/drivers/staging/wavelan/i82593.h deleted file mode 100644 index afac5c7a323d..000000000000 --- a/drivers/staging/wavelan/i82593.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Definitions for Intel 82593 CSMA/CD Core LAN Controller - * The definitions are taken from the 1992 users manual with Intel - * order number 297125-001. - * - * /usr/src/pc/RCS/i82593.h,v 1.1 1996/07/17 15:23:12 root Exp - * - * Copyright 1994, Anders Klemets - * - * HISTORY - * i82593.h,v - * Revision 1.4 2005/11/4 09:15:00 baroniunas - * Modified copyright with permission of author as follows: - * - * "If I82539.H is the only file with my copyright statement - * that is included in the Source Forge project, then you have - * my approval to change the copyright statement to be a GPL - * license, in the way you proposed on October 10." - * - * Revision 1.1 1996/07/17 15:23:12 root - * Initial revision - * - * Revision 1.3 1995/04/05 15:13:58 adj - * Initial alpha release - * - * Revision 1.2 1994/06/16 23:57:31 klemets - * Mirrored all the fields in the configuration block. - * - * Revision 1.1 1994/06/02 20:25:34 klemets - * Initial revision - * - * - */ -#ifndef _I82593_H -#define _I82593_H - -/* Intel 82593 CSMA/CD Core LAN Controller */ - -/* Port 0 Command Register definitions */ - -/* Execution operations */ -#define OP0_NOP 0 /* CHNL = 0 */ -#define OP0_SWIT_TO_PORT_1 0 /* CHNL = 1 */ -#define OP0_IA_SETUP 1 -#define OP0_CONFIGURE 2 -#define OP0_MC_SETUP 3 -#define OP0_TRANSMIT 4 -#define OP0_TDR 5 -#define OP0_DUMP 6 -#define OP0_DIAGNOSE 7 -#define OP0_TRANSMIT_NO_CRC 9 -#define OP0_RETRANSMIT 12 -#define OP0_ABORT 13 -/* Reception operations */ -#define OP0_RCV_ENABLE 8 -#define OP0_RCV_DISABLE 10 -#define OP0_STOP_RCV 11 -/* Status pointer control operations */ -#define OP0_FIX_PTR 15 /* CHNL = 1 */ -#define OP0_RLS_PTR 15 /* CHNL = 0 */ -#define OP0_RESET 14 - -#define CR0_CHNL (1 << 4) /* 0=Channel 0, 1=Channel 1 */ -#define CR0_STATUS_0 0x00 -#define CR0_STATUS_1 0x20 -#define CR0_STATUS_2 0x40 -#define CR0_STATUS_3 0x60 -#define CR0_INT_ACK (1 << 7) /* 0=No ack, 1=acknowledge */ - -/* Port 0 Status Register definitions */ - -#define SR0_NO_RESULT 0 /* dummy */ -#define SR0_EVENT_MASK 0x0f -#define SR0_IA_SETUP_DONE 1 -#define SR0_CONFIGURE_DONE 2 -#define SR0_MC_SETUP_DONE 3 -#define SR0_TRANSMIT_DONE 4 -#define SR0_TDR_DONE 5 -#define SR0_DUMP_DONE 6 -#define SR0_DIAGNOSE_PASSED 7 -#define SR0_TRANSMIT_NO_CRC_DONE 9 -#define SR0_RETRANSMIT_DONE 12 -#define SR0_EXECUTION_ABORTED 13 -#define SR0_END_OF_FRAME 8 -#define SR0_RECEPTION_ABORTED 10 -#define SR0_DIAGNOSE_FAILED 15 -#define SR0_STOP_REG_HIT 11 - -#define SR0_CHNL (1 << 4) -#define SR0_EXECUTION (1 << 5) -#define SR0_RECEPTION (1 << 6) -#define SR0_INTERRUPT (1 << 7) -#define SR0_BOTH_RX_TX (SR0_EXECUTION | SR0_RECEPTION) - -#define SR3_EXEC_STATE_MASK 0x03 -#define SR3_EXEC_IDLE 0 -#define SR3_TX_ABORT_IN_PROGRESS 1 -#define SR3_EXEC_ACTIVE 2 -#define SR3_ABORT_IN_PROGRESS 3 -#define SR3_EXEC_CHNL (1 << 2) -#define SR3_STP_ON_NO_RSRC (1 << 3) -#define SR3_RCVING_NO_RSRC (1 << 4) -#define SR3_RCV_STATE_MASK 0x60 -#define SR3_RCV_IDLE 0x00 -#define SR3_RCV_READY 0x20 -#define SR3_RCV_ACTIVE 0x40 -#define SR3_RCV_STOP_IN_PROG 0x60 -#define SR3_RCV_CHNL (1 << 7) - -/* Port 1 Command Register definitions */ - -#define OP1_NOP 0 -#define OP1_SWIT_TO_PORT_0 1 -#define OP1_INT_DISABLE 2 -#define OP1_INT_ENABLE 3 -#define OP1_SET_TS 5 -#define OP1_RST_TS 7 -#define OP1_POWER_DOWN 8 -#define OP1_RESET_RING_MNGMT 11 -#define OP1_RESET 14 -#define OP1_SEL_RST 15 - -#define CR1_STATUS_4 0x00 -#define CR1_STATUS_5 0x20 -#define CR1_STATUS_6 0x40 -#define CR1_STOP_REG_UPDATE (1 << 7) - -/* Receive frame status bits */ - -#define RX_RCLD (1 << 0) -#define RX_IA_MATCH (1 << 1) -#define RX_NO_AD_MATCH (1 << 2) -#define RX_NO_SFD (1 << 3) -#define RX_SRT_FRM (1 << 7) -#define RX_OVRRUN (1 << 8) -#define RX_ALG_ERR (1 << 10) -#define RX_CRC_ERR (1 << 11) -#define RX_LEN_ERR (1 << 12) -#define RX_RCV_OK (1 << 13) -#define RX_TYP_LEN (1 << 15) - -/* Transmit status bits */ - -#define TX_NCOL_MASK 0x0f -#define TX_FRTL (1 << 4) -#define TX_MAX_COL (1 << 5) -#define TX_HRT_BEAT (1 << 6) -#define TX_DEFER (1 << 7) -#define TX_UND_RUN (1 << 8) -#define TX_LOST_CTS (1 << 9) -#define TX_LOST_CRS (1 << 10) -#define TX_LTCOL (1 << 11) -#define TX_OK (1 << 13) -#define TX_COLL (1 << 15) - -struct i82593_conf_block { - u_char fifo_limit : 4, - forgnesi : 1, - fifo_32 : 1, - d6mod : 1, - throttle_enb : 1; - u_char throttle : 6, - cntrxint : 1, - contin : 1; - u_char addr_len : 3, - acloc : 1, - preamb_len : 2, - loopback : 2; - u_char lin_prio : 3, - tbofstop : 1, - exp_prio : 3, - bof_met : 1; - u_char : 4, - ifrm_spc : 4; - u_char : 5, - slottim_low : 3; - u_char slottim_hi : 3, - : 1, - max_retr : 4; - u_char prmisc : 1, - bc_dis : 1, - : 1, - crs_1 : 1, - nocrc_ins : 1, - crc_1632 : 1, - : 1, - crs_cdt : 1; - u_char cs_filter : 3, - crs_src : 1, - cd_filter : 3, - : 1; - u_char : 2, - min_fr_len : 6; - u_char lng_typ : 1, - lng_fld : 1, - rxcrc_xf : 1, - artx : 1, - sarec : 1, - tx_jabber : 1, /* why is this called max_len in the manual? */ - hash_1 : 1, - lbpkpol : 1; - u_char : 6, - fdx : 1, - : 1; - u_char dummy_6 : 6, /* supposed to be ones */ - mult_ia : 1, - dis_bof : 1; - u_char dummy_1 : 1, /* supposed to be one */ - tx_ifs_retrig : 2, - mc_all : 1, - rcv_mon : 2, - frag_acpt : 1, - tstrttrs : 1; - u_char fretx : 1, - runt_eop : 1, - hw_sw_pin : 1, - big_endn : 1, - syncrqs : 1, - sttlen : 1, - tx_eop : 1, - rx_eop : 1; - u_char rbuf_size : 5, - rcvstop : 1, - : 2; -}; - -#define I82593_MAX_MULTICAST_ADDRESSES 128 /* Hardware hashed filter */ - -#endif /* _I82593_H */ diff --git a/drivers/staging/wavelan/wavelan_cs.p.h b/drivers/staging/wavelan/wavelan_cs.p.h index 81d91531c4f9..8fbfaa8a5a67 100644 --- a/drivers/staging/wavelan/wavelan_cs.p.h +++ b/drivers/staging/wavelan/wavelan_cs.p.h @@ -446,7 +446,7 @@ #include /* Wavelan declarations */ -#include "i82593.h" /* Definitions for the Intel chip */ +#include /* Definitions for the Intel chip */ #include "wavelan_cs.h" /* Others bits of the hardware */ diff --git a/include/linux/i82593.h b/include/linux/i82593.h new file mode 100644 index 000000000000..afac5c7a323d --- /dev/null +++ b/include/linux/i82593.h @@ -0,0 +1,229 @@ +/* + * Definitions for Intel 82593 CSMA/CD Core LAN Controller + * The definitions are taken from the 1992 users manual with Intel + * order number 297125-001. + * + * /usr/src/pc/RCS/i82593.h,v 1.1 1996/07/17 15:23:12 root Exp + * + * Copyright 1994, Anders Klemets + * + * HISTORY + * i82593.h,v + * Revision 1.4 2005/11/4 09:15:00 baroniunas + * Modified copyright with permission of author as follows: + * + * "If I82539.H is the only file with my copyright statement + * that is included in the Source Forge project, then you have + * my approval to change the copyright statement to be a GPL + * license, in the way you proposed on October 10." + * + * Revision 1.1 1996/07/17 15:23:12 root + * Initial revision + * + * Revision 1.3 1995/04/05 15:13:58 adj + * Initial alpha release + * + * Revision 1.2 1994/06/16 23:57:31 klemets + * Mirrored all the fields in the configuration block. + * + * Revision 1.1 1994/06/02 20:25:34 klemets + * Initial revision + * + * + */ +#ifndef _I82593_H +#define _I82593_H + +/* Intel 82593 CSMA/CD Core LAN Controller */ + +/* Port 0 Command Register definitions */ + +/* Execution operations */ +#define OP0_NOP 0 /* CHNL = 0 */ +#define OP0_SWIT_TO_PORT_1 0 /* CHNL = 1 */ +#define OP0_IA_SETUP 1 +#define OP0_CONFIGURE 2 +#define OP0_MC_SETUP 3 +#define OP0_TRANSMIT 4 +#define OP0_TDR 5 +#define OP0_DUMP 6 +#define OP0_DIAGNOSE 7 +#define OP0_TRANSMIT_NO_CRC 9 +#define OP0_RETRANSMIT 12 +#define OP0_ABORT 13 +/* Reception operations */ +#define OP0_RCV_ENABLE 8 +#define OP0_RCV_DISABLE 10 +#define OP0_STOP_RCV 11 +/* Status pointer control operations */ +#define OP0_FIX_PTR 15 /* CHNL = 1 */ +#define OP0_RLS_PTR 15 /* CHNL = 0 */ +#define OP0_RESET 14 + +#define CR0_CHNL (1 << 4) /* 0=Channel 0, 1=Channel 1 */ +#define CR0_STATUS_0 0x00 +#define CR0_STATUS_1 0x20 +#define CR0_STATUS_2 0x40 +#define CR0_STATUS_3 0x60 +#define CR0_INT_ACK (1 << 7) /* 0=No ack, 1=acknowledge */ + +/* Port 0 Status Register definitions */ + +#define SR0_NO_RESULT 0 /* dummy */ +#define SR0_EVENT_MASK 0x0f +#define SR0_IA_SETUP_DONE 1 +#define SR0_CONFIGURE_DONE 2 +#define SR0_MC_SETUP_DONE 3 +#define SR0_TRANSMIT_DONE 4 +#define SR0_TDR_DONE 5 +#define SR0_DUMP_DONE 6 +#define SR0_DIAGNOSE_PASSED 7 +#define SR0_TRANSMIT_NO_CRC_DONE 9 +#define SR0_RETRANSMIT_DONE 12 +#define SR0_EXECUTION_ABORTED 13 +#define SR0_END_OF_FRAME 8 +#define SR0_RECEPTION_ABORTED 10 +#define SR0_DIAGNOSE_FAILED 15 +#define SR0_STOP_REG_HIT 11 + +#define SR0_CHNL (1 << 4) +#define SR0_EXECUTION (1 << 5) +#define SR0_RECEPTION (1 << 6) +#define SR0_INTERRUPT (1 << 7) +#define SR0_BOTH_RX_TX (SR0_EXECUTION | SR0_RECEPTION) + +#define SR3_EXEC_STATE_MASK 0x03 +#define SR3_EXEC_IDLE 0 +#define SR3_TX_ABORT_IN_PROGRESS 1 +#define SR3_EXEC_ACTIVE 2 +#define SR3_ABORT_IN_PROGRESS 3 +#define SR3_EXEC_CHNL (1 << 2) +#define SR3_STP_ON_NO_RSRC (1 << 3) +#define SR3_RCVING_NO_RSRC (1 << 4) +#define SR3_RCV_STATE_MASK 0x60 +#define SR3_RCV_IDLE 0x00 +#define SR3_RCV_READY 0x20 +#define SR3_RCV_ACTIVE 0x40 +#define SR3_RCV_STOP_IN_PROG 0x60 +#define SR3_RCV_CHNL (1 << 7) + +/* Port 1 Command Register definitions */ + +#define OP1_NOP 0 +#define OP1_SWIT_TO_PORT_0 1 +#define OP1_INT_DISABLE 2 +#define OP1_INT_ENABLE 3 +#define OP1_SET_TS 5 +#define OP1_RST_TS 7 +#define OP1_POWER_DOWN 8 +#define OP1_RESET_RING_MNGMT 11 +#define OP1_RESET 14 +#define OP1_SEL_RST 15 + +#define CR1_STATUS_4 0x00 +#define CR1_STATUS_5 0x20 +#define CR1_STATUS_6 0x40 +#define CR1_STOP_REG_UPDATE (1 << 7) + +/* Receive frame status bits */ + +#define RX_RCLD (1 << 0) +#define RX_IA_MATCH (1 << 1) +#define RX_NO_AD_MATCH (1 << 2) +#define RX_NO_SFD (1 << 3) +#define RX_SRT_FRM (1 << 7) +#define RX_OVRRUN (1 << 8) +#define RX_ALG_ERR (1 << 10) +#define RX_CRC_ERR (1 << 11) +#define RX_LEN_ERR (1 << 12) +#define RX_RCV_OK (1 << 13) +#define RX_TYP_LEN (1 << 15) + +/* Transmit status bits */ + +#define TX_NCOL_MASK 0x0f +#define TX_FRTL (1 << 4) +#define TX_MAX_COL (1 << 5) +#define TX_HRT_BEAT (1 << 6) +#define TX_DEFER (1 << 7) +#define TX_UND_RUN (1 << 8) +#define TX_LOST_CTS (1 << 9) +#define TX_LOST_CRS (1 << 10) +#define TX_LTCOL (1 << 11) +#define TX_OK (1 << 13) +#define TX_COLL (1 << 15) + +struct i82593_conf_block { + u_char fifo_limit : 4, + forgnesi : 1, + fifo_32 : 1, + d6mod : 1, + throttle_enb : 1; + u_char throttle : 6, + cntrxint : 1, + contin : 1; + u_char addr_len : 3, + acloc : 1, + preamb_len : 2, + loopback : 2; + u_char lin_prio : 3, + tbofstop : 1, + exp_prio : 3, + bof_met : 1; + u_char : 4, + ifrm_spc : 4; + u_char : 5, + slottim_low : 3; + u_char slottim_hi : 3, + : 1, + max_retr : 4; + u_char prmisc : 1, + bc_dis : 1, + : 1, + crs_1 : 1, + nocrc_ins : 1, + crc_1632 : 1, + : 1, + crs_cdt : 1; + u_char cs_filter : 3, + crs_src : 1, + cd_filter : 3, + : 1; + u_char : 2, + min_fr_len : 6; + u_char lng_typ : 1, + lng_fld : 1, + rxcrc_xf : 1, + artx : 1, + sarec : 1, + tx_jabber : 1, /* why is this called max_len in the manual? */ + hash_1 : 1, + lbpkpol : 1; + u_char : 6, + fdx : 1, + : 1; + u_char dummy_6 : 6, /* supposed to be ones */ + mult_ia : 1, + dis_bof : 1; + u_char dummy_1 : 1, /* supposed to be one */ + tx_ifs_retrig : 2, + mc_all : 1, + rcv_mon : 2, + frag_acpt : 1, + tstrttrs : 1; + u_char fretx : 1, + runt_eop : 1, + hw_sw_pin : 1, + big_endn : 1, + syncrqs : 1, + sttlen : 1, + tx_eop : 1, + rx_eop : 1; + u_char rbuf_size : 5, + rcvstop : 1, + : 2; +}; + +#define I82593_MAX_MULTICAST_ADDRESSES 128 /* Hardware hashed filter */ + +#endif /* _I82593_H */ -- cgit v1.3-8-gc7d7 From b753e03e5e7c6ee60e81cd6335c80dc26519f9d0 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Tue, 17 Nov 2009 18:34:54 +0100 Subject: ALSA: cs4236: update control names Update control names to be more closer to their meaning. Change the "Mono" name to the "Beep" as this line is usually used to forward the PC beeper signal to sound card's output. Update names for both cs423x and wss. Clean up cs4235 controls according to the cs4235 doc. Rename some of the cs4235 controls to be consistent with the cs4236's ones. Also, delete one misnamed cs4231 register define. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai --- include/sound/cs4231-regs.h | 1 - sound/isa/cs423x/cs4236_lib.c | 49 +++++++++++++++++++------------------------ sound/isa/wss/wss_lib.c | 8 +++---- 3 files changed, 25 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/sound/cs4231-regs.h b/include/sound/cs4231-regs.h index 92647532c454..66d28c2cb53d 100644 --- a/include/sound/cs4231-regs.h +++ b/include/sound/cs4231-regs.h @@ -70,7 +70,6 @@ #define AD1845_PWR_DOWN 0x1b /* power down control */ #define CS4235_LEFT_MASTER 0x1b /* left master output control */ #define CS4231_REC_FORMAT 0x1c /* clock and data format - record - bits 7-0 MCE */ -#define CS4231_PLY_VAR_FREQ 0x1d /* playback variable frequency */ #define AD1845_CLOCK 0x1d /* crystal clock select and total power down */ #define CS4235_RIGHT_MASTER 0x1d /* right master output control */ #define CS4231_REC_UPR_CNT 0x1e /* record upper count */ diff --git a/sound/isa/cs423x/cs4236_lib.c b/sound/isa/cs423x/cs4236_lib.c index 1b1ad1cad328..4c4024a73c6b 100644 --- a/sound/isa/cs423x/cs4236_lib.c +++ b/sound/isa/cs423x/cs4236_lib.c @@ -777,7 +777,7 @@ CS4236_DOUBLE("Mic Playback Switch", 0, CS4236_DOUBLE("Mic Capture Switch", 0, CS4236_LEFT_MIC, CS4236_RIGHT_MIC, 7, 7, 1, 1), CS4236_DOUBLE("Mic Volume", 0, CS4236_LEFT_MIC, CS4236_RIGHT_MIC, 0, 0, 31, 1), -CS4236_DOUBLE("Mic Playback Boost", 0, +CS4236_DOUBLE("Mic Playback Boost (+20dB)", 0, CS4236_LEFT_MIC, CS4236_RIGHT_MIC, 5, 5, 1, 0), WSS_DOUBLE("Line Playback Switch", 0, @@ -798,10 +798,10 @@ WSS_DOUBLE("CD Capture Switch", 0, CS4236_DOUBLE1("Mono Output Playback Switch", 0, CS4231_MONO_CTRL, CS4236_RIGHT_MIX_CTRL, 6, 7, 1, 1), -CS4236_DOUBLE1("Mono Playback Switch", 0, +CS4236_DOUBLE1("Beep Playback Switch", 0, CS4231_MONO_CTRL, CS4236_LEFT_MIX_CTRL, 7, 7, 1, 1), -WSS_SINGLE("Mono Playback Volume", 0, CS4231_MONO_CTRL, 0, 15, 1), -WSS_SINGLE("Mono Playback Bypass", 0, CS4231_MONO_CTRL, 5, 1, 0), +WSS_SINGLE("Beep Playback Volume", 0, CS4231_MONO_CTRL, 0, 15, 1), +WSS_SINGLE("Beep Bypass Playback Switch", 0, CS4231_MONO_CTRL, 5, 1, 0), WSS_DOUBLE("Capture Volume", 0, CS4231_LEFT_INPUT, CS4231_RIGHT_INPUT, 0, 0, 15, 0), @@ -815,31 +815,27 @@ CS4236_DOUBLE1("Digital Loopback Playback Volume", 0, static struct snd_kcontrol_new snd_cs4235_controls[] = { -WSS_DOUBLE("Master Switch", 0, +WSS_DOUBLE("Master Playback Switch", 0, CS4235_LEFT_MASTER, CS4235_RIGHT_MASTER, 7, 7, 1, 1), -WSS_DOUBLE("Master Volume", 0, +WSS_DOUBLE("Master Playback Volume", 0, CS4235_LEFT_MASTER, CS4235_RIGHT_MASTER, 0, 0, 31, 1), CS4235_OUTPUT_ACCU("Playback Volume", 0), -CS4236_DOUBLE("Master Digital Playback Switch", 0, - CS4236_LEFT_MASTER, CS4236_RIGHT_MASTER, 7, 7, 1, 1), -CS4236_DOUBLE("Master Digital Capture Switch", 0, - CS4236_DAC_MUTE, CS4236_DAC_MUTE, 7, 6, 1, 1), -CS4236_MASTER_DIGITAL("Master Digital Volume", 0), - -WSS_DOUBLE("Master Digital Playback Switch", 1, +WSS_DOUBLE("Synth Playback Switch", 1, CS4231_LEFT_LINE_IN, CS4231_RIGHT_LINE_IN, 7, 7, 1, 1), -WSS_DOUBLE("Master Digital Capture Switch", 1, +WSS_DOUBLE("Synth Capture Switch", 1, CS4231_LEFT_LINE_IN, CS4231_RIGHT_LINE_IN, 6, 6, 1, 1), -WSS_DOUBLE("Master Digital Volume", 1, +WSS_DOUBLE("Synth Volume", 1, CS4231_LEFT_LINE_IN, CS4231_RIGHT_LINE_IN, 0, 0, 31, 1), CS4236_DOUBLE("Capture Volume", 0, CS4236_LEFT_MIX_CTRL, CS4236_RIGHT_MIX_CTRL, 5, 5, 3, 1), -WSS_DOUBLE("PCM Switch", 0, +WSS_DOUBLE("PCM Playback Switch", 0, CS4231_LEFT_OUTPUT, CS4231_RIGHT_OUTPUT, 7, 7, 1, 1), +WSS_DOUBLE("PCM Capture Switch", 0, + CS4236_DAC_MUTE, CS4236_DAC_MUTE, 7, 6, 1, 1), WSS_DOUBLE("PCM Volume", 0, CS4231_LEFT_OUTPUT, CS4231_RIGHT_OUTPUT, 0, 0, 63, 1), @@ -855,28 +851,25 @@ CS4236_DOUBLE("Mic Capture Switch", 0, CS4236_DOUBLE("Mic Playback Switch", 0, CS4236_LEFT_MIC, CS4236_RIGHT_MIC, 6, 6, 1, 1), CS4236_SINGLE("Mic Volume", 0, CS4236_LEFT_MIC, 0, 31, 1), -CS4236_SINGLE("Mic Playback Boost", 0, CS4236_LEFT_MIC, 5, 1, 0), +CS4236_SINGLE("Mic Boost (+20dB)", 0, CS4236_LEFT_MIC, 5, 1, 0), -WSS_DOUBLE("Aux Playback Switch", 0, +WSS_DOUBLE("Line Playback Switch", 0, CS4231_AUX1_LEFT_INPUT, CS4231_AUX1_RIGHT_INPUT, 7, 7, 1, 1), -WSS_DOUBLE("Aux Capture Switch", 0, +WSS_DOUBLE("Line Capture Switch", 0, CS4231_AUX1_LEFT_INPUT, CS4231_AUX1_RIGHT_INPUT, 6, 6, 1, 1), -WSS_DOUBLE("Aux Volume", 0, +WSS_DOUBLE("Line Volume", 0, CS4231_AUX1_LEFT_INPUT, CS4231_AUX1_RIGHT_INPUT, 0, 0, 31, 1), -WSS_DOUBLE("Aux Playback Switch", 1, +WSS_DOUBLE("CD Playback Switch", 1, CS4231_AUX2_LEFT_INPUT, CS4231_AUX2_RIGHT_INPUT, 7, 7, 1, 1), -WSS_DOUBLE("Aux Capture Switch", 1, +WSS_DOUBLE("CD Capture Switch", 1, CS4231_AUX2_LEFT_INPUT, CS4231_AUX2_RIGHT_INPUT, 6, 6, 1, 1), -WSS_DOUBLE("Aux Volume", 1, +WSS_DOUBLE("CD Volume", 1, CS4231_AUX2_LEFT_INPUT, CS4231_AUX2_RIGHT_INPUT, 0, 0, 31, 1), -CS4236_DOUBLE1("Master Mono Switch", 0, - CS4231_MONO_CTRL, CS4236_RIGHT_MIX_CTRL, 6, 7, 1, 1), - -CS4236_DOUBLE1("Mono Switch", 0, +CS4236_DOUBLE1("Beep Playback Switch", 0, CS4231_MONO_CTRL, CS4236_LEFT_MIX_CTRL, 7, 7, 1, 1), -WSS_SINGLE("Mono Volume", 0, CS4231_MONO_CTRL, 0, 15, 1), +WSS_SINGLE("Beep Playback Volume", 0, CS4231_MONO_CTRL, 0, 15, 1), WSS_DOUBLE("Analog Loopback Switch", 0, CS4231_LEFT_INPUT, CS4231_RIGHT_INPUT, 7, 7, 1, 0), diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c index 705db0924375..5b9d6c18bc45 100644 --- a/sound/isa/wss/wss_lib.c +++ b/sound/isa/wss/wss_lib.c @@ -2224,7 +2224,7 @@ WSS_DOUBLE_TLV("Capture Volume", 0, CS4231_LEFT_INPUT, CS4231_RIGHT_INPUT, .get = snd_wss_get_mux, .put = snd_wss_put_mux, }, -WSS_DOUBLE("Mic Boost", 0, +WSS_DOUBLE("Mic Boost (+20dB)", 0, CS4231_LEFT_INPUT, CS4231_RIGHT_INPUT, 5, 5, 1, 0), WSS_SINGLE("Loopback Capture Switch", 0, CS4231_LOOPBACK, 0, 1, 0), @@ -2235,14 +2235,14 @@ WSS_DOUBLE("Line Playback Switch", 0, WSS_DOUBLE_TLV("Line Playback Volume", 0, CS4231_LEFT_LINE_IN, CS4231_RIGHT_LINE_IN, 0, 0, 31, 1, db_scale_5bit_12db_max), -WSS_SINGLE("Mono Playback Switch", 0, +WSS_SINGLE("Beep Playback Switch", 0, CS4231_MONO_CTRL, 7, 1, 1), -WSS_SINGLE_TLV("Mono Playback Volume", 0, +WSS_SINGLE_TLV("Beep Playback Volume", 0, CS4231_MONO_CTRL, 0, 15, 1, db_scale_4bit), WSS_SINGLE("Mono Output Playback Switch", 0, CS4231_MONO_CTRL, 6, 1, 1), -WSS_SINGLE("Mono Output Playback Bypass", 0, +WSS_SINGLE("Beep Bypass Playback Switch", 0, CS4231_MONO_CTRL, 5, 1, 0), }; -- cgit v1.3-8-gc7d7 From d83345adf96bc13a5e360f4649a2e68ef968dec0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Nov 2009 03:36:51 +0000 Subject: net: add dev_txq_stats_fold() helper Some drivers ndo_get_stats() method need to perform txqueue stats folding. Move folding from dev_get_stats() to a new dev_txq_stats_fold() function Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 48 ++++++++++++++++++++++++++++------------------- 2 files changed, 30 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7043f85e643d..c8fa4627de00 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1941,6 +1941,7 @@ extern void netdev_features_change(struct net_device *dev); extern void dev_load(struct net *net, const char *name); extern void dev_mcast_init(void); extern const struct net_device_stats *dev_get_stats(struct net_device *dev); +extern void dev_txq_stats_fold(const struct net_device *dev, struct net_device_stats *stats); extern int netdev_max_backlog; extern int weight_p; diff --git a/net/core/dev.c b/net/core/dev.c index d867522290b9..c3e0578d29d1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5168,6 +5168,32 @@ void netdev_run_todo(void) } } +/** + * dev_txq_stats_fold - fold tx_queues stats + * @dev: device to get statistics from + * @stats: struct net_device_stats to hold results + */ +void dev_txq_stats_fold(const struct net_device *dev, + struct net_device_stats *stats) +{ + unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; + unsigned int i; + struct netdev_queue *txq; + + for (i = 0; i < dev->num_tx_queues; i++) { + txq = netdev_get_tx_queue(dev, i); + tx_bytes += txq->tx_bytes; + tx_packets += txq->tx_packets; + tx_dropped += txq->tx_dropped; + } + if (tx_bytes || tx_packets || tx_dropped) { + stats->tx_bytes = tx_bytes; + stats->tx_packets = tx_packets; + stats->tx_dropped = tx_dropped; + } +} +EXPORT_SYMBOL(dev_txq_stats_fold); + /** * dev_get_stats - get network device statistics * @dev: device to get statistics from @@ -5182,25 +5208,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev) if (ops->ndo_get_stats) return ops->ndo_get_stats(dev); - else { - unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; - struct net_device_stats *stats = &dev->stats; - unsigned int i; - struct netdev_queue *txq; - - for (i = 0; i < dev->num_tx_queues; i++) { - txq = netdev_get_tx_queue(dev, i); - tx_bytes += txq->tx_bytes; - tx_packets += txq->tx_packets; - tx_dropped += txq->tx_dropped; - } - if (tx_bytes || tx_packets || tx_dropped) { - stats->tx_bytes = tx_bytes; - stats->tx_packets = tx_packets; - stats->tx_dropped = tx_dropped; - } - return stats; - } + + dev_txq_stats_fold(dev, &dev->stats); + return &dev->stats; } EXPORT_SYMBOL(dev_get_stats); -- cgit v1.3-8-gc7d7 From 395264d509aec45149745843d9a737140a1ece16 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 16 Nov 2009 13:49:35 +0000 Subject: net: introduce NETDEV_UNREGISTER_PERNET This new event is called once for each unique net namespace in batched unregister operations (with the argument set to a random device from that namespace) and once per device in non-batched unregister operations. It allows us to factorize some device unregister work such as clearing the routing cache. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/linux/notifier.h | 1 + net/core/dev.c | 29 +++++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 29714b8441b1..b0c3671d463c 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -202,6 +202,7 @@ static inline int notifier_to_errno(int ret) #define NETDEV_BONDING_OLDTYPE 0x000E #define NETDEV_BONDING_NEWTYPE 0x000F #define NETDEV_POST_INIT 0x0010 +#define NETDEV_UNREGISTER_PERNET 0x0011 #define SYS_DOWN 0x0001 /* Notify of system down */ #define SYS_RESTART SYS_DOWN diff --git a/net/core/dev.c b/net/core/dev.c index c3e0578d29d1..e25fe5d9343b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1344,6 +1344,7 @@ rollback: nb->notifier_call(nb, NETDEV_DOWN, dev); } nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + nb->notifier_call(nb, NETDEV_UNREGISTER_PERNET, dev); } } @@ -4721,7 +4722,8 @@ static void net_set_todo(struct net_device *dev) static void rollback_registered_many(struct list_head *head) { - struct net_device *dev; + struct net_device *dev, *aux, *fdev; + LIST_HEAD(pernet_list); BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -4779,8 +4781,24 @@ static void rollback_registered_many(struct list_head *head) synchronize_net(); - list_for_each_entry(dev, head, unreg_list) + list_for_each_entry_safe(dev, aux, head, unreg_list) { + int new_net = 1; + list_for_each_entry(fdev, &pernet_list, unreg_list) { + if (dev_net(dev) == dev_net(fdev)) { + new_net = 0; + dev_put(dev); + break; + } + } + if (new_net) + list_move(&dev->unreg_list, &pernet_list); + } + + list_for_each_entry_safe(dev, aux, &pernet_list, unreg_list) { + call_netdevice_notifiers(NETDEV_UNREGISTER_PERNET, dev); + list_move(&dev->unreg_list, head); dev_put(dev); + } } static void rollback_registered(struct net_device *dev) @@ -5074,6 +5092,8 @@ static void netdev_wait_allrefs(struct net_device *dev) /* Rebroadcast unregister notification */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + /* don't resend NETDEV_UNREGISTER_PERNET, _PERNET users + * should have already handle it the first time */ if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { @@ -5385,6 +5405,10 @@ EXPORT_SYMBOL(unregister_netdevice_queue); * unregister_netdevice_many - unregister many devices * @head: list of devices * + * WARNING: Calling this modifies the given list + * (in rollback_registered_many). It may change the order of the elements + * in the list. However, you can assume it does not add or delete elements + * to/from the list. */ void unregister_netdevice_many(struct list_head *head) { @@ -5504,6 +5528,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char this device. They should clean all the things. */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + call_netdevice_notifiers(NETDEV_UNREGISTER_PERNET, dev); /* * Flush the unicast and multicast chains -- cgit v1.3-8-gc7d7 From e014debecd3ee3832e6476b3a9c948edfcfd1250 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Nov 2009 05:59:21 +0000 Subject: linkwatch: linkwatch_forget_dev() to speedup device dismantle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +- net/core/dev.c | 3 ++ net/core/link_watch.c | 94 ++++++++++++++++++++++++++++------------------- 3 files changed, 62 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c8fa4627de00..97873e31661c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -896,7 +896,7 @@ struct net_device { /* device index hash chain */ struct hlist_node index_hlist; - struct net_device *link_watch_next; + struct list_head link_watch_list; /* register/unregister state machine */ enum { NETREG_UNINITIALIZED=0, @@ -1600,6 +1600,7 @@ static inline void dev_hold(struct net_device *dev) */ extern void linkwatch_fire_event(struct net_device *dev); +extern void linkwatch_forget_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present diff --git a/net/core/dev.c b/net/core/dev.c index e25fe5d9343b..c128af708ebf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5085,6 +5085,8 @@ static void netdev_wait_allrefs(struct net_device *dev) { unsigned long rebroadcast_time, warning_time; + linkwatch_forget_dev(dev); + rebroadcast_time = warning_time = jiffies; while (atomic_read(&dev->refcnt) != 0) { if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { @@ -5311,6 +5313,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); + INIT_LIST_HEAD(&dev->link_watch_list); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); strcpy(dev->name, name); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index bf8f7af699d7..5910b555a54a 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -35,7 +35,7 @@ static unsigned long linkwatch_nextevent; static void linkwatch_event(struct work_struct *dummy); static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); -static struct net_device *lweventlist; +static LIST_HEAD(lweventlist); static DEFINE_SPINLOCK(lweventlist_lock); static unsigned char default_operstate(const struct net_device *dev) @@ -89,8 +89,10 @@ static void linkwatch_add_event(struct net_device *dev) unsigned long flags; spin_lock_irqsave(&lweventlist_lock, flags); - dev->link_watch_next = lweventlist; - lweventlist = dev; + if (list_empty(&dev->link_watch_list)) { + list_add_tail(&dev->link_watch_list, &lweventlist); + dev_hold(dev); + } spin_unlock_irqrestore(&lweventlist_lock, flags); } @@ -133,9 +135,35 @@ static void linkwatch_schedule_work(int urgent) } +static void linkwatch_do_dev(struct net_device *dev) +{ + /* + * Make sure the above read is complete since it can be + * rewritten as soon as we clear the bit below. + */ + smp_mb__before_clear_bit(); + + /* We are about to handle this device, + * so new events can be accepted + */ + clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); + + rfc2863_policy(dev); + if (dev->flags & IFF_UP) { + if (netif_carrier_ok(dev)) + dev_activate(dev); + else + dev_deactivate(dev); + + netdev_state_change(dev); + } + dev_put(dev); +} + static void __linkwatch_run_queue(int urgent_only) { - struct net_device *next; + struct net_device *dev; + LIST_HEAD(wrk); /* * Limit the number of linkwatch events to one @@ -153,46 +181,40 @@ static void __linkwatch_run_queue(int urgent_only) clear_bit(LW_URGENT, &linkwatch_flags); spin_lock_irq(&lweventlist_lock); - next = lweventlist; - lweventlist = NULL; - spin_unlock_irq(&lweventlist_lock); + list_splice_init(&lweventlist, &wrk); - while (next) { - struct net_device *dev = next; + while (!list_empty(&wrk)) { - next = dev->link_watch_next; + dev = list_first_entry(&wrk, struct net_device, link_watch_list); + list_del_init(&dev->link_watch_list); if (urgent_only && !linkwatch_urgent_event(dev)) { - linkwatch_add_event(dev); + list_add_tail(&dev->link_watch_list, &lweventlist); continue; } - - /* - * Make sure the above read is complete since it can be - * rewritten as soon as we clear the bit below. - */ - smp_mb__before_clear_bit(); - - /* We are about to handle this device, - * so new events can be accepted - */ - clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); - - rfc2863_policy(dev); - if (dev->flags & IFF_UP) { - if (netif_carrier_ok(dev)) - dev_activate(dev); - else - dev_deactivate(dev); - - netdev_state_change(dev); - } - - dev_put(dev); + spin_unlock_irq(&lweventlist_lock); + linkwatch_do_dev(dev); + spin_lock_irq(&lweventlist_lock); } - if (lweventlist) + if (!list_empty(&lweventlist)) linkwatch_schedule_work(0); + spin_unlock_irq(&lweventlist_lock); +} + +void linkwatch_forget_dev(struct net_device *dev) +{ + unsigned long flags; + int clean = 0; + + spin_lock_irqsave(&lweventlist_lock, flags); + if (!list_empty(&dev->link_watch_list)) { + list_del_init(&dev->link_watch_list); + clean = 1; + } + spin_unlock_irqrestore(&lweventlist_lock, flags); + if (clean) + linkwatch_do_dev(dev); } @@ -216,8 +238,6 @@ void linkwatch_fire_event(struct net_device *dev) bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { - dev_hold(dev); - linkwatch_add_event(dev); } else if (!urgent) return; -- cgit v1.3-8-gc7d7 From 2ea6dec4a22a6f66f6633876212fd4d195cf8277 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Nov 2009 14:27:27 -0800 Subject: generic-ipi: Add smp_call_function_any() Andrew points out that acpi-cpufreq uses cpumask_any, when it really would prefer to use the same CPU if possible (to avoid an IPI). In general, this seems a good idea to offer. [ tglx: Documented selection preference and Inlined the UP case to avoid the copy of smp_call_function_single() and the extra EXPORT ] Signed-off-by: Rusty Russell Cc: Ingo Molnar Cc: Venkatesh Pallipadi Cc: Len Brown Cc: Zhao Yakui Cc: Dave Jones Cc: Thomas Gleixner Cc: Mike Galbraith Cc: "Zhang, Yanmin" Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/smp.h | 11 ++++++++++- kernel/smp.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index 39c64bae776d..7a0570e6a596 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -76,6 +76,9 @@ void smp_call_function_many(const struct cpumask *mask, void __smp_call_function_single(int cpuid, struct call_single_data *data, int wait); +int smp_call_function_any(const struct cpumask *mask, + void (*func)(void *info), void *info, int wait); + /* * Generic and arch helpers */ @@ -137,9 +140,15 @@ static inline void smp_send_reschedule(int cpu) { } #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) -static inline void init_call_single_data(void) +static inline void init_call_single_data(void) { } + +static inline int +smp_call_function_any(const struct cpumask *mask, void (*func)(void *info), + void *info, int wait) { + return smp_call_function_single(0, func, info, wait); } + #endif /* !SMP */ /* diff --git a/kernel/smp.c b/kernel/smp.c index 8bd618f0364d..a8c76069cf50 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -319,6 +319,51 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); +/* + * smp_call_function_any - Run a function on any of the given cpus + * @mask: The mask of cpus it can run on. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait until function has completed. + * + * Returns 0 on success, else a negative status code (if no cpus were online). + * Note that @wait will be implicitly turned on in case of allocation failures, + * since we fall back to on-stack allocation. + * + * Selection preference: + * 1) current cpu if in @mask + * 2) any cpu of current node if in @mask + * 3) any other online cpu in @mask + */ +int smp_call_function_any(const struct cpumask *mask, + void (*func)(void *info), void *info, int wait) +{ + unsigned int cpu; + const struct cpumask *nodemask; + int ret; + + /* Try for same CPU (cheapest) */ + cpu = get_cpu(); + if (cpumask_test_cpu(cpu, mask)) + goto call; + + /* Try for same node. */ + nodemask = cpumask_of_node(cpu); + for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids; + cpu = cpumask_next_and(cpu, nodemask, mask)) { + if (cpu_online(cpu)) + goto call; + } + + /* Any online will do: smp_call_function_single handles nr_cpu_ids. */ + cpu = cpumask_any_and(mask, cpu_online_mask); +call: + ret = smp_call_function_single(cpu, func, info, wait); + put_cpu(); + return ret; +} +EXPORT_SYMBOL_GPL(smp_call_function_any); + /** * __smp_call_function_single(): Run a function on another CPU * @cpu: The CPU to run on. -- cgit v1.3-8-gc7d7 From 60a0a52df149286a25fddf9b2d0cfe77cf0bc516 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 16 Nov 2009 02:30:16 -0800 Subject: sysctl: kill dead ctl_handler definitions. When removing the sysctl strategy routines I overlooked their definitions in sysctl.h. So remove those unnecessary definitions now. Signed-off-by: Eric W. Biederman --- include/linux/sysctl.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 4e40442777cf..b4f6adc6a02c 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -972,10 +972,6 @@ extern int sysctl_perm(struct ctl_table_root *root, typedef struct ctl_table ctl_table; -typedef int ctl_handler (struct ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen); - typedef int proc_handler (struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -996,13 +992,6 @@ extern int proc_doulongvec_minmax(struct ctl_table *, int, extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void __user *, size_t *, loff_t *); -extern ctl_handler sysctl_data; -extern ctl_handler sysctl_string; -extern ctl_handler sysctl_intvec; -extern ctl_handler sysctl_jiffies; -extern ctl_handler sysctl_ms_jiffies; - - /* * Register a set of sysctl names by calling register_sysctl_table * with an initialised array of struct ctl_table's. An entry with -- cgit v1.3-8-gc7d7 From 86926d0096279b9739ceeff40f68d3c33b9119a9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 16 Nov 2009 02:40:01 -0800 Subject: sysctl: Remove CTL_NONE and CTL_UNNUMBERED Now that the sysctl structures no longer have a ctl_name field there is no reason to retain the definitions for CTL_NONE and CTL_UNNUMBERED, or to explain their historic usage. Signed-off-by: Eric W. Biederman --- Documentation/sysctl/ctl_unnumbered.txt | 22 ---------------------- include/linux/sysctl.h | 9 --------- 2 files changed, 31 deletions(-) delete mode 100644 Documentation/sysctl/ctl_unnumbered.txt (limited to 'include') diff --git a/Documentation/sysctl/ctl_unnumbered.txt b/Documentation/sysctl/ctl_unnumbered.txt deleted file mode 100644 index 23003a8ea3e7..000000000000 --- a/Documentation/sysctl/ctl_unnumbered.txt +++ /dev/null @@ -1,22 +0,0 @@ - -Except for a few extremely rare exceptions user space applications do not use -the binary sysctl interface. Instead everyone uses /proc/sys/... with -readable ascii names. - -Recently the kernel has started supporting setting the binary sysctl value to -CTL_UNNUMBERED so we no longer need to assign a binary sysctl path to allow -sysctls to show up in /proc/sys. - -Assigning binary sysctl numbers is an endless source of conflicts in sysctl.h, -breaking of the user space ABI (because of those conflicts), and maintenance -problems. A complete pass through all of the sysctl users revealed multiple -instances where the sysctl binary interface was broken and had gone undetected -for years. - -So please do not add new binary sysctl numbers. They are unneeded and -problematic. - -If you really need a new binary sysctl number please first merge your sysctl -into the kernel and then as a separate patch allocate a binary sysctl number. - -(ebiederm@xmission.com, June 2007) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index b4f6adc6a02c..c83a86a22381 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -15,9 +15,6 @@ ** The kernel will then return -ENOTDIR to any application using ** the old binary interface. ** - ** For new interfaces unless you really need a binary number - ** please use CTL_UNNUMBERED. - ** **************************************************************** **************************************************************** */ @@ -50,12 +47,6 @@ struct __sysctl_args { /* Top-level names: */ -/* For internal pattern-matching use only: */ -#ifdef __KERNEL__ -#define CTL_NONE 0 -#define CTL_UNNUMBERED CTL_NONE /* sysctl without a binary number */ -#endif - enum { CTL_KERN=1, /* General kernel info and control */ -- cgit v1.3-8-gc7d7 From eeb74a9d45f781ec6f47b9e0a75a6a427b53f165 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 18 Nov 2009 10:08:26 -0800 Subject: Phonet: convert devices list to RCU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pn_dev.h | 2 +- net/phonet/pn_dev.c | 63 +++++++++++++++++++++++++++++---------------- net/phonet/pn_netlink.c | 6 ++--- 3 files changed, 45 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index afa7defceb14..d7b989ca3d63 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -25,7 +25,7 @@ struct phonet_device_list { struct list_head list; - spinlock_t lock; + struct mutex lock; }; struct phonet_device_list *phonet_device_list(struct net *net); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index d5ad7947d771..d87388c94b00 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -61,7 +61,8 @@ static struct phonet_device *__phonet_device_alloc(struct net_device *dev) pnd->netdev = dev; bitmap_zero(pnd->addrs, 64); - list_add(&pnd->list, &pndevs->list); + BUG_ON(!mutex_is_locked(&pndevs->lock)); + list_add_rcu(&pnd->list, &pndevs->list); return pnd; } @@ -70,6 +71,7 @@ static struct phonet_device *__phonet_get(struct net_device *dev) struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; + BUG_ON(!mutex_is_locked(&pndevs->lock)); list_for_each_entry(pnd, &pndevs->list, list) { if (pnd->netdev == dev) return pnd; @@ -77,6 +79,18 @@ static struct phonet_device *__phonet_get(struct net_device *dev) return NULL; } +static struct phonet_device *__phonet_get_rcu(struct net_device *dev) +{ + struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); + struct phonet_device *pnd; + + list_for_each_entry_rcu(pnd, &pndevs->list, list) { + if (pnd->netdev == dev) + return pnd; + } + return NULL; +} + static void phonet_device_destroy(struct net_device *dev) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); @@ -84,11 +98,11 @@ static void phonet_device_destroy(struct net_device *dev) ASSERT_RTNL(); - spin_lock_bh(&pndevs->lock); + mutex_lock(&pndevs->lock); pnd = __phonet_get(dev); if (pnd) - list_del(&pnd->list); - spin_unlock_bh(&pndevs->lock); + list_del_rcu(&pnd->list); + mutex_unlock(&pndevs->lock); if (pnd) { u8 addr; @@ -106,8 +120,8 @@ struct net_device *phonet_device_get(struct net *net) struct phonet_device *pnd; struct net_device *dev = NULL; - spin_lock_bh(&pndevs->lock); - list_for_each_entry(pnd, &pndevs->list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pnd, &pndevs->list, list) { dev = pnd->netdev; BUG_ON(!dev); @@ -118,7 +132,7 @@ struct net_device *phonet_device_get(struct net *net) } if (dev) dev_hold(dev); - spin_unlock_bh(&pndevs->lock); + rcu_read_unlock(); return dev; } @@ -128,7 +142,7 @@ int phonet_address_add(struct net_device *dev, u8 addr) struct phonet_device *pnd; int err = 0; - spin_lock_bh(&pndevs->lock); + mutex_lock(&pndevs->lock); /* Find or create Phonet-specific device data */ pnd = __phonet_get(dev); if (pnd == NULL) @@ -137,7 +151,7 @@ int phonet_address_add(struct net_device *dev, u8 addr) err = -ENOMEM; else if (test_and_set_bit(addr >> 2, pnd->addrs)) err = -EEXIST; - spin_unlock_bh(&pndevs->lock); + mutex_unlock(&pndevs->lock); return err; } @@ -147,27 +161,32 @@ int phonet_address_del(struct net_device *dev, u8 addr) struct phonet_device *pnd; int err = 0; - spin_lock_bh(&pndevs->lock); + mutex_lock(&pndevs->lock); pnd = __phonet_get(dev); - if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) + if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) { err = -EADDRNOTAVAIL; - else if (bitmap_empty(pnd->addrs, 64)) { - list_del(&pnd->list); + pnd = NULL; + } else if (bitmap_empty(pnd->addrs, 64)) + list_del_rcu(&pnd->list); + else + pnd = NULL; + mutex_unlock(&pndevs->lock); + + if (pnd) { + synchronize_rcu(); kfree(pnd); } - spin_unlock_bh(&pndevs->lock); return err; } /* Gets a source address toward a destination, through a interface. */ u8 phonet_address_get(struct net_device *dev, u8 daddr) { - struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; u8 saddr; - spin_lock_bh(&pndevs->lock); - pnd = __phonet_get(dev); + rcu_read_lock(); + pnd = __phonet_get_rcu(dev); if (pnd) { BUG_ON(bitmap_empty(pnd->addrs, 64)); @@ -178,7 +197,7 @@ u8 phonet_address_get(struct net_device *dev, u8 daddr) saddr = find_first_bit(pnd->addrs, 64) << 2; } else saddr = PN_NO_ADDR; - spin_unlock_bh(&pndevs->lock); + rcu_read_unlock(); if (saddr == PN_NO_ADDR) { /* Fallback to another device */ @@ -200,8 +219,8 @@ int phonet_address_lookup(struct net *net, u8 addr) struct phonet_device *pnd; int err = -EADDRNOTAVAIL; - spin_lock_bh(&pndevs->lock); - list_for_each_entry(pnd, &pndevs->list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pnd, &pndevs->list, list) { /* Don't allow unregistering devices! */ if ((pnd->netdev->reg_state != NETREG_REGISTERED) || ((pnd->netdev->flags & IFF_UP)) != IFF_UP) @@ -213,7 +232,7 @@ int phonet_address_lookup(struct net *net, u8 addr) } } found: - spin_unlock_bh(&pndevs->lock); + rcu_read_unlock(); return err; } @@ -304,7 +323,7 @@ static int phonet_init_net(struct net *net) } INIT_LIST_HEAD(&pnn->pndevs.list); - spin_lock_init(&pnn->pndevs.lock); + mutex_init(&pnn->pndevs.lock); mutex_init(&pnn->routes.lock); net_assign_generic(net, phonet_net_id, pnn); return 0; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 609e509b369b..2e6c7eb8e76a 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -131,8 +131,8 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) int addr_idx = 0, addr_start_idx = cb->args[1]; pndevs = phonet_device_list(sock_net(skb->sk)); - spin_lock_bh(&pndevs->lock); - list_for_each_entry(pnd, &pndevs->list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pnd, &pndevs->list, list) { u8 addr; if (dev_idx > dev_start_idx) @@ -154,7 +154,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } out: - spin_unlock_bh(&pndevs->lock); + rcu_read_unlock(); cb->args[0] = dev_idx; cb->args[1] = addr_idx; -- cgit v1.3-8-gc7d7 From 599bf6a4d64d8399e99514e0e1ef02e97e43238f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 15 Nov 2009 23:07:30 +0100 Subject: mac80211: add the total ampdu length to tx info Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/mac80211.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2c10eac637d8..3b3defbe6fc1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -390,10 +390,12 @@ struct ieee80211_tx_rate { * @control: union for control data * @status: union for status data * @driver_data: array of driver_data pointers - * @ampdu_ack_len: number of aggregated frames. + * @ampdu_ack_len: number of acked aggregated frames. * relevant only if IEEE80211_TX_STATUS_AMPDU was set. * @ampdu_ack_map: block ack bit map for the aggregation. * relevant only if IEEE80211_TX_STATUS_AMPDU was set. + * @ampdu_len: number of aggregated frames. + * relevant only if IEEE80211_TX_STATUS_AMPDU was set. * @ack_signal: signal strength of the ACK frame */ struct ieee80211_tx_info { @@ -428,7 +430,8 @@ struct ieee80211_tx_info { u8 ampdu_ack_len; u64 ampdu_ack_map; int ack_signal; - /* 8 bytes free */ + u8 ampdu_len; + /* 7 bytes free */ } status; struct { struct ieee80211_tx_rate driver_rates[ -- cgit v1.3-8-gc7d7 From c951ad3550ab40071bb0f222ba6125845769c08a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 16 Nov 2009 12:00:38 +0100 Subject: mac80211: convert aggregation to operate on vifs/stas The entire aggregation code currently operates on the hw pointer and station addresses, but that needs to change to make stations purely per-vif; As one step preparing for that make the aggregation code callable with the station, or by the combination of virtual interface and station address. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ar9170/main.c | 5 +- drivers/net/wireless/ath/ath9k/main.c | 5 +- drivers/net/wireless/ath/ath9k/rc.c | 2 +- drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 2 +- drivers/net/wireless/iwlwifi/iwl-agn.c | 1 + drivers/net/wireless/iwlwifi/iwl-tx.c | 8 +-- include/net/mac80211.h | 28 +++++---- net/mac80211/agg-rx.c | 6 +- net/mac80211/agg-tx.c | 96 ++++++++++++------------------- net/mac80211/driver-ops.h | 5 +- net/mac80211/driver-trace.h | 9 ++- net/mac80211/ht.c | 3 +- net/mac80211/ieee80211_i.h | 3 +- net/mac80211/main.c | 8 +-- 14 files changed, 84 insertions(+), 97 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c index bd2a276c870a..f9d6db8d013e 100644 --- a/drivers/net/wireless/ath/ar9170/main.c +++ b/drivers/net/wireless/ath/ar9170/main.c @@ -2441,6 +2441,7 @@ static int ar9170_conf_tx(struct ieee80211_hw *hw, u16 queue, } static int ar9170_ampdu_action(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, u16 *ssn) { @@ -2470,7 +2471,7 @@ static int ar9170_ampdu_action(struct ieee80211_hw *hw, tid_info->state = AR9170_TID_STATE_PROGRESS; tid_info->active = false; spin_unlock_irqrestore(&ar->tx_ampdu_list_lock, flags); - ieee80211_start_tx_ba_cb_irqsafe(hw, sta->addr, tid); + ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; case IEEE80211_AMPDU_TX_STOP: @@ -2480,7 +2481,7 @@ static int ar9170_ampdu_action(struct ieee80211_hw *hw, tid_info->active = false; skb_queue_purge(&tid_info->queue); spin_unlock_irqrestore(&ar->tx_ampdu_list_lock, flags); - ieee80211_stop_tx_ba_cb_irqsafe(hw, sta->addr, tid); + ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; case IEEE80211_AMPDU_TX_OPERATIONAL: diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 3229c3993568..16bdb1b549b4 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -3078,6 +3078,7 @@ static void ath9k_reset_tsf(struct ieee80211_hw *hw) } static int ath9k_ampdu_action(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, u16 *ssn) @@ -3095,11 +3096,11 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw, break; case IEEE80211_AMPDU_TX_START: ath_tx_aggr_start(sc, sta, tid, ssn); - ieee80211_start_tx_ba_cb_irqsafe(hw, sta->addr, tid); + ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; case IEEE80211_AMPDU_TX_STOP: ath_tx_aggr_stop(sc, sta, tid); - ieee80211_stop_tx_ba_cb_irqsafe(hw, sta->addr, tid); + ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; case IEEE80211_AMPDU_TX_OPERATIONAL: ath_tx_aggr_resume(sc, sta, tid); diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index ea4b9081b4d0..1d96777b4cd2 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -1353,7 +1353,7 @@ static void ath_tx_status(void *priv, struct ieee80211_supported_band *sband, an = (struct ath_node *)sta->drv_priv; if(ath_tx_aggr_check(sc, an, tid)) - ieee80211_start_tx_ba_session(sc->hw, hdr->addr1, tid); + ieee80211_start_tx_ba_session(sta, tid); } } diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index 43edd8fd4405..2f09e3b7f0eb 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -301,7 +301,7 @@ static void rs_tl_turn_on_agg_for_tid(struct iwl_priv *priv, if (rs_tl_get_load(lq_data, tid) > IWL_AGG_LOAD_THRESHOLD) { IWL_DEBUG_HT(priv, "Starting Tx agg: STA: %pM tid: %d\n", sta->addr, tid); - ieee80211_start_tx_ba_session(priv->hw, sta->addr, tid); + ieee80211_start_tx_ba_session(sta, tid); } } diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 6385cdf24d13..b80cd0bc5845 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -2691,6 +2691,7 @@ static int iwl_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } static int iwl_mac_ampdu_action(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, u16 *ssn) { diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index ebfc460115d6..f3dff2ecc406 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -1312,7 +1312,7 @@ int iwl_tx_agg_start(struct iwl_priv *priv, const u8 *ra, u16 tid, u16 *ssn) if (tid_data->tfds_in_queue == 0) { IWL_DEBUG_HT(priv, "HW queue is empty\n"); tid_data->agg.state = IWL_AGG_ON; - ieee80211_start_tx_ba_cb_irqsafe(priv->hw, ra, tid); + ieee80211_start_tx_ba_cb_irqsafe(priv->vif, ra, tid); } else { IWL_DEBUG_HT(priv, "HW queue is NOT empty: %d packets in HW queue\n", tid_data->tfds_in_queue); @@ -1377,7 +1377,7 @@ int iwl_tx_agg_stop(struct iwl_priv *priv , const u8 *ra, u16 tid) if (ret) return ret; - ieee80211_stop_tx_ba_cb_irqsafe(priv->hw, ra, tid); + ieee80211_stop_tx_ba_cb_irqsafe(priv->vif, ra, tid); return 0; } @@ -1401,7 +1401,7 @@ int iwl_txq_check_empty(struct iwl_priv *priv, int sta_id, u8 tid, int txq_id) priv->cfg->ops->lib->txq_agg_disable(priv, txq_id, ssn, tx_fifo); tid_data->agg.state = IWL_AGG_OFF; - ieee80211_stop_tx_ba_cb_irqsafe(priv->hw, addr, tid); + ieee80211_stop_tx_ba_cb_irqsafe(priv->vif, addr, tid); } break; case IWL_EMPTYING_HW_QUEUE_ADDBA: @@ -1409,7 +1409,7 @@ int iwl_txq_check_empty(struct iwl_priv *priv, int sta_id, u8 tid, int txq_id) if (tid_data->tfds_in_queue == 0) { IWL_DEBUG_HT(priv, "HW queue empty: continue ADDBA flow\n"); tid_data->agg.state = IWL_AGG_ON; - ieee80211_start_tx_ba_cb_irqsafe(priv->hw, addr, tid); + ieee80211_start_tx_ba_cb_irqsafe(priv->vif, addr, tid); } break; } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3b3defbe6fc1..4af0ffb98aaf 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1508,6 +1508,7 @@ struct ieee80211_ops { void (*reset_tsf)(struct ieee80211_hw *hw); int (*tx_last_beacon)(struct ieee80211_hw *hw); int (*ampdu_action)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, u16 *ssn); @@ -2029,8 +2030,7 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw, /** * ieee80211_start_tx_ba_session - Start a tx Block Ack session. - * @hw: pointer as obtained from ieee80211_alloc_hw(). - * @ra: receiver address of the BA session recipient + * @sta: the station for which to start a BA session * @tid: the TID to BA on. * * Return: success if addBA request was sent, failure otherwise @@ -2039,22 +2039,22 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw, * the need to start aggregation on a certain RA/TID, the session level * will be managed by the mac80211. */ -int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid); +int ieee80211_start_tx_ba_session(struct ieee80211_sta *sta, u16 tid); /** * ieee80211_start_tx_ba_cb - low level driver ready to aggregate. - * @hw: pointer as obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf * @ra: receiver address of the BA session recipient. * @tid: the TID to BA on. * * This function must be called by low level driver once it has * finished with preparations for the BA session. */ -void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid); +void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid); /** * ieee80211_start_tx_ba_cb_irqsafe - low level driver ready to aggregate. - * @hw: pointer as obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf * @ra: receiver address of the BA session recipient. * @tid: the TID to BA on. * @@ -2062,13 +2062,12 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid); * finished with preparations for the BA session. * This version of the function is IRQ-safe. */ -void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra, +void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid); /** * ieee80211_stop_tx_ba_session - Stop a Block Ack session. - * @hw: pointer as obtained from ieee80211_alloc_hw(). - * @ra: receiver address of the BA session recipient + * @sta: the station whose BA session to stop * @tid: the TID to stop BA. * @initiator: if indicates initiator DELBA frame will be sent. * @@ -2078,24 +2077,23 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra, * the need to stop aggregation on a certain RA/TID, the session level * will be managed by the mac80211. */ -int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, - u8 *ra, u16 tid, +int ieee80211_stop_tx_ba_session(struct ieee80211_sta *sta, u16 tid, enum ieee80211_back_parties initiator); /** * ieee80211_stop_tx_ba_cb - low level driver ready to stop aggregate. - * @hw: pointer as obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf * @ra: receiver address of the BA session recipient. * @tid: the desired TID to BA on. * * This function must be called by low level driver once it has * finished with preparations for the BA session tear down. */ -void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid); +void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); /** * ieee80211_stop_tx_ba_cb_irqsafe - low level driver ready to stop aggregate. - * @hw: pointer as obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf * @ra: receiver address of the BA session recipient. * @tid: the desired TID to BA on. * @@ -2103,7 +2101,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid); * finished with preparations for the BA session tear down. * This version of the function is IRQ-safe. */ -void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra, +void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid); /** diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index bc064d7933ff..f3a5c9e0578d 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -41,7 +41,8 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, sta->sta.addr, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - if (drv_ampdu_action(local, IEEE80211_AMPDU_RX_STOP, + if (drv_ampdu_action(local, &sta->sdata->vif, + IEEE80211_AMPDU_RX_STOP, &sta->sta, tid, NULL)) printk(KERN_DEBUG "HW problem - can not stop rx " "aggregation for tid %d\n", tid); @@ -284,7 +285,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, goto end; } - ret = drv_ampdu_action(local, IEEE80211_AMPDU_RX_START, + ret = drv_ampdu_action(local, &sta->sdata->vif, + IEEE80211_AMPDU_RX_START, &sta->sta, tid, &start_seq_num); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b09948ceec4a..6ddd11466df2 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -138,7 +138,8 @@ static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, *state = HT_AGG_STATE_REQ_STOP_BA_MSK | (initiator << HT_AGG_STATE_INITIATOR_SHIFT); - ret = drv_ampdu_action(local, IEEE80211_AMPDU_TX_STOP, + ret = drv_ampdu_action(local, &sta->sdata->vif, + IEEE80211_AMPDU_TX_STOP, &sta->sta, tid, NULL); /* HW shall not deny going back to legacy */ @@ -196,11 +197,11 @@ static inline int ieee80211_ac_from_tid(int tid) return ieee802_1d_to_ac[tid & 7]; } -int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) +int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) { - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata; + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; u8 *state; int ret = 0; u16 start_seq_num; @@ -208,52 +209,37 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) if (WARN_ON(!local->ops->ampdu_action)) return -EINVAL; - if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) + if ((tid >= STA_TID_NUM) || + !(local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) return -EINVAL; #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Open BA session requested for %pM tid %u\n", - ra, tid); + pubsta->addr, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - rcu_read_lock(); - - sta = sta_info_get(local, ra); - if (!sta) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Could not find the station\n"); -#endif - ret = -ENOENT; - goto unlock; - } - /* * The aggregation code is not prepared to handle * anything but STA/AP due to the BSSID handling. * IBSS could work in the code but isn't supported * by drivers or the standard. */ - if (sta->sdata->vif.type != NL80211_IFTYPE_STATION && - sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sta->sdata->vif.type != NL80211_IFTYPE_AP) { - ret = -EINVAL; - goto unlock; - } + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP) + return -EINVAL; if (test_sta_flags(sta, WLAN_STA_SUSPEND)) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Suspend in progress. " "Denying BA session request\n"); #endif - ret = -EINVAL; - goto unlock; + return -EINVAL; } spin_lock_bh(&sta->lock); spin_lock(&local->ampdu_lock); - sdata = sta->sdata; - /* we have tried too many times, receiver does not want A-MPDU */ if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { ret = -EBUSY; @@ -310,8 +296,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) start_seq_num = sta->tid_seq[tid]; - ret = drv_ampdu_action(local, IEEE80211_AMPDU_TX_START, - &sta->sta, tid, &start_seq_num); + ret = drv_ampdu_action(local, &sdata->vif, + IEEE80211_AMPDU_TX_START, + pubsta, tid, &start_seq_num); if (ret) { #ifdef CONFIG_MAC80211_HT_DEBUG @@ -336,7 +323,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) sta->ampdu_mlme.dialog_token_allocator; sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; - ieee80211_send_addba_request(sta->sdata, ra, tid, + ieee80211_send_addba_request(sdata, pubsta->addr, tid, sta->ampdu_mlme.tid_tx[tid]->dialog_token, sta->ampdu_mlme.tid_tx[tid]->ssn, 0x40, 5000); @@ -348,7 +335,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); #endif - goto unlock; + return 0; err_free: kfree(sta->ampdu_mlme.tid_tx[tid]); @@ -360,8 +347,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) err_unlock_sta: spin_unlock(&local->ampdu_lock); spin_unlock_bh(&sta->lock); - unlock: - rcu_read_unlock(); return ret; } EXPORT_SYMBOL(ieee80211_start_tx_ba_session); @@ -428,13 +413,15 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, ieee80211_agg_splice_finish(local, sta, tid); spin_unlock(&local->ampdu_lock); - drv_ampdu_action(local, IEEE80211_AMPDU_TX_OPERATIONAL, + drv_ampdu_action(local, &sta->sdata->vif, + IEEE80211_AMPDU_TX_OPERATIONAL, &sta->sta, tid, NULL); } -void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) +void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) { - struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; u8 *state; @@ -483,10 +470,11 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) } EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); -void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, +void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid) { - struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; struct ieee80211_ra_tid *ra_tid; struct sk_buff *skb = dev_alloc_skb(0); @@ -535,13 +523,12 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return ret; } -int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, - u8 *ra, u16 tid, +int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, enum ieee80211_back_parties initiator) { - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - int ret = 0; + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; if (WARN_ON(!local->ops->ampdu_action)) return -EINVAL; @@ -549,22 +536,14 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, if (tid >= STA_TID_NUM) return -EINVAL; - rcu_read_lock(); - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); - return -ENOENT; - } - - ret = __ieee80211_stop_tx_ba_session(sta, tid, initiator); - rcu_read_unlock(); - return ret; + return __ieee80211_stop_tx_ba_session(sta, tid, initiator); } EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); -void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) +void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) { - struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; u8 *state; @@ -627,10 +606,11 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) } EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb); -void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, +void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid) { - struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; struct ieee80211_ra_tid *ra_tid; struct sk_buff *skb = dev_alloc_skb(0); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 020a94a31106..921dd9c9ff62 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -239,15 +239,16 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local) } static inline int drv_ampdu_action(struct ieee80211_local *local, + struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, u16 *ssn) { int ret = -EOPNOTSUPP; if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(&local->hw, action, + ret = local->ops->ampdu_action(&local->hw, vif, action, sta, tid, ssn); - trace_drv_ampdu_action(local, action, sta, tid, ssn, ret); + trace_drv_ampdu_action(local, vif, action, sta, tid, ssn, ret); return ret; } diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 37b9051afcf3..b8fef1d11369 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -634,11 +634,12 @@ TRACE_EVENT(drv_tx_last_beacon, TRACE_EVENT(drv_ampdu_action, TP_PROTO(struct ieee80211_local *local, + struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, u16 *ssn, int ret), - TP_ARGS(local, action, sta, tid, ssn, ret), + TP_ARGS(local, vif, action, sta, tid, ssn, ret), TP_STRUCT__entry( LOCAL_ENTRY @@ -647,10 +648,12 @@ TRACE_EVENT(drv_ampdu_action, __field(u16, tid) __field(u16, ssn) __field(int, ret) + VIF_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; + VIF_ASSIGN; STA_ASSIGN; __entry->ret = ret; __entry->action = action; @@ -659,8 +662,8 @@ TRACE_EVENT(drv_ampdu_action, ), TP_printk( - LOCAL_PR_FMT STA_PR_FMT " action:%d tid:%d ret:%d", - LOCAL_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d ret:%d", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid, __entry->ret ) ); #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 48ef1a282b91..345c8ee50175 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -141,7 +141,6 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len) { - struct ieee80211_local *local = sdata->local; u16 tid, params; u16 initiator; @@ -164,7 +163,7 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, sta->ampdu_mlme.tid_state_tx[tid] = HT_AGG_STATE_OPERATIONAL; spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(&local->hw, sta->sta.addr, tid, + ieee80211_stop_tx_ba_session(&sta->sta, tid, WLAN_BACK_RECIPIENT); } } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b63b99fb2fd3..b7598db5ade2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -771,8 +771,9 @@ IEEE80211_DEV_TO_SUB_IF(struct net_device *dev) return netdev_priv(dev); } -/* this struct represents 802.11n's RA/TID combination */ +/* this struct represents 802.11n's RA/TID combination along with our vif */ struct ieee80211_ra_tid { + struct ieee80211_vif *vif; u8 ra[ETH_ALEN]; u16 tid; }; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c74a6a1935b3..f4be97c3b747 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -296,14 +296,14 @@ static void ieee80211_tasklet_handler(unsigned long data) break; case IEEE80211_DELBA_MSG: ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - ieee80211_stop_tx_ba_cb(local_to_hw(local), - ra_tid->ra, ra_tid->tid); + ieee80211_stop_tx_ba_cb(ra_tid->vif, ra_tid->ra, + ra_tid->tid); dev_kfree_skb(skb); break; case IEEE80211_ADDBA_MSG: ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - ieee80211_start_tx_ba_cb(local_to_hw(local), - ra_tid->ra, ra_tid->tid); + ieee80211_start_tx_ba_cb(ra_tid->vif, ra_tid->ra, + ra_tid->tid); dev_kfree_skb(skb); break ; default: -- cgit v1.3-8-gc7d7 From c95cf3d09adc9afe7816a13a920b6df36062a3fe Mon Sep 17 00:00:00 2001 From: David-John Willis Date: Tue, 17 Nov 2009 18:50:09 +0200 Subject: wl1251: add NVS in EEPROM support wl1251 supports also that NVS is stored in a separate EEPROM, add support for that. kvalo: use platform data instead Kconfig and use kernel style Signed-off-by: David-John Willis Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/wl1251.h | 1 + drivers/net/wireless/wl12xx/wl1251_boot.c | 20 +++++++++++++------- drivers/net/wireless/wl12xx/wl1251_reg.h | 6 ++++++ drivers/net/wireless/wl12xx/wl1251_spi.c | 2 ++ include/linux/spi/wl12xx.h | 1 + 5 files changed, 23 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/wl12xx/wl1251.h b/drivers/net/wireless/wl12xx/wl1251.h index a839466664f0..054533f7a124 100644 --- a/drivers/net/wireless/wl12xx/wl1251.h +++ b/drivers/net/wireless/wl12xx/wl1251.h @@ -269,6 +269,7 @@ struct wl1251 { void (*set_power)(bool enable); int irq; + bool use_eeprom; enum wl1251_state state; struct mutex mutex; diff --git a/drivers/net/wireless/wl12xx/wl1251_boot.c b/drivers/net/wireless/wl12xx/wl1251_boot.c index 5094f24ad034..2e733e7bdfd4 100644 --- a/drivers/net/wireless/wl12xx/wl1251_boot.c +++ b/drivers/net/wireless/wl12xx/wl1251_boot.c @@ -494,13 +494,19 @@ int wl1251_boot(struct wl1251 *wl) goto out; /* 2. start processing NVS file */ - ret = wl1251_boot_upload_nvs(wl); - if (ret < 0) - goto out; - - /* write firmware's last address (ie. it's length) to - * ACX_EEPROMLESS_IND_REG */ - wl1251_reg_write32(wl, ACX_EEPROMLESS_IND_REG, wl->fw_len); + if (wl->use_eeprom) { + wl1251_reg_write32(wl, ACX_REG_EE_START, START_EEPROM_MGR); + msleep(4000); + wl1251_reg_write32(wl, ACX_EEPROMLESS_IND_REG, USE_EEPROM); + } else { + ret = wl1251_boot_upload_nvs(wl); + if (ret < 0) + goto out; + + /* write firmware's last address (ie. it's length) to + * ACX_EEPROMLESS_IND_REG */ + wl1251_reg_write32(wl, ACX_EEPROMLESS_IND_REG, wl->fw_len); + } /* 6. read the EEPROM parameters */ tmp = wl1251_reg_read32(wl, SCR_PAD2); diff --git a/drivers/net/wireless/wl12xx/wl1251_reg.h b/drivers/net/wireless/wl12xx/wl1251_reg.h index 06e1bd94a739..0ca3b4326056 100644 --- a/drivers/net/wireless/wl12xx/wl1251_reg.h +++ b/drivers/net/wireless/wl12xx/wl1251_reg.h @@ -370,6 +370,7 @@ enum wl12xx_acx_int_reg { EEPROM location specified in the EE_ADDR register. The Wlan hardware hardware clears this bit automatically. *===============================================*/ +#define EE_CTL (REGISTERS_BASE + 0x2000) #define ACX_EE_CTL_REG EE_CTL #define EE_WRITE 0x00000001ul #define EE_READ 0x00000002ul @@ -380,6 +381,7 @@ enum wl12xx_acx_int_reg { This register specifies the address within the EEPROM from/to which to read/write data. ===============================================*/ +#define EE_ADDR (REGISTERS_BASE + 0x2008) #define ACX_EE_ADDR_REG EE_ADDR /*=============================================== @@ -389,8 +391,12 @@ enum wl12xx_acx_int_reg { data from the EEPROM or the write data to be written to the EEPROM. ===============================================*/ +#define EE_DATA (REGISTERS_BASE + 0x2004) #define ACX_EE_DATA_REG EE_DATA +#define EEPROM_ACCESS_TO 10000 /* timeout counter */ +#define START_EEPROM_MGR 0x00000001 + /*=============================================== EEPROM Base Address - 32bit RW ------------------------------------------ diff --git a/drivers/net/wireless/wl12xx/wl1251_spi.c b/drivers/net/wireless/wl12xx/wl1251_spi.c index 2cf8a2169d43..9cc8c323830f 100644 --- a/drivers/net/wireless/wl12xx/wl1251_spi.c +++ b/drivers/net/wireless/wl12xx/wl1251_spi.c @@ -270,6 +270,8 @@ static int __devinit wl1251_spi_probe(struct spi_device *spi) return -ENODEV; } + wl->use_eeprom = pdata->use_eeprom; + ret = request_irq(wl->irq, wl1251_irq, 0, DRIVER_NAME, wl); if (ret < 0) { wl1251_error("request_irq() failed: %d", ret); diff --git a/include/linux/spi/wl12xx.h b/include/linux/spi/wl12xx.h index 11430cab2aad..aed64ed3dc8a 100644 --- a/include/linux/spi/wl12xx.h +++ b/include/linux/spi/wl12xx.h @@ -26,6 +26,7 @@ struct wl12xx_platform_data { void (*set_power)(bool enable); + bool use_eeprom; }; #endif -- cgit v1.3-8-gc7d7 From af65cd96dd4ea8ea5adc6ee850e61a407cd1067a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Nov 2009 18:18:36 +0100 Subject: mac80211: make software rate control optional Some devices implement the entire rate control in firmware in some way, like wl1271 or like iwlwifi which does some things in software but not a lot. Therefore generic software rate control is rather useless for them and just adds avoidable overhead to the transmit path. It's fairly simple to let drivers indicate that they do not need rate control, but they need to fulfil a number of conditions that we encode in WARN_ONs. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 14 ++++++++++++++ net/mac80211/debugfs.c | 2 +- net/mac80211/rate.c | 12 +++++++++++- net/mac80211/rate.h | 9 ++++++--- net/mac80211/sta_info.c | 29 ++++++++++++++++++++++------- net/mac80211/tx.c | 3 ++- 6 files changed, 56 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 4af0ffb98aaf..f34f4ca7016c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -855,6 +855,19 @@ enum ieee80211_tkip_key_type { * any particular flags. There are some exceptions to this rule, * however, so you are advised to review these flags carefully. * + * @IEEE80211_HW_HAS_RATE_CONTROL: + * The hardware or firmware includes rate control, and cannot be + * controlled by the stack. As such, no rate control algorithm + * should be instantiated, and the TX rate reported to userspace + * will be taken from the TX status instead of the rate control + * algorithm. + * Note that this requires that the driver implement a number of + * callbacks so it has the correct information, it needs to have + * the @set_rts_threshold callback and must look at the BSS config + * @use_cts_prot for G/N protection, @use_short_slot for slot + * timing in 2.4 GHz and @use_short_preamble for preambles for + * CCK frames. + * * @IEEE80211_HW_RX_INCLUDES_FCS: * Indicates that received frames passed to the stack include * the FCS at the end. @@ -913,6 +926,7 @@ enum ieee80211_tkip_key_type { * avoid waking up cpu. */ enum ieee80211_hw_flags { + IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING = 1<<2, IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE = 1<<3, diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 82c807723b6f..e4b54093d41b 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -52,7 +52,7 @@ DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d", DEBUGFS_READONLY_FILE(wep_iv, 20, "%#08x", local->wep_iv & 0xffffff); DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", - local->rate_ctrl ? local->rate_ctrl->ops->name : ""); + local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); static ssize_t tsf_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index ccda7454fb17..b9007f80cb92 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -284,9 +284,16 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, struct rate_control_ref *ref, *old; ASSERT_RTNL(); + if (local->open_count) return -EBUSY; + if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) { + if (WARN_ON(!local->ops->set_rts_threshold)) + return -EINVAL; + return 0; + } + ref = rate_control_alloc(name, local); if (!ref) { printk(KERN_WARNING "%s: Failed to select rate control " @@ -305,7 +312,6 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, "algorithm '%s'\n", wiphy_name(local->hw.wiphy), ref->ops->name); - return 0; } @@ -314,6 +320,10 @@ void rate_control_deinitialize(struct ieee80211_local *local) struct rate_control_ref *ref; ref = local->rate_ctrl; + + if (!ref) + return; + local->rate_ctrl = NULL; rate_control_put(ref); } diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 2ab5ad9e71ce..cb9bd1f65e27 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -59,6 +59,9 @@ static inline void rate_control_rate_init(struct sta_info *sta) void *priv_sta = sta->rate_ctrl_priv; struct ieee80211_supported_band *sband; + if (!ref) + return; + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; ref->ops->rate_init(ref->priv, sband, ista, priv_sta); @@ -72,7 +75,7 @@ static inline void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; - if (ref->ops->rate_update) + if (ref && ref->ops->rate_update) ref->ops->rate_update(ref->priv, sband, ista, priv_sta, changed); } @@ -97,7 +100,7 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta) { #ifdef CONFIG_MAC80211_DEBUGFS struct rate_control_ref *ref = sta->rate_ctrl; - if (sta->debugfs.dir && ref->ops->add_sta_debugfs) + if (ref && sta->debugfs.dir && ref->ops->add_sta_debugfs) ref->ops->add_sta_debugfs(ref->priv, sta->rate_ctrl_priv, sta->debugfs.dir); #endif @@ -107,7 +110,7 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta) { #ifdef CONFIG_MAC80211_DEBUGFS struct rate_control_ref *ref = sta->rate_ctrl; - if (ref->ops->remove_sta_debugfs) + if (ref && ref->ops->remove_sta_debugfs) ref->ops->remove_sta_debugfs(ref->priv, sta->rate_ctrl_priv); #endif } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index aa017a56afc8..71f370dd24bc 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -148,8 +148,10 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, static void __sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { - rate_control_free_sta(sta); - rate_control_put(sta->rate_ctrl); + if (sta->rate_ctrl) { + rate_control_free_sta(sta); + rate_control_put(sta->rate_ctrl); + } #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Destroyed STA %pM\n", @@ -277,6 +279,23 @@ static void sta_unblock(struct work_struct *wk) ieee80211_sta_ps_deliver_poll_response(sta); } +static int sta_prepare_rate_control(struct ieee80211_local *local, + struct sta_info *sta, gfp_t gfp) +{ + if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) + return 0; + + sta->rate_ctrl = rate_control_get(local->rate_ctrl); + sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, + &sta->sta, gfp); + if (!sta->rate_ctrl_priv) { + rate_control_put(sta->rate_ctrl); + return -ENOMEM; + } + + return 0; +} + struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr, gfp_t gfp) { @@ -296,11 +315,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->local = local; sta->sdata = sdata; - sta->rate_ctrl = rate_control_get(local->rate_ctrl); - sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, - &sta->sta, gfp); - if (!sta->rate_ctrl_priv) { - rate_control_put(sta->rate_ctrl); + if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); return NULL; } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3466e1f7fd12..bd916437e2fb 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1219,7 +1219,8 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) CALL_TXH(ieee80211_tx_h_ps_buf); CALL_TXH(ieee80211_tx_h_select_key); CALL_TXH(ieee80211_tx_h_michael_mic_add); - CALL_TXH(ieee80211_tx_h_rate_ctrl); + if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) + CALL_TXH(ieee80211_tx_h_rate_ctrl); CALL_TXH(ieee80211_tx_h_misc); CALL_TXH(ieee80211_tx_h_sequence); CALL_TXH(ieee80211_tx_h_fragment); -- cgit v1.3-8-gc7d7 From 0878c3504f92f1bf063d0890a9960d4b9e6c4618 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 18 Nov 2009 16:48:00 +0100 Subject: rfkill: Add missing description for RFKILL_TYPE_GPS Signed-off-by: Marcel Holtmann Signed-off-by: Janakiram Sistla Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 3392c59d2706..a75e9e566ce5 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -35,6 +35,7 @@ * @RFKILL_TYPE_UWB: switch is on a ultra wideband device. * @RFKILL_TYPE_WIMAX: switch is on a WiMAX device. * @RFKILL_TYPE_WWAN: switch is on a wireless WAN device. + * @RFKILL_TYPE_GPS: switch is on a GPS device. * @NUM_RFKILL_TYPES: number of defined rfkill types */ enum rfkill_type { -- cgit v1.3-8-gc7d7 From 875405a7793e9c35fab33819e7e5df7a98b6064c Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 18 Nov 2009 16:48:01 +0100 Subject: rfkill: Add constant for RFKILL_TYPE_FM radio devices Signed-off-by: Marcel Holtmann Signed-off-by: Janakiram Sistla Signed-off-by: John W. Linville --- include/linux/rfkill.h | 2 ++ net/rfkill/core.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index a75e9e566ce5..97059d08a626 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -36,6 +36,7 @@ * @RFKILL_TYPE_WIMAX: switch is on a WiMAX device. * @RFKILL_TYPE_WWAN: switch is on a wireless WAN device. * @RFKILL_TYPE_GPS: switch is on a GPS device. + * @RFKILL_TYPE_FM: switch is on a FM radio device. * @NUM_RFKILL_TYPES: number of defined rfkill types */ enum rfkill_type { @@ -46,6 +47,7 @@ enum rfkill_type { RFKILL_TYPE_WIMAX, RFKILL_TYPE_WWAN, RFKILL_TYPE_GPS, + RFKILL_TYPE_FM, NUM_RFKILL_TYPES, }; diff --git a/net/rfkill/core.c b/net/rfkill/core.c index ba2efb960c60..09f4e161799b 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -592,11 +592,13 @@ static const char *rfkill_get_type_str(enum rfkill_type type) return "wwan"; case RFKILL_TYPE_GPS: return "gps"; + case RFKILL_TYPE_FM: + return "fm"; default: BUG(); } - BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_GPS + 1); + BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_FM + 1); } static ssize_t rfkill_type_show(struct device *dev, -- cgit v1.3-8-gc7d7 From 136cfa28615ccce0f9374811480e0b81c4191ea5 Mon Sep 17 00:00:00 2001 From: Rui Paulo Date: Wed, 18 Nov 2009 18:40:00 +0000 Subject: mac80211: use a structure to hold the mesh config information element Signed-off-by: Rui Paulo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 16 +++++++++++++++- net/mac80211/ieee80211_i.h | 3 +-- net/mac80211/mesh.c | 23 ++++++++--------------- net/mac80211/util.c | 4 ++-- net/wireless/scan.c | 9 +++++---- 5 files changed, 31 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 49b1abd2fe97..afa8e0ac27a7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -115,7 +115,6 @@ #define IEEE80211_MAX_SSID_LEN 32 #define IEEE80211_MAX_MESH_ID_LEN 32 -#define IEEE80211_MESH_CONFIG_LEN 7 #define IEEE80211_QOS_CTL_LEN 2 #define IEEE80211_QOS_CTL_TID_MASK 0x000F @@ -554,6 +553,21 @@ struct ieee80211_tim_ie { u8 virtual_map[1]; } __attribute__ ((packed)); +/** + * struct ieee80211_meshconf_ie + * + * This structure refers to "Mesh Configuration information element" + */ +struct ieee80211_meshconf_ie { + u8 meshconf_psel; + u8 meshconf_pmetric; + u8 meshconf_congest; + u8 meshconf_synch; + u8 meshconf_auth; + u8 meshconf_form; + u8 meshconf_cap; +} __attribute__ ((packed)); + /** * struct ieee80211_rann_ie * diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 74e79935de0a..87d27f450a05 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -798,7 +798,7 @@ struct ieee802_11_elems { u8 *wmm_param; struct ieee80211_ht_cap *ht_cap_elem; struct ieee80211_ht_info *ht_info_elem; - u8 *mesh_config; + struct ieee80211_meshconf_ie *mesh_config; u8 *mesh_id; u8 *peer_link; u8 *preq; @@ -826,7 +826,6 @@ struct ieee802_11_elems { u8 ext_supp_rates_len; u8 wmm_info_len; u8 wmm_param_len; - u8 mesh_config_len; u8 mesh_id_len; u8 peer_link_len; u8 preq_len; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 3a0683ba357b..51adb1115215 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -16,12 +16,6 @@ #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) #define IEEE80211_MESH_RANN_INTERVAL (1 * HZ) -#define MESHCONF_PP_OFFSET 0 /* Path Selection Protocol */ -#define MESHCONF_PM_OFFSET 1 /* Path Selection Metric */ -#define MESHCONF_CC_OFFSET 2 /* Congestion Control Mode */ -#define MESHCONF_SP_OFFSET 3 /* Synchronization Protocol */ -#define MESHCONF_AUTH_OFFSET 4 /* Authentication Protocol */ -#define MESHCONF_CAPAB_OFFSET 6 #define MESHCONF_CAPAB_ACCEPT_PLINKS 0x01 #define MESHCONF_CAPAB_FORWARDING 0x08 @@ -87,12 +81,11 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat */ if (ifmsh->mesh_id_len == ie->mesh_id_len && memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && - (ifmsh->mesh_pp_id == *(ie->mesh_config + MESHCONF_PP_OFFSET))&& - (ifmsh->mesh_pm_id == *(ie->mesh_config + MESHCONF_PM_OFFSET))&& - (ifmsh->mesh_cc_id == *(ie->mesh_config + MESHCONF_CC_OFFSET))&& - (ifmsh->mesh_sp_id == *(ie->mesh_config + MESHCONF_SP_OFFSET))&& - (ifmsh->mesh_auth_id == *(ie->mesh_config + - MESHCONF_AUTH_OFFSET))) + (ifmsh->mesh_pp_id == ie->mesh_config->meshconf_psel) && + (ifmsh->mesh_pm_id == ie->mesh_config->meshconf_pmetric) && + (ifmsh->mesh_cc_id == ie->mesh_config->meshconf_congest) && + (ifmsh->mesh_sp_id == ie->mesh_config->meshconf_synch) && + (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth)) return true; return false; @@ -105,7 +98,7 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat */ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) { - return (*(ie->mesh_config + MESHCONF_CAPAB_OFFSET) & + return (ie->mesh_config->meshconf_cap & MESHCONF_CAPAB_ACCEPT_PLINKS) != 0; } @@ -262,9 +255,9 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) if (sdata->u.mesh.mesh_id_len) memcpy(pos, sdata->u.mesh.mesh_id, sdata->u.mesh.mesh_id_len); - pos = skb_put(skb, 2 + IEEE80211_MESH_CONFIG_LEN); + pos = skb_put(skb, 2 + sizeof(struct ieee80211_meshconf_ie)); *pos++ = WLAN_EID_MESH_CONFIG; - *pos++ = IEEE80211_MESH_CONFIG_LEN; + *pos++ = sizeof(struct ieee80211_meshconf_ie); /* Active path selection protocol ID */ *pos++ = sdata->u.mesh.mesh_pp_id; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 5ae1bf389849..2fb0432ac830 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -666,8 +666,8 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elems->mesh_id_len = elen; break; case WLAN_EID_MESH_CONFIG: - elems->mesh_config = pos; - elems->mesh_config_len = elen; + if (elen >= sizeof(struct ieee80211_meshconf_ie)) + elems->mesh_config = (void *)pos; break; case WLAN_EID_PEER_LINK: elems->peer_link = pos; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index e2d344ff6745..d03447d68cdd 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -217,7 +217,7 @@ static bool is_mesh(struct cfg80211_bss *a, a->len_information_elements); if (!ie) return false; - if (ie[1] != IEEE80211_MESH_CONFIG_LEN) + if (ie[1] != sizeof(struct ieee80211_meshconf_ie)) return false; /* @@ -225,7 +225,8 @@ static bool is_mesh(struct cfg80211_bss *a, * comparing since that may differ between stations taking * part in the same mesh. */ - return memcmp(ie + 2, meshcfg, IEEE80211_MESH_CONFIG_LEN - 2) == 0; + return memcmp(ie + 2, meshcfg, + sizeof(struct ieee80211_meshconf_ie) - 2) == 0; } static int cmp_bss(struct cfg80211_bss *a, @@ -399,7 +400,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, res->pub.information_elements, res->pub.len_information_elements); if (!meshid || !meshcfg || - meshcfg[1] != IEEE80211_MESH_CONFIG_LEN) { + meshcfg[1] != sizeof(struct ieee80211_meshconf_ie)) { /* bogus mesh */ kref_put(&res->ref, bss_release); return NULL; @@ -865,7 +866,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, break; case WLAN_EID_MESH_CONFIG: ismesh = true; - if (ie[1] != IEEE80211_MESH_CONFIG_LEN) + if (ie[1] != sizeof(struct ieee80211_meshconf_ie)) break; buf = kmalloc(50, GFP_ATOMIC); if (!buf) -- cgit v1.3-8-gc7d7 From 192dcf1d1775736627280a5dd4cb0f605b21857a Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 18 Nov 2009 13:06:55 -0800 Subject: tracing: Remove the stale include/trace/power.h Commit 6161352 moved the power tracing to include/trace/events/, but left the old header behind. No one is using the old header, and its declarations are now incorrect, so it should be removed. Signed-off-by: Josh Stone Acked-by: Arjan van de Ven Cc: Frank Ch. Eigler Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1258578415-14752-1-git-send-email-jistone@redhat.com> Signed-off-by: Ingo Molnar --- include/trace/power.h | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 include/trace/power.h (limited to 'include') diff --git a/include/trace/power.h b/include/trace/power.h deleted file mode 100644 index ef204666e983..000000000000 --- a/include/trace/power.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _TRACE_POWER_H -#define _TRACE_POWER_H - -#include -#include - -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -struct power_trace { - ktime_t stamp; - ktime_t end; - int type; - int state; -}; - -DECLARE_TRACE(power_start, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); - -DECLARE_TRACE(power_mark, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); - -DECLARE_TRACE(power_end, - TP_PROTO(struct power_trace *it), - TP_ARGS(it)); - -#endif /* _TRACE_POWER_H */ -- cgit v1.3-8-gc7d7 From 386e50cc7d82b3799ea6f53267f04f123ae05afe Mon Sep 17 00:00:00 2001 From: Andrew Hendry Date: Wed, 18 Nov 2009 23:30:41 -0800 Subject: X25: Enable setting of cause and diagnostic fields Adds SIOCX25SCAUSEDIAG, allowing X.25 programs to set the cause and diagnostic fields. Normally used to indicate status upon closing connections. Signed-off-by: Andrew Hendry Signed-off-by: David S. Miller --- include/linux/x25.h | 1 + net/x25/af_x25.c | 12 ++++++++++++ net/x25/x25_subr.c | 6 ++++++ 3 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/x25.h b/include/linux/x25.h index d035e4e87d07..6450a7f12074 100644 --- a/include/linux/x25.h +++ b/include/linux/x25.h @@ -25,6 +25,7 @@ #define SIOCX25SENDCALLACCPT (SIOCPROTOPRIVATE + 9) #define SIOCX25GDTEFACILITIES (SIOCPROTOPRIVATE + 10) #define SIOCX25SDTEFACILITIES (SIOCPROTOPRIVATE + 11) +#define SIOCX25SCAUSEDIAG (SIOCPROTOPRIVATE + 12) /* * Values for {get,set}sockopt. diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 39ce03e07d18..ac7dba46fa33 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1471,6 +1471,17 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } + case SIOCX25SCAUSEDIAG: { + struct x25_causediag causediag; + rc = -EFAULT; + if (copy_from_user(&causediag, argp, sizeof(causediag))) + break; + x25->causediag = causediag; + rc = 0; + break; + + } + case SIOCX25SCUDMATCHLEN: { struct x25_subaddr sub_addr; rc = -EINVAL; @@ -1639,6 +1650,7 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, case SIOCX25GCALLUSERDATA: case SIOCX25SCALLUSERDATA: case SIOCX25GCAUSEDIAG: + case SIOCX25SCAUSEDIAG: case SIOCX25SCUDMATCHLEN: case SIOCX25CALLACCPTAPPRV: case SIOCX25SENDCALLACCPT: diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 511a5986af3e..352b32d216fc 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -225,6 +225,12 @@ void x25_write_internal(struct sock *sk, int frametype) break; case X25_CLEAR_REQUEST: + dptr = skb_put(skb, 3); + *dptr++ = frametype; + *dptr++ = x25->causediag.cause; + *dptr++ = x25->causediag.diagnostic; + break; + case X25_RESET_REQUEST: dptr = skb_put(skb, 3); *dptr++ = frametype; -- cgit v1.3-8-gc7d7 From 5be83de54c16944dea9c16c6a5a53c1fa75ed304 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Nov 2009 00:56:28 +0100 Subject: cfg80211: convert bools into flags We've accumulated a number of options for wiphys which make more sense as flags as we keep adding more. Convert the existing ones. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath/regd.c | 5 +-- drivers/net/wireless/iwlwifi/iwl-agn.c | 8 ++-- drivers/net/wireless/iwlwifi/iwl3945-base.c | 6 +-- drivers/net/wireless/mac80211_hwsim.c | 18 ++++----- drivers/net/wireless/p54/main.c | 2 +- include/net/cfg80211.h | 62 ++++++++++++++++------------- net/mac80211/main.c | 2 +- net/wireless/Kconfig | 6 --- net/wireless/core.c | 13 ++++-- net/wireless/nl80211.c | 2 +- net/wireless/reg.c | 13 +++--- 11 files changed, 70 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c index 077bcc142cde..039ac490465c 100644 --- a/drivers/net/wireless/ath/regd.c +++ b/drivers/net/wireless/ath/regd.c @@ -450,7 +450,7 @@ ath_regd_init_wiphy(struct ath_regulatory *reg, const struct ieee80211_regdomain *regd; wiphy->reg_notifier = reg_notifier; - wiphy->strict_regulatory = true; + wiphy->flags |= WIPHY_FLAG_STRICT_REGULATORY; if (ath_is_world_regd(reg)) { /* @@ -458,8 +458,7 @@ ath_regd_init_wiphy(struct ath_regulatory *reg, * saved on the wiphy orig_* parameters */ regd = ath_world_regdomain(reg); - wiphy->custom_regulatory = true; - wiphy->strict_regulatory = false; + wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY; } else { /* * This gets applied in the case of the absense of CRDA, diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index b80cd0bc5845..e8f405a01e3d 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -2400,16 +2400,14 @@ static int iwl_setup_mac(struct iwl_priv *priv) BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC); - hw->wiphy->custom_regulatory = true; - - /* Firmware does not support this */ - hw->wiphy->disable_beacon_hints = true; + hw->wiphy->flags |= WIPHY_FLAG_STRICT_REGULATORY | + WIPHY_FLAG_DISABLE_BEACON_HINTS; /* * For now, disable PS by default because it affects * RX performance significantly. */ - hw->wiphy->ps_default = false; + hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT; hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX; /* we create the 802.11 header and a zero-length SSID element */ diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 31f3c42b835c..5b7e80e5bab4 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -3904,10 +3904,8 @@ static int iwl3945_setup_mac(struct iwl_priv *priv) BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC); - hw->wiphy->custom_regulatory = true; - - /* Firmware does not support this */ - hw->wiphy->disable_beacon_hints = true; + hw->wiphy->flags |= WIPHY_FLAG_STRICT_REGULATORY | + WIPHY_FLAG_DISABLE_BEACON_HINTS; hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX_3945; /* we create the 802.11 header and a zero-length SSID element */ diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index fc4ec48eda12..88e41176e7fd 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1146,46 +1146,46 @@ static int __init init_mac80211_hwsim(void) break; case HWSIM_REGTEST_WORLD_ROAM: if (i == 0) { - hw->wiphy->custom_regulatory = true; + hw->wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY; wiphy_apply_custom_regulatory(hw->wiphy, &hwsim_world_regdom_custom_01); } break; case HWSIM_REGTEST_CUSTOM_WORLD: - hw->wiphy->custom_regulatory = true; + hw->wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY; wiphy_apply_custom_regulatory(hw->wiphy, &hwsim_world_regdom_custom_01); break; case HWSIM_REGTEST_CUSTOM_WORLD_2: if (i == 0) { - hw->wiphy->custom_regulatory = true; + hw->wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY; wiphy_apply_custom_regulatory(hw->wiphy, &hwsim_world_regdom_custom_01); } else if (i == 1) { - hw->wiphy->custom_regulatory = true; + hw->wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY; wiphy_apply_custom_regulatory(hw->wiphy, &hwsim_world_regdom_custom_02); } break; case HWSIM_REGTEST_STRICT_ALL: - hw->wiphy->strict_regulatory = true; + hw->wiphy->flags |= WIPHY_FLAG_STRICT_REGULATORY; break; case HWSIM_REGTEST_STRICT_FOLLOW: case HWSIM_REGTEST_STRICT_AND_DRIVER_REG: if (i == 0) - hw->wiphy->strict_regulatory = true; + hw->wiphy->flags |= WIPHY_FLAG_STRICT_REGULATORY; break; case HWSIM_REGTEST_ALL: if (i == 0) { - hw->wiphy->custom_regulatory = true; + hw->wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY; wiphy_apply_custom_regulatory(hw->wiphy, &hwsim_world_regdom_custom_01); } else if (i == 1) { - hw->wiphy->custom_regulatory = true; + hw->wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY; wiphy_apply_custom_regulatory(hw->wiphy, &hwsim_world_regdom_custom_02); } else if (i == 4) - hw->wiphy->strict_regulatory = true; + hw->wiphy->flags |= WIPHY_FLAG_STRICT_REGULATORY; break; default: break; diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index 4d486bf9f725..18012dbfb45d 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -579,7 +579,7 @@ struct ieee80211_hw *p54_init_common(size_t priv_data_len) * For now, disable PS by default because it affects * link stability significantly. */ - dev->wiphy->ps_default = false; + dev->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT; mutex_init(&priv->conf_mutex); mutex_init(&priv->eeprom_mutex); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 21710fc17eaf..eca36abca8f5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1108,27 +1108,45 @@ struct cfg80211_ops { */ /** - * struct wiphy - wireless hardware description - * @idx: the wiphy index assigned to this item - * @class_dev: the class device representing /sys/class/ieee80211/ - * @custom_regulatory: tells us the driver for this device + * enum wiphy_flags - wiphy capability flags + * + * @WIPHY_FLAG_CUSTOM_REGULATORY: tells us the driver for this device * has its own custom regulatory domain and cannot identify the * ISO / IEC 3166 alpha2 it belongs to. When this is enabled * we will disregard the first regulatory hint (when the * initiator is %REGDOM_SET_BY_CORE). - * @strict_regulatory: tells us the driver for this device will ignore - * regulatory domain settings until it gets its own regulatory domain - * via its regulatory_hint(). After its gets its own regulatory domain - * it will only allow further regulatory domain settings to further - * enhance compliance. For example if channel 13 and 14 are disabled - * by this regulatory domain no user regulatory domain can enable these - * channels at a later time. This can be used for devices which do not - * have calibration information gauranteed for frequencies or settings - * outside of its regulatory domain. - * @disable_beacon_hints: enable this if your driver needs to ensure that - * passive scan flags and beaconing flags may not be lifted by cfg80211 - * due to regulatory beacon hints. For more information on beacon + * @WIPHY_FLAG_STRICT_REGULATORY: tells us the driver for this device will + * ignore regulatory domain settings until it gets its own regulatory + * domain via its regulatory_hint(). After its gets its own regulatory + * domain it will only allow further regulatory domain settings to + * further enhance compliance. For example if channel 13 and 14 are + * disabled by this regulatory domain no user regulatory domain can + * enable these channels at a later time. This can be used for devices + * which do not have calibration information gauranteed for frequencies + * or settings outside of its regulatory domain. + * @WIPHY_FLAG_DISABLE_BEACON_HINTS: enable this if your driver needs to ensure + * that passive scan flags and beaconing flags may not be lifted by + * cfg80211 due to regulatory beacon hints. For more information on beacon * hints read the documenation for regulatory_hint_found_beacon() + * @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this + * wiphy at all + * @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled + * by default -- this flag will be set depending on the kernel's default + * on wiphy_new(), but can be changed by the driver if it has a good + * reason to override the default + */ +enum wiphy_flags { + WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), + WIPHY_FLAG_STRICT_REGULATORY = BIT(1), + WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), + WIPHY_FLAG_NETNS_OK = BIT(3), + WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), +}; + +/** + * struct wiphy - wireless hardware description + * @idx: the wiphy index assigned to this item + * @class_dev: the class device representing /sys/class/ieee80211/ * @reg_notifier: the driver's regulatory notification callback * @regd: the driver's regulatory domain, if one was requested via * the regulatory_hint() API. This can be used by the driver @@ -1143,11 +1161,6 @@ struct cfg80211_ops { * -1 = fragmentation disabled, only odd values >= 256 used * @rts_threshold: RTS threshold (dot11RTSThreshold); -1 = RTS/CTS disabled * @net: the network namespace this wiphy currently lives in - * @netnsok: if set to false, do not allow changing the netns of this - * wiphy at all - * @ps_default: default for powersave, will be set depending on the - * kernel's default on wiphy_new(), but can be changed by the - * driver if it has a good reason to override the default */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -1158,12 +1171,7 @@ struct wiphy { /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; - bool custom_regulatory; - bool strict_regulatory; - bool disable_beacon_hints; - - bool netnsok; - bool ps_default; + u32 flags; enum cfg80211_signal_type signal_type; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e843a912849c..8084b622e97e 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -328,7 +328,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, if (!wiphy) return NULL; - wiphy->netnsok = true; + wiphy->flags |= WIPHY_FLAG_NETNS_OK; wiphy->privid = mac80211_wiphy_privid; /* Yes, putting cfg80211_bss into ieee80211_bss is a hack */ diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 614bdcec1c80..90e93a5701aa 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -74,12 +74,6 @@ config CFG80211_REG_DEBUG If unsure, say N. -config CFG80211_DEFAULT_PS_VALUE - int - default 1 if CFG80211_DEFAULT_PS - default 0 - depends on CFG80211 - config CFG80211_DEFAULT_PS bool "enable powersave by default" depends on CFG80211 diff --git a/net/wireless/core.c b/net/wireless/core.c index 02835172b227..e2cc6e7522dd 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -230,7 +230,7 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev; int err = 0; - if (!rdev->wiphy.netnsok) + if (!(rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK)) return -EOPNOTSUPP; list_for_each_entry(wdev, &rdev->netdev_list, list) { @@ -367,7 +367,9 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.dev.class = &ieee80211_class; rdev->wiphy.dev.platform_data = rdev; - rdev->wiphy.ps_default = CONFIG_CFG80211_DEFAULT_PS_VALUE; +#ifdef CONFIG_CFG80211_DEFAULT_PS + rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; +#endif wiphy_net_set(&rdev->wiphy, &init_net); @@ -482,7 +484,7 @@ int wiphy_register(struct wiphy *wiphy) if (IS_ERR(rdev->wiphy.debugfsdir)) rdev->wiphy.debugfsdir = NULL; - if (wiphy->custom_regulatory) { + if (wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { struct regulatory_request request; request.wiphy_idx = get_wiphy_idx(wiphy); @@ -680,7 +682,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, wdev->wext.default_key = -1; wdev->wext.default_mgmt_key = -1; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; - wdev->wext.ps = wdev->wiphy->ps_default; + if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT) + wdev->wext.ps = true; + else + wdev->wext.ps = false; wdev->wext.ps_timeout = 100; if (rdev->ops->set_power_mgmt) if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 37264d56bace..6634188f9453 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -561,7 +561,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(deauth, DEAUTHENTICATE); CMD(disassoc, DISASSOCIATE); CMD(join_ibss, JOIN_IBSS); - if (dev->wiphy.netnsok) { + if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index f256dfffbf46..1f33017737fd 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1008,7 +1008,7 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && - request_wiphy->strict_regulatory) { + request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { /* * This gaurantees the driver's requested regulatory domain * will always be used as a base for further regulatory @@ -1051,13 +1051,13 @@ static bool ignore_reg_update(struct wiphy *wiphy, if (!last_request) return true; if (initiator == NL80211_REGDOM_SET_BY_CORE && - wiphy->custom_regulatory) + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) return true; /* * wiphy->regd will be set once the device has its own * desired regulatory domain set */ - if (wiphy->strict_regulatory && !wiphy->regd && + if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd && !is_world_regdom(last_request->alpha2)) return true; return false; @@ -1093,7 +1093,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, chan->beacon_found = true; - if (wiphy->disable_beacon_hints) + if (wiphy->flags & WIPHY_FLAG_DISABLE_BEACON_HINTS) return; chan_before.center_freq = chan->center_freq; @@ -1164,7 +1164,7 @@ static bool reg_is_world_roaming(struct wiphy *wiphy) return true; if (last_request && last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy->custom_regulatory) + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) return true; return false; } @@ -1591,7 +1591,8 @@ static void reg_process_hint(struct regulatory_request *reg_request) r = __regulatory_hint(wiphy, reg_request); /* This is required so that the orig_* parameters are saved */ - if (r == -EALREADY && wiphy && wiphy->strict_regulatory) + if (r == -EALREADY && wiphy && + wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) wiphy_update_regulatory(wiphy, reg_request->initiator); out: mutex_unlock(®_mutex); -- cgit v1.3-8-gc7d7 From 9bc383de37090ba7ca3ff32a12c9d809dc5867f0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Nov 2009 11:55:19 +0100 Subject: cfg80211: introduce capability for 4addr mode It's very likely that not many devices will support four-address mode in station or AP mode so introduce capability bits for both modes, set them in mac80211 and check them when userspace tries to use the mode. Also, keep track of 4addr in cfg80211 (wireless_dev) and not in mac80211 any more. mac80211 can also be improved for the VLAN case by not looking at the 4addr flag but maintaining the station pointer for it correctly. However, keep track of use_4addr for station mode in mac80211 to avoid all the derefs. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 11 +++++++++++ net/mac80211/cfg.c | 21 ++++++++------------- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/iface.c | 12 ++++++++---- net/mac80211/main.c | 4 +++- net/mac80211/rx.c | 14 +++++++++----- net/mac80211/tx.c | 7 +++---- net/wireless/nl80211.c | 34 +++++++++++++++++++++++++++++++++- net/wireless/util.c | 5 +++++ 9 files changed, 82 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index eca36abca8f5..d1e05aeb0c09 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1134,6 +1134,9 @@ struct cfg80211_ops { * by default -- this flag will be set depending on the kernel's default * on wiphy_new(), but can be changed by the driver if it has a good * reason to override the default + * @WIPHY_FLAG_4ADDR_AP: supports 4addr mode even on AP (with a single station + * on a VLAN interface) + * @WIPHY_FLAG_4ADDR_STATION: supports 4addr mode even as a station */ enum wiphy_flags { WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), @@ -1141,6 +1144,8 @@ enum wiphy_flags { WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), WIPHY_FLAG_NETNS_OK = BIT(3), WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), + WIPHY_FLAG_4ADDR_AP = BIT(5), + WIPHY_FLAG_4ADDR_STATION = BIT(6), }; /** @@ -1366,6 +1371,10 @@ struct cfg80211_cached_keys; * @ssid_len: (private) Used by the internal configuration code * @wext: (private) Used by the internal wireless extensions compat code * @wext_bssid: (private) Used by the internal wireless extensions compat code + * @use_4addr: indicates 4addr mode is used on this interface, must be + * set by driver (if supported) on add_interface BEFORE registering the + * netdev and may otherwise be used by driver read-only, will be update + * by cfg80211 on change_interface */ struct wireless_dev { struct wiphy *wiphy; @@ -1379,6 +1388,8 @@ struct wireless_dev { struct work_struct cleanup_work; + bool use_4addr; + /* currently used for IBSS and SME - might be rearranged later */ u8 ssid[IEEE80211_MAX_SSID_LEN]; u8 ssid_len; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7d591816ed10..c484a882140e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -42,15 +42,6 @@ static bool nl80211_params_check(enum nl80211_iftype type, if (!nl80211_type_check(type)) return false; - if (params->use_4addr > 0) { - switch(type) { - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_STATION: - break; - default: - return false; - } - } return true; } @@ -107,12 +98,16 @@ static int ieee80211_change_iface(struct wiphy *wiphy, params->mesh_id_len, params->mesh_id); - if (params->use_4addr >= 0) - sdata->use_4addr = !!params->use_4addr; - if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !flags) return 0; + if (type == NL80211_IFTYPE_AP_VLAN && + params && params->use_4addr == 0) + rcu_assign_pointer(sdata->u.vlan.sta, NULL); + else if (type == NL80211_IFTYPE_STATION && + params && params->use_4addr >= 0) + sdata->u.mgd.use_4addr = params->use_4addr; + sdata->u.mntr_flags = *flags; return 0; } @@ -827,7 +822,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, return -EINVAL; } - if (vlansdata->use_4addr) { + if (params->vlan->ieee80211_ptr->use_4addr) { if (vlansdata->u.vlan.sta) return -EBUSY; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 87d27f450a05..f13d76c9b575 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -312,6 +312,8 @@ struct ieee80211_if_managed { } mfp; /* management frame protection */ int wmm_last_param_set; + + u8 use_4addr; }; enum ieee80211_ibss_request { @@ -459,8 +461,6 @@ struct ieee80211_sub_if_data { int force_unicast_rateidx; /* forced TX rateidx for unicast frames */ int max_ratectrl_rateidx; /* max TX rateidx for rate control */ - bool use_4addr; /* use 4-address frames */ - union { struct ieee80211_if_ap ap; struct ieee80211_if_wds wds; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1f02b0610e82..1bf12a26b45e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -752,7 +752,8 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, ieee80211_mandatory_rates(sdata->local, sdata->local->hw.conf.channel->band); sdata->drop_unencrypted = 0; - sdata->use_4addr = 0; + if (type == NL80211_IFTYPE_STATION) + sdata->u.mgd.use_4addr = false; return 0; } @@ -810,6 +811,12 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, /* setup type-dependent data */ ieee80211_setup_sdata(sdata, type); + if (params) { + ndev->ieee80211_ptr->use_4addr = params->use_4addr; + if (type == NL80211_IFTYPE_STATION) + sdata->u.mgd.use_4addr = params->use_4addr; + } + ret = register_netdevice(ndev); if (ret) goto fail; @@ -820,9 +827,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, params->mesh_id_len, params->mesh_id); - if (params && params->use_4addr >= 0) - sdata->use_4addr = !!params->use_4addr; - mutex_lock(&local->iflist_mtx); list_add_tail_rcu(&sdata->list, &local->interfaces); mutex_unlock(&local->iflist_mtx); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8084b622e97e..dd8ec8d5e8b2 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -328,7 +328,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, if (!wiphy) return NULL; - wiphy->flags |= WIPHY_FLAG_NETNS_OK; + wiphy->flags |= WIPHY_FLAG_NETNS_OK | + WIPHY_FLAG_4ADDR_AP | + WIPHY_FLAG_4ADDR_STATION; wiphy->privid = mac80211_wiphy_privid; /* Yes, putting cfg80211_bss into ieee80211_bss is a hack */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 775365f856c9..96f13ad05d3c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1192,10 +1192,13 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx) struct net_device *dev = sdata->dev; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->use_4addr && - ieee80211_has_a4(hdr->frame_control)) + if (ieee80211_has_a4(hdr->frame_control) && + sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta) return -1; - if (sdata->use_4addr && is_multicast_ether_addr(hdr->addr1)) + + if (is_multicast_ether_addr(hdr->addr1) && + ((sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) || + (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.use_4addr))) return -1; return ieee80211_data_to_8023(rx->skb, dev->dev_addr, sdata->vif.type); @@ -1245,7 +1248,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if ((sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && - (rx->flags & IEEE80211_RX_RA_MATCH) && !rx->sdata->use_4addr) { + (rx->flags & IEEE80211_RX_RA_MATCH) && + (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { if (is_multicast_ether_addr(ehdr->h_dest)) { /* * send multicast frames both to higher layers in @@ -2007,7 +2011,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: - if (!bssid && !sdata->use_4addr) + if (!bssid && !sdata->u.mgd.use_4addr) return 0; if (!multicast && compare_ether_addr(sdata->dev->dev_addr, hdr->addr1) != 0) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b3c1faeb5927..5af2f40ea4db 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1051,7 +1051,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, hdr = (struct ieee80211_hdr *) skb->data; - if ((sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && sdata->use_4addr) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) tx->sta = rcu_dereference(sdata->u.vlan.sta); if (!tx->sta) tx->sta = sta_info_get(local, hdr->addr1); @@ -1632,8 +1632,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: rcu_read_lock(); - if (sdata->use_4addr) - sta = rcu_dereference(sdata->u.vlan.sta); + sta = rcu_dereference(sdata->u.vlan.sta); if (sta) { fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ @@ -1727,7 +1726,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, #endif case NL80211_IFTYPE_STATION: memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); - if (sdata->use_4addr && ethertype != ETH_P_PAE) { + if (sdata->u.mgd.use_4addr && ethertype != ETH_P_PAE) { fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6634188f9453..b7b0f67b0c61 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -968,6 +968,28 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) return 0; } +static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, + u8 use_4addr, enum nl80211_iftype iftype) +{ + if (!use_4addr) + return 0; + + switch (iftype) { + case NL80211_IFTYPE_AP_VLAN: + if (rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP) + return 0; + break; + case NL80211_IFTYPE_STATION: + if (rdev->wiphy.flags & WIPHY_FLAG_4ADDR_STATION) + return 0; + break; + default: + break; + } + + return -EOPNOTSUPP; +} + static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; @@ -1011,6 +1033,9 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); change = true; + err = nl80211_valid_4addr(rdev, params.use_4addr, ntype); + if (err) + goto unlock; } else { params.use_4addr = -1; } @@ -1034,6 +1059,9 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) else err = 0; + if (!err && params.use_4addr != -1) + dev->ieee80211_ptr->use_4addr = params.use_4addr; + unlock: dev_put(dev); cfg80211_unlock_rdev(rdev); @@ -1081,8 +1109,12 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); } - if (info->attrs[NL80211_ATTR_4ADDR]) + if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); + err = nl80211_valid_4addr(rdev, params.use_4addr, type); + if (err) + goto unlock; + } err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, diff --git a/net/wireless/util.c b/net/wireless/util.c index 5aa39f7cf9b9..17a7a4cfc617 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -659,6 +659,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EOPNOTSUPP; if (ntype != otype) { + dev->ieee80211_ptr->use_4addr = false; + switch (otype) { case NL80211_IFTYPE_ADHOC: cfg80211_leave_ibss(rdev, dev, false); @@ -682,5 +684,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, WARN_ON(!err && dev->ieee80211_ptr->iftype != ntype); + if (!err && params && params->use_4addr != -1) + dev->ieee80211_ptr->use_4addr = params->use_4addr; + return err; } -- cgit v1.3-8-gc7d7 From ad4bb6f8883a13bb0f65b194dae36c62a02ac779 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Nov 2009 00:56:30 +0100 Subject: cfg80211: disallow bridging managed/adhoc interfaces A number of people have tried to add a wireless interface (in managed mode) to a bridge and then complained that it doesn't work. It cannot work, however, because in 802.11 networks all packets need to be acknowledged and as such need to be sent to the right address. Promiscuous doesn't help here. The wireless address format used for these links has only space for three addresses, the * transmitter, which must be equal to the sender (origin) * receiver (on the wireless medium), which is the AP in the case of managed mode * the recipient (destination), which is on the APs local network segment In an IBSS, it is similar, but the receiver and recipient must match and the third address is used as the BSSID. To avoid such mistakes in the future, disallow adding a wireless interface to a bridge. Felix has recently added a four-address mode to the AP and client side that can be used (after negotiating that it is possible, which must happen out-of-band by setting up both sides) for bridging, so allow that case. Signed-off-by: Johannes Berg Acked-by: Stephen Hemminger Signed-off-by: John W. Linville --- include/linux/if.h | 1 + net/bridge/br_if.c | 4 ++++ net/wireless/core.c | 4 ++++ net/wireless/nl80211.c | 12 ++++++++---- net/wireless/util.c | 31 +++++++++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/if.h b/include/linux/if.h index 3b2a46bf8f8d..3a9f410a296b 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -70,6 +70,7 @@ #define IFF_XMIT_DST_RELEASE 0x400 /* dev_hard_start_xmit() is allowed to * release skb->dst */ +#define IFF_DONT_BRIDGE 0x800 /* disallow bridging this ether dev */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index a6f74b2b9571..a2cbe61f6e65 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -390,6 +390,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (dev->br_port != NULL) return -EBUSY; + /* No bridging devices that dislike that (e.g. wireless) */ + if (dev->priv_flags & IFF_DONT_BRIDGE) + return -EOPNOTSUPP; + p = new_nbp(br, dev); if (IS_ERR(p)) return PTR_ERR(p); diff --git a/net/wireless/core.c b/net/wireless/core.c index e2cc6e7522dd..fc5e9b508607 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -697,6 +697,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, #endif if (!dev->ethtool_ops) dev->ethtool_ops = &cfg80211_ethtool_ops; + + if ((wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) + dev->priv_flags |= IFF_DONT_BRIDGE; break; case NETDEV_GOING_DOWN: switch (wdev->iftype) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b7b0f67b0c61..149539ade15e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -969,10 +969,14 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) } static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, - u8 use_4addr, enum nl80211_iftype iftype) + struct net_device *netdev, u8 use_4addr, + enum nl80211_iftype iftype) { - if (!use_4addr) + if (!use_4addr) { + if (netdev && netdev->br_port) + return -EBUSY; return 0; + } switch (iftype) { case NL80211_IFTYPE_AP_VLAN: @@ -1033,7 +1037,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); change = true; - err = nl80211_valid_4addr(rdev, params.use_4addr, ntype); + err = nl80211_valid_4addr(rdev, dev, params.use_4addr, ntype); if (err) goto unlock; } else { @@ -1111,7 +1115,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); - err = nl80211_valid_4addr(rdev, params.use_4addr, type); + err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type); if (err) goto unlock; } diff --git a/net/wireless/util.c b/net/wireless/util.c index 17a7a4cfc617..59361fdcb5d0 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -658,6 +658,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, !(rdev->wiphy.interface_modes & (1 << ntype))) return -EOPNOTSUPP; + /* if it's part of a bridge, reject changing type to station/ibss */ + if (dev->br_port && (ntype == NL80211_IFTYPE_ADHOC || + ntype == NL80211_IFTYPE_STATION)) + return -EBUSY; + if (ntype != otype) { dev->ieee80211_ptr->use_4addr = false; @@ -687,5 +692,31 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (!err && params && params->use_4addr != -1) dev->ieee80211_ptr->use_4addr = params->use_4addr; + if (!err) { + dev->priv_flags &= ~IFF_DONT_BRIDGE; + switch (ntype) { + case NL80211_IFTYPE_STATION: + if (dev->ieee80211_ptr->use_4addr) + break; + /* fall through */ + case NL80211_IFTYPE_ADHOC: + dev->priv_flags |= IFF_DONT_BRIDGE; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MESH_POINT: + /* bridging OK */ + break; + case NL80211_IFTYPE_MONITOR: + /* monitor can't bridge anyway */ + break; + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: + /* not happening */ + break; + } + } + return err; } -- cgit v1.3-8-gc7d7 From 7351c6bd482712e5e3ec9dffc547de0e0863efb0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Nov 2009 01:08:30 +0100 Subject: mac80211: request TX status where needed Right now all frames mac80211 hands to the driver have the IEEE80211_TX_CTL_REQ_TX_STATUS flag set to request TX status. This isn't really necessary, only the injected frames need TX status (the latter for hostapd) so move setting this flag. The rate control algorithms also need TX status, but they don't require it. Also, rt2x00 uses that bit for its own purposes and seems to require it being set for all frames, but that can be fixed in rt2x00. This doesn't really change anything for any drivers but in the future drivers using hw-rate control may opt to not report TX status for frames that don't have the IEEE80211_TX_CTL_REQ_TX_STATUS flag set. Signed-off-by: Johannes Berg Acked-by: Ivo van Doorn [rt2x00 bits] Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00dev.c | 11 ++++++----- drivers/net/wireless/rt2x00/rt2x00lib.h | 4 +++- drivers/net/wireless/rt2x00/rt2x00mac.c | 5 ++--- drivers/net/wireless/rt2x00/rt2x00queue.c | 6 +++++- drivers/net/wireless/rt2x00/rt2x00queue.h | 5 ++++- include/net/mac80211.h | 2 +- net/mac80211/tx.c | 4 ++-- 7 files changed, 23 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 6c6d0ac35549..4a4b7e42fe6e 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -205,6 +205,7 @@ void rt2x00lib_txdone(struct queue_entry *entry, enum data_queue_qid qid = skb_get_queue_mapping(entry->skb); unsigned int header_length = ieee80211_get_hdrlen_from_skb(entry->skb); u8 rate_idx, rate_flags, retry_rates; + u8 skbdesc_flags = skbdesc->flags; unsigned int i; bool success; @@ -287,12 +288,12 @@ void rt2x00lib_txdone(struct queue_entry *entry, } /* - * Only send the status report to mac80211 when TX status was - * requested by it. If this was a extra frame coming through - * a mac80211 library call (RTS/CTS) then we should not send the - * status report back. + * Only send the status report to mac80211 when it's a frame + * that originated in mac80211. If this was a extra frame coming + * through a mac80211 library call (RTS/CTS) then we should not + * send the status report back. */ - if (tx_info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) + if (!(skbdesc_flags & SKBDESC_NOT_MAC80211)) ieee80211_tx_status_irqsafe(rt2x00dev->hw, entry->skb); else dev_kfree_skb_irq(entry->skb); diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h index c1f48acaee41..be2e37fb4071 100644 --- a/drivers/net/wireless/rt2x00/rt2x00lib.h +++ b/drivers/net/wireless/rt2x00/rt2x00lib.h @@ -162,8 +162,10 @@ void rt2x00queue_remove_l2pad(struct sk_buff *skb, unsigned int header_length); * rt2x00queue_write_tx_frame - Write TX frame to hardware * @queue: Queue over which the frame should be send * @skb: The skb to send + * @local: frame is not from mac80211 */ -int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb); +int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb, + bool local); /** * rt2x00queue_update_beacon - Send new beacon from mac80211 to hardware diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index eed093d34532..9c90ceb0ffcc 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -66,7 +66,6 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev, rts_info = IEEE80211_SKB_CB(skb); rts_info->control.rates[0].flags &= ~IEEE80211_TX_RC_USE_RTS_CTS; rts_info->control.rates[0].flags &= ~IEEE80211_TX_RC_USE_CTS_PROTECT; - rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS; if (tx_info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) rts_info->flags |= IEEE80211_TX_CTL_NO_ACK; @@ -91,7 +90,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev, frag_skb->data, data_length, tx_info, (struct ieee80211_rts *)(skb->data)); - retval = rt2x00queue_write_tx_frame(queue, skb); + retval = rt2x00queue_write_tx_frame(queue, skb, true); if (retval) { dev_kfree_skb_any(skb); WARNING(rt2x00dev, "Failed to send RTS/CTS frame.\n"); @@ -153,7 +152,7 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb) goto exit_fail; } - if (rt2x00queue_write_tx_frame(queue, skb)) + if (rt2x00queue_write_tx_frame(queue, skb, false)) goto exit_fail; if (rt2x00queue_threshold(queue)) diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index 02972a036bce..eaedee8c05c8 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -454,7 +454,8 @@ static void rt2x00queue_write_tx_descriptor(struct queue_entry *entry, rt2x00dev->ops->lib->kick_tx_queue(rt2x00dev, queue->qid); } -int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb) +int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb, + bool local) { struct ieee80211_tx_info *tx_info; struct queue_entry *entry = rt2x00queue_get_entry(queue, Q_INDEX); @@ -495,6 +496,9 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb) skbdesc->tx_rate_idx = rate_idx; skbdesc->tx_rate_flags = rate_flags; + if (local) + skbdesc->flags |= SKBDESC_NOT_MAC80211; + /* * When hardware encryption is supported, and this frame * is to be encrypted, we should strip the IV/EIV data from diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.h b/drivers/net/wireless/rt2x00/rt2x00queue.h index 97c7895c0ece..70775e5ba1ac 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.h +++ b/drivers/net/wireless/rt2x00/rt2x00queue.h @@ -94,12 +94,15 @@ enum data_queue_qid { * mac80211 but was stripped for processing by the driver. * @SKBDESC_L2_PADDED: Payload has been padded for 4-byte alignment, * the padded bytes are located between header and payload. + * @SKBDESC_NOT_MAC80211: Frame didn't originate from mac80211, + * don't try to pass it back. */ enum skb_frame_desc_flags { SKBDESC_DMA_MAPPED_RX = 1 << 0, SKBDESC_DMA_MAPPED_TX = 1 << 1, SKBDESC_IV_STRIPPED = 1 << 2, - SKBDESC_L2_PADDED = 1 << 3 + SKBDESC_L2_PADDED = 1 << 3, + SKBDESC_NOT_MAC80211 = 1 << 4, }; /** diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f34f4ca7016c..3754ea405c88 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -219,7 +219,7 @@ struct ieee80211_bss_conf { * * These flags are used with the @flags member of &ieee80211_tx_info. * - * @IEEE80211_TX_CTL_REQ_TX_STATUS: request TX status callback for this frame. + * @IEEE80211_TX_CTL_REQ_TX_STATUS: require TX status callback for this frame. * @IEEE80211_TX_CTL_ASSIGN_SEQ: The driver has to assign a sequence * number to this frame, taking care of not overwriting the fragment * number and increasing the sequence number only when the diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5af2f40ea4db..943def2b07df 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1443,8 +1443,6 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); } - info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; - rcu_read_lock(); if (unlikely(sdata->vif.type == NL80211_IFTYPE_MONITOR)) { @@ -1575,6 +1573,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, memset(info, 0, sizeof(*info)); + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + /* pass the radiotap header up to xmit */ ieee80211_xmit(IEEE80211_DEV_TO_SUB_IF(dev), skb); return NETDEV_TX_OK; -- cgit v1.3-8-gc7d7 From a58ce43f2fb17b728395ff530f019ca53c80145f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Nov 2009 12:45:42 +0100 Subject: mac80211: avoid spurious deauth frames/messages With WEXT, it happens frequently that the SME requests an authentication but then deauthenticates right away because some new parameters came along. Every time this happens we print a deauth message and send a deauth frame, but both of that is rather confusing. Avoid it by aborting the authentication process silently, and telling cfg80211 about that. The patch looks larger than it really is: __cfg80211_auth_remove() is split out from cfg80211_send_auth_timeout(), there's no new code except __cfg80211_auth_canceled() (a one-liner) and the mac80211 bits (7 new lines of code). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 12 ++++++++++++ net/mac80211/mlme.c | 17 +++++++++++++++++ net/wireless/mlme.c | 36 +++++++++++++++++++++++++----------- 3 files changed, 54 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d1e05aeb0c09..a6492e9bca97 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1838,6 +1838,18 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len); */ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr); +/** + * __cfg80211_auth_canceled - notify cfg80211 that authentication was canceled + * @dev: network device + * @addr: The MAC address of the device with which the authentication timed out + * + * When a pending authentication had no action yet, the driver may decide + * to not send a deauth frame, but in that case must calls this function + * to tell cfg80211 about this decision. It is only valid to call this + * function within the deauth() callback. + */ +void __cfg80211_auth_canceled(struct net_device *dev, const u8 *addr); + /** * cfg80211_send_rx_assoc - notification of processed association * @dev: network device diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f399547306c3..6dc7b5ad9a41 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2508,6 +2508,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_work *wk; const u8 *bssid = NULL; + bool not_auth_yet = false; mutex_lock(&ifmgd->mtx); @@ -2517,12 +2518,28 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, } else list_for_each_entry(wk, &ifmgd->work_list, list) { if (&wk->bss->cbss == req->bss) { bssid = req->bss->bssid; + if (wk->state == IEEE80211_MGD_STATE_PROBE) + not_auth_yet = true; list_del(&wk->list); kfree(wk); break; } } + /* + * If somebody requests authentication and we haven't + * sent out an auth frame yet there's no need to send + * out a deauth frame either. If the state was PROBE, + * then this is the case. If it's AUTH we have sent a + * frame, and if it's IDLE we have completed the auth + * process already. + */ + if (not_auth_yet) { + mutex_unlock(&ifmgd->mtx); + __cfg80211_auth_canceled(sdata->dev, bssid); + return 0; + } + /* * cfg80211 should catch this ... but it's racy since * we can receive a deauth frame, process it, hand it diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 622af5649b9a..1001db4912f7 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -243,21 +243,12 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) } EXPORT_SYMBOL(cfg80211_send_disassoc); -void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) +static void __cfg80211_auth_remove(struct wireless_dev *wdev, const u8 *addr) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); int i; bool done = false; - wdev_lock(wdev); - - nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); - if (wdev->sme_state == CFG80211_SME_CONNECTING) - __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); + ASSERT_WDEV_LOCK(wdev); for (i = 0; addr && i < MAX_AUTH_BSSES; i++) { if (wdev->authtry_bsses[i] && @@ -272,6 +263,29 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) } WARN_ON(!done); +} + +void __cfg80211_auth_canceled(struct net_device *dev, const u8 *addr) +{ + __cfg80211_auth_remove(dev->ieee80211_ptr, addr); +} +EXPORT_SYMBOL(__cfg80211_auth_canceled); + +void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + wdev_lock(wdev); + + nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); + if (wdev->sme_state == CFG80211_SME_CONNECTING) + __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, + WLAN_STATUS_UNSPECIFIED_FAILURE, + false, NULL); + + __cfg80211_auth_remove(wdev, addr); wdev_unlock(wdev); } -- cgit v1.3-8-gc7d7 From 3d7a641e544e428191667e8b1f83f96fa46dbd65 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:23 +0000 Subject: SLOW_WORK: Wait for outstanding work items belonging to a module to clear Wait for outstanding slow work items belonging to a module to clear when unregistering that module as a user of the facility. This prevents the put_ref code of a work item from being taken away before it returns. Signed-off-by: David Howells --- Documentation/slow-work.txt | 13 ++++- fs/fscache/main.c | 6 +- fs/fscache/object.c | 1 + fs/fscache/operation.c | 1 + fs/gfs2/main.c | 4 +- fs/gfs2/recovery.c | 1 + include/linux/slow-work.h | 8 ++- kernel/slow-work.c | 132 ++++++++++++++++++++++++++++++++++++++++++-- 8 files changed, 150 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index ebc50f808ea4..f12fda31dcdc 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -64,9 +64,11 @@ USING SLOW WORK ITEMS Firstly, a module or subsystem wanting to make use of slow work items must register its interest: - int ret = slow_work_register_user(); + int ret = slow_work_register_user(struct module *module); -This will return 0 if successful, or a -ve error upon failure. +This will return 0 if successful, or a -ve error upon failure. The module +pointer should be the module interested in using this facility (almost +certainly THIS_MODULE). Slow work items may then be set up by: @@ -110,7 +112,12 @@ operation. When all a module's slow work items have been processed, and the module has no further interest in the facility, it should unregister its interest: - slow_work_unregister_user(); + slow_work_unregister_user(struct module *module); + +The module pointer is used to wait for all outstanding work items for that +module before completing the unregistration. This prevents the put_ref() code +from being taken away before it completes. module should almost certainly be +THIS_MODULE. =============== diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 4de41b597499..add6bdb53f04 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -48,7 +48,7 @@ static int __init fscache_init(void) { int ret; - ret = slow_work_register_user(); + ret = slow_work_register_user(THIS_MODULE); if (ret < 0) goto error_slow_work; @@ -80,7 +80,7 @@ error_kobj: error_cookie_jar: fscache_proc_cleanup(); error_proc: - slow_work_unregister_user(); + slow_work_unregister_user(THIS_MODULE); error_slow_work: return ret; } @@ -97,7 +97,7 @@ static void __exit fscache_exit(void) kobject_put(fscache_root); kmem_cache_destroy(fscache_cookie_jar); fscache_proc_cleanup(); - slow_work_unregister_user(); + slow_work_unregister_user(THIS_MODULE); printk(KERN_NOTICE "FS-Cache: Unloaded\n"); } diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 392a41b1b79d..d236eb1d6f37 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -45,6 +45,7 @@ static void fscache_enqueue_dependents(struct fscache_object *); static void fscache_dequeue_object(struct fscache_object *); const struct slow_work_ops fscache_object_slow_work_ops = { + .owner = THIS_MODULE, .get_ref = fscache_object_slow_work_get_ref, .put_ref = fscache_object_slow_work_put_ref, .execute = fscache_object_slow_work_execute, diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index e7f8d53b8b6b..f1a2857b2ff5 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -453,6 +453,7 @@ static void fscache_op_execute(struct slow_work *work) } const struct slow_work_ops fscache_op_slow_work_ops = { + .owner = THIS_MODULE, .get_ref = fscache_op_get_ref, .put_ref = fscache_op_put_ref, .execute = fscache_op_execute, diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index eacd78a5d082..5b31f7741a8f 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -114,7 +114,7 @@ static int __init init_gfs2_fs(void) if (error) goto fail_unregister; - error = slow_work_register_user(); + error = slow_work_register_user(THIS_MODULE); if (error) goto fail_slow; @@ -163,7 +163,7 @@ static void __exit exit_gfs2_fs(void) gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); - slow_work_unregister_user(); + slow_work_unregister_user(THIS_MODULE); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 59d2695509d3..b2bb779f09ed 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -593,6 +593,7 @@ fail: } struct slow_work_ops gfs2_recover_ops = { + .owner = THIS_MODULE, .get_ref = gfs2_recover_get_ref, .put_ref = gfs2_recover_put_ref, .execute = gfs2_recover_work, diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index b65c8881f07a..9adb2b30754f 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -24,6 +24,9 @@ struct slow_work; * The operations used to support slow work items */ struct slow_work_ops { + /* owner */ + struct module *owner; + /* get a ref on a work item * - return 0 if successful, -ve if not */ @@ -42,6 +45,7 @@ struct slow_work_ops { * queued */ struct slow_work { + struct module *owner; /* the owning module */ unsigned long flags; #define SLOW_WORK_PENDING 0 /* item pending (further) execution */ #define SLOW_WORK_EXECUTING 1 /* item currently executing */ @@ -84,8 +88,8 @@ static inline void vslow_work_init(struct slow_work *work, } extern int slow_work_enqueue(struct slow_work *work); -extern int slow_work_register_user(void); -extern void slow_work_unregister_user(void); +extern int slow_work_register_user(struct module *owner); +extern void slow_work_unregister_user(struct module *owner); #ifdef CONFIG_SYSCTL extern ctl_table slow_work_sysctls[]; diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 0d31135efbf4..dd08f376e406 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -22,6 +22,8 @@ #define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after * OOM */ +#define SLOW_WORK_THREAD_LIMIT 255 /* abs maximum number of slow-work threads */ + static void slow_work_cull_timeout(unsigned long); static void slow_work_oom_timeout(unsigned long); @@ -46,7 +48,7 @@ static unsigned vslow_work_proportion = 50; /* % of threads that may process #ifdef CONFIG_SYSCTL static const int slow_work_min_min_threads = 2; -static int slow_work_max_max_threads = 255; +static int slow_work_max_max_threads = SLOW_WORK_THREAD_LIMIT; static const int slow_work_min_vslow = 1; static const int slow_work_max_vslow = 99; @@ -97,6 +99,23 @@ static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0); static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0); static struct slow_work slow_work_new_thread; /* new thread starter */ +/* + * slow work ID allocation (use slow_work_queue_lock) + */ +static DECLARE_BITMAP(slow_work_ids, SLOW_WORK_THREAD_LIMIT); + +/* + * Unregistration tracking to prevent put_ref() from disappearing during module + * unload + */ +#ifdef CONFIG_MODULES +static struct module *slow_work_thread_processing[SLOW_WORK_THREAD_LIMIT]; +static struct module *slow_work_unreg_module; +static struct slow_work *slow_work_unreg_work_item; +static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq); +static DEFINE_MUTEX(slow_work_unreg_sync_lock); +#endif + /* * The queues of work items and the lock governing access to them. These are * shared between all the CPUs. It doesn't make sense to have per-CPU queues @@ -149,8 +168,11 @@ static unsigned slow_work_calc_vsmax(void) * Attempt to execute stuff queued on a slow thread. Return true if we managed * it, false if there was nothing to do. */ -static bool slow_work_execute(void) +static bool slow_work_execute(int id) { +#ifdef CONFIG_MODULES + struct module *module; +#endif struct slow_work *work = NULL; unsigned vsmax; bool very_slow; @@ -186,6 +208,12 @@ static bool slow_work_execute(void) } else { very_slow = false; /* avoid the compiler warning */ } + +#ifdef CONFIG_MODULES + if (work) + slow_work_thread_processing[id] = work->owner; +#endif + spin_unlock_irq(&slow_work_queue_lock); if (!work) @@ -219,7 +247,18 @@ static bool slow_work_execute(void) spin_unlock_irq(&slow_work_queue_lock); } + /* sort out the race between module unloading and put_ref() */ work->ops->put_ref(work); + +#ifdef CONFIG_MODULES + module = slow_work_thread_processing[id]; + slow_work_thread_processing[id] = NULL; + smp_mb(); + if (slow_work_unreg_work_item == work || + slow_work_unreg_module == module) + wake_up_all(&slow_work_unreg_wq); +#endif + return true; auto_requeue: @@ -232,6 +271,7 @@ auto_requeue: else list_add_tail(&work->link, &slow_work_queue); spin_unlock_irq(&slow_work_queue_lock); + slow_work_thread_processing[id] = NULL; return true; } @@ -368,13 +408,22 @@ static inline bool slow_work_available(int vsmax) */ static int slow_work_thread(void *_data) { - int vsmax; + int vsmax, id; DEFINE_WAIT(wait); set_freezable(); set_user_nice(current, -5); + /* allocate ourselves an ID */ + spin_lock_irq(&slow_work_queue_lock); + id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT); + BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT); + __set_bit(id, slow_work_ids); + spin_unlock_irq(&slow_work_queue_lock); + + sprintf(current->comm, "kslowd%03u", id); + for (;;) { vsmax = vslow_work_proportion; vsmax *= atomic_read(&slow_work_thread_count); @@ -395,7 +444,7 @@ static int slow_work_thread(void *_data) vsmax *= atomic_read(&slow_work_thread_count); vsmax /= 100; - if (slow_work_available(vsmax) && slow_work_execute()) { + if (slow_work_available(vsmax) && slow_work_execute(id)) { cond_resched(); if (list_empty(&slow_work_queue) && list_empty(&vslow_work_queue) && @@ -412,6 +461,10 @@ static int slow_work_thread(void *_data) break; } + spin_lock_irq(&slow_work_queue_lock); + __clear_bit(id, slow_work_ids); + spin_unlock_irq(&slow_work_queue_lock); + if (atomic_dec_and_test(&slow_work_thread_count)) complete_and_exit(&slow_work_last_thread_exited, 0); return 0; @@ -475,6 +528,7 @@ static void slow_work_new_thread_execute(struct slow_work *work) } static const struct slow_work_ops slow_work_new_thread_ops = { + .owner = THIS_MODULE, .get_ref = slow_work_new_thread_get_ref, .put_ref = slow_work_new_thread_put_ref, .execute = slow_work_new_thread_execute, @@ -546,12 +600,13 @@ static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, /** * slow_work_register_user - Register a user of the facility + * @module: The module about to make use of the facility * * Register a user of the facility, starting up the initial threads if there * aren't any other users at this point. This will return 0 if successful, or * an error if not. */ -int slow_work_register_user(void) +int slow_work_register_user(struct module *module) { struct task_struct *p; int loop; @@ -598,14 +653,79 @@ error: } EXPORT_SYMBOL(slow_work_register_user); +/* + * wait for all outstanding items from the calling module to complete + * - note that more items may be queued whilst we're waiting + */ +static void slow_work_wait_for_items(struct module *module) +{ + DECLARE_WAITQUEUE(myself, current); + struct slow_work *work; + int loop; + + mutex_lock(&slow_work_unreg_sync_lock); + add_wait_queue(&slow_work_unreg_wq, &myself); + + for (;;) { + spin_lock_irq(&slow_work_queue_lock); + + /* first of all, we wait for the last queued item in each list + * to be processed */ + list_for_each_entry_reverse(work, &vslow_work_queue, link) { + if (work->owner == module) { + set_current_state(TASK_UNINTERRUPTIBLE); + slow_work_unreg_work_item = work; + goto do_wait; + } + } + list_for_each_entry_reverse(work, &slow_work_queue, link) { + if (work->owner == module) { + set_current_state(TASK_UNINTERRUPTIBLE); + slow_work_unreg_work_item = work; + goto do_wait; + } + } + + /* then we wait for the items being processed to finish */ + slow_work_unreg_module = module; + smp_mb(); + for (loop = 0; loop < SLOW_WORK_THREAD_LIMIT; loop++) { + if (slow_work_thread_processing[loop] == module) + goto do_wait; + } + spin_unlock_irq(&slow_work_queue_lock); + break; /* okay, we're done */ + + do_wait: + spin_unlock_irq(&slow_work_queue_lock); + schedule(); + slow_work_unreg_work_item = NULL; + slow_work_unreg_module = NULL; + } + + remove_wait_queue(&slow_work_unreg_wq, &myself); + mutex_unlock(&slow_work_unreg_sync_lock); +} + /** * slow_work_unregister_user - Unregister a user of the facility + * @module: The module whose items should be cleared * * Unregister a user of the facility, killing all the threads if this was the * last one. + * + * This waits for all the work items belonging to the nominated module to go + * away before proceeding. */ -void slow_work_unregister_user(void) +void slow_work_unregister_user(struct module *module) { + /* first of all, wait for all outstanding items from the calling module + * to complete */ + if (module) + slow_work_wait_for_items(module); + + /* then we can actually go about shutting down the facility if need + * be */ mutex_lock(&slow_work_user_lock); BUG_ON(slow_work_user_count <= 0); -- cgit v1.3-8-gc7d7 From 0160950297c08f8233c89b9f9e7dd59cfb080809 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Nov 2009 18:10:43 +0000 Subject: SLOW_WORK: Add support for cancellation of slow work Add support for cancellation of queued slow work and delayed slow work items. The cancellation functions will wait for items that are pending or undergoing execution to be discarded by the slow work facility. Attempting to enqueue work that is in the process of being cancelled will result in ECANCELED. Signed-off-by: Jens Axboe Signed-off-by: David Howells --- Documentation/slow-work.txt | 12 ++++++- include/linux/slow-work.h | 2 ++ kernel/slow-work.c | 81 +++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 88 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index c655c517fc68..2e384bd4dead 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -108,7 +108,17 @@ on the item, 0 otherwise. The items are reference counted, so there ought to be no need for a flush -operation. When all a module's slow work items have been processed, and the +operation. But as the reference counting is optional, means to cancel +existing work items are also included: + + cancel_slow_work(&myitem); + +can be used to cancel pending work. The above cancel function waits for +existing work to have been executed (or prevent execution of them, depending +on timing). + + +When all a module's slow work items have been processed, and the module has no further interest in the facility, it should unregister its interest: diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index 9adb2b30754f..eef20182d5b4 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -51,6 +51,7 @@ struct slow_work { #define SLOW_WORK_EXECUTING 1 /* item currently executing */ #define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ #define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ +#define SLOW_WORK_CANCELLING 4 /* item is being cancelled, don't enqueue */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ }; @@ -88,6 +89,7 @@ static inline void vslow_work_init(struct slow_work *work, } extern int slow_work_enqueue(struct slow_work *work); +extern void slow_work_cancel(struct slow_work *work); extern int slow_work_register_user(struct module *owner); extern void slow_work_unregister_user(struct module *owner); diff --git a/kernel/slow-work.c b/kernel/slow-work.c index fccf421eb5c1..671cc434532a 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -236,12 +236,17 @@ static bool slow_work_execute(int id) if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags)) BUG(); - work->ops->execute(work); + /* don't execute if the work is in the process of being cancelled */ + if (!test_bit(SLOW_WORK_CANCELLING, &work->flags)) + work->ops->execute(work); if (very_slow) atomic_dec(&vslow_work_executing_count); clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags); + /* wake up anyone waiting for this work to be complete */ + wake_up_bit(&work->flags, SLOW_WORK_EXECUTING); + /* if someone tried to enqueue the item whilst we were executing it, * then it'll be left unenqueued to avoid multiple threads trying to * execute it simultaneously @@ -314,11 +319,16 @@ auto_requeue: * allowed to pick items to execute. This ensures that very slow items won't * overly block ones that are just ordinarily slow. * - * Returns 0 if successful, -EAGAIN if not. + * Returns 0 if successful, -EAGAIN if not (or -ECANCELED if cancelled work is + * attempted queued) */ int slow_work_enqueue(struct slow_work *work) { unsigned long flags; + int ret; + + if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) + return -ECANCELED; BUG_ON(slow_work_user_count <= 0); BUG_ON(!work); @@ -335,6 +345,9 @@ int slow_work_enqueue(struct slow_work *work) if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { spin_lock_irqsave(&slow_work_queue_lock, flags); + if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags))) + goto cancelled; + /* we promise that we will not attempt to execute the work * function in more than one thread simultaneously * @@ -352,8 +365,9 @@ int slow_work_enqueue(struct slow_work *work) if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); } else { - if (slow_work_get_ref(work) < 0) - goto cant_get_ref; + ret = slow_work_get_ref(work); + if (ret < 0) + goto failed; if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) list_add_tail(&work->link, &vslow_work_queue); else @@ -365,12 +379,67 @@ int slow_work_enqueue(struct slow_work *work) } return 0; -cant_get_ref: +cancelled: + ret = -ECANCELED; +failed: spin_unlock_irqrestore(&slow_work_queue_lock, flags); - return -EAGAIN; + return ret; } EXPORT_SYMBOL(slow_work_enqueue); +static int slow_work_wait(void *word) +{ + schedule(); + return 0; +} + +/** + * slow_work_cancel - Cancel a slow work item + * @work: The work item to cancel + * + * This function will cancel a previously enqueued work item. If we cannot + * cancel the work item, it is guarenteed to have run when this function + * returns. + */ +void slow_work_cancel(struct slow_work *work) +{ + bool wait = true, put = false; + + set_bit(SLOW_WORK_CANCELLING, &work->flags); + + spin_lock_irq(&slow_work_queue_lock); + + if (test_bit(SLOW_WORK_PENDING, &work->flags) && + !list_empty(&work->link)) { + /* the link in the pending queue holds a reference on the item + * that we will need to release */ + list_del_init(&work->link); + wait = false; + put = true; + clear_bit(SLOW_WORK_PENDING, &work->flags); + + } else if (test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) { + /* the executor is holding our only reference on the item, so + * we merely need to wait for it to finish executing */ + clear_bit(SLOW_WORK_PENDING, &work->flags); + } + + spin_unlock_irq(&slow_work_queue_lock); + + /* the EXECUTING flag is set by the executor whilst the spinlock is set + * and before the item is dequeued - so assuming the above doesn't + * actually dequeue it, simply waiting for the EXECUTING flag to be + * released here should be sufficient */ + if (wait) + wait_on_bit(&work->flags, SLOW_WORK_EXECUTING, slow_work_wait, + TASK_UNINTERRUPTIBLE); + + clear_bit(SLOW_WORK_CANCELLING, &work->flags); + if (put) + slow_work_put_ref(work); +} +EXPORT_SYMBOL(slow_work_cancel); + /* * Schedule a cull of the thread pool at some time in the near future */ -- cgit v1.3-8-gc7d7 From 6b8268b17a1ffc942bc72d7d00274e433d6b6719 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Nov 2009 18:10:47 +0000 Subject: SLOW_WORK: Add delayed_slow_work support This adds support for starting slow work with a delay, similar to the functionality we have for workqueues. Signed-off-by: Jens Axboe Signed-off-by: David Howells --- Documentation/slow-work.txt | 16 +++++- include/linux/slow-work.h | 29 ++++++++++ kernel/slow-work.c | 129 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 171 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index 2e384bd4dead..a9d1b0ffdded 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -41,6 +41,13 @@ expand files, provided the time taken to do so isn't too long. Operations of both types may sleep during execution, thus tying up the thread loaned to it. +A further class of work item is available, based on the slow work item class: + + (*) Delayed slow work items. + +These are slow work items that have a timer to defer queueing of the item for +a while. + THREAD-TO-CLASS ALLOCATION -------------------------- @@ -93,6 +100,10 @@ Slow work items may then be set up by: slow_work_init(&myitem, &myitem_ops); + or: + + delayed_slow_work_init(&myitem, &myitem_ops); + or: vslow_work_init(&myitem, &myitem_ops); @@ -104,7 +115,9 @@ A suitably set up work item can then be enqueued for processing: int ret = slow_work_enqueue(&myitem); This will return a -ve error if the thread pool is unable to gain a reference -on the item, 0 otherwise. +on the item, 0 otherwise, or (for delayed work): + + int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay); The items are reference counted, so there ought to be no need for a flush @@ -112,6 +125,7 @@ operation. But as the reference counting is optional, means to cancel existing work items are also included: cancel_slow_work(&myitem); + cancel_delayed_slow_work(&myitem); can be used to cancel pending work. The above cancel function waits for existing work to have been executed (or prevent execution of them, depending diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index eef20182d5b4..b245b9a9cc0b 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -17,6 +17,7 @@ #ifdef CONFIG_SLOW_WORK #include +#include struct slow_work; @@ -52,10 +53,16 @@ struct slow_work { #define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ #define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ #define SLOW_WORK_CANCELLING 4 /* item is being cancelled, don't enqueue */ +#define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ }; +struct delayed_slow_work { + struct slow_work work; + struct timer_list timer; +}; + /** * slow_work_init - Initialise a slow work item * @work: The work item to initialise @@ -71,6 +78,20 @@ static inline void slow_work_init(struct slow_work *work, INIT_LIST_HEAD(&work->link); } +/** + * slow_work_init - Initialise a delayed slow work item + * @work: The work item to initialise + * @ops: The operations to use to handle the slow work item + * + * Initialise a delayed slow work item. + */ +static inline void delayed_slow_work_init(struct delayed_slow_work *dwork, + const struct slow_work_ops *ops) +{ + init_timer(&dwork->timer); + slow_work_init(&dwork->work, ops); +} + /** * vslow_work_init - Initialise a very slow work item * @work: The work item to initialise @@ -93,6 +114,14 @@ extern void slow_work_cancel(struct slow_work *work); extern int slow_work_register_user(struct module *owner); extern void slow_work_unregister_user(struct module *owner); +extern int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, + unsigned long delay); + +static inline void delayed_slow_work_cancel(struct delayed_slow_work *dwork) +{ + slow_work_cancel(&dwork->work); +} + #ifdef CONFIG_SYSCTL extern ctl_table slow_work_sysctls[]; #endif diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 671cc434532a..f67e1daae93d 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -406,11 +406,40 @@ void slow_work_cancel(struct slow_work *work) bool wait = true, put = false; set_bit(SLOW_WORK_CANCELLING, &work->flags); + smp_mb(); + + /* if the work item is a delayed work item with an active timer, we + * need to wait for the timer to finish _before_ getting the spinlock, + * lest we deadlock against the timer routine + * + * the timer routine will leave DELAYED set if it notices the + * CANCELLING flag in time + */ + if (test_bit(SLOW_WORK_DELAYED, &work->flags)) { + struct delayed_slow_work *dwork = + container_of(work, struct delayed_slow_work, work); + del_timer_sync(&dwork->timer); + } spin_lock_irq(&slow_work_queue_lock); - if (test_bit(SLOW_WORK_PENDING, &work->flags) && - !list_empty(&work->link)) { + if (test_bit(SLOW_WORK_DELAYED, &work->flags)) { + /* the timer routine aborted or never happened, so we are left + * holding the timer's reference on the item and should just + * drop the pending flag and wait for any ongoing execution to + * finish */ + struct delayed_slow_work *dwork = + container_of(work, struct delayed_slow_work, work); + + BUG_ON(timer_pending(&dwork->timer)); + BUG_ON(!list_empty(&work->link)); + + clear_bit(SLOW_WORK_DELAYED, &work->flags); + put = true; + clear_bit(SLOW_WORK_PENDING, &work->flags); + + } else if (test_bit(SLOW_WORK_PENDING, &work->flags) && + !list_empty(&work->link)) { /* the link in the pending queue holds a reference on the item * that we will need to release */ list_del_init(&work->link); @@ -440,6 +469,102 @@ void slow_work_cancel(struct slow_work *work) } EXPORT_SYMBOL(slow_work_cancel); +/* + * Handle expiry of the delay timer, indicating that a delayed slow work item + * should now be queued if not cancelled + */ +static void delayed_slow_work_timer(unsigned long data) +{ + struct slow_work *work = (struct slow_work *) data; + unsigned long flags; + bool queued = false, put = false; + + spin_lock_irqsave(&slow_work_queue_lock, flags); + if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) { + clear_bit(SLOW_WORK_DELAYED, &work->flags); + + if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { + /* we discard the reference the timer was holding in + * favour of the one the executor holds */ + set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); + put = true; + } else { + if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) + list_add_tail(&work->link, &vslow_work_queue); + else + list_add_tail(&work->link, &slow_work_queue); + queued = true; + } + } + + spin_unlock_irqrestore(&slow_work_queue_lock, flags); + if (put) + slow_work_put_ref(work); + if (queued) + wake_up(&slow_work_thread_wq); +} + +/** + * delayed_slow_work_enqueue - Schedule a delayed slow work item for processing + * @dwork: The delayed work item to queue + * @delay: When to start executing the work, in jiffies from now + * + * This is similar to slow_work_enqueue(), but it adds a delay before the work + * is actually queued for processing. + * + * The item can have delayed processing requested on it whilst it is being + * executed. The delay will begin immediately, and if it expires before the + * item finishes executing, the item will be placed back on the queue when it + * has done executing. + */ +int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, + unsigned long delay) +{ + struct slow_work *work = &dwork->work; + unsigned long flags; + int ret; + + if (delay == 0) + return slow_work_enqueue(&dwork->work); + + BUG_ON(slow_work_user_count <= 0); + BUG_ON(!work); + BUG_ON(!work->ops); + + if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) + return -ECANCELED; + + if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { + spin_lock_irqsave(&slow_work_queue_lock, flags); + + if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) + goto cancelled; + + /* the timer holds a reference whilst it is pending */ + ret = work->ops->get_ref(work); + if (ret < 0) + goto cant_get_ref; + + if (test_and_set_bit(SLOW_WORK_DELAYED, &work->flags)) + BUG(); + dwork->timer.expires = jiffies + delay; + dwork->timer.data = (unsigned long) work; + dwork->timer.function = delayed_slow_work_timer; + add_timer(&dwork->timer); + + spin_unlock_irqrestore(&slow_work_queue_lock, flags); + } + + return 0; + +cancelled: + ret = -ECANCELED; +cant_get_ref: + spin_unlock_irqrestore(&slow_work_queue_lock, flags); + return ret; +} +EXPORT_SYMBOL(delayed_slow_work_enqueue); + /* * Schedule a cull of the thread pool at some time in the near future */ -- cgit v1.3-8-gc7d7 From 8fba10a42d191de612e60e7009c8f0313f90a9b3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:51 +0000 Subject: SLOW_WORK: Allow the work items to be viewed through a /proc file Allow the executing and queued work items to be viewed through a /proc file for debugging purposes. The contents look something like the following: THR PID ITEM ADDR FL MARK DESC === ===== ================ == ===== ========== 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2 vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2 vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2 vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2 vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2 vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2 vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK In the 'THR' column, executing items show the thread they're occupying and queued threads indicate which queue they're on. 'PID' shows the process ID of a slow-work thread that's executing something. 'FL' shows the work item flags. 'MARK' indicates how long since an item was queued or began executing. Lastly, the 'DESC' column permits the owner of an item to give some information. Signed-off-by: David Howells --- Documentation/slow-work.txt | 60 +++++++++++- include/linux/slow-work.h | 11 +++ init/Kconfig | 10 ++ kernel/Makefile | 1 + kernel/slow-work-proc.c | 227 ++++++++++++++++++++++++++++++++++++++++++++ kernel/slow-work.c | 44 ++++++--- kernel/slow-work.h | 72 ++++++++++++++ 7 files changed, 413 insertions(+), 12 deletions(-) create mode 100644 kernel/slow-work-proc.c create mode 100644 kernel/slow-work.h (limited to 'include') diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index a9d1b0ffdded..f120238e70fe 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -149,7 +149,8 @@ ITEM OPERATIONS =============== Each work item requires a table of operations of type struct slow_work_ops. -Only ->execute() is required, getting and putting of a reference are optional. +Only ->execute() is required; the getting and putting of a reference and the +describing of an item are all optional. (*) Get a reference on an item: @@ -179,6 +180,16 @@ Only ->execute() is required, getting and putting of a reference are optional. This should perform the work required of the item. It may sleep, it may perform disk I/O and it may wait for locks. + (*) View an item through /proc: + + void (*desc)(struct slow_work *work, struct seq_file *m); + + If supplied, this should print to 'm' a small string describing the work + the item is to do. This should be no more than about 40 characters, and + shouldn't include a newline character. + + See the 'Viewing executing and queued items' section below. + ================== POOL CONFIGURATION @@ -203,3 +214,50 @@ The slow-work thread pool has a number of configurables: is bounded to between 1 and one fewer than the number of active threads. This ensures there is always at least one thread that can process very slow work items, and always at least one thread that won't. + + +================================== +VIEWING EXECUTING AND QUEUED ITEMS +================================== + +If CONFIG_SLOW_WORK_PROC is enabled, a proc file is made available: + + /proc/slow_work_rq + +through which the list of work items being executed and the queues of items to +be executed may be viewed. The owner of a work item is given the chance to +add some information of its own. + +The contents look something like the following: + + THR PID ITEM ADDR FL MARK DESC + === ===== ================ == ===== ========== + 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK + 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2 + 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK + 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN + 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2 + 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2 + 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2 + 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN + vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2 + vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2 + vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2 + vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2 + vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2 + vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2 + vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK + vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK + vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK + vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK + vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK + vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK + vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK + vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK + +In the 'THR' column, executing items show the thread they're occupying and +queued threads indicate which queue they're on. 'PID' shows the process ID of +a slow-work thread that's executing something. 'FL' shows the work item flags. +'MARK' indicates how long since an item was queued or began executing. Lastly, +the 'DESC' column permits the owner of an item to give some information. + diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index b245b9a9cc0b..f41485145ed1 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -20,6 +20,9 @@ #include struct slow_work; +#ifdef CONFIG_SLOW_WORK_PROC +struct seq_file; +#endif /* * The operations used to support slow work items @@ -38,6 +41,11 @@ struct slow_work_ops { /* execute a work item */ void (*execute)(struct slow_work *work); + +#ifdef CONFIG_SLOW_WORK_PROC + /* describe a work item for /proc */ + void (*desc)(struct slow_work *work, struct seq_file *m); +#endif }; /* @@ -56,6 +64,9 @@ struct slow_work { #define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ +#ifdef CONFIG_SLOW_WORK_PROC + struct timespec mark; /* jiffies at which queued or exec begun */ +#endif }; struct delayed_slow_work { diff --git a/init/Kconfig b/init/Kconfig index 9e03ef8b311e..ab5c64801fe5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1098,6 +1098,16 @@ config SLOW_WORK See Documentation/slow-work.txt. +config SLOW_WORK_PROC + bool "Slow work debugging through /proc" + default n + depends on SLOW_WORK && PROC_FS + help + Display the contents of the slow work run queue through /proc, + including items currently executing. + + See Documentation/slow-work.txt. + endmenu # General setup config HAVE_GENERIC_DMA_COHERENT diff --git a/kernel/Makefile b/kernel/Makefile index b8d4cd8ac0b9..776ffed1556d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -94,6 +94,7 @@ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o +obj-$(CONFIG_SLOW_WORK_PROC) += slow-work-proc.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) diff --git a/kernel/slow-work-proc.c b/kernel/slow-work-proc.c new file mode 100644 index 000000000000..3988032571f5 --- /dev/null +++ b/kernel/slow-work-proc.c @@ -0,0 +1,227 @@ +/* Slow work debugging + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "slow-work.h" + +#define ITERATOR_SHIFT (BITS_PER_LONG - 4) +#define ITERATOR_SELECTOR (0xfUL << ITERATOR_SHIFT) +#define ITERATOR_COUNTER (~ITERATOR_SELECTOR) + +void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m) +{ + seq_puts(m, "Slow-work: New thread"); +} + +/* + * Render the time mark field on a work item into a 5-char time with units plus + * a space + */ +static void slow_work_print_mark(struct seq_file *m, struct slow_work *work) +{ + struct timespec now, diff; + + now = CURRENT_TIME; + diff = timespec_sub(now, work->mark); + + if (diff.tv_sec < 0) + seq_puts(m, " -ve "); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000) + seq_printf(m, "%3luns ", diff.tv_nsec); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000) + seq_printf(m, "%3luus ", diff.tv_nsec / 1000); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000) + seq_printf(m, "%3lums ", diff.tv_nsec / 1000000); + else if (diff.tv_sec <= 1) + seq_puts(m, " 1s "); + else if (diff.tv_sec < 60) + seq_printf(m, "%4lus ", diff.tv_sec); + else if (diff.tv_sec < 60 * 60) + seq_printf(m, "%4lum ", diff.tv_sec / 60); + else if (diff.tv_sec < 60 * 60 * 24) + seq_printf(m, "%4luh ", diff.tv_sec / 3600); + else + seq_puts(m, "exces "); +} + +/* + * Describe a slow work item for /proc + */ +static int slow_work_runqueue_show(struct seq_file *m, void *v) +{ + struct slow_work *work; + struct list_head *p = v; + unsigned long id; + + switch ((unsigned long) v) { + case 1: + seq_puts(m, "THR PID ITEM ADDR FL MARK DESC\n"); + return 0; + case 2: + seq_puts(m, "=== ===== ================ == ===== ==========\n"); + return 0; + + case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1: + id = (unsigned long) v - 3; + + read_lock(&slow_work_execs_lock); + work = slow_work_execs[id]; + if (work) { + smp_read_barrier_depends(); + + seq_printf(m, "%3lu %5d %16p %2lx ", + id, slow_work_pids[id], work, work->flags); + slow_work_print_mark(m, work); + + if (work->ops->desc) + work->ops->desc(work, m); + seq_putc(m, '\n'); + } + read_unlock(&slow_work_execs_lock); + return 0; + + default: + work = list_entry(p, struct slow_work, link); + seq_printf(m, "%3s - %16p %2lx ", + work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq", + work, work->flags); + slow_work_print_mark(m, work); + + if (work->ops->desc) + work->ops->desc(work, m); + seq_putc(m, '\n'); + return 0; + } +} + +/* + * map the iterator to a work item + */ +static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos) +{ + struct list_head *p; + unsigned long count, id; + + switch (*_pos >> ITERATOR_SHIFT) { + case 0x0: + if (*_pos == 0) + *_pos = 1; + if (*_pos < 3) + return (void *)(unsigned long) *_pos; + if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT) + for (id = *_pos - 3; + id < SLOW_WORK_THREAD_LIMIT; + id++, (*_pos)++) + if (slow_work_execs[id]) + return (void *)(unsigned long) *_pos; + *_pos = 0x1UL << ITERATOR_SHIFT; + + case 0x1: + count = *_pos & ITERATOR_COUNTER; + list_for_each(p, &slow_work_queue) { + if (count == 0) + return p; + count--; + } + *_pos = 0x2UL << ITERATOR_SHIFT; + + case 0x2: + count = *_pos & ITERATOR_COUNTER; + list_for_each(p, &vslow_work_queue) { + if (count == 0) + return p; + count--; + } + *_pos = 0x3UL << ITERATOR_SHIFT; + + default: + return NULL; + } +} + +/* + * set up the iterator to start reading from the first line + */ +static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos) +{ + spin_lock_irq(&slow_work_queue_lock); + return slow_work_runqueue_index(m, _pos); +} + +/* + * move to the next line + */ +static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos) +{ + struct list_head *p = v; + unsigned long selector = *_pos >> ITERATOR_SHIFT; + + (*_pos)++; + switch (selector) { + case 0x0: + return slow_work_runqueue_index(m, _pos); + + case 0x1: + if (*_pos >> ITERATOR_SHIFT == 0x1) { + p = p->next; + if (p != &slow_work_queue) + return p; + } + *_pos = 0x2UL << ITERATOR_SHIFT; + p = &vslow_work_queue; + + case 0x2: + if (*_pos >> ITERATOR_SHIFT == 0x2) { + p = p->next; + if (p != &vslow_work_queue) + return p; + } + *_pos = 0x3UL << ITERATOR_SHIFT; + + default: + return NULL; + } +} + +/* + * clean up after reading + */ +static void slow_work_runqueue_stop(struct seq_file *m, void *v) +{ + spin_unlock_irq(&slow_work_queue_lock); +} + +static const struct seq_operations slow_work_runqueue_ops = { + .start = slow_work_runqueue_start, + .stop = slow_work_runqueue_stop, + .next = slow_work_runqueue_next, + .show = slow_work_runqueue_show, +}; + +/* + * open "/proc/slow_work_rq" to list queue contents + */ +static int slow_work_runqueue_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &slow_work_runqueue_ops); +} + +const struct file_operations slow_work_runqueue_fops = { + .owner = THIS_MODULE, + .open = slow_work_runqueue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; diff --git a/kernel/slow-work.c b/kernel/slow-work.c index f67e1daae93d..b763bc2d2670 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -16,13 +16,8 @@ #include #include #include - -#define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of - * things to do */ -#define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after - * OOM */ - -#define SLOW_WORK_THREAD_LIMIT 255 /* abs maximum number of slow-work threads */ +#include +#include "slow-work.h" static void slow_work_cull_timeout(unsigned long); static void slow_work_oom_timeout(unsigned long); @@ -116,6 +111,15 @@ static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq); static DEFINE_MUTEX(slow_work_unreg_sync_lock); #endif +/* + * Data for tracking currently executing items for indication through /proc + */ +#ifdef CONFIG_SLOW_WORK_PROC +struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT]; +pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT]; +DEFINE_RWLOCK(slow_work_execs_lock); +#endif + /* * The queues of work items and the lock governing access to them. These are * shared between all the CPUs. It doesn't make sense to have per-CPU queues @@ -124,9 +128,9 @@ static DEFINE_MUTEX(slow_work_unreg_sync_lock); * There are two queues of work items: one for slow work items, and one for * very slow work items. */ -static LIST_HEAD(slow_work_queue); -static LIST_HEAD(vslow_work_queue); -static DEFINE_SPINLOCK(slow_work_queue_lock); +LIST_HEAD(slow_work_queue); +LIST_HEAD(vslow_work_queue); +DEFINE_SPINLOCK(slow_work_queue_lock); /* * The thread controls. A variable used to signal to the threads that they @@ -182,7 +186,7 @@ static unsigned slow_work_calc_vsmax(void) * Attempt to execute stuff queued on a slow thread. Return true if we managed * it, false if there was nothing to do. */ -static bool slow_work_execute(int id) +static noinline bool slow_work_execute(int id) { #ifdef CONFIG_MODULES struct module *module; @@ -227,6 +231,10 @@ static bool slow_work_execute(int id) if (work) slow_work_thread_processing[id] = work->owner; #endif + if (work) { + slow_work_mark_time(work); + slow_work_begin_exec(id, work); + } spin_unlock_irq(&slow_work_queue_lock); @@ -247,6 +255,8 @@ static bool slow_work_execute(int id) /* wake up anyone waiting for this work to be complete */ wake_up_bit(&work->flags, SLOW_WORK_EXECUTING); + slow_work_end_exec(id, work); + /* if someone tried to enqueue the item whilst we were executing it, * then it'll be left unenqueued to avoid multiple threads trying to * execute it simultaneously @@ -285,6 +295,7 @@ auto_requeue: * - we transfer our ref on the item back to the appropriate queue * - don't wake another thread up as we're awake already */ + slow_work_mark_time(work); if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) list_add_tail(&work->link, &vslow_work_queue); else @@ -368,6 +379,7 @@ int slow_work_enqueue(struct slow_work *work) ret = slow_work_get_ref(work); if (ret < 0) goto failed; + slow_work_mark_time(work); if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) list_add_tail(&work->link, &vslow_work_queue); else @@ -489,6 +501,7 @@ static void delayed_slow_work_timer(unsigned long data) set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); put = true; } else { + slow_work_mark_time(work); if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) list_add_tail(&work->link, &vslow_work_queue); else @@ -627,6 +640,7 @@ static int slow_work_thread(void *_data) id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT); BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT); __set_bit(id, slow_work_ids); + slow_work_set_thread_pid(id, current->pid); spin_unlock_irq(&slow_work_queue_lock); sprintf(current->comm, "kslowd%03u", id); @@ -669,6 +683,7 @@ static int slow_work_thread(void *_data) } spin_lock_irq(&slow_work_queue_lock); + slow_work_set_thread_pid(id, 0); __clear_bit(id, slow_work_ids); spin_unlock_irq(&slow_work_queue_lock); @@ -722,6 +737,9 @@ static void slow_work_new_thread_execute(struct slow_work *work) static const struct slow_work_ops slow_work_new_thread_ops = { .owner = THIS_MODULE, .execute = slow_work_new_thread_execute, +#ifdef CONFIG_SLOW_WORK_PROC + .desc = slow_work_new_thread_desc, +#endif }; /* @@ -948,6 +966,10 @@ static int __init init_slow_work(void) #ifdef CONFIG_SYSCTL if (slow_work_max_max_threads < nr_cpus * 2) slow_work_max_max_threads = nr_cpus * 2; +#endif +#ifdef CONFIG_SLOW_WORK_PROC + proc_create("slow_work_rq", S_IFREG | 0400, NULL, + &slow_work_runqueue_fops); #endif return 0; } diff --git a/kernel/slow-work.h b/kernel/slow-work.h new file mode 100644 index 000000000000..3c2f007f3ad6 --- /dev/null +++ b/kernel/slow-work.h @@ -0,0 +1,72 @@ +/* Slow work private definitions + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of + * things to do */ +#define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after + * OOM */ + +#define SLOW_WORK_THREAD_LIMIT 255 /* abs maximum number of slow-work threads */ + +/* + * slow-work.c + */ +#ifdef CONFIG_SLOW_WORK_PROC +extern struct slow_work *slow_work_execs[]; +extern pid_t slow_work_pids[]; +extern rwlock_t slow_work_execs_lock; +#endif + +extern struct list_head slow_work_queue; +extern struct list_head vslow_work_queue; +extern spinlock_t slow_work_queue_lock; + +/* + * slow-work-proc.c + */ +#ifdef CONFIG_SLOW_WORK_PROC +extern const struct file_operations slow_work_runqueue_fops; + +extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *); +#endif + +/* + * Helper functions + */ +static inline void slow_work_set_thread_pid(int id, pid_t pid) +{ +#ifdef CONFIG_SLOW_WORK_PROC + slow_work_pids[id] = pid; +#endif +} + +static inline void slow_work_mark_time(struct slow_work *work) +{ +#ifdef CONFIG_SLOW_WORK_PROC + work->mark = CURRENT_TIME; +#endif +} + +static inline void slow_work_begin_exec(int id, struct slow_work *work) +{ +#ifdef CONFIG_SLOW_WORK_PROC + slow_work_execs[id] = work; +#endif +} + +static inline void slow_work_end_exec(int id, struct slow_work *work) +{ +#ifdef CONFIG_SLOW_WORK_PROC + write_lock(&slow_work_execs_lock); + slow_work_execs[id] = NULL; + write_unlock(&slow_work_execs_lock); +#endif +} -- cgit v1.3-8-gc7d7 From 31ba99d304494cb28fa8671ccc769c5543e1165d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:53 +0000 Subject: SLOW_WORK: Allow the owner of a work item to determine if it is queued or not Add a function (slow_work_is_queued()) to permit the owner of a work item to determine if the item is queued or not. The work item is counted as being queued if it is actually on the queue, not just if it is pending. If it is executing and pending, then it is not on the queue, but will rather be put back on the queue when execution finishes. This permits a caller to quickly work out if it may be able to put another, dependent work item on the queue behind it, or whether it will have to wait till that is finished. This can be used by CacheFiles to work out whether the creation a new object can be immediately deferred when it has to wait for an old object to be deleted, or whether a wait must take place. If a wait is necessary, then the slow-work thread can otherwise get blocked, preventing the deletion from taking place. Signed-off-by: David Howells --- Documentation/slow-work.txt | 15 +++++++++++++++ include/linux/slow-work.h | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'include') diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index f120238e70fe..0169c9d9dd16 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -144,6 +144,21 @@ from being taken away before it completes. module should almost certainly be THIS_MODULE. +================ +HELPER FUNCTIONS +================ + +The slow-work facility provides a function by which it can be determined +whether or not an item is queued for later execution: + + bool queued = slow_work_is_queued(struct slow_work *work); + +If it returns false, then the item is not on the queue (it may be executing +with a requeue pending). This can be used to work out whether an item on which +another depends is on the queue, thus allowing a dependent item to be queued +after it. + + =============== ITEM OPERATIONS =============== diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index f41485145ed1..bfd3ab4c8898 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -120,6 +120,25 @@ static inline void vslow_work_init(struct slow_work *work, INIT_LIST_HEAD(&work->link); } +/** + * slow_work_is_queued - Determine if a slow work item is on the work queue + * work: The work item to test + * + * Determine if the specified slow-work item is on the work queue. This + * returns true if it is actually on the queue. + * + * If the item is executing and has been marked for requeue when execution + * finishes, then false will be returned. + * + * Anyone wishing to wait for completion of execution can wait on the + * SLOW_WORK_EXECUTING bit. + */ +static inline bool slow_work_is_queued(struct slow_work *work) +{ + unsigned long flags = work->flags; + return flags & SLOW_WORK_PENDING && !(flags & SLOW_WORK_EXECUTING); +} + extern int slow_work_enqueue(struct slow_work *work); extern void slow_work_cancel(struct slow_work *work); extern int slow_work_register_user(struct module *owner); -- cgit v1.3-8-gc7d7 From 3bde31a4ac225cb5805be02eff6eaaf7e0766ccd Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:57 +0000 Subject: SLOW_WORK: Allow a requeueable work item to sleep till the thread is needed Add a function to allow a requeueable work item to sleep till the thread processing it is needed by the slow-work facility to perform other work. Sometimes a work item can't progress immediately, but must wait for the completion of another work item that's currently being processed by another slow-work thread. In some circumstances, the waiting item could instead - theoretically - put itself back on the queue and yield its thread back to the slow-work facility, thus waiting till it gets processing time again before attempting to progress. This would allow other work items processing time on that thread. However, this only works if there is something on the queue for it to queue behind - otherwise it will just get a thread again immediately, and will end up cycling between the queue and the thread, eating up valuable CPU time. So, slow_work_sleep_till_thread_needed() is provided such that an item can put itself on a wait queue that will wake it up when the event it is actually interested in occurs, then call this function in lieu of calling schedule(). This function will then sleep until either the item's event occurs or another work item appears on the queue. If another work item is queued, but the item's event hasn't occurred, then the work item should requeue itself and yield the thread back to the slow-work facility by returning. This can be used by CacheFiles for an object that is being created on one thread to wait for an object being deleted on another thread where there is nothing on the queue for the creation to go and wait behind. As soon as an item appears on the queue that could be given thread time instead, CacheFiles can stick the creating object back on the queue and return to the slow-work facility - assuming the object deletion didn't also complete. Signed-off-by: David Howells --- Documentation/slow-work.txt | 44 +++++++++++++++++++++ include/linux/slow-work.h | 3 ++ kernel/slow-work.c | 94 ++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 132 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index 0169c9d9dd16..52bc31433723 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -158,6 +158,50 @@ with a requeue pending). This can be used to work out whether an item on which another depends is on the queue, thus allowing a dependent item to be queued after it. +If the above shows an item on which another depends not to be queued, then the +owner of the dependent item might need to wait. However, to avoid locking up +the threads unnecessarily be sleeping in them, it can make sense under some +circumstances to return the work item to the queue, thus deferring it until +some other items have had a chance to make use of the yielded thread. + +To yield a thread and defer an item, the work function should simply enqueue +the work item again and return. However, this doesn't work if there's nothing +actually on the queue, as the thread just vacated will jump straight back into +the item's work function, thus busy waiting on a CPU. + +Instead, the item should use the thread to wait for the dependency to go away, +but rather than using schedule() or schedule_timeout() to sleep, it should use +the following function: + + bool requeue = slow_work_sleep_till_thread_needed( + struct slow_work *work, + signed long *_timeout); + +This will add a second wait and then sleep, such that it will be woken up if +either something appears on the queue that could usefully make use of the +thread - and behind which this item can be queued, or if the event the caller +set up to wait for happens. True will be returned if something else appeared +on the queue and this work function should perhaps return, of false if +something else woke it up. The timeout is as for schedule_timeout(). + +For example: + + wq = bit_waitqueue(&my_flags, MY_BIT); + init_wait(&wait); + requeue = false; + do { + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); + if (!test_bit(MY_BIT, &my_flags)) + break; + requeue = slow_work_sleep_till_thread_needed(&my_work, + &timeout); + } while (timeout > 0 && !requeue); + finish_wait(wq, &wait); + if (!test_bit(MY_BIT, &my_flags) + goto do_my_thing; + if (requeue) + return; // to slow_work + =============== ITEM OPERATIONS diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index bfd3ab4c8898..5035a2691739 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -152,6 +152,9 @@ static inline void delayed_slow_work_cancel(struct delayed_slow_work *dwork) slow_work_cancel(&dwork->work); } +extern bool slow_work_sleep_till_thread_needed(struct slow_work *work, + signed long *_timeout); + #ifdef CONFIG_SYSCTL extern ctl_table slow_work_sysctls[]; #endif diff --git a/kernel/slow-work.c b/kernel/slow-work.c index b763bc2d2670..da94f3c101af 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -132,6 +132,15 @@ LIST_HEAD(slow_work_queue); LIST_HEAD(vslow_work_queue); DEFINE_SPINLOCK(slow_work_queue_lock); +/* + * The following are two wait queues that get pinged when a work item is placed + * on an empty queue. These allow work items that are hogging a thread by + * sleeping in a way that could be deferred to yield their thread and enqueue + * themselves. + */ +static DECLARE_WAIT_QUEUE_HEAD(slow_work_queue_waits_for_occupation); +static DECLARE_WAIT_QUEUE_HEAD(vslow_work_queue_waits_for_occupation); + /* * The thread controls. A variable used to signal to the threads that they * should exit when the queue is empty, a waitqueue used by the threads to wait @@ -305,6 +314,50 @@ auto_requeue: return true; } +/** + * slow_work_sleep_till_thread_needed - Sleep till thread needed by other work + * work: The work item under execution that wants to sleep + * _timeout: Scheduler sleep timeout + * + * Allow a requeueable work item to sleep on a slow-work processor thread until + * that thread is needed to do some other work or the sleep is interrupted by + * some other event. + * + * The caller must set up a wake up event before calling this and must have set + * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own + * condition before calling this function as no test is made here. + * + * False is returned if there is nothing on the queue; true is returned if the + * work item should be requeued + */ +bool slow_work_sleep_till_thread_needed(struct slow_work *work, + signed long *_timeout) +{ + wait_queue_head_t *wfo_wq; + struct list_head *queue; + + DEFINE_WAIT(wait); + + if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { + wfo_wq = &vslow_work_queue_waits_for_occupation; + queue = &vslow_work_queue; + } else { + wfo_wq = &slow_work_queue_waits_for_occupation; + queue = &slow_work_queue; + } + + if (!list_empty(queue)) + return true; + + add_wait_queue_exclusive(wfo_wq, &wait); + if (list_empty(queue)) + *_timeout = schedule_timeout(*_timeout); + finish_wait(wfo_wq, &wait); + + return !list_empty(queue); +} +EXPORT_SYMBOL(slow_work_sleep_till_thread_needed); + /** * slow_work_enqueue - Schedule a slow work item for processing * @work: The work item to queue @@ -335,6 +388,8 @@ auto_requeue: */ int slow_work_enqueue(struct slow_work *work) { + wait_queue_head_t *wfo_wq; + struct list_head *queue; unsigned long flags; int ret; @@ -354,6 +409,14 @@ int slow_work_enqueue(struct slow_work *work) * maintaining our promise */ if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { + if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { + wfo_wq = &vslow_work_queue_waits_for_occupation; + queue = &vslow_work_queue; + } else { + wfo_wq = &slow_work_queue_waits_for_occupation; + queue = &slow_work_queue; + } + spin_lock_irqsave(&slow_work_queue_lock, flags); if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags))) @@ -380,11 +443,13 @@ int slow_work_enqueue(struct slow_work *work) if (ret < 0) goto failed; slow_work_mark_time(work); - if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) - list_add_tail(&work->link, &vslow_work_queue); - else - list_add_tail(&work->link, &slow_work_queue); + list_add_tail(&work->link, queue); wake_up(&slow_work_thread_wq); + + /* if someone who could be requeued is sleeping on a + * thread, then ask them to yield their thread */ + if (work->link.prev == queue) + wake_up(wfo_wq); } spin_unlock_irqrestore(&slow_work_queue_lock, flags); @@ -487,9 +552,19 @@ EXPORT_SYMBOL(slow_work_cancel); */ static void delayed_slow_work_timer(unsigned long data) { + wait_queue_head_t *wfo_wq; + struct list_head *queue; struct slow_work *work = (struct slow_work *) data; unsigned long flags; - bool queued = false, put = false; + bool queued = false, put = false, first = false; + + if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { + wfo_wq = &vslow_work_queue_waits_for_occupation; + queue = &vslow_work_queue; + } else { + wfo_wq = &slow_work_queue_waits_for_occupation; + queue = &slow_work_queue; + } spin_lock_irqsave(&slow_work_queue_lock, flags); if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) { @@ -502,17 +577,18 @@ static void delayed_slow_work_timer(unsigned long data) put = true; } else { slow_work_mark_time(work); - if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) - list_add_tail(&work->link, &vslow_work_queue); - else - list_add_tail(&work->link, &slow_work_queue); + list_add_tail(&work->link, queue); queued = true; + if (work->link.prev == queue) + first = true; } } spin_unlock_irqrestore(&slow_work_queue_lock, flags); if (put) slow_work_put_ref(work); + if (first) + wake_up(wfo_wq); if (queued) wake_up(&slow_work_thread_wq); } -- cgit v1.3-8-gc7d7 From 440f0affe247e9990c8f8778f1861da4fd7d5e50 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:01 +0000 Subject: FS-Cache: Annotate slow-work runqueue proc lines for FS-Cache work items Annotate slow-work runqueue proc lines for FS-Cache work items. Objects include the object ID and the state. Operations include the object ID, the operation ID and the operation type and state. Signed-off-by: David Howells --- fs/fscache/object.c | 41 ++++++++++++++++++++++++++++++++++++++++- fs/fscache/operation.c | 29 +++++++++++++++++++++++++++++ fs/fscache/page.c | 27 ++++++++++++++++++++++----- include/linux/fscache-cache.h | 12 ++++++++++++ 4 files changed, 103 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/fscache/object.c b/fs/fscache/object.c index d236eb1d6f37..615b63dd9ecc 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -14,9 +14,10 @@ #define FSCACHE_DEBUG_LEVEL COOKIE #include +#include #include "internal.h" -const char *fscache_object_states[] = { +const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { [FSCACHE_OBJECT_INIT] = "OBJECT_INIT", [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP", [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", @@ -33,9 +34,28 @@ const char *fscache_object_states[] = { }; EXPORT_SYMBOL(fscache_object_states); +static const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { + [FSCACHE_OBJECT_INIT] = "INIT", + [FSCACHE_OBJECT_LOOKING_UP] = "LOOK", + [FSCACHE_OBJECT_CREATING] = "CRTN", + [FSCACHE_OBJECT_AVAILABLE] = "AVBL", + [FSCACHE_OBJECT_ACTIVE] = "ACTV", + [FSCACHE_OBJECT_UPDATING] = "UPDT", + [FSCACHE_OBJECT_DYING] = "DYNG", + [FSCACHE_OBJECT_LC_DYING] = "LCDY", + [FSCACHE_OBJECT_ABORT_INIT] = "ABTI", + [FSCACHE_OBJECT_RELEASING] = "RELS", + [FSCACHE_OBJECT_RECYCLING] = "RCYC", + [FSCACHE_OBJECT_WITHDRAWING] = "WTHD", + [FSCACHE_OBJECT_DEAD] = "DEAD", +}; + static void fscache_object_slow_work_put_ref(struct slow_work *); static int fscache_object_slow_work_get_ref(struct slow_work *); static void fscache_object_slow_work_execute(struct slow_work *); +#ifdef CONFIG_SLOW_WORK_PROC +static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *); +#endif static void fscache_initialise_object(struct fscache_object *); static void fscache_lookup_object(struct fscache_object *); static void fscache_object_available(struct fscache_object *); @@ -49,6 +69,9 @@ const struct slow_work_ops fscache_object_slow_work_ops = { .get_ref = fscache_object_slow_work_get_ref, .put_ref = fscache_object_slow_work_put_ref, .execute = fscache_object_slow_work_execute, +#ifdef CONFIG_SLOW_WORK_PROC + .desc = fscache_object_slow_work_desc, +#endif }; EXPORT_SYMBOL(fscache_object_slow_work_ops); @@ -326,6 +349,22 @@ static void fscache_object_slow_work_execute(struct slow_work *work) fscache_enqueue_object(object); } +/* + * describe an object for slow-work debugging + */ +#ifdef CONFIG_SLOW_WORK_PROC +static void fscache_object_slow_work_desc(struct slow_work *work, + struct seq_file *m) +{ + struct fscache_object *object = + container_of(work, struct fscache_object, work); + + seq_printf(m, "FSC: OBJ%x: %s", + object->debug_id, + fscache_object_states_short[object->state]); +} +#endif + /* * initialise an object * - check the specified object's parent to see if we can make use of it diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index f1a2857b2ff5..91bbe6f0377c 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -13,6 +13,7 @@ #define FSCACHE_DEBUG_LEVEL OPERATION #include +#include #include "internal.h" atomic_t fscache_op_debug_id; @@ -31,6 +32,8 @@ void fscache_enqueue_operation(struct fscache_operation *op) _enter("{OBJ%x OP%x,%u}", op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + fscache_set_op_state(op, "EnQ"); + ASSERT(op->processor != NULL); ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); ASSERTCMP(atomic_read(&op->usage), >, 0); @@ -67,6 +70,8 @@ EXPORT_SYMBOL(fscache_enqueue_operation); static void fscache_run_op(struct fscache_object *object, struct fscache_operation *op) { + fscache_set_op_state(op, "Run"); + object->n_in_progress++; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); @@ -87,6 +92,8 @@ int fscache_submit_exclusive_op(struct fscache_object *object, _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); + fscache_set_op_state(op, "SubmitX"); + spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); @@ -190,6 +197,8 @@ int fscache_submit_op(struct fscache_object *object, ASSERTCMP(atomic_read(&op->usage), >, 0); + fscache_set_op_state(op, "Submit"); + spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); @@ -298,6 +307,8 @@ void fscache_put_operation(struct fscache_operation *op) if (!atomic_dec_and_test(&op->usage)) return; + fscache_set_op_state(op, "Put"); + _debug("PUT OP"); if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) BUG(); @@ -452,9 +463,27 @@ static void fscache_op_execute(struct slow_work *work) _leave(""); } +/* + * describe an operation for slow-work debugging + */ +#ifdef CONFIG_SLOW_WORK_PROC +static void fscache_op_desc(struct slow_work *work, struct seq_file *m) +{ + struct fscache_operation *op = + container_of(work, struct fscache_operation, slow_work); + + seq_printf(m, "FSC: OBJ%x OP%x: %s/%s fl=%lx", + op->object->debug_id, op->debug_id, + op->name, op->state, op->flags); +} +#endif + const struct slow_work_ops fscache_op_slow_work_ops = { .owner = THIS_MODULE, .get_ref = fscache_op_get_ref, .put_ref = fscache_op_put_ref, .execute = fscache_op_execute, +#ifdef CONFIG_SLOW_WORK_PROC + .desc = fscache_op_desc, +#endif }; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 2568e0eb644f..e8bbc395cef6 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -63,14 +63,19 @@ static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *p static void fscache_attr_changed_op(struct fscache_operation *op) { struct fscache_object *object = op->object; + int ret; _enter("{OBJ%x OP%x}", object->debug_id, op->debug_id); fscache_stat(&fscache_n_attr_changed_calls); - if (fscache_object_is_active(object) && - object->cache->ops->attr_changed(object) < 0) - fscache_abort_object(object); + if (fscache_object_is_active(object)) { + fscache_set_op_state(op, "CallFS"); + ret = object->cache->ops->attr_changed(object); + fscache_set_op_state(op, "Done"); + if (ret < 0) + fscache_abort_object(object); + } _leave(""); } @@ -99,6 +104,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) fscache_operation_init(op, NULL); fscache_operation_init_slow(op, fscache_attr_changed_op); op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE); + fscache_set_op_name(op, "Attr"); spin_lock(&cookie->lock); @@ -184,6 +190,7 @@ static struct fscache_retrieval *fscache_alloc_retrieval( op->start_time = jiffies; INIT_WORK(&op->op.fast_work, fscache_retrieval_work); INIT_LIST_HEAD(&op->to_do); + fscache_set_op_name(&op->op, "Retr"); return op; } @@ -257,6 +264,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, _leave(" = -ENOMEM"); return -ENOMEM; } + fscache_set_op_name(&op->op, "RetrRA1"); spin_lock(&cookie->lock); @@ -369,6 +377,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(mapping, end_io_func, context); if (!op) return -ENOMEM; + fscache_set_op_name(&op->op, "RetrRAN"); spin_lock(&cookie->lock); @@ -461,6 +470,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(page->mapping, NULL, NULL); if (!op) return -ENOMEM; + fscache_set_op_name(&op->op, "RetrAL1"); spin_lock(&cookie->lock); @@ -529,6 +539,8 @@ static void fscache_write_op(struct fscache_operation *_op) _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); + fscache_set_op_state(&op->op, "GetPage"); + spin_lock(&cookie->lock); spin_lock(&object->lock); @@ -559,13 +571,17 @@ static void fscache_write_op(struct fscache_operation *_op) spin_unlock(&cookie->lock); if (page) { + fscache_set_op_state(&op->op, "Store"); ret = object->cache->ops->write_page(op, page); + fscache_set_op_state(&op->op, "EndWrite"); fscache_end_page_write(cookie, page); page_cache_release(page); - if (ret < 0) + if (ret < 0) { + fscache_set_op_state(&op->op, "Abort"); fscache_abort_object(object); - else + } else { fscache_enqueue_operation(&op->op); + } } _leave(""); @@ -634,6 +650,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, fscache_operation_init(&op->op, fscache_release_write_op); fscache_operation_init_slow(&op->op, fscache_write_op); op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING); + fscache_set_op_name(&op->op, "Write1"); ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); if (ret < 0) diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 84d3532dd3ea..7a9847ccd192 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -102,6 +102,16 @@ struct fscache_operation { /* operation releaser */ fscache_operation_release_t release; + +#ifdef CONFIG_SLOW_WORK_PROC + const char *name; /* operation name */ + const char *state; /* operation state */ +#define fscache_set_op_name(OP, N) do { (OP)->name = (N); } while(0) +#define fscache_set_op_state(OP, S) do { (OP)->state = (S); } while(0) +#else +#define fscache_set_op_name(OP, N) do { } while(0) +#define fscache_set_op_state(OP, S) do { } while(0) +#endif }; extern atomic_t fscache_op_debug_id; @@ -125,6 +135,7 @@ static inline void fscache_operation_init(struct fscache_operation *op, op->debug_id = atomic_inc_return(&fscache_op_debug_id); op->release = release; INIT_LIST_HEAD(&op->pend_link); + fscache_set_op_state(op, "Init"); } /** @@ -337,6 +348,7 @@ struct fscache_object { FSCACHE_OBJECT_RECYCLING, /* retiring object */ FSCACHE_OBJECT_WITHDRAWING, /* withdrawing object */ FSCACHE_OBJECT_DEAD, /* object is now dead */ + FSCACHE_OBJECT__NSTATES } state; int debug_id; /* debugging ID */ -- cgit v1.3-8-gc7d7 From 4fbf4291aa15926cd4fdca0ffe9122e89d0459db Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:04 +0000 Subject: FS-Cache: Allow the current state of all objects to be dumped Allow the current state of all fscache objects to be dumped by doing: cat /proc/fs/fscache/objects By default, all objects and all fields will be shown. This can be restricted by adding a suitable key to one of the caller's keyrings (such as the session keyring): keyctl add user fscache:objlist "" @s The are: K Show hexdump of object key (don't show if not given) A Show hexdump of object aux data (don't show if not given) And paired restrictions: C Show objects that have a cookie c Show objects that don't have a cookie B Show objects that are busy b Show objects that aren't busy W Show objects that have pending writes w Show objects that don't have pending writes R Show objects that have outstanding reads r Show objects that don't have outstanding reads S Show objects that have slow work queued s Show objects that don't have slow work queued If neither side of a restriction pair is given, then both are implied. For example: keyctl add user fscache:objlist KB @s shows objects that are busy, and lists their object keys, but does not dump their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is not implied. Signed-off-by: David Howells --- Documentation/filesystems/caching/fscache.txt | 81 +++++ fs/cachefiles/interface.c | 1 + fs/cachefiles/rdwr.c | 6 +- fs/fscache/Kconfig | 7 + fs/fscache/Makefile | 1 + fs/fscache/cache.c | 1 + fs/fscache/cookie.c | 2 + fs/fscache/internal.h | 13 + fs/fscache/object-list.c | 432 ++++++++++++++++++++++++++ fs/fscache/object.c | 2 +- fs/fscache/operation.c | 3 + fs/fscache/page.c | 6 + fs/fscache/proc.c | 13 + include/linux/fscache-cache.h | 13 + 14 files changed, 578 insertions(+), 3 deletions(-) create mode 100644 fs/fscache/object-list.c (limited to 'include') diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 9e94b9491d89..cac09e11ca30 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -299,6 +299,87 @@ proc files. jiffy range covered, and the SECS field the equivalent number of seconds. +=========== +OBJECT LIST +=========== + +If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a +list of all the objects currently allocated and allow them to be viewed +through: + + /proc/fs/fscache/objects + +This will look something like: + + [root@andromeda ~]# head /proc/fs/fscache/objects + OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA + ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================ + 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a + 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a + +where the first set of columns before the '|' describe the object: + + COLUMN DESCRIPTION + ======= =============================================================== + OBJECT Object debugging ID (appears as OBJ%x in some debug messages) + PARENT Debugging ID of parent object + STAT Object state + CHLDN Number of child objects of this object + OPS Number of outstanding operations on this object + OOP Number of outstanding child object management operations + IPR + EX Number of outstanding exclusive operations + READS Number of outstanding read operations + EM Object's event mask + EV Events raised on this object + F Object flags + S Object slow-work work item flags + +and the second set of columns describe the object's cookie, if present: + + COLUMN DESCRIPTION + =============== ======================================================= + NETFS_COOKIE_DEF Name of netfs cookie definition + TY Cookie type (IX - index, DT - data, hex - special) + FL Cookie flags + NETFS_DATA Netfs private data stored in the cookie + OBJECT_KEY Object key } 1 column, with separating comma + AUX_DATA Object aux data } presence may be configured + +The data shown may be filtered by attaching the a key to an appropriate keyring +before viewing the file. Something like: + + keyctl add user fscache:objlist @s + +where are a selection of the following letters: + + K Show hexdump of object key (don't show if not given) + A Show hexdump of object aux data (don't show if not given) + +and the following paired letters: + + C Show objects that have a cookie + c Show objects that don't have a cookie + B Show objects that are busy + b Show objects that aren't busy + W Show objects that have pending writes + w Show objects that don't have pending writes + R Show objects that have outstanding reads + r Show objects that don't have outstanding reads + S Show objects that have slow work queued + s Show objects that don't have slow work queued + +If neither side of a letter pair is given, then both are implied. For example: + + keyctl add user fscache:objlist KB @s + +shows objects that are busy, and lists their object keys, but does not dump +their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is +not implied. + +By default all objects and all fields will be shown. + + ========= DEBUGGING ========= diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 431accd475a7..dd7f852746cb 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -331,6 +331,7 @@ static void cachefiles_put_object(struct fscache_object *_object) } cache = object->fscache.cache; + fscache_object_destroy(&object->fscache); kmem_cache_free(cachefiles_object_jar, object); fscache_object_destroyed(cache); } diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index a69787e7dd96..3304646dae84 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -333,7 +333,8 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; - op->op.flags = FSCACHE_OP_FAST; + op->op.flags &= FSCACHE_OP_KEEP_FLAGS; + op->op.flags |= FSCACHE_OP_FAST; op->op.processor = cachefiles_read_copier; pagevec_init(&pagevec, 0); @@ -639,7 +640,8 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, pagevec_init(&pagevec, 0); - op->op.flags = FSCACHE_OP_FAST; + op->op.flags &= FSCACHE_OP_KEEP_FLAGS; + op->op.flags |= FSCACHE_OP_FAST; op->op.processor = cachefiles_read_copier; INIT_LIST_HEAD(&backpages); diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index 9bbb8ce7bea0..864dac20a242 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig @@ -54,3 +54,10 @@ config FSCACHE_DEBUG enabled by setting bits in /sys/modules/fscache/parameter/debug. See Documentation/filesystems/caching/fscache.txt for more information. + +config FSCACHE_OBJECT_LIST + bool "Maintain global object list for debugging purposes" + depends on FSCACHE && PROC_FS + help + Maintain a global list of active fscache objects that can be + retrieved through /proc/fs/fscache/objects for debugging purposes diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index 91571b95aacc..6d561531cb36 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -15,5 +15,6 @@ fscache-y := \ fscache-$(CONFIG_PROC_FS) += proc.o fscache-$(CONFIG_FSCACHE_STATS) += stats.o fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o +fscache-$(CONFIG_FSCACHE_OBJECT_LIST) += object-list.o obj-$(CONFIG_FSCACHE) := fscache.o diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index e21985bbb1fb..724384ef96de 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -263,6 +263,7 @@ int fscache_add_cache(struct fscache_cache *cache, spin_lock(&cache->object_list_lock); list_add_tail(&ifsdef->cache_link, &cache->object_list); spin_unlock(&cache->object_list_lock); + fscache_objlist_add(ifsdef); /* add the cache's netfs definition index object to the top level index * cookie as a known backing object */ diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 72fd18f6c71f..9b5187328230 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -349,6 +349,8 @@ static int fscache_attach_object(struct fscache_cookie *cookie, object->cookie = cookie; atomic_inc(&cookie->usage); hlist_add_head(&object->cookie_link, &cookie->backing_objects); + + fscache_objlist_add(object); ret = 0; cant_attach_object: diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 1c341304621f..fe02973a9516 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -88,10 +88,23 @@ extern int fscache_wait_bit_interruptible(void *); /* * object.c */ +extern const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5]; + extern void fscache_withdrawing_object(struct fscache_cache *, struct fscache_object *); extern void fscache_enqueue_object(struct fscache_object *); +/* + * object-list.c + */ +#ifdef CONFIG_FSCACHE_OBJECT_LIST +extern const struct file_operations fscache_objlist_fops; + +extern void fscache_objlist_add(struct fscache_object *); +#else +#define fscache_objlist_add(object) do {} while(0) +#endif + /* * operation.c */ diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c new file mode 100644 index 000000000000..e590242fa41a --- /dev/null +++ b/fs/fscache/object-list.c @@ -0,0 +1,432 @@ +/* Global fscache object list maintainer and viewer + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL COOKIE +#include +#include +#include +#include +#include "internal.h" + +static struct rb_root fscache_object_list; +static DEFINE_RWLOCK(fscache_object_list_lock); + +struct fscache_objlist_data { + unsigned long config; /* display configuration */ +#define FSCACHE_OBJLIST_CONFIG_KEY 0x00000001 /* show object keys */ +#define FSCACHE_OBJLIST_CONFIG_AUX 0x00000002 /* show object auxdata */ +#define FSCACHE_OBJLIST_CONFIG_COOKIE 0x00000004 /* show objects with cookies */ +#define FSCACHE_OBJLIST_CONFIG_NOCOOKIE 0x00000008 /* show objects without cookies */ +#define FSCACHE_OBJLIST_CONFIG_BUSY 0x00000010 /* show busy objects */ +#define FSCACHE_OBJLIST_CONFIG_IDLE 0x00000020 /* show idle objects */ +#define FSCACHE_OBJLIST_CONFIG_PENDWR 0x00000040 /* show objects with pending writes */ +#define FSCACHE_OBJLIST_CONFIG_NOPENDWR 0x00000080 /* show objects without pending writes */ +#define FSCACHE_OBJLIST_CONFIG_READS 0x00000100 /* show objects with active reads */ +#define FSCACHE_OBJLIST_CONFIG_NOREADS 0x00000200 /* show objects without active reads */ +#define FSCACHE_OBJLIST_CONFIG_EVENTS 0x00000400 /* show objects with events */ +#define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */ +#define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with slow work */ +#define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without slow work */ + + u8 buf[512]; /* key and aux data buffer */ +}; + +/* + * Add an object to the object list + * - we use the address of the fscache_object structure as the key into the + * tree + */ +void fscache_objlist_add(struct fscache_object *obj) +{ + struct fscache_object *xobj; + struct rb_node **p = &fscache_object_list.rb_node, *parent = NULL; + + write_lock(&fscache_object_list_lock); + + while (*p) { + parent = *p; + xobj = rb_entry(parent, struct fscache_object, objlist_link); + + if (obj < xobj) + p = &(*p)->rb_left; + else if (obj > xobj) + p = &(*p)->rb_right; + else + BUG(); + } + + rb_link_node(&obj->objlist_link, parent, p); + rb_insert_color(&obj->objlist_link, &fscache_object_list); + + write_unlock(&fscache_object_list_lock); +} + +/** + * fscache_object_destroy - Note that a cache object is about to be destroyed + * @object: The object to be destroyed + * + * Note the imminent destruction and deallocation of a cache object record. + */ +void fscache_object_destroy(struct fscache_object *obj) +{ + write_lock(&fscache_object_list_lock); + + BUG_ON(RB_EMPTY_ROOT(&fscache_object_list)); + rb_erase(&obj->objlist_link, &fscache_object_list); + + write_unlock(&fscache_object_list_lock); +} +EXPORT_SYMBOL(fscache_object_destroy); + +/* + * find the object in the tree on or after the specified index + */ +static struct fscache_object *fscache_objlist_lookup(loff_t *_pos) +{ + struct fscache_object *pobj, *obj, *minobj = NULL; + struct rb_node *p; + unsigned long pos; + + if (*_pos >= (unsigned long) ERR_PTR(-ENOENT)) + return NULL; + pos = *_pos; + + /* banners (can't represent line 0 by pos 0 as that would involve + * returning a NULL pointer) */ + if (pos == 0) + return (struct fscache_object *) ++(*_pos); + if (pos < 3) + return (struct fscache_object *)pos; + + pobj = (struct fscache_object *)pos; + p = fscache_object_list.rb_node; + while (p) { + obj = rb_entry(p, struct fscache_object, objlist_link); + if (pobj < obj) { + if (!minobj || minobj > obj) + minobj = obj; + p = p->rb_left; + } else if (pobj > obj) { + p = p->rb_right; + } else { + minobj = obj; + break; + } + obj = NULL; + } + + if (!minobj) + *_pos = (unsigned long) ERR_PTR(-ENOENT); + else if (minobj != obj) + *_pos = (unsigned long) minobj; + return minobj; +} + +/* + * set up the iterator to start reading from the first line + */ +static void *fscache_objlist_start(struct seq_file *m, loff_t *_pos) + __acquires(&fscache_object_list_lock) +{ + read_lock(&fscache_object_list_lock); + return fscache_objlist_lookup(_pos); +} + +/* + * move to the next line + */ +static void *fscache_objlist_next(struct seq_file *m, void *v, loff_t *_pos) +{ + (*_pos)++; + return fscache_objlist_lookup(_pos); +} + +/* + * clean up after reading + */ +static void fscache_objlist_stop(struct seq_file *m, void *v) + __releases(&fscache_object_list_lock) +{ + read_unlock(&fscache_object_list_lock); +} + +/* + * display an object + */ +static int fscache_objlist_show(struct seq_file *m, void *v) +{ + struct fscache_objlist_data *data = m->private; + struct fscache_object *obj = v; + unsigned long config = data->config; + uint16_t keylen, auxlen; + char _type[3], *type; + bool no_cookie; + u8 *buf = data->buf, *p; + + if ((unsigned long) v == 1) { + seq_puts(m, "OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS" + " EM EV F S" + " | NETFS_COOKIE_DEF TY FL NETFS_DATA"); + if (config & (FSCACHE_OBJLIST_CONFIG_KEY | + FSCACHE_OBJLIST_CONFIG_AUX)) + seq_puts(m, " "); + if (config & FSCACHE_OBJLIST_CONFIG_KEY) + seq_puts(m, "OBJECT_KEY"); + if ((config & (FSCACHE_OBJLIST_CONFIG_KEY | + FSCACHE_OBJLIST_CONFIG_AUX)) == + (FSCACHE_OBJLIST_CONFIG_KEY | FSCACHE_OBJLIST_CONFIG_AUX)) + seq_puts(m, ", "); + if (config & FSCACHE_OBJLIST_CONFIG_AUX) + seq_puts(m, "AUX_DATA"); + seq_puts(m, "\n"); + return 0; + } + + if ((unsigned long) v == 2) { + seq_puts(m, "======== ======== ==== ===== === === === == =====" + " == == = =" + " | ================ == == ================"); + if (config & (FSCACHE_OBJLIST_CONFIG_KEY | + FSCACHE_OBJLIST_CONFIG_AUX)) + seq_puts(m, " ================"); + seq_puts(m, "\n"); + return 0; + } + + /* filter out any unwanted objects */ +#define FILTER(criterion, _yes, _no) \ + do { \ + unsigned long yes = FSCACHE_OBJLIST_CONFIG_##_yes; \ + unsigned long no = FSCACHE_OBJLIST_CONFIG_##_no; \ + if (criterion) { \ + if (!(config & yes)) \ + return 0; \ + } else { \ + if (!(config & no)) \ + return 0; \ + } \ + } while(0) + + if (~config) { + FILTER(obj->cookie, + COOKIE, NOCOOKIE); + FILTER(obj->state != FSCACHE_OBJECT_ACTIVE || + obj->n_ops != 0 || + obj->n_obj_ops != 0 || + obj->flags || + !list_empty(&obj->dependents), + BUSY, IDLE); + FILTER(test_bit(FSCACHE_OBJECT_PENDING_WRITE, &obj->flags), + PENDWR, NOPENDWR); + FILTER(atomic_read(&obj->n_reads), + READS, NOREADS); + FILTER(obj->events & obj->event_mask, + EVENTS, NOEVENTS); + FILTER(obj->work.flags & ~(1UL << SLOW_WORK_VERY_SLOW), + WORK, NOWORK); + } + + seq_printf(m, + "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1lx | ", + obj->debug_id, + obj->parent ? obj->parent->debug_id : -1, + fscache_object_states_short[obj->state], + obj->n_children, + obj->n_ops, + obj->n_obj_ops, + obj->n_in_progress, + obj->n_exclusive, + atomic_read(&obj->n_reads), + obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK, + obj->events, + obj->flags, + obj->work.flags); + + no_cookie = true; + keylen = auxlen = 0; + if (obj->cookie) { + spin_lock(&obj->lock); + if (obj->cookie) { + switch (obj->cookie->def->type) { + case 0: + type = "IX"; + break; + case 1: + type = "DT"; + break; + default: + sprintf(_type, "%02u", + obj->cookie->def->type); + type = _type; + break; + } + + seq_printf(m, "%-16s %s %2lx %16p", + obj->cookie->def->name, + type, + obj->cookie->flags, + obj->cookie->netfs_data); + + if (obj->cookie->def->get_key && + config & FSCACHE_OBJLIST_CONFIG_KEY) + keylen = obj->cookie->def->get_key( + obj->cookie->netfs_data, + buf, 400); + + if (obj->cookie->def->get_aux && + config & FSCACHE_OBJLIST_CONFIG_AUX) + auxlen = obj->cookie->def->get_aux( + obj->cookie->netfs_data, + buf + keylen, 512 - keylen); + + no_cookie = false; + } + spin_unlock(&obj->lock); + + if (!no_cookie && (keylen > 0 || auxlen > 0)) { + seq_printf(m, " "); + for (p = buf; keylen > 0; keylen--) + seq_printf(m, "%02x", *p++); + if (auxlen > 0) { + if (config & FSCACHE_OBJLIST_CONFIG_KEY) + seq_printf(m, ", "); + for (; auxlen > 0; auxlen--) + seq_printf(m, "%02x", *p++); + } + } + } + + if (no_cookie) + seq_printf(m, "\n"); + else + seq_printf(m, "\n"); + return 0; +} + +static const struct seq_operations fscache_objlist_ops = { + .start = fscache_objlist_start, + .stop = fscache_objlist_stop, + .next = fscache_objlist_next, + .show = fscache_objlist_show, +}; + +/* + * get the configuration for filtering the list + */ +static void fscache_objlist_config(struct fscache_objlist_data *data) +{ +#ifdef CONFIG_KEYS + struct user_key_payload *confkey; + unsigned long config; + struct key *key; + const char *buf; + int len; + + key = request_key(&key_type_user, "fscache:objlist", NULL); + if (IS_ERR(key)) + goto no_config; + + config = 0; + rcu_read_lock(); + + confkey = key->payload.data; + buf = confkey->data; + + for (len = confkey->datalen - 1; len >= 0; len--) { + switch (buf[len]) { + case 'K': config |= FSCACHE_OBJLIST_CONFIG_KEY; break; + case 'A': config |= FSCACHE_OBJLIST_CONFIG_AUX; break; + case 'C': config |= FSCACHE_OBJLIST_CONFIG_COOKIE; break; + case 'c': config |= FSCACHE_OBJLIST_CONFIG_NOCOOKIE; break; + case 'B': config |= FSCACHE_OBJLIST_CONFIG_BUSY; break; + case 'b': config |= FSCACHE_OBJLIST_CONFIG_IDLE; break; + case 'W': config |= FSCACHE_OBJLIST_CONFIG_PENDWR; break; + case 'w': config |= FSCACHE_OBJLIST_CONFIG_NOPENDWR; break; + case 'R': config |= FSCACHE_OBJLIST_CONFIG_READS; break; + case 'r': config |= FSCACHE_OBJLIST_CONFIG_NOREADS; break; + case 'S': config |= FSCACHE_OBJLIST_CONFIG_WORK; break; + case 's': config |= FSCACHE_OBJLIST_CONFIG_NOWORK; break; + } + } + + rcu_read_unlock(); + key_put(key); + + if (!(config & (FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE))) + config |= FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE; + if (!(config & (FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE))) + config |= FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE; + if (!(config & (FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR))) + config |= FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR; + if (!(config & (FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS))) + config |= FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS; + if (!(config & (FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS))) + config |= FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS; + if (!(config & (FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK))) + config |= FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK; + + data->config = config; + return; + +no_config: +#endif + data->config = ULONG_MAX; +} + +/* + * open "/proc/fs/fscache/objects" to provide a list of active objects + * - can be configured by a user-defined key added to the caller's keyrings + */ +static int fscache_objlist_open(struct inode *inode, struct file *file) +{ + struct fscache_objlist_data *data; + struct seq_file *m; + int ret; + + ret = seq_open(file, &fscache_objlist_ops); + if (ret < 0) + return ret; + + m = file->private_data; + + /* buffer for key extraction */ + data = kmalloc(sizeof(struct fscache_objlist_data), GFP_KERNEL); + if (!data) { + seq_release(inode, file); + return -ENOMEM; + } + + /* get the configuration key */ + fscache_objlist_config(data); + + m->private = data; + return 0; +} + +/* + * clean up on close + */ +static int fscache_objlist_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + + kfree(m->private); + m->private = NULL; + return seq_release(inode, file); +} + +const struct file_operations fscache_objlist_fops = { + .owner = THIS_MODULE, + .open = fscache_objlist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = fscache_objlist_release, +}; diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 615b63dd9ecc..ad1644f073bd 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -34,7 +34,7 @@ const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { }; EXPORT_SYMBOL(fscache_object_states); -static const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { +const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { [FSCACHE_OBJECT_INIT] = "INIT", [FSCACHE_OBJECT_LOOKING_UP] = "LOOK", [FSCACHE_OBJECT_CREATING] = "CRTN", diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 91bbe6f0377c..09e43b6e822f 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -322,6 +322,9 @@ void fscache_put_operation(struct fscache_operation *op) object = op->object; + if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) + atomic_dec(&object->n_reads); + /* now... we may get called with the object spinlock held, so we * complete the cleanup here only if we can immediately acquire the * lock, and defer it otherwise */ diff --git a/fs/fscache/page.c b/fs/fscache/page.c index e8bbc395cef6..c5973e38ce39 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -275,6 +275,9 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); + atomic_inc(&object->n_reads); + set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + if (fscache_submit_op(object, &op->op) < 0) goto nobufs_unlock; spin_unlock(&cookie->lock); @@ -386,6 +389,9 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); + atomic_inc(&object->n_reads); + set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + if (fscache_submit_op(object, &op->op) < 0) goto nobufs_unlock; spin_unlock(&cookie->lock); diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c index beeab44bc31a..1d9e4951a597 100644 --- a/fs/fscache/proc.c +++ b/fs/fscache/proc.c @@ -37,10 +37,20 @@ int __init fscache_proc_init(void) goto error_histogram; #endif +#ifdef CONFIG_FSCACHE_OBJECT_LIST + if (!proc_create("fs/fscache/objects", S_IFREG | 0444, NULL, + &fscache_objlist_fops)) + goto error_objects; +#endif + _leave(" = 0"); return 0; +#ifdef CONFIG_FSCACHE_OBJECT_LIST +error_objects: +#endif #ifdef CONFIG_FSCACHE_HISTOGRAM + remove_proc_entry("fs/fscache/histogram", NULL); error_histogram: #endif #ifdef CONFIG_FSCACHE_STATS @@ -58,6 +68,9 @@ error_dir: */ void fscache_proc_cleanup(void) { +#ifdef CONFIG_FSCACHE_OBJECT_LIST + remove_proc_entry("fs/fscache/objects", NULL); +#endif #ifdef CONFIG_FSCACHE_HISTOGRAM remove_proc_entry("fs/fscache/histogram", NULL); #endif diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 7a9847ccd192..184cbdfbcc99 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -91,6 +91,8 @@ struct fscache_operation { #define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ #define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ #define FSCACHE_OP_DEAD 6 /* op is now dead */ +#define FSCACHE_OP_DEC_READ_CNT 7 /* decrement object->n_reads on destruction */ +#define FSCACHE_OP_KEEP_FLAGS 0xc0 /* flags to keep when repurposing an op */ atomic_t usage; unsigned debug_id; /* debugging ID */ @@ -357,6 +359,7 @@ struct fscache_object { int n_obj_ops; /* number of object ops outstanding on object */ int n_in_progress; /* number of ops in progress */ int n_exclusive; /* number of exclusive ops queued */ + atomic_t n_reads; /* number of read ops in progress */ spinlock_t lock; /* state and operations lock */ unsigned long lookup_jif; /* time at which lookup started */ @@ -370,6 +373,7 @@ struct fscache_object { #define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ #define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ #define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ +#define FSCACHE_OBJECT_EVENTS_MASK 0x7f /* mask of all events*/ unsigned long flags; #define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ @@ -385,6 +389,9 @@ struct fscache_object { struct list_head dependents; /* FIFO of dependent objects */ struct list_head dep_link; /* link in parent's dependents list */ struct list_head pending_ops; /* unstarted operations on this object */ +#ifdef CONFIG_FSCACHE_OBJECT_LIST + struct rb_node objlist_link; /* link in global object list */ +#endif pgoff_t store_limit; /* current storage limit */ }; @@ -434,6 +441,12 @@ void fscache_object_init(struct fscache_object *object, extern void fscache_object_lookup_negative(struct fscache_object *object); extern void fscache_obtained_object(struct fscache_object *object); +#ifdef CONFIG_FSCACHE_OBJECT_LIST +extern void fscache_object_destroy(struct fscache_object *object); +#else +#define fscache_object_destroy(object) do {} while(0) +#endif + /** * fscache_object_destroyed - Note destruction of an object in a cache * @cache: The cache from which the object came -- cgit v1.3-8-gc7d7 From 1bccf513ac49d44604ba1cddcc29f5886e70f1b6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:25 +0000 Subject: FS-Cache: Fix lock misorder in fscache_write_op() FS-Cache has two structs internally for keeping track of the internal state of a cached file: the fscache_cookie struct, which represents the netfs's state, and fscache_object struct, which represents the cache's state. Each has a pointer that points to the other (when both are in existence), and each has a spinlock for pointer maintenance. Since netfs operations approach these structures from the cookie side, they get the cookie lock first, then the object lock. Cache operations, on the other hand, approach from the object side, and get the object lock first. It is not then permitted for a cache operation to get the cookie lock whilst it is holding the object lock lest deadlock occur; instead, it must do one of two things: (1) increment the cookie usage counter, drop the object lock and then get both locks in order, or (2) simply hold the object lock as certain parts of the cookie may not be altered whilst the object lock is held. It is also not permitted to follow either pointer without holding the lock at the end you start with. To break the pointers between the cookie and the object, both locks must be held. fscache_write_op(), however, violates the locking rules: It attempts to get the cookie lock without (a) checking that the cookie pointer is a valid pointer, and (b) holding the object lock to protect the cookie pointer whilst it follows it. This is so that it can access the pending page store tree without interference from __fscache_write_page(). This is fixed by splitting the cookie lock, such that the page store tracking tree is protected by its own lock, and checking that the cookie pointer is non-NULL before we attempt to follow it whilst holding the object lock. The new lock is subordinate to both the cookie lock and the object lock, and so should be taken after those. Signed-off-by: David Howells --- Documentation/filesystems/caching/fscache.txt | 3 ++ fs/fscache/cookie.c | 1 + fs/fscache/internal.h | 4 +++ fs/fscache/page.c | 52 ++++++++++++++++++--------- fs/fscache/stats.c | 10 ++++-- include/linux/fscache-cache.h | 1 + 6 files changed, 52 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 0a77868f4977..9cf2cfbc81c9 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -269,6 +269,9 @@ proc files. oom=N Number of store reqs failed -ENOMEM ops=N Number of store reqs submitted run=N Number of store reqs granted CPU time + pgs=N Number of pages given store req processing time + rxd=N Number of store reqs deleted from tracking tree + olm=N Number of store reqs over store limit Ops pend=N Number of times async ops added to pending queues run=N Number of times async ops given CPU time enq=N Number of times async ops queued for processing diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index e6854f5222f5..f979659c1b3f 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -36,6 +36,7 @@ void fscache_cookie_init_once(void *_cookie) memset(cookie, 0, sizeof(*cookie)); spin_lock_init(&cookie->lock); + spin_lock_init(&cookie->stores_lock); INIT_HLIST_HEAD(&cookie->backing_objects); } diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 50324ad2b194..ba1853fa1ff9 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -17,6 +17,7 @@ * - cache->object_list_lock * - object->lock * - object->parent->lock + * - cookie->stores_lock * - fscache_thread_lock * */ @@ -174,6 +175,9 @@ extern atomic_t fscache_n_stores_nobufs; extern atomic_t fscache_n_stores_oom; extern atomic_t fscache_n_store_ops; extern atomic_t fscache_n_store_calls; +extern atomic_t fscache_n_store_pages; +extern atomic_t fscache_n_store_radix_deletes; +extern atomic_t fscache_n_store_pages_over_limit; extern atomic_t fscache_n_marks; extern atomic_t fscache_n_uncaches; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index e6f2e61133a1..3ea8897bc217 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -45,16 +45,26 @@ EXPORT_SYMBOL(__fscache_wait_on_page_write); /* * note that a page has finished being written to the cache */ -static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *page) +static void fscache_end_page_write(struct fscache_object *object, + struct page *page) { - struct page *xpage; + struct fscache_cookie *cookie; + struct page *xpage = NULL; - spin_lock(&cookie->lock); - xpage = radix_tree_delete(&cookie->stores, page->index); - spin_unlock(&cookie->lock); - ASSERT(xpage != NULL); - - wake_up_bit(&cookie->flags, 0); + spin_lock(&object->lock); + cookie = object->cookie; + if (cookie) { + /* delete the page from the tree if it is now no longer + * pending */ + spin_lock(&cookie->stores_lock); + fscache_stat(&fscache_n_store_radix_deletes); + xpage = radix_tree_delete(&cookie->stores, page->index); + spin_unlock(&cookie->stores_lock); + wake_up_bit(&cookie->flags, 0); + } + spin_unlock(&object->lock); + if (xpage) + page_cache_release(xpage); } /* @@ -591,7 +601,7 @@ static void fscache_write_op(struct fscache_operation *_op) struct fscache_storage *op = container_of(_op, struct fscache_storage, op); struct fscache_object *object = op->op.object; - struct fscache_cookie *cookie = object->cookie; + struct fscache_cookie *cookie; struct page *page; unsigned n; void *results[1]; @@ -601,16 +611,17 @@ static void fscache_write_op(struct fscache_operation *_op) fscache_set_op_state(&op->op, "GetPage"); - spin_lock(&cookie->lock); spin_lock(&object->lock); + cookie = object->cookie; - if (!fscache_object_is_active(object)) { + if (!fscache_object_is_active(object) || !cookie) { spin_unlock(&object->lock); - spin_unlock(&cookie->lock); _leave(""); return; } + spin_lock(&cookie->stores_lock); + fscache_stat(&fscache_n_store_calls); /* find a page to store */ @@ -621,23 +632,25 @@ static void fscache_write_op(struct fscache_operation *_op) goto superseded; page = results[0]; _debug("gang %d [%lx]", n, page->index); - if (page->index > op->store_limit) + if (page->index > op->store_limit) { + fscache_stat(&fscache_n_store_pages_over_limit); goto superseded; + } radix_tree_tag_clear(&cookie->stores, page->index, FSCACHE_COOKIE_PENDING_TAG); + spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); - spin_unlock(&cookie->lock); if (page) { fscache_set_op_state(&op->op, "Store"); + fscache_stat(&fscache_n_store_pages); fscache_stat(&fscache_n_cop_write_page); ret = object->cache->ops->write_page(op, page); fscache_stat_d(&fscache_n_cop_write_page); fscache_set_op_state(&op->op, "EndWrite"); - fscache_end_page_write(cookie, page); - page_cache_release(page); + fscache_end_page_write(object, page); if (ret < 0) { fscache_set_op_state(&op->op, "Abort"); fscache_abort_object(object); @@ -653,9 +666,9 @@ superseded: /* this writer is going away and there aren't any more things to * write */ _debug("cease"); + spin_unlock(&cookie->stores_lock); clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); spin_unlock(&object->lock); - spin_unlock(&cookie->lock); _leave(""); } @@ -731,6 +744,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, /* add the page to the pending-storage radix tree on the backing * object */ spin_lock(&object->lock); + spin_lock(&cookie->stores_lock); _debug("store limit %llx", (unsigned long long) object->store_limit); @@ -751,6 +765,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags)) goto already_pending; + spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); @@ -772,6 +787,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, already_queued: fscache_stat(&fscache_n_stores_again); already_pending: + spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); spin_unlock(&cookie->lock); radix_tree_preload_end(); @@ -781,7 +797,9 @@ already_pending: return 0; submit_failed: + spin_lock(&cookie->stores_lock); radix_tree_delete(&cookie->stores, page->index); + spin_unlock(&cookie->stores_lock); page_cache_release(page); ret = -ENOBUFS; goto nobufs; diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4c07439d1307..1d53ea68409e 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -58,6 +58,9 @@ atomic_t fscache_n_stores_nobufs; atomic_t fscache_n_stores_oom; atomic_t fscache_n_store_ops; atomic_t fscache_n_store_calls; +atomic_t fscache_n_store_pages; +atomic_t fscache_n_store_radix_deletes; +atomic_t fscache_n_store_pages_over_limit; atomic_t fscache_n_marks; atomic_t fscache_n_uncaches; @@ -200,9 +203,12 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_stores_again), atomic_read(&fscache_n_stores_nobufs), atomic_read(&fscache_n_stores_oom)); - seq_printf(m, "Stores : ops=%u run=%u\n", + seq_printf(m, "Stores : ops=%u run=%u pgs=%u rxd=%u olm=%u\n", atomic_read(&fscache_n_store_ops), - atomic_read(&fscache_n_store_calls)); + atomic_read(&fscache_n_store_calls), + atomic_read(&fscache_n_store_pages), + atomic_read(&fscache_n_store_radix_deletes), + atomic_read(&fscache_n_store_pages_over_limit)); seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u\n", atomic_read(&fscache_n_op_pend), diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 184cbdfbcc99..f3aa4bdafef6 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -310,6 +310,7 @@ struct fscache_cookie { atomic_t usage; /* number of users of this cookie */ atomic_t n_children; /* number of children of this cookie */ spinlock_t lock; + spinlock_t stores_lock; /* lock on page store tree */ struct hlist_head backing_objects; /* object(s) backing this file/index */ const struct fscache_cookie_def *def; /* definition */ struct fscache_cookie *parent; /* parent of this entry */ -- cgit v1.3-8-gc7d7 From 201a15428bd54f83eccec8b7c64a04b8f9431204 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:35 +0000 Subject: FS-Cache: Handle pages pending storage that get evicted under OOM conditions Handle netfs pages that the vmscan algorithm wants to evict from the pagecache under OOM conditions, but that are waiting for write to the cache. Under these conditions, vmscan calls the releasepage() function of the netfs, asking if a page can be discarded. The problem is typified by the following trace of a stuck process: kslowd005 D 0000000000000000 0 4253 2 0x00000080 ffff88001b14f370 0000000000000046 ffff880020d0d000 0000000000000007 0000000000000006 0000000000000001 ffff88001b14ffd8 ffff880020d0d2a8 000000000000ddf0 00000000000118c0 00000000000118c0 ffff880020d0d2a8 Call Trace: [] __fscache_wait_on_page_write+0x8b/0xa7 [fscache] [] ? autoremove_wake_function+0x0/0x34 [] ? __fscache_check_page_write+0x63/0x70 [fscache] [] nfs_fscache_release_page+0x4e/0xc4 [nfs] [] nfs_release_page+0x3c/0x41 [nfs] [] try_to_release_page+0x32/0x3b [] shrink_page_list+0x316/0x4ac [] shrink_inactive_list+0x392/0x67c [] ? __mutex_unlock_slowpath+0x100/0x10b [] ? trace_hardirqs_on_caller+0x10c/0x130 [] ? mutex_unlock+0x9/0xb [] shrink_list+0x8d/0x8f [] shrink_zone+0x278/0x33c [] ? ktime_get_ts+0xad/0xba [] try_to_free_pages+0x22e/0x392 [] ? isolate_pages_global+0x0/0x212 [] __alloc_pages_nodemask+0x3dc/0x5cf [] grab_cache_page_write_begin+0x65/0xaa [] ext3_write_begin+0x78/0x1eb [] generic_file_buffered_write+0x109/0x28c [] ? current_fs_time+0x22/0x29 [] __generic_file_aio_write+0x350/0x385 [] ? generic_file_aio_write+0x4a/0xae [] generic_file_aio_write+0x60/0xae [] do_sync_write+0xe3/0x120 [] ? autoremove_wake_function+0x0/0x34 [] ? __dentry_open+0x1a5/0x2b8 [] ? dentry_open+0x82/0x89 [] cachefiles_write_page+0x298/0x335 [cachefiles] [] fscache_write_op+0x178/0x2c2 [fscache] [] fscache_op_execute+0x7a/0xd1 [fscache] [] slow_work_execute+0x18f/0x2d1 [] slow_work_thread+0x1c5/0x308 [] ? autoremove_wake_function+0x0/0x34 [] ? slow_work_thread+0x0/0x308 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? tg_shares_up+0x171/0x227 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 In the above backtrace, the following is happening: (1) A page storage operation is being executed by a slow-work thread (fscache_write_op()). (2) FS-Cache farms the operation out to the cache to perform (cachefiles_write_page()). (3) CacheFiles is then calling Ext3 to perform the actual write, using Ext3's standard write (do_sync_write()) under KERNEL_DS directly from the netfs page. (4) However, for Ext3 to perform the write, it must allocate some memory, in particular, it must allocate at least one page cache page into which it can copy the data from the netfs page. (5) Under OOM conditions, the memory allocator can't immediately come up with a page, so it uses vmscan to find something to discard (try_to_free_pages()). (6) vmscan finds a clean netfs page it might be able to discard (possibly the one it's trying to write out). (7) The netfs is called to throw the page away (nfs_release_page()) - but it's called with __GFP_WAIT, so the netfs decides to wait for the store to complete (__fscache_wait_on_page_write()). (8) This blocks a slow-work processing thread - possibly against itself. The system ends up stuck because it can't write out any netfs pages to the cache without allocating more memory. To avoid this, we make FS-Cache cancel some writes that aren't in the middle of actually being performed. This means that some data won't make it into the cache this time. To support this, a new FS-Cache function is added fscache_maybe_release_page() that replaces what the netfs releasepage() functions used to do with respect to the cache. The decisions fscache_maybe_release_page() makes are counted and displayed through /proc/fs/fscache/stats on a line labelled "VmScan". There are four counters provided: "nos=N" - pages that weren't pending storage; "gon=N" - pages that were pending storage when we first looked, but weren't by the time we got the object lock; "bsy=N" - pages that we ignored as they were actively being written when we looked; and "can=N" - pages that we cancelled the storage of. What I'd really like to do is alter the behaviour of the cancellation heuristics, depending on how necessary it is to expel pages. If there are plenty of other pages that aren't waiting to be written to the cache that could be ejected first, then it would be nice to hold up on immediate cancellation of cache writes - but I don't see a way of doing that. Signed-off-by: David Howells --- Documentation/filesystems/caching/fscache.txt | 4 ++ Documentation/filesystems/caching/netfs-api.txt | 21 ++++++- fs/9p/cache.c | 14 +---- fs/afs/file.c | 15 +---- fs/fscache/internal.h | 5 ++ fs/fscache/page.c | 79 ++++++++++++++++++++++++- fs/fscache/stats.c | 11 ++++ fs/nfs/fscache.c | 10 +--- include/linux/fscache-cache.h | 1 + include/linux/fscache.h | 27 +++++++++ 10 files changed, 152 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 057a3c71d524..7097fd29fb3d 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -272,6 +272,10 @@ proc files. pgs=N Number of pages given store req processing time rxd=N Number of store reqs deleted from tracking tree olm=N Number of store reqs over store limit + VmScan nos=N Number of release reqs against pages with no pending store + gon=N Number of release reqs against pages stored by time lock granted + bsy=N Number of release reqs ignored due to in-progress store + can=N Number of page stores cancelled due to release req Ops pend=N Number of times async ops added to pending queues run=N Number of times async ops given CPU time enq=N Number of times async ops queued for processing diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index 2666b1ed5e9e..1902c57b72ef 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt @@ -641,7 +641,7 @@ data file must be retired (see the relinquish cookie function below). Furthermore, note that this does not cancel the asynchronous read or write operation started by the read/alloc and write functions, so the page -invalidation and release functions must use: +invalidation functions must use: bool fscache_check_page_write(struct fscache_cookie *cookie, struct page *page); @@ -654,6 +654,25 @@ to see if a page is being written to the cache, and: to wait for it to finish if it is. +When releasepage() is being implemented, a special FS-Cache function exists to +manage the heuristics of coping with vmscan trying to eject pages, which may +conflict with the cache trying to write pages to the cache (which may itself +need to allocate memory): + + bool fscache_maybe_release_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp); + +This takes the netfs cookie, and the page and gfp arguments as supplied to +releasepage(). It will return false if the page cannot be released yet for +some reason and if it returns true, the page has been uncached and can now be +released. + +To make a page available for release, this function may wait for an outstanding +storage request to complete, or it may attempt to cancel the storage request - +in which case the page will not be stored in the cache this time. + + ========================== INDEX AND DATA FILE UPDATE ========================== diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 51c94e26a346..bcc5357a9069 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -343,18 +343,7 @@ int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) BUG_ON(!vcookie->fscache); - if (PageFsCache(page)) { - if (fscache_check_page_write(vcookie->fscache, page)) { - if (!(gfp & __GFP_WAIT)) - return 0; - fscache_wait_on_page_write(vcookie->fscache, page); - } - - fscache_uncache_page(vcookie->fscache, page); - ClearPageFsCache(page); - } - - return 1; + return fscache_maybe_release_page(vnode->cache, page, gfp); } void __v9fs_fscache_invalidate_page(struct page *page) @@ -368,7 +357,6 @@ void __v9fs_fscache_invalidate_page(struct page *page) fscache_wait_on_page_write(vcookie->fscache, page); BUG_ON(!PageLocked(page)); fscache_uncache_page(vcookie->fscache, page); - ClearPageFsCache(page); } } diff --git a/fs/afs/file.c b/fs/afs/file.c index 681c2a7b013f..39b301662f22 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -315,7 +315,6 @@ static void afs_invalidatepage(struct page *page, unsigned long offset) struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); fscache_wait_on_page_write(vnode->cache, page); fscache_uncache_page(vnode->cache, page); - ClearPageFsCache(page); } #endif @@ -349,17 +348,9 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) /* deny if page is being written to the cache and the caller hasn't * elected to wait */ #ifdef CONFIG_AFS_FSCACHE - if (PageFsCache(page)) { - if (fscache_check_page_write(vnode->cache, page)) { - if (!(gfp_flags & __GFP_WAIT)) { - _leave(" = F [cache busy]"); - return 0; - } - fscache_wait_on_page_write(vnode->cache, page); - } - - fscache_uncache_page(vnode->cache, page); - ClearPageFsCache(page); + if (!fscache_maybe_release_page(vnode->cache, page, gfp_flags)) { + _leave(" = F [cache busy]"); + return 0; } #endif diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index a0769872b19c..e5046519b153 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -180,6 +180,11 @@ extern atomic_t fscache_n_store_pages; extern atomic_t fscache_n_store_radix_deletes; extern atomic_t fscache_n_store_pages_over_limit; +extern atomic_t fscache_n_store_vmscan_not_storing; +extern atomic_t fscache_n_store_vmscan_gone; +extern atomic_t fscache_n_store_vmscan_busy; +extern atomic_t fscache_n_store_vmscan_cancelled; + extern atomic_t fscache_n_marks; extern atomic_t fscache_n_uncaches; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 022a5da8e130..fc76798bd968 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -42,6 +42,75 @@ void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *pa } EXPORT_SYMBOL(__fscache_wait_on_page_write); +/* + * decide whether a page can be released, possibly by cancelling a store to it + * - we're allowed to sleep if __GFP_WAIT is flagged + */ +bool __fscache_maybe_release_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + struct page *xpage; + void *val; + + _enter("%p,%p,%x", cookie, page, gfp); + + rcu_read_lock(); + val = radix_tree_lookup(&cookie->stores, page->index); + if (!val) { + rcu_read_unlock(); + fscache_stat(&fscache_n_store_vmscan_not_storing); + __fscache_uncache_page(cookie, page); + return true; + } + + /* see if the page is actually undergoing storage - if so we can't get + * rid of it till the cache has finished with it */ + if (radix_tree_tag_get(&cookie->stores, page->index, + FSCACHE_COOKIE_STORING_TAG)) { + rcu_read_unlock(); + goto page_busy; + } + + /* the page is pending storage, so we attempt to cancel the store and + * discard the store request so that the page can be reclaimed */ + spin_lock(&cookie->stores_lock); + rcu_read_unlock(); + + if (radix_tree_tag_get(&cookie->stores, page->index, + FSCACHE_COOKIE_STORING_TAG)) { + /* the page started to undergo storage whilst we were looking, + * so now we can only wait or return */ + spin_unlock(&cookie->stores_lock); + goto page_busy; + } + + xpage = radix_tree_delete(&cookie->stores, page->index); + spin_unlock(&cookie->stores_lock); + + if (xpage) { + fscache_stat(&fscache_n_store_vmscan_cancelled); + fscache_stat(&fscache_n_store_radix_deletes); + ASSERTCMP(xpage, ==, page); + } else { + fscache_stat(&fscache_n_store_vmscan_gone); + } + + wake_up_bit(&cookie->flags, 0); + if (xpage) + page_cache_release(xpage); + __fscache_uncache_page(cookie, page); + return true; + +page_busy: + /* we might want to wait here, but that could deadlock the allocator as + * the slow-work threads writing to the cache may all end up sleeping + * on memory allocation */ + fscache_stat(&fscache_n_store_vmscan_busy); + return false; +} +EXPORT_SYMBOL(__fscache_maybe_release_page); + /* * note that a page has finished being written to the cache */ @@ -57,6 +126,8 @@ static void fscache_end_page_write(struct fscache_object *object, /* delete the page from the tree if it is now no longer * pending */ spin_lock(&cookie->stores_lock); + radix_tree_tag_clear(&cookie->stores, page->index, + FSCACHE_COOKIE_STORING_TAG); if (!radix_tree_tag_get(&cookie->stores, page->index, FSCACHE_COOKIE_PENDING_TAG)) { fscache_stat(&fscache_n_store_radix_deletes); @@ -640,8 +711,12 @@ static void fscache_write_op(struct fscache_operation *_op) goto superseded; } - radix_tree_tag_clear(&cookie->stores, page->index, - FSCACHE_COOKIE_PENDING_TAG); + if (page) { + radix_tree_tag_set(&cookie->stores, page->index, + FSCACHE_COOKIE_STORING_TAG); + radix_tree_tag_clear(&cookie->stores, page->index, + FSCACHE_COOKIE_PENDING_TAG); + } spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 045ba396dbf2..cda69994e06d 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -63,6 +63,11 @@ atomic_t fscache_n_store_pages; atomic_t fscache_n_store_radix_deletes; atomic_t fscache_n_store_pages_over_limit; +atomic_t fscache_n_store_vmscan_not_storing; +atomic_t fscache_n_store_vmscan_gone; +atomic_t fscache_n_store_vmscan_busy; +atomic_t fscache_n_store_vmscan_cancelled; + atomic_t fscache_n_marks; atomic_t fscache_n_uncaches; @@ -211,6 +216,12 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_store_radix_deletes), atomic_read(&fscache_n_store_pages_over_limit)); + seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u\n", + atomic_read(&fscache_n_store_vmscan_not_storing), + atomic_read(&fscache_n_store_vmscan_gone), + atomic_read(&fscache_n_store_vmscan_busy), + atomic_read(&fscache_n_store_vmscan_cancelled)); + seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u rej=%u\n", atomic_read(&fscache_n_op_pend), atomic_read(&fscache_n_op_run), diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 70fad69eb959..fa588006588d 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -359,17 +359,13 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) BUG_ON(!cookie); - if (fscache_check_page_write(cookie, page)) { - if (!(gfp & __GFP_WAIT)) - return 0; - fscache_wait_on_page_write(cookie, page); - } - if (PageFsCache(page)) { dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", cookie, page, nfsi); - fscache_uncache_page(cookie, page); + if (!fscache_maybe_release_page(cookie, page, gfp)) + return 0; + nfs_add_fscache_stats(page->mapping->host, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); } diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index f3aa4bdafef6..4750d5fb419f 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -317,6 +317,7 @@ struct fscache_cookie { void *netfs_data; /* back pointer to netfs */ struct radix_tree_root stores; /* pages to be stored on this cookie */ #define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ +#define FSCACHE_COOKIE_STORING_TAG 1 /* pages tag: writing to cache */ unsigned long flags; #define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 6d8ee466e0a0..595ce49288b7 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -202,6 +202,8 @@ extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t); extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); +extern bool __fscache_maybe_release_page(struct fscache_cookie *, struct page *, + gfp_t); /** * fscache_register_netfs - Register a filesystem as desiring caching services @@ -615,4 +617,29 @@ void fscache_wait_on_page_write(struct fscache_cookie *cookie, __fscache_wait_on_page_write(cookie, page); } +/** + * fscache_maybe_release_page - Consider releasing a page, cancelling a store + * @cookie: The cookie representing the cache object + * @page: The netfs page that is being cached. + * @gfp: The gfp flags passed to releasepage() + * + * Consider releasing a page for the vmscan algorithm, on behalf of the netfs's + * releasepage() call. A storage request on the page may cancelled if it is + * not currently being processed. + * + * The function returns true if the page no longer has a storage request on it, + * and false if a storage request is left in place. If true is returned, the + * page will have been passed to fscache_uncache_page(). If false is returned + * the page cannot be freed yet. + */ +static inline +bool fscache_maybe_release_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + if (fscache_cookie_valid(cookie) && PageFsCache(page)) + return __fscache_maybe_release_page(cookie, page, gfp); + return false; +} + #endif /* _LINUX_FSCACHE_H */ -- cgit v1.3-8-gc7d7 From 60d543ca724be155c2b6166e36a00c80b21bd810 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:45 +0000 Subject: FS-Cache: Start processing an object's operations on that object's death Start processing an object's operations when that object moves into the DYING state as the object cannot be destroyed until all its outstanding operations have completed. Furthermore, make sure that read and allocation operations handle being woken up on a dead object. Such events are recorded in the Allocs.abt and Retrvls.abt statistics as viewable through /proc/fs/fscache/stats. The code for waiting for object activation for the read and allocation operations is also extracted into its own function as it is much the same in all cases, differing only in the stats incremented. Signed-off-by: David Howells --- Documentation/filesystems/caching/fscache.txt | 2 + fs/fscache/internal.h | 5 ++ fs/fscache/object.c | 1 + fs/fscache/page.c | 112 +++++++++++++------------- fs/fscache/stats.c | 12 ++- include/linux/fscache-cache.h | 4 + 6 files changed, 75 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 7097fd29fb3d..3c23411956bb 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -253,6 +253,7 @@ proc files. int=N Number of alloc reqs aborted -ERESTARTSYS ops=N Number of alloc reqs submitted owt=N Number of alloc reqs waited for CPU time + abt=N Number of alloc reqs aborted due to object death Retrvls n=N Number of retrieval (read) requests seen ok=N Number of successful retr reqs wt=N Number of retr reqs that waited on lookup completion @@ -262,6 +263,7 @@ proc files. oom=N Number of retr reqs failed -ENOMEM ops=N Number of retr reqs submitted owt=N Number of retr reqs waited for CPU time + abt=N Number of retr reqs aborted due to object death Stores n=N Number of storage (write) requests seen ok=N Number of successful store reqs agn=N Number of store reqs on a page already pending storage diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 2bf463d26080..5b49a373689b 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -156,6 +156,7 @@ extern atomic_t fscache_n_allocs_ok; extern atomic_t fscache_n_allocs_wait; extern atomic_t fscache_n_allocs_nobufs; extern atomic_t fscache_n_allocs_intr; +extern atomic_t fscache_n_allocs_object_dead; extern atomic_t fscache_n_alloc_ops; extern atomic_t fscache_n_alloc_op_waits; @@ -166,6 +167,7 @@ extern atomic_t fscache_n_retrievals_nodata; extern atomic_t fscache_n_retrievals_nobufs; extern atomic_t fscache_n_retrievals_intr; extern atomic_t fscache_n_retrievals_nomem; +extern atomic_t fscache_n_retrievals_object_dead; extern atomic_t fscache_n_retrieval_ops; extern atomic_t fscache_n_retrieval_op_waits; @@ -249,9 +251,12 @@ static inline void fscache_stat_d(atomic_t *stat) atomic_dec(stat); } +#define __fscache_stat(stat) (stat) + extern const struct file_operations fscache_stats_fops; #else +#define __fscache_stat(stat) (NULL) #define fscache_stat(stat) do {} while (0) #endif diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 74bc562a2cbc..c85c9f582166 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -201,6 +201,7 @@ static void fscache_object_state_machine(struct fscache_object *object) } spin_unlock(&object->lock); fscache_enqueue_dependents(object); + fscache_start_operations(object); goto terminal_transit; /* handle an abort during initialisation */ diff --git a/fs/fscache/page.c b/fs/fscache/page.c index fc76798bd968..c598ea4c4e7d 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -313,6 +313,43 @@ static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) return 0; } +/* + * wait for an object to become active (or dead) + */ +static int fscache_wait_for_retrieval_activation(struct fscache_object *object, + struct fscache_retrieval *op, + atomic_t *stat_op_waits, + atomic_t *stat_object_dead) +{ + int ret; + + if (!test_bit(FSCACHE_OP_WAITING, &op->op.flags)) + goto check_if_dead; + + _debug(">>> WT"); + fscache_stat(stat_op_waits); + if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit_interruptible, + TASK_INTERRUPTIBLE) < 0) { + ret = fscache_cancel_op(&op->op); + if (ret == 0) + return -ERESTARTSYS; + + /* it's been removed from the pending queue by another party, + * so we should get to run shortly */ + wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + } + _debug("<<< GO"); + +check_if_dead: + if (unlikely(fscache_object_is_dead(object))) { + fscache_stat(stat_object_dead); + return -ENOBUFS; + } + return 0; +} + /* * read a page from the cache or allocate a block in which to store it * - we return: @@ -376,25 +413,12 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, /* we wait for the operation to become active, and then process it * *here*, in this thread, and not in the thread pool */ - if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { - _debug(">>> WT"); - fscache_stat(&fscache_n_retrieval_op_waits); - if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit_interruptible, - TASK_INTERRUPTIBLE) < 0) { - ret = fscache_cancel_op(&op->op); - if (ret == 0) { - ret = -ERESTARTSYS; - goto error; - } - - /* it's been removed from the pending queue by another - * party, so we should get to run shortly */ - wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit, TASK_UNINTERRUPTIBLE); - } - _debug("<<< GO"); - } + ret = fscache_wait_for_retrieval_activation( + object, op, + __fscache_stat(&fscache_n_retrieval_op_waits), + __fscache_stat(&fscache_n_retrievals_object_dead)); + if (ret < 0) + goto error; /* ask the cache to honour the operation */ if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { @@ -506,25 +530,12 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, /* we wait for the operation to become active, and then process it * *here*, in this thread, and not in the thread pool */ - if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { - _debug(">>> WT"); - fscache_stat(&fscache_n_retrieval_op_waits); - if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit_interruptible, - TASK_INTERRUPTIBLE) < 0) { - ret = fscache_cancel_op(&op->op); - if (ret == 0) { - ret = -ERESTARTSYS; - goto error; - } - - /* it's been removed from the pending queue by another - * party, so we should get to run shortly */ - wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit, TASK_UNINTERRUPTIBLE); - } - _debug("<<< GO"); - } + ret = fscache_wait_for_retrieval_activation( + object, op, + __fscache_stat(&fscache_n_retrieval_op_waits), + __fscache_stat(&fscache_n_retrievals_object_dead)); + if (ret < 0) + goto error; /* ask the cache to honour the operation */ if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { @@ -612,25 +623,12 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, fscache_stat(&fscache_n_alloc_ops); - if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { - _debug(">>> WT"); - fscache_stat(&fscache_n_alloc_op_waits); - if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit_interruptible, - TASK_INTERRUPTIBLE) < 0) { - ret = fscache_cancel_op(&op->op); - if (ret == 0) { - ret = -ERESTARTSYS; - goto error; - } - - /* it's been removed from the pending queue by another - * party, so we should get to run shortly */ - wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit, TASK_UNINTERRUPTIBLE); - } - _debug("<<< GO"); - } + ret = fscache_wait_for_retrieval_activation( + object, op, + __fscache_stat(&fscache_n_alloc_op_waits), + __fscache_stat(&fscache_n_allocs_object_dead)); + if (ret < 0) + goto error; /* ask the cache to honour the operation */ fscache_stat(&fscache_n_cop_allocate_page); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 9e15289eb5c1..05f77caf4a2d 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -39,6 +39,7 @@ atomic_t fscache_n_allocs_ok; atomic_t fscache_n_allocs_wait; atomic_t fscache_n_allocs_nobufs; atomic_t fscache_n_allocs_intr; +atomic_t fscache_n_allocs_object_dead; atomic_t fscache_n_alloc_ops; atomic_t fscache_n_alloc_op_waits; @@ -49,6 +50,7 @@ atomic_t fscache_n_retrievals_nodata; atomic_t fscache_n_retrievals_nobufs; atomic_t fscache_n_retrievals_intr; atomic_t fscache_n_retrievals_nomem; +atomic_t fscache_n_retrievals_object_dead; atomic_t fscache_n_retrieval_ops; atomic_t fscache_n_retrieval_op_waits; @@ -188,9 +190,10 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_allocs_wait), atomic_read(&fscache_n_allocs_nobufs), atomic_read(&fscache_n_allocs_intr)); - seq_printf(m, "Allocs : ops=%u owt=%u\n", + seq_printf(m, "Allocs : ops=%u owt=%u abt=%u\n", atomic_read(&fscache_n_alloc_ops), - atomic_read(&fscache_n_alloc_op_waits)); + atomic_read(&fscache_n_alloc_op_waits), + atomic_read(&fscache_n_allocs_object_dead)); seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u" " int=%u oom=%u\n", @@ -201,9 +204,10 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_retrievals_nobufs), atomic_read(&fscache_n_retrievals_intr), atomic_read(&fscache_n_retrievals_nomem)); - seq_printf(m, "Retrvls: ops=%u owt=%u\n", + seq_printf(m, "Retrvls: ops=%u owt=%u abt=%u\n", atomic_read(&fscache_n_retrieval_ops), - atomic_read(&fscache_n_retrieval_op_waits)); + atomic_read(&fscache_n_retrieval_op_waits), + atomic_read(&fscache_n_retrievals_object_dead)); seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n", atomic_read(&fscache_n_stores), diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 4750d5fb419f..907bb56c5888 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -404,6 +404,10 @@ extern const char *fscache_object_states[]; (obj)->state >= FSCACHE_OBJECT_AVAILABLE && \ (obj)->state < FSCACHE_OBJECT_DYING) +#define fscache_object_is_dead(obj) \ + (test_bit(FSCACHE_IOERROR, &(obj)->cache->flags) && \ + (obj)->state >= FSCACHE_OBJECT_DYING) + extern const struct slow_work_ops fscache_object_slow_work_ops; /** -- cgit v1.3-8-gc7d7 From a17754fb8c28af19cd70dcbec6d5b0773b94e0c1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:52 +0000 Subject: CacheFiles: Don't write a full page if there's only a partial page to cache cachefiles_write_page() writes a full page to the backing file for the last page of the netfs file, even if the netfs file's last page is only a partial page. This causes the EOF on the backing file to be extended beyond the EOF of the netfs, and thus the backing file will be truncated by cachefiles_attr_changed() called from cachefiles_lookup_object(). So we need to limit the write we make to the backing file on that last page such that it doesn't push the EOF too far. Also, if a backing file that has a partial page at the end is expanded, we discard the partial page and refetch it on the basis that we then have a hole in the file with invalid data, and should the power go out... A better way to deal with this could be to record a note that the partial page contains invalid data until the correct data is written into it. This isn't a problem for netfs's that discard the whole backing file if the file size changes (such as NFS). Signed-off-by: David Howells --- fs/cachefiles/interface.c | 20 +++++++++++++++++--- fs/cachefiles/rdwr.c | 23 +++++++++++++++++++---- include/linux/fscache-cache.h | 3 +++ 3 files changed, 39 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index dd7f852746cb..8e67abf05985 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -404,12 +404,26 @@ static int cachefiles_attr_changed(struct fscache_object *_object) if (oi_size == ni_size) return 0; - newattrs.ia_size = ni_size; - newattrs.ia_valid = ATTR_SIZE; - cachefiles_begin_secure(cache, &saved_cred); mutex_lock(&object->backer->d_inode->i_mutex); + + /* if there's an extension to a partial page at the end of the backing + * file, we need to discard the partial page so that we pick up new + * data after it */ + if (oi_size & ~PAGE_MASK && ni_size > oi_size) { + _debug("discard tail %llx", oi_size); + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = oi_size & PAGE_MASK; + ret = notify_change(object->backer, &newattrs); + if (ret < 0) + goto truncate_failed; + } + + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = ni_size; ret = notify_change(object->backer, &newattrs); + +truncate_failed: mutex_unlock(&object->backer->d_inode->i_mutex); cachefiles_end_secure(cache, saved_cred); diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 3304646dae84..5a84fd7109ad 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -803,7 +803,8 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) struct cachefiles_cache *cache; mm_segment_t old_fs; struct file *file; - loff_t pos; + loff_t pos, eof; + size_t len; void *data; int ret; @@ -837,15 +838,29 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) ret = -EIO; if (file->f_op->write) { pos = (loff_t) page->index << PAGE_SHIFT; + + /* we mustn't write more data than we have, so we have + * to beware of a partial page at EOF */ + eof = object->fscache.store_limit_l; + len = PAGE_SIZE; + if (eof & ~PAGE_MASK) { + ASSERTCMP(pos, <, eof); + if (eof - pos < PAGE_SIZE) { + _debug("cut short %llx to %llx", + pos, eof); + len = eof - pos; + ASSERTCMP(pos + len, ==, eof); + } + } + data = kmap(page); old_fs = get_fs(); set_fs(KERNEL_DS); ret = file->f_op->write( - file, (const void __user *) data, PAGE_SIZE, - &pos); + file, (const void __user *) data, len, &pos); set_fs(old_fs); kunmap(page); - if (ret != PAGE_SIZE) + if (ret != len) ret = -EIO; } fput(file); diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 907bb56c5888..5db50002f3b5 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -395,6 +395,7 @@ struct fscache_object { struct rb_node objlist_link; /* link in global object list */ #endif pgoff_t store_limit; /* current storage limit */ + loff_t store_limit_l; /* current storage limit */ }; extern const char *fscache_object_states[]; @@ -439,6 +440,7 @@ void fscache_object_init(struct fscache_object *object, object->events = object->event_mask = 0; object->flags = 0; object->store_limit = 0; + object->store_limit_l = 0; object->cache = cache; object->cookie = cookie; object->parent = NULL; @@ -491,6 +493,7 @@ static inline void fscache_object_lookup_error(struct fscache_object *object) static inline void fscache_set_store_limit(struct fscache_object *object, loff_t i_size) { + object->store_limit_l = i_size; object->store_limit = i_size >> PAGE_SHIFT; if (i_size & ~PAGE_MASK) object->store_limit++; -- cgit v1.3-8-gc7d7 From fee096deb4f33897937b974cb2c5168bab7935be Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:12:05 +0000 Subject: CacheFiles: Catch an overly long wait for an old active object Catch an overly long wait for an old, dying active object when we want to replace it with a new one. The probability is that all the slow-work threads are hogged, and the delete can't get a look in. What we do instead is: (1) if there's nothing in the slow work queue, we sleep until either the dying object has finished dying or there is something in the slow work queue behind which we can queue our object. (2) if there is something in the slow work queue, we return ETIMEDOUT to fscache_lookup_object(), which then puts us back on the slow work queue, presumably behind the deletion that we're blocked by. We are then deferred for a while until we work our way back through the queue - without blocking a slow-work thread unnecessarily. A backtrace similar to the following may appear in the log without this patch: INFO: task kslowd004:5711 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kslowd004 D 0000000000000000 0 5711 2 0x00000080 ffff88000340bb80 0000000000000046 ffff88002550d000 0000000000000000 ffff88002550d000 0000000000000007 ffff88000340bfd8 ffff88002550d2a8 000000000000ddf0 00000000000118c0 00000000000118c0 ffff88002550d2a8 Call Trace: [] ? trace_hardirqs_on+0xd/0xf [] ? cachefiles_wait_bit+0x0/0xd [cachefiles] [] cachefiles_wait_bit+0x9/0xd [cachefiles] [] __wait_on_bit+0x43/0x76 [] ? ext3_xattr_get+0x1ec/0x270 [] out_of_line_wait_on_bit+0x69/0x74 [] ? cachefiles_wait_bit+0x0/0xd [cachefiles] [] ? wake_bit_function+0x0/0x2e [] cachefiles_mark_object_active+0x203/0x23b [cachefiles] [] cachefiles_walk_to_object+0x558/0x827 [cachefiles] [] cachefiles_lookup_object+0xac/0x12a [cachefiles] [] fscache_lookup_object+0x1c7/0x214 [fscache] [] fscache_object_state_machine+0xa5/0x52d [fscache] [] fscache_object_slow_work_execute+0x5f/0xa0 [fscache] [] slow_work_execute+0x18f/0x2d1 [] slow_work_thread+0x1c5/0x308 [] ? autoremove_wake_function+0x0/0x34 [] ? slow_work_thread+0x0/0x308 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 1 lock held by kslowd004/5711: #0: (&sb->s_type->i_mutex_key#7/1){+.+.+.}, at: [] cachefiles_walk_to_object+0x1b3/0x827 [cachefiles] Signed-off-by: David Howells --- Documentation/filesystems/caching/fscache.txt | 1 + fs/cachefiles/interface.c | 6 +- fs/cachefiles/namei.c | 87 +++++++++++++++++++++------ fs/fscache/internal.h | 1 + fs/fscache/object.c | 10 ++- fs/fscache/stats.c | 4 +- include/linux/fscache-cache.h | 6 +- 7 files changed, 90 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 3c23411956bb..a91e2e2095b0 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -235,6 +235,7 @@ proc files. neg=N Number of negative lookups made pos=N Number of positive lookups made crt=N Number of objects created by lookup + tmo=N Number of lookups timed out and requeued Updates n=N Number of update cookie requests seen nul=N Number of upd reqs given a NULL parent run=N Number of upd reqs granted CPU time diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 8e67abf05985..9d3c426044ae 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -114,8 +114,9 @@ nomem_lookup_data: /* * attempt to look up the nominated node in this cache + * - return -ETIMEDOUT to be scheduled again */ -static void cachefiles_lookup_object(struct fscache_object *_object) +static int cachefiles_lookup_object(struct fscache_object *_object) { struct cachefiles_lookup_data *lookup_data; struct cachefiles_object *parent, *object; @@ -145,13 +146,14 @@ static void cachefiles_lookup_object(struct fscache_object *_object) object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) cachefiles_attr_changed(&object->fscache); - if (ret < 0) { + if (ret < 0 && ret != -ETIMEDOUT) { printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n", ret); fscache_object_lookup_error(&object->fscache); } _leave(" [%d]", ret); + return ret; } /* diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 00a0cda8f47a..14ac4806e291 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -21,12 +21,6 @@ #include #include "internal.h" -static int cachefiles_wait_bit(void *flags) -{ - schedule(); - return 0; -} - #define CACHEFILES_KEYBUF_SIZE 512 /* @@ -100,8 +94,8 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object, /* * record the fact that an object is now active */ -static void cachefiles_mark_object_active(struct cachefiles_cache *cache, - struct cachefiles_object *object) +static int cachefiles_mark_object_active(struct cachefiles_cache *cache, + struct cachefiles_object *object) { struct cachefiles_object *xobject; struct rb_node **_p, *_parent = NULL; @@ -139,8 +133,8 @@ try_again: rb_insert_color(&object->active_node, &cache->active_nodes); write_unlock(&cache->active_lock); - _leave(""); - return; + _leave(" = 0"); + return 0; /* an old object from a previous incarnation is hogging the slot - we * need to wait for it to be destroyed */ @@ -155,13 +149,64 @@ wait_for_old_object: atomic_inc(&xobject->usage); write_unlock(&cache->active_lock); - _debug(">>> wait"); - wait_on_bit(&xobject->flags, CACHEFILES_OBJECT_ACTIVE, - cachefiles_wait_bit, TASK_UNINTERRUPTIBLE); - _debug("<<< waited"); + if (test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { + wait_queue_head_t *wq; + + signed long timeout = 60 * HZ; + wait_queue_t wait; + bool requeue; + + /* if the object we're waiting for is queued for processing, + * then just put ourselves on the queue behind it */ + if (slow_work_is_queued(&xobject->fscache.work)) { + _debug("queue OBJ%x behind OBJ%x immediately", + object->fscache.debug_id, + xobject->fscache.debug_id); + goto requeue; + } + + /* otherwise we sleep until either the object we're waiting for + * is done, or the slow-work facility wants the thread back to + * do other work */ + wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE); + init_wait(&wait); + requeue = false; + do { + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); + if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) + break; + requeue = slow_work_sleep_till_thread_needed( + &object->fscache.work, &timeout); + } while (timeout > 0 && !requeue); + finish_wait(wq, &wait); + + if (requeue && + test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { + _debug("queue OBJ%x behind OBJ%x after wait", + object->fscache.debug_id, + xobject->fscache.debug_id); + goto requeue; + } + + if (timeout <= 0) { + printk(KERN_ERR "\n"); + printk(KERN_ERR "CacheFiles: Error: Overlong" + " wait for old active object to go away\n"); + cachefiles_printk_object(object, xobject); + goto requeue; + } + } + + ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)); cache->cache.ops->put_object(&xobject->fscache); goto try_again; + +requeue: + clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); + cache->cache.ops->put_object(&xobject->fscache); + _leave(" = -ETIMEDOUT"); + return -ETIMEDOUT; } /* @@ -466,12 +511,15 @@ lookup_again: } /* note that we're now using this object */ - cachefiles_mark_object_active(cache, object); + ret = cachefiles_mark_object_active(cache, object); mutex_unlock(&dir->d_inode->i_mutex); dput(dir); dir = NULL; + if (ret == -ETIMEDOUT) + goto mark_active_timed_out; + _debug("=== OBTAINED_OBJECT ==="); if (object->new) { @@ -515,6 +563,10 @@ create_error: cachefiles_io_error(cache, "Create/mkdir failed"); goto error; +mark_active_timed_out: + _debug("mark active timed out"); + goto release_dentry; + check_error: _debug("check error %d", ret); write_lock(&cache->active_lock); @@ -522,7 +574,7 @@ check_error: clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); write_unlock(&cache->active_lock); - +release_dentry: dput(object->dentry); object->dentry = NULL; goto error_out; @@ -543,9 +595,6 @@ error: error_out2: dput(dir); error_out: - if (ret == -ENOSPC) - ret = -ENOBUFS; - _leave(" = error %d", -ret); return ret; } diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 5b49a373689b..0ca2566e038c 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -215,6 +215,7 @@ extern atomic_t fscache_n_object_no_alloc; extern atomic_t fscache_n_object_lookups; extern atomic_t fscache_n_object_lookups_negative; extern atomic_t fscache_n_object_lookups_positive; +extern atomic_t fscache_n_object_lookups_timed_out; extern atomic_t fscache_n_object_created; extern atomic_t fscache_n_object_avail; extern atomic_t fscache_n_object_dead; diff --git a/fs/fscache/object.c b/fs/fscache/object.c index f3f952cf887e..e513ac599c8e 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -468,6 +468,7 @@ static void fscache_lookup_object(struct fscache_object *object) { struct fscache_cookie *cookie = object->cookie; struct fscache_object *parent; + int ret; _enter(""); @@ -493,12 +494,19 @@ static void fscache_lookup_object(struct fscache_object *object) fscache_stat(&fscache_n_object_lookups); fscache_stat(&fscache_n_cop_lookup_object); - object->cache->ops->lookup_object(object); + ret = object->cache->ops->lookup_object(object); fscache_stat_d(&fscache_n_cop_lookup_object); if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events)) set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); + if (ret == -ETIMEDOUT) { + /* probably stuck behind another object, so move this one to + * the back of the queue */ + fscache_stat(&fscache_n_object_lookups_timed_out); + set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + } + _leave(""); } diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 05f77caf4a2d..46435f3aae68 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -98,6 +98,7 @@ atomic_t fscache_n_object_no_alloc; atomic_t fscache_n_object_lookups; atomic_t fscache_n_object_lookups_negative; atomic_t fscache_n_object_lookups_positive; +atomic_t fscache_n_object_lookups_timed_out; atomic_t fscache_n_object_created; atomic_t fscache_n_object_avail; atomic_t fscache_n_object_dead; @@ -160,10 +161,11 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_acquires_nobufs), atomic_read(&fscache_n_acquires_oom)); - seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u\n", + seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u tmo=%u\n", atomic_read(&fscache_n_object_lookups), atomic_read(&fscache_n_object_lookups_negative), atomic_read(&fscache_n_object_lookups_positive), + atomic_read(&fscache_n_object_lookups_timed_out), atomic_read(&fscache_n_object_created)); seq_printf(m, "Updates: n=%u nul=%u run=%u\n", diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 5db50002f3b5..7be0c6fbe880 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -234,8 +234,10 @@ struct fscache_cache_ops { struct fscache_object *(*alloc_object)(struct fscache_cache *cache, struct fscache_cookie *cookie); - /* look up the object for a cookie */ - void (*lookup_object)(struct fscache_object *object); + /* look up the object for a cookie + * - return -ETIMEDOUT to be requeued + */ + int (*lookup_object)(struct fscache_object *object); /* finished looking up */ void (*lookup_complete)(struct fscache_object *object); -- cgit v1.3-8-gc7d7 From fb141597550243b471f3bd526fe689aa3b74df25 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 8 Nov 2009 19:45:54 -0800 Subject: Input: ucb1400_ts - allow passing IRQ through platfrom_data This patch allows UCB1400 to get IRQ GPIO from platform data. In case platform_data are not supplied or the IRQ supplied in the platform_data is negative, fall back to the old IRQ detection algorithm. Signed-off-by: Marek Vasut Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ucb1400_ts.c | 11 +++++++---- drivers/mfd/ucb1400_core.c | 7 +++++++ include/linux/ucb1400.h | 4 ++++ 3 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c index 095f84b1f56e..89dcbe7b4b02 100644 --- a/drivers/input/touchscreen/ucb1400_ts.c +++ b/drivers/input/touchscreen/ucb1400_ts.c @@ -355,10 +355,13 @@ static int ucb1400_ts_probe(struct platform_device *dev) goto err; } - error = ucb1400_ts_detect_irq(ucb); - if (error) { - printk(KERN_ERR "UCB1400: IRQ probe failed\n"); - goto err_free_devs; + /* Only in case the IRQ line wasn't supplied, try detecting it */ + if (ucb->irq < 0) { + error = ucb1400_ts_detect_irq(ucb); + if (error) { + printk(KERN_ERR "UCB1400: IRQ probe failed\n"); + goto err_free_devs; + } } init_waitqueue_head(&ucb->ts_wait); diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c index fa294b6d600a..85fd9421be94 100644 --- a/drivers/mfd/ucb1400_core.c +++ b/drivers/mfd/ucb1400_core.c @@ -51,6 +51,7 @@ static int ucb1400_core_probe(struct device *dev) struct ucb1400_ts ucb_ts; struct ucb1400_gpio ucb_gpio; struct snd_ac97 *ac97; + struct ucb1400_pdata *pdata = dev->platform_data; memset(&ucb_ts, 0, sizeof(ucb_ts)); memset(&ucb_gpio, 0, sizeof(ucb_gpio)); @@ -88,6 +89,12 @@ static int ucb1400_core_probe(struct device *dev) /* TOUCHSCREEN */ ucb_ts.ac97 = ac97; + + if (pdata != NULL && pdata->irq >= 0) + ucb_ts.irq = pdata->irq; + else + ucb_ts.irq = -1; + ucb->ucb1400_ts = platform_device_alloc("ucb1400_ts", -1); if (!ucb->ucb1400_ts) { err = -ENOMEM; diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h index adb44066680c..1b4790911052 100644 --- a/include/linux/ucb1400.h +++ b/include/linux/ucb1400.h @@ -110,6 +110,10 @@ struct ucb1400 { struct platform_device *ucb1400_gpio; }; +struct ucb1400_pdata { + int irq; +}; + static inline u16 ucb1400_reg_read(struct snd_ac97 *ac97, u16 reg) { return ac97->bus->ops->read(ac97, reg); -- cgit v1.3-8-gc7d7 From dad725d089b94bce8bbc769b7471dcfba3fbda0e Mon Sep 17 00:00:00 2001 From: Samu Onkalo Date: Fri, 13 Nov 2009 21:13:22 -0800 Subject: Input: input-polldev - add sysfs interface for controlling poll interval Sysfs entry for reading and setting of the polling interval. If the interval is set to 0, polling is stopped. Polling is restarted when interval is changed to non-zero. sysfs entries: poll = current polling interval in msec (RW) max = max allowed polling interval (RO) min = min allowed polling interval (RO) Minimum and maximum limit for interval can be set while setting up the device. Interval can be adjusted even if the input device is not currently open. [dtor@mail.ru: add kernel doc markup for the new fields] Signed-off-by: Samu Onkalo Signed-off-by: Dmitry Torokhov --- drivers/input/input-polldev.c | 111 +++++++++++++++++++++++++++++++++++++++--- include/linux/input-polldev.h | 11 ++++- 2 files changed, 115 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c index 910220c127cb..31874275fed0 100644 --- a/drivers/input/input-polldev.c +++ b/drivers/input/input-polldev.c @@ -56,14 +56,10 @@ static void input_polldev_stop_workqueue(void) mutex_unlock(&polldev_mutex); } -static void input_polled_device_work(struct work_struct *work) +static void input_polldev_queue_work(struct input_polled_dev *dev) { - struct input_polled_dev *dev = - container_of(work, struct input_polled_dev, work.work); unsigned long delay; - dev->poll(dev); - delay = msecs_to_jiffies(dev->poll_interval); if (delay >= HZ) delay = round_jiffies_relative(delay); @@ -71,6 +67,15 @@ static void input_polled_device_work(struct work_struct *work) queue_delayed_work(polldev_wq, &dev->work, delay); } +static void input_polled_device_work(struct work_struct *work) +{ + struct input_polled_dev *dev = + container_of(work, struct input_polled_dev, work.work); + + dev->poll(dev); + input_polldev_queue_work(dev); +} + static int input_open_polled_device(struct input_dev *input) { struct input_polled_dev *dev = input_get_drvdata(input); @@ -100,6 +105,83 @@ static void input_close_polled_device(struct input_dev *input) dev->close(dev); } +/* SYSFS interface */ + +static ssize_t input_polldev_get_poll(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct input_polled_dev *polldev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", polldev->poll_interval); +} + +static ssize_t input_polldev_set_poll(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct input_polled_dev *polldev = dev_get_drvdata(dev); + struct input_dev *input = polldev->input; + unsigned long interval; + + if (strict_strtoul(buf, 0, &interval)) + return -EINVAL; + + if (interval < polldev->poll_interval_min) + return -EINVAL; + + if (interval > polldev->poll_interval_max) + return -EINVAL; + + mutex_lock(&input->mutex); + + polldev->poll_interval = interval; + + if (input->users) { + cancel_delayed_work_sync(&polldev->work); + if (polldev->poll_interval > 0) + input_polldev_queue_work(polldev); + } + + mutex_unlock(&input->mutex); + + return count; +} + +static DEVICE_ATTR(poll, S_IRUGO | S_IWUSR, input_polldev_get_poll, + input_polldev_set_poll); + + +static ssize_t input_polldev_get_max(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct input_polled_dev *polldev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", polldev->poll_interval_max); +} + +static DEVICE_ATTR(max, S_IRUGO, input_polldev_get_max, NULL); + +static ssize_t input_polldev_get_min(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct input_polled_dev *polldev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", polldev->poll_interval_min); +} + +static DEVICE_ATTR(min, S_IRUGO, input_polldev_get_min, NULL); + +static struct attribute *sysfs_attrs[] = { + &dev_attr_poll.attr, + &dev_attr_max.attr, + &dev_attr_min.attr, + NULL +}; + +static struct attribute_group input_polldev_attribute_group = { + .attrs = sysfs_attrs +}; + /** * input_allocate_polled_device - allocated memory polled device * @@ -153,15 +235,29 @@ EXPORT_SYMBOL(input_free_polled_device); int input_register_polled_device(struct input_polled_dev *dev) { struct input_dev *input = dev->input; + int error; input_set_drvdata(input, dev); INIT_DELAYED_WORK(&dev->work, input_polled_device_work); if (!dev->poll_interval) dev->poll_interval = 500; + if (!dev->poll_interval_max) + dev->poll_interval_max = dev->poll_interval; input->open = input_open_polled_device; input->close = input_close_polled_device; - return input_register_device(input); + error = input_register_device(input); + if (error) + return error; + + error = sysfs_create_group(&input->dev.kobj, + &input_polldev_attribute_group); + if (error) { + input_unregister_device(input); + return error; + } + + return 0; } EXPORT_SYMBOL(input_register_polled_device); @@ -177,6 +273,9 @@ EXPORT_SYMBOL(input_register_polled_device); */ void input_unregister_polled_device(struct input_polled_dev *dev) { + sysfs_remove_group(&dev->input->dev.kobj, + &input_polldev_attribute_group); + input_unregister_device(dev->input); dev->input = NULL; } diff --git a/include/linux/input-polldev.h b/include/linux/input-polldev.h index 5c0ec68a965e..5e3dddf8f562 100644 --- a/include/linux/input-polldev.h +++ b/include/linux/input-polldev.h @@ -21,7 +21,12 @@ * longer being polled. Used to put device into low power mode. * @poll: driver-supplied method that polls the device and posts * input events (mandatory). - * @poll_interval: specifies how often the poll() method shoudl be called. + * @poll_interval: specifies how often the poll() method should be called. + * Defaults to 500 msec unless overriden when registering the device. + * @poll_interval_max: specifies upper bound for the poll interval. + * Defaults to the initial value of @poll_interval. + * @poll_interval_min: specifies lower bound for the poll interval. + * Defaults to 0. * @input: input device structire associated with the polled device. * Must be properly initialized by the driver (id, name, phys, bits). * @@ -36,8 +41,12 @@ struct input_polled_dev { void (*close)(struct input_polled_dev *dev); void (*poll)(struct input_polled_dev *dev); unsigned int poll_interval; /* msec */ + unsigned int poll_interval_max; /* msec */ + unsigned int poll_interval_min; /* msec */ struct input_dev *input; + +/* private: */ struct delayed_work work; }; -- cgit v1.3-8-gc7d7 From d69249f4b6857c0b23ceca270ae591381b16bba9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 16 Nov 2009 22:12:20 -0800 Subject: Input: input-polldev, matrix-keypad - include in kernel doc Make sure that polled input device and matrix keypad APIs are included with the rest of input API when generating kernel documentation. Also description of absres was missing as well. Signed-off-by: Dmitry Torokhov --- Documentation/DocBook/device-drivers.tmpl | 9 +++++++++ include/linux/input.h | 1 + include/linux/input/matrix_keypad.h | 3 +++ 3 files changed, 13 insertions(+) (limited to 'include') diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index 94a20fe8fedf..e994d1d9fbe6 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -293,10 +293,19 @@ X!Idrivers/video/console/fonts.c Input Subsystem + Input core !Iinclude/linux/input.h !Edrivers/input/input.c !Edrivers/input/ff-core.c !Edrivers/input/ff-memless.c + + Polled input devices +!Iinclude/linux/input-polldev.h +!Edrivers/input/input-polldev.c + + Matrix keyboars/keypads +!Iinclude/linux/input/matrix_keypad.h + diff --git a/include/linux/input.h b/include/linux/input.h index 9a04e26daab2..56d8e048c646 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1040,6 +1040,7 @@ struct ff_effect { * @absmin: minimum values for events coming from absolute axes * @absfuzz: describes noisiness for axes * @absflat: size of the center flat position (used by joydev) + * @absres: resolution used for events coming form absolute axes * @open: this method is called when the very first user calls * input_open_device(). The driver must prepare the device * to start generating events (start polling thread, diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h index b3cd42d50e16..3bd018baae20 100644 --- a/include/linux/input/matrix_keypad.h +++ b/include/linux/input/matrix_keypad.h @@ -41,6 +41,9 @@ struct matrix_keymap_data { * @col_scan_delay_us: delay, measured in microseconds, that is * needed before we can keypad after activating column gpio * @debounce_ms: debounce interval in milliseconds + * @active_low: gpio polarity + * @wakeup: controls whether the device should be set up as wakeup + * source * * This structure represents platform-specific data that use used by * matrix_keypad driver to perform proper initialization. -- cgit v1.3-8-gc7d7 From 8629ea2eaba8ca0de2e38ce1b4a825e16255976e Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Sep 2009 16:32:53 +0800 Subject: hrtimer: Fix /proc/timer_list regression commit 507e1231 (timer stats: Optimize by adding quick check to avoid function calls) introduced a regression in /proc/timer_list. /proc/timer_list shows now #0: , tick_sched_timer, S:01, <(null)>, /-1 instead of #0: , tick_sched_timer, S:01, hrtimer_start, swapper/0 Revert the hrtimer quick check for now. The optimization needs more thought, but this is neither 2.6.32-rc7 nor stable material. [ tglx: - Removed unrelated changes from the original patch - Prevent unneccesary call to timer_stats_update_stats - massaged the changelog ] Signed-off-by: Feng Tang LKML-Reference: Cc: Heiko Carstens Cc: stable@kernel.org Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index ff037f0b1b4e..9bace4b9f4fe 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -446,7 +446,7 @@ extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, static inline void timer_stats_account_hrtimer(struct hrtimer *timer) { - if (likely(!timer->start_site)) + if (likely(!timer_stats_active)) return; timer_stats_update_stats(timer, timer->start_pid, timer->start_site, timer->function, timer->start_comm, 0); @@ -457,8 +457,6 @@ extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) { - if (likely(!timer_stats_active)) - return; __timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0)); } -- cgit v1.3-8-gc7d7 From 8964be4a9a5ca8cab1219bb046db2f6d1936227c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Nov 2009 15:35:04 -0800 Subject: net: rename skb->iif to skb->skb_iif To help grep games, rename iif to skb_iif Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/ieee802154/fakehard.c | 2 +- drivers/net/ifb.c | 6 +++--- include/linux/skbuff.h | 4 ++-- include/net/pkt_cls.h | 4 ++-- net/core/dev.c | 6 +++--- net/core/skbuff.c | 2 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/sched/act_mirred.c | 2 +- net/sched/cls_flow.c | 2 +- security/selinux/hooks.c | 6 +++--- security/smack/smack_lsm.c | 4 ++-- 11 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c index f877f13e3ab3..617549f30ef9 100644 --- a/drivers/ieee802154/fakehard.c +++ b/drivers/ieee802154/fakehard.c @@ -282,7 +282,7 @@ static int ieee802154_fake_close(struct net_device *dev) static netdev_tx_t ieee802154_fake_xmit(struct sk_buff *skb, struct net_device *dev) { - skb->iif = dev->ifindex; + skb->skb_iif = dev->ifindex; skb->dev = dev; dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 69c25668dd63..f4081c0a2d9c 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -99,7 +99,7 @@ static void ri_tasklet(unsigned long dev) stats->tx_bytes +=skb->len; rcu_read_lock(); - skb->dev = dev_get_by_index_rcu(&init_net, skb->iif); + skb->dev = dev_get_by_index_rcu(&init_net, skb->skb_iif); if (!skb->dev) { rcu_read_unlock(); dev_kfree_skb(skb); @@ -107,7 +107,7 @@ static void ri_tasklet(unsigned long dev) break; } rcu_read_unlock(); - skb->iif = _dev->ifindex; + skb->skb_iif = _dev->ifindex; if (from & AT_EGRESS) { dp->st_rx_frm_egr++; @@ -172,7 +172,7 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) stats->rx_packets++; stats->rx_bytes+=skb->len; - if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->iif) { + if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) { dev_kfree_skb(skb); stats->rx_dropped++; return NETDEV_TX_OK; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 63f47426977a..89eed8cdd318 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -299,7 +299,7 @@ typedef unsigned char *sk_buff_data_t; * @nfctinfo: Relationship of this skb to the connection * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c - * @iif: ifindex of device we arrived on + * @skb_iif: ifindex of device we arrived on * @queue_mapping: Queue mapping for multiqueue devices * @tc_index: Traffic control index * @tc_verd: traffic control verdict @@ -366,7 +366,7 @@ struct sk_buff { struct nf_bridge_info *nf_bridge; #endif - int iif; + int skb_iif; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 3dd210d073ca..dd3031aed9d5 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -343,9 +343,9 @@ tcf_match_indev(struct sk_buff *skb, char *indev) struct net_device *dev; if (indev[0]) { - if (!skb->iif) + if (!skb->skb_iif) return 0; - dev = __dev_get_by_index(dev_net(skb->dev), skb->iif); + dev = __dev_get_by_index(dev_net(skb->dev), skb->skb_iif); if (!dev || strcmp(indev, dev->name)) return 0; } diff --git a/net/core/dev.c b/net/core/dev.c index 9977288583b8..09f3d6b9c0c8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2287,7 +2287,7 @@ static int ing_filter(struct sk_buff *skb) if (MAX_RED_LOOP < ttl++) { printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", - skb->iif, dev->ifindex); + skb->skb_iif, dev->ifindex); return TC_ACT_SHOT; } @@ -2395,8 +2395,8 @@ int netif_receive_skb(struct sk_buff *skb) if (netpoll_receive_skb(skb)) return NET_RX_DROP; - if (!skb->iif) - skb->iif = skb->dev->ifindex; + if (!skb->skb_iif) + skb->skb_iif = skb->dev->ifindex; null_or_orig = NULL; orig_dev = skb->dev; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 739b8f4dd327..bfa3e7865a8c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -549,7 +549,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->protocol = old->protocol; new->mark = old->mark; - new->iif = old->iif; + new->skb_iif = old->skb_iif; __nf_copy(new, old); #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 3dfe2bac8623..98ed22ee2ff4 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1550,7 +1550,7 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb, struct netlbl_unlhsh_iface *iface; rcu_read_lock(); - iface = netlbl_unlhsh_search_iface_def(skb->iif); + iface = netlbl_unlhsh_search_iface_def(skb->skb_iif); if (iface == NULL) goto unlabel_getattr_nolabel; switch (family) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 797479369881..d329170243cb 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -185,7 +185,7 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); skb2->dev = dev; - skb2->iif = skb->dev->ifindex; + skb2->skb_iif = skb->dev->ifindex; dev_queue_xmit(skb2); err = 0; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 9402a7fd3785..e054c62857e1 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -171,7 +171,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb) static u32 flow_get_iif(const struct sk_buff *skb) { - return skb->iif; + return skb->skb_iif; } static u32 flow_get_priority(const struct sk_buff *skb) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index bb230d5d7085..83a4aada0b4c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4085,7 +4085,7 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, char *addrp; COMMON_AUDIT_DATA_INIT(&ad, NET); - ad.u.net.netif = skb->iif; + ad.u.net.netif = skb->skb_iif; ad.u.net.family = family; err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); if (err) @@ -4147,7 +4147,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; COMMON_AUDIT_DATA_INIT(&ad, NET); - ad.u.net.netif = skb->iif; + ad.u.net.netif = skb->skb_iif; ad.u.net.family = family; err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); if (err) @@ -4159,7 +4159,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) err = selinux_skb_peerlbl_sid(skb, family, &peer_sid); if (err) return err; - err = selinux_inet_sys_rcv_skb(skb->iif, addrp, family, + err = selinux_inet_sys_rcv_skb(skb->skb_iif, addrp, family, peer_sid, &ad); if (err) { selinux_netlbl_err(skb, err, 0); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index c33b6bb9b6dd..529c9ca65878 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2602,7 +2602,7 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_AUDIT smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); ad.a.u.net.family = sk->sk_family; - ad.a.u.net.netif = skb->iif; + ad.a.u.net.netif = skb->skb_iif; ipv4_skb_to_auditdata(skb, &ad.a, NULL); #endif /* @@ -2757,7 +2757,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_AUDIT smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); ad.a.u.net.family = family; - ad.a.u.net.netif = skb->iif; + ad.a.u.net.netif = skb->skb_iif; ipv4_skb_to_auditdata(skb, &ad.a, NULL); #endif /* -- cgit v1.3-8-gc7d7 From 453f19eea7dbad837425e9b07d84568d14898794 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:43 +0100 Subject: perf: Allow for custom overflow handlers in-kernel perf users might wish to have custom actions on the sample interrupt. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.222339539@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++++ kernel/perf_event.c | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b5cdac0de370..a430ac3074af 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -567,6 +567,8 @@ struct perf_pending_entry { typedef void (*perf_callback_t)(struct perf_event *, void *); +struct perf_sample_data; + /** * struct perf_event - performance event kernel representation: */ @@ -658,6 +660,10 @@ struct perf_event { struct pid_namespace *ns; u64 id; + void (*overflow_handler)(struct perf_event *event, + int nmi, struct perf_sample_data *data, + struct pt_regs *regs); + #ifdef CONFIG_EVENT_PROFILE struct event_filter *filter; #endif diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3852e2656bb0..1dfb6cc4fdea 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3710,7 +3710,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, perf_event_disable(event); } - perf_event_output(event, nmi, data, regs); + if (event->overflow_handler) + event->overflow_handler(event, nmi, data, regs); + else + perf_event_output(event, nmi, data, regs); + return ret; } @@ -4836,6 +4840,8 @@ inherit_event(struct perf_event *parent_event, if (parent_event->attr.freq) child_event->hw.sample_period = parent_event->hw.sample_period; + child_event->overflow_handler = parent_event->overflow_handler; + /* * Link it up in the child's context: */ -- cgit v1.3-8-gc7d7 From 59ed446f792cc07d37b1536b9c4664d14e25e425 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:55 +0100 Subject: perf: Fix event scaling for inherited counters Properly account the full hierarchy of counters for both the count (we already did so) and the scale times (new). Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212509.153379276@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 3 ++- kernel/perf_event.c | 48 +++++++++++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a430ac3074af..36fe89f72641 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -782,7 +782,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, pid_t pid, perf_callback_t callback); -extern u64 perf_event_read_value(struct perf_event *event); +extern u64 perf_event_read_value(struct perf_event *event, + u64 *enabled, u64 *running); struct perf_sample_data { u64 type; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index fdfae888a67c..80f40da9a01e 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1774,14 +1774,25 @@ static int perf_event_read_size(struct perf_event *event) return size; } -u64 perf_event_read_value(struct perf_event *event) +u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { struct perf_event *child; u64 total = 0; + *enabled = 0; + *running = 0; + total += perf_event_read(event); - list_for_each_entry(child, &event->child_list, child_list) + *enabled += event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); + *running += event->total_time_running + + atomic64_read(&event->child_total_time_running); + + list_for_each_entry(child, &event->child_list, child_list) { total += perf_event_read(child); + *enabled += child->total_time_enabled; + *running += child->total_time_running; + } return total; } @@ -1793,19 +1804,15 @@ static int perf_event_read_group(struct perf_event *event, struct perf_event *leader = event->group_leader, *sub; int n = 0, size = 0, ret = 0; u64 values[5]; - u64 count; + u64 count, enabled, running; - count = perf_event_read_value(leader); + count = perf_event_read_value(leader, &enabled, &running); values[n++] = 1 + leader->nr_siblings; - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = leader->total_time_enabled + - atomic64_read(&leader->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = leader->total_time_running + - atomic64_read(&leader->child_total_time_running); - } + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = enabled; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = running; values[n++] = count; if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); @@ -1820,7 +1827,7 @@ static int perf_event_read_group(struct perf_event *event, list_for_each_entry(sub, &leader->sibling_list, group_entry) { n = 0; - values[n++] = perf_event_read_value(sub); + values[n++] = perf_event_read_value(sub, &enabled, &running); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); @@ -1838,18 +1845,15 @@ static int perf_event_read_group(struct perf_event *event, static int perf_event_read_one(struct perf_event *event, u64 read_format, char __user *buf) { + u64 enabled, running; u64 values[4]; int n = 0; - values[n++] = perf_event_read_value(event); - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = event->total_time_enabled + - atomic64_read(&event->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = event->total_time_running + - atomic64_read(&event->child_total_time_running); - } + values[n++] = perf_event_read_value(event, &enabled, &running); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = enabled; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = running; if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); -- cgit v1.3-8-gc7d7 From ce71b9df8893ec954e56c5979df6da274f20f65e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Nov 2009 05:26:55 +0100 Subject: tracing: Use the perf recursion protection from trace event When we commit a trace to perf, we first check if we are recursing in the same buffer so that we don't mess-up the buffer with a recursing trace. But later on, we do the same check from perf to avoid commit recursion. The recursion check is desired early before we touch the buffer but we want to do this check only once. Then export the recursion protection from perf and use it from the trace events before submitting a trace. v2: Put appropriate Reported-by tag Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Jason Baron LKML-Reference: <1258864015-10579-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 9 ++--- include/linux/perf_event.h | 4 +++ include/trace/ftrace.h | 23 +++++++------ kernel/perf_event.c | 68 +++++++++++++++++++++++++------------- kernel/trace/trace_event_profile.c | 14 ++++---- kernel/trace/trace_kprobe.c | 48 ++++++++++----------------- kernel/trace/trace_syscalls.c | 47 ++++++++++---------------- 7 files changed, 106 insertions(+), 107 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 43360c1d8f70..47bbdf9c38d0 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -137,13 +137,8 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -struct perf_trace_buf { - char buf[FTRACE_MAX_PROFILE_SIZE]; - int recursion; -}; - -extern struct perf_trace_buf *perf_trace_buf; -extern struct perf_trace_buf *perf_trace_buf_nmi; +extern char *perf_trace_buf; +extern char *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 36fe89f72641..74e98b1d3391 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); +extern int perf_swevent_get_recursion_context(int **recursion); +extern void perf_swevent_put_recursion_context(int *recursion); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -902,6 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } +static int perf_swevent_get_recursion_context(int **recursion) { return -1; } +static void perf_swevent_put_recursion_context(int *recursion) { } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 4945d1c99864..c222ef5238bf 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -724,16 +724,19 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ + extern int perf_swevent_get_recursion_context(int **recursion); \ + extern void perf_swevent_put_recursion_context(int *recursion); \ struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ - struct perf_trace_buf *trace_buf; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ + char *trace_buf; \ char *raw_data; \ + int *recursion; \ int __cpu; \ int pc; \ \ @@ -749,6 +752,10 @@ static void ftrace_profile_##call(proto) \ return; \ \ local_irq_save(irq_flags); \ + \ + if (perf_swevent_get_recursion_context(&recursion)) \ + goto end_recursion; \ + \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ @@ -759,13 +766,7 @@ static void ftrace_profile_##call(proto) \ if (!trace_buf) \ goto end; \ \ - trace_buf = per_cpu_ptr(trace_buf, __cpu); \ - if (trace_buf->recursion++) \ - goto end_recursion; \ - \ - barrier(); \ - \ - raw_data = trace_buf->buf; \ + raw_data = per_cpu_ptr(trace_buf, __cpu); \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -780,9 +781,9 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ -end_recursion: \ - trace_buf->recursion--; \ -end: \ +end: \ + perf_swevent_put_recursion_context(recursion); \ +end_recursion: \ local_irq_restore(irq_flags); \ \ } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 718fa939b1a7..aba822722300 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3880,34 +3880,42 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, } } -static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) +/* + * Must be called with preemption disabled + */ +int perf_swevent_get_recursion_context(int **recursion) { + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + if (in_nmi()) - return &cpuctx->recursion[3]; + *recursion = &cpuctx->recursion[3]; + else if (in_irq()) + *recursion = &cpuctx->recursion[2]; + else if (in_softirq()) + *recursion = &cpuctx->recursion[1]; + else + *recursion = &cpuctx->recursion[0]; - if (in_irq()) - return &cpuctx->recursion[2]; + if (**recursion) + return -1; - if (in_softirq()) - return &cpuctx->recursion[1]; + (**recursion)++; - return &cpuctx->recursion[0]; + return 0; } -static void do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) +void perf_swevent_put_recursion_context(int *recursion) { - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - int *recursion = perf_swevent_recursion_context(cpuctx); - struct perf_event_context *ctx; - - if (*recursion) - goto out; + (*recursion)--; +} - (*recursion)++; - barrier(); +static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct perf_event_context *ctx; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); rcu_read_lock(); perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, @@ -3920,12 +3928,25 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, if (ctx) perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); rcu_read_unlock(); +} - barrier(); - (*recursion)--; +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + int *recursion; + + preempt_disable(); + + if (perf_swevent_get_recursion_context(&recursion)) + goto out; + + __do_perf_sw_event(type, event_id, nr, nmi, data, regs); + perf_swevent_put_recursion_context(recursion); out: - put_cpu_var(perf_cpu_context); + preempt_enable(); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, @@ -4159,7 +4180,8 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, if (!regs) regs = task_pt_regs(current); - do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, + /* Trace events already protected against recursion */ + __do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data, regs); } EXPORT_SYMBOL_GPL(perf_tp_event); diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index e0d351b01f5a..d9c60f80aa0d 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -9,31 +9,33 @@ #include "trace.h" -struct perf_trace_buf *perf_trace_buf; +char *perf_trace_buf; EXPORT_SYMBOL_GPL(perf_trace_buf); -struct perf_trace_buf *perf_trace_buf_nmi; +char *perf_trace_buf_nmi; EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); +typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; + /* Count the events in use (per event id, not per instance) */ static int total_profile_count; static int ftrace_profile_enable_event(struct ftrace_event_call *event) { - struct perf_trace_buf *buf; + char *buf; int ret = -ENOMEM; if (atomic_inc_return(&event->profile_count)) return 0; if (!total_profile_count) { - buf = alloc_percpu(struct perf_trace_buf); + buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail_buf; rcu_assign_pointer(perf_trace_buf, buf); - buf = alloc_percpu(struct perf_trace_buf); + buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail_buf_nmi; @@ -79,7 +81,7 @@ int ftrace_profile_enable(int event_id) static void ftrace_profile_disable_event(struct ftrace_event_call *event) { - struct perf_trace_buf *buf, *nmi_buf; + char *buf, *nmi_buf; if (!atomic_add_negative(-1, &event->profile_count)) return; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3696476f307d..22e6f68b05b3 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1208,11 +1208,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; - struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; + char *trace_buf; char *raw_data; + int *recursion; pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); @@ -1227,6 +1228,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, * This also protects the rcu read side */ local_irq_save(irq_flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + __cpu = smp_processor_id(); if (in_nmi()) @@ -1237,18 +1242,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, __cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, __cpu); /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -1263,9 +1257,9 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ip, 1, entry, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(irq_flags); return 0; @@ -1278,10 +1272,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; - struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; + char *trace_buf; + int *recursion; char *raw_data; pc = preempt_count(); @@ -1297,6 +1292,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, * This also protects the rcu read side */ local_irq_save(irq_flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + __cpu = smp_processor_id(); if (in_nmi()) @@ -1307,18 +1306,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, __cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, __cpu); /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -1334,9 +1322,9 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ret_ip, 1, entry, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(irq_flags); return 0; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 51213b0aa81b..0bb934875263 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -477,10 +477,11 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; - struct perf_trace_buf *trace_buf; struct syscall_trace_enter *rec; unsigned long flags; + char *trace_buf; char *raw_data; + int *recursion; int syscall_nr; int size; int cpu; @@ -505,6 +506,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + cpu = smp_processor_id(); if (in_nmi()) @@ -515,18 +519,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -539,9 +532,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) (unsigned long *)&rec->args); perf_tp_event(sys_data->enter_id, 0, 1, rec, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(flags); } @@ -588,10 +581,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; - struct perf_trace_buf *trace_buf; unsigned long flags; int syscall_nr; + char *trace_buf; char *raw_data; + int *recursion; int size; int cpu; @@ -617,6 +611,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + cpu = smp_processor_id(); if (in_nmi()) @@ -627,18 +625,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -652,9 +639,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(flags); } -- cgit v1.3-8-gc7d7 From 5093ebad5f2348076fdc3dac7d2358b1ad7f85f7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Nov 2009 05:21:35 +0100 Subject: hw-breakpoints: Separate the kernel part from breakpoint headers So that we can include this header from userspace tools, like perf tools, to get the breakpoint types and len definitions. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad LKML-Reference: <1258863695-10464-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 0b98cbf76da7..4659e0c55ea6 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -16,6 +16,7 @@ enum { HW_BREAKPOINT_X = 4, }; +#ifdef __KERNEL__ #ifdef CONFIG_HAVE_HW_BREAKPOINT static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) @@ -133,5 +134,6 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#endif /* __KERNEL__ */ #endif /* _LINUX_HW_BREAKPOINT_H */ -- cgit v1.3-8-gc7d7 From 9f680ab41485edfdc96331b70afa7513aa0a7720 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 22 Nov 2009 08:53:49 -0800 Subject: rcu: Eliminate unneeded function wrapping The functions rcu_init() is a wrapper for __rcu_init(), and also sets up the CPU-hotplug notifier for rcu_barrier_cpu_hotplug(). But TINY_RCU doesn't need CPU-hotplug notification, and the rcu_barrier_cpu_hotplug() is a simple wrapper for rcu_cpu_notify(). So push rcu_init() out to kernel/rcutree.c and kernel/rcutiny.c and get rid of the wrapper function rcu_barrier_cpu_hotplug(). Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12589088302320-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutiny.h | 2 -- include/linux/rcutree.h | 3 --- kernel/rcupdate.c | 22 ---------------------- kernel/rcutiny.c | 11 +---------- kernel/rcutree.c | 17 ++++++++++++++--- 5 files changed, 15 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 2c1fe8373e71..a3b6272af2dd 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -38,7 +38,6 @@ void rcu_bh_qs(int cpu); #define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); -extern void __rcu_init(void); /* * Return the number of grace periods. @@ -69,7 +68,6 @@ static inline void synchronize_rcu_bh_expedited(void) } struct notifier_block; -extern int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); #ifdef CONFIG_NO_HZ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 9642c6bcb399..111a65257350 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -34,8 +34,6 @@ struct notifier_block; extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); -extern int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); extern int rcu_needs_cpu(int cpu); extern int rcu_expedited_torture_stats(char *page); @@ -83,7 +81,6 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched_expedited(); } -extern void __rcu_init(void); extern void rcu_check_callbacks(int cpu, int user); extern long rcu_batches_completed(void); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 7625f207f65e..eb6b534db318 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -161,28 +161,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_bh); #endif /* #ifndef CONFIG_TINY_RCU */ -static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - return rcu_cpu_notify(self, action, hcpu); -} - -void __init rcu_init(void) -{ - int i; - - __rcu_init(); - cpu_notifier(rcu_barrier_cpu_hotplug, 0); - - /* - * We don't need protection against CPU-hotplug here because - * this is called early in boot, before either interrupts - * or the scheduler are operational. - */ - for_each_online_cpu(i) - rcu_barrier_cpu_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)i); -} - void rcu_scheduler_starting(void) { WARN_ON(num_online_cpus() != 1); diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index b33ec3aa377a..9f6d9ff2572c 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -177,15 +177,6 @@ static void rcu_process_callbacks(struct softirq_action *unused) __rcu_process_callbacks(&rcu_bh_ctrlblk); } -/* - * Null function to handle CPU being onlined. Longer term, we want to - * make TINY_RCU avoid using rcupdate.c, but later... - */ -int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) -{ - return NOTIFY_OK; -} - /* * Wait for a grace period to elapse. But it is illegal to invoke * synchronize_sched() from within an RCU read-side critical section. @@ -285,7 +276,7 @@ void rcu_barrier_sched(void) } EXPORT_SYMBOL_GPL(rcu_barrier_sched); -void __rcu_init(void) +void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b79bfcd28e95..e3d3bbddbcd5 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1644,8 +1644,8 @@ static void __cpuinit rcu_online_cpu(int cpu) /* * Handle CPU online/offline notification events. */ -int __cpuinit rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int __cpuinit rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -1781,8 +1781,10 @@ do { \ } \ } while (0) -void __init __rcu_init(void) +void __init rcu_init(void) { + int i; + rcu_bootup_announce(); #ifdef CONFIG_RCU_CPU_STALL_DETECTOR printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); @@ -1791,6 +1793,15 @@ void __init __rcu_init(void) RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); __rcu_init_preempt(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); + + /* + * We don't need protection against CPU-hotplug here because + * this is called early in boot, before either interrupts + * or the scheduler are operational. + */ + cpu_notifier(rcu_cpu_notify, 0); + for_each_online_cpu(i) + rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i); } #include "rcutree_plugin.h" -- cgit v1.3-8-gc7d7 From 6ebb237bece23275d1da149b61a342f0d4d06a08 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 22 Nov 2009 08:53:50 -0800 Subject: rcu: Re-arrange code to reduce #ifdef pain Remove #ifdefs from kernel/rcupdate.c and include/linux/rcupdate.h by moving code to include/linux/rcutiny.h, include/linux/rcutree.h, and kernel/rcutree.c. Also remove some definitions that are no longer used. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1258908830885-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 12 ------ include/linux/rcutiny.h | 11 +++++ include/linux/rcutree.h | 4 +- kernel/rcupdate.c | 104 ----------------------------------------------- kernel/rcutree.c | 80 ++++++++++++++++++++++++++++++++++++ kernel/rcutree_plugin.h | 24 +++++++++++ 6 files changed, 118 insertions(+), 117 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 2f1bc42a3b82..24440f4bf476 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,11 +52,6 @@ struct rcu_head { }; /* Exported common interfaces */ -#ifdef CONFIG_TREE_PREEMPT_RCU -extern void synchronize_rcu(void); -#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -#define synchronize_rcu synchronize_sched -#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ extern void synchronize_rcu_bh(void); extern void synchronize_sched(void); extern void rcu_barrier(void); @@ -67,13 +62,6 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); -extern void rcu_scheduler_starting(void); -#ifndef CONFIG_TINY_RCU -extern int rcu_needs_cpu(int cpu); -#else -static inline int rcu_needs_cpu(int cpu) { return 0; } -#endif -extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index a3b6272af2dd..c4ba9a78721e 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -39,6 +39,11 @@ void rcu_bh_qs(int cpu); #define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); +static inline int rcu_needs_cpu(int cpu) +{ + return 0; +} + /* * Return the number of grace periods. */ @@ -57,6 +62,8 @@ static inline long rcu_batches_completed_bh(void) extern int rcu_expedited_torture_stats(char *page); +#define synchronize_rcu synchronize_sched + static inline void synchronize_rcu_expedited(void) { synchronize_sched(); @@ -86,6 +93,10 @@ static inline void rcu_exit_nohz(void) #endif /* #else #ifdef CONFIG_NO_HZ */ +static inline void rcu_scheduler_starting(void) +{ +} + static inline void exit_rcu(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 111a65257350..c93eee5911b0 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -35,12 +35,14 @@ struct notifier_block; extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern int rcu_needs_cpu(int cpu); +extern void rcu_scheduler_starting(void); extern int rcu_expedited_torture_stats(char *page); #ifdef CONFIG_TREE_PREEMPT_RCU extern void __rcu_read_lock(void); extern void __rcu_read_unlock(void); +extern void synchronize_rcu(void); extern void exit_rcu(void); #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ @@ -55,7 +57,7 @@ static inline void __rcu_read_unlock(void) preempt_enable(); } -#define __synchronize_sched() synchronize_rcu() +#define synchronize_rcu synchronize_sched static inline void exit_rcu(void) { diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index eb6b534db318..9b7fd4723878 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -44,7 +44,6 @@ #include #include #include -#include #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; @@ -53,8 +52,6 @@ struct lockdep_map rcu_lock_map = EXPORT_SYMBOL_GPL(rcu_lock_map); #endif -int rcu_scheduler_active __read_mostly; - /* * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. @@ -66,104 +63,3 @@ void wakeme_after_rcu(struct rcu_head *head) rcu = container_of(head, struct rcu_synchronize, head); complete(&rcu->completion); } - -#ifndef CONFIG_TINY_RCU - -#ifdef CONFIG_TREE_PREEMPT_RCU - -/** - * synchronize_rcu - wait until a grace period has elapsed. - * - * Control will return to the caller some time after a full grace - * period has elapsed, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. - */ -void synchronize_rcu(void) -{ - struct rcu_synchronize rcu; - - if (!rcu_scheduler_active) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_rcu); - -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - -/** - * synchronize_sched - wait until an rcu-sched grace period has elapsed. - * - * Control will return to the caller some time after a full rcu-sched - * grace period has elapsed, in other words after all currently executing - * rcu-sched read-side critical sections have completed. These read-side - * critical sections are delimited by rcu_read_lock_sched() and - * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), - * local_irq_disable(), and so on may be used in place of - * rcu_read_lock_sched(). - * - * This means that all preempt_disable code sequences, including NMI and - * hardware-interrupt handlers, in progress on entry will have completed - * before this primitive returns. However, this does not guarantee that - * softirq handlers will have completed, since in some kernels, these - * handlers can run in process context, and can block. - * - * This primitive provides the guarantees made by the (now removed) - * synchronize_kernel() API. In contrast, synchronize_rcu() only - * guarantees that rcu_read_lock() sections will have completed. - * In "classic RCU", these two guarantees happen to be one and - * the same, but can differ in realtime RCU implementations. - */ -void synchronize_sched(void) -{ - struct rcu_synchronize rcu; - - if (rcu_blocking_is_gp()) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_sched(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_sched); - -/** - * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. - * - * Control will return to the caller some time after a full rcu_bh grace - * period has elapsed, in other words after all currently executing rcu_bh - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), - * and may be nested. - */ -void synchronize_rcu_bh(void) -{ - struct rcu_synchronize rcu; - - if (rcu_blocking_is_gp()) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_bh(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_rcu_bh); - -#endif /* #ifndef CONFIG_TINY_RCU */ - -void rcu_scheduler_starting(void) -{ - WARN_ON(num_online_cpus() != 1); - WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; -} diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e3d3bbddbcd5..4ca7e0292fd8 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "rcutree.h" @@ -79,6 +80,8 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); +static int rcu_scheduler_active __read_mostly; + /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s @@ -1396,6 +1399,68 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu_bh); +/** + * synchronize_sched - wait until an rcu-sched grace period has elapsed. + * + * Control will return to the caller some time after a full rcu-sched + * grace period has elapsed, in other words after all currently executing + * rcu-sched read-side critical sections have completed. These read-side + * critical sections are delimited by rcu_read_lock_sched() and + * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), + * local_irq_disable(), and so on may be used in place of + * rcu_read_lock_sched(). + * + * This means that all preempt_disable code sequences, including NMI and + * hardware-interrupt handlers, in progress on entry will have completed + * before this primitive returns. However, this does not guarantee that + * softirq handlers will have completed, since in some kernels, these + * handlers can run in process context, and can block. + * + * This primitive provides the guarantees made by the (now removed) + * synchronize_kernel() API. In contrast, synchronize_rcu() only + * guarantees that rcu_read_lock() sections will have completed. + * In "classic RCU", these two guarantees happen to be one and + * the same, but can differ in realtime RCU implementations. + */ +void synchronize_sched(void) +{ + struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_sched(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_sched); + +/** + * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. + * + * Control will return to the caller some time after a full rcu_bh grace + * period has elapsed, in other words after all currently executing rcu_bh + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), + * and may be nested. + */ +void synchronize_rcu_bh(void) +{ + struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_bh(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_bh); + /* * Check to see if there is any immediate RCU-related work to be done * by the current CPU, for the specified type of RCU, returning 1 if so. @@ -1480,6 +1545,21 @@ int rcu_needs_cpu(int cpu) rcu_preempt_needs_cpu(cpu); } +/* + * This function is invoked towards the end of the scheduler's initialization + * process. Before this is called, the idle task might contain + * RCU read-side critical sections (during which time, this idle + * task is booting the system). After this function is called, the + * idle tasks are prohibited from containing RCU read-side critical + * sections. + */ +void rcu_scheduler_starting(void) +{ + WARN_ON(num_online_cpus() != 1); + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} + static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 0bdb592eee66..1d295c789d3d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -425,6 +425,30 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu); +/** + * synchronize_rcu - wait until a grace period has elapsed. + * + * Control will return to the caller some time after a full grace + * period has elapsed, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +void synchronize_rcu(void) +{ + struct rcu_synchronize rcu; + + if (!rcu_scheduler_active) + return; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_rcu); + /* * Wait for an rcu-preempt grace period. We are supposed to expedite the * grace period, but this is the crude slow compatability hack, so just -- cgit v1.3-8-gc7d7 From 3066eec68d21cf4d468809c0b7b1fe9ee59c8f32 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 22 Oct 2009 13:26:45 +0300 Subject: MFD: twl4030: add twl4030_codec MFD as a new child to the core New MFD child to twl4030 MFD device. Reason for the twl4030_codec MFD: the vibra control is actually in the codec part of the twl4030. If both the vibra and the audio functionality is needed from the twl4030 at the same time, than they need to control the codec power and APLL at the same time without breaking the other driver. Also these two has to be able to work without the need for the other driver. This MFD device will be used by the drivers, which needs resources from the twl4030 codec like audio and vibra. The platform specific configuration data is passed along to the child drivers (audio, vibra). Signed-off-by: Peter Ujfalusi Acked-by: Samuel Ortiz Signed-off-by: Mark Brown --- drivers/mfd/Kconfig | 6 + drivers/mfd/Makefile | 1 + drivers/mfd/twl4030-codec.c | 241 +++++++++++++++++++++++++++++++++ drivers/mfd/twl4030-core.c | 14 ++ include/linux/i2c/twl4030.h | 18 +++ include/linux/mfd/twl4030-codec.h | 271 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 551 insertions(+) create mode 100644 drivers/mfd/twl4030-codec.c create mode 100644 include/linux/mfd/twl4030-codec.h (limited to 'include') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 570be139f9df..08f2d07bf56a 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -121,6 +121,12 @@ config TWL4030_POWER and load scripts controling which resources are switched off/on or reset when a sleep, wakeup or warm reset event occurs. +config TWL4030_CODEC + bool + depends on TWL4030_CORE + select MFD_CORE + default n + config MFD_TMIO bool default n diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index f3b277b90d40..af0fc903cec8 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_MENELAUS) += menelaus.o obj-$(CONFIG_TWL4030_CORE) += twl4030-core.o twl4030-irq.o obj-$(CONFIG_TWL4030_POWER) += twl4030-power.o +obj-$(CONFIG_TWL4030_CODEC) += twl4030-codec.o obj-$(CONFIG_MFD_MC13783) += mc13783-core.o diff --git a/drivers/mfd/twl4030-codec.c b/drivers/mfd/twl4030-codec.c new file mode 100644 index 000000000000..97103078dc2a --- /dev/null +++ b/drivers/mfd/twl4030-codec.c @@ -0,0 +1,241 @@ +/* + * MFD driver for twl4030 codec submodule + * + * Author: Peter Ujfalusi + * + * Copyright: (C) 2009 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define TWL4030_CODEC_CELLS 2 + +static struct platform_device *twl4030_codec_dev; + +struct twl4030_codec_resource { + int request_count; + u8 reg; + u8 mask; +}; + +struct twl4030_codec { + struct mutex mutex; + struct twl4030_codec_resource resource[TWL4030_CODEC_RES_MAX]; + struct mfd_cell cells[TWL4030_CODEC_CELLS]; +}; + +/* + * Modify the resource, the function returns the content of the register + * after the modification. + */ +static int twl4030_codec_set_resource(enum twl4030_codec_res id, int enable) +{ + struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev); + u8 val; + + twl4030_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &val, + codec->resource[id].reg); + + if (enable) + val |= codec->resource[id].mask; + else + val &= ~codec->resource[id].mask; + + twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE, + val, codec->resource[id].reg); + + return val; +} + +static inline int twl4030_codec_get_resource(enum twl4030_codec_res id) +{ + struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev); + u8 val; + + twl4030_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &val, + codec->resource[id].reg); + + return val; +} + +/* + * Enable the resource. + * The function returns with error or the content of the register + */ +int twl4030_codec_enable_resource(enum twl4030_codec_res id) +{ + struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev); + int val; + + if (id >= TWL4030_CODEC_RES_MAX) { + dev_err(&twl4030_codec_dev->dev, + "Invalid resource ID (%u)\n", id); + return -EINVAL; + } + + mutex_lock(&codec->mutex); + if (!codec->resource[id].request_count) + /* Resource was disabled, enable it */ + val = twl4030_codec_set_resource(id, 1); + else + val = twl4030_codec_get_resource(id); + + codec->resource[id].request_count++; + mutex_unlock(&codec->mutex); + + return val; +} +EXPORT_SYMBOL_GPL(twl4030_codec_enable_resource); + +/* + * Disable the resource. + * The function returns with error or the content of the register + */ +int twl4030_codec_disable_resource(unsigned id) +{ + struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev); + int val; + + if (id >= TWL4030_CODEC_RES_MAX) { + dev_err(&twl4030_codec_dev->dev, + "Invalid resource ID (%u)\n", id); + return -EINVAL; + } + + mutex_lock(&codec->mutex); + if (!codec->resource[id].request_count) { + dev_err(&twl4030_codec_dev->dev, + "Resource has been disabled already (%u)\n", id); + mutex_unlock(&codec->mutex); + return -EPERM; + } + codec->resource[id].request_count--; + + if (!codec->resource[id].request_count) + /* Resource can be disabled now */ + val = twl4030_codec_set_resource(id, 0); + else + val = twl4030_codec_get_resource(id); + + mutex_unlock(&codec->mutex); + + return val; +} +EXPORT_SYMBOL_GPL(twl4030_codec_disable_resource); + +static int __devinit twl4030_codec_probe(struct platform_device *pdev) +{ + struct twl4030_codec *codec; + struct twl4030_codec_data *pdata = pdev->dev.platform_data; + struct mfd_cell *cell = NULL; + int ret, childs = 0; + + codec = kzalloc(sizeof(struct twl4030_codec), GFP_KERNEL); + if (!codec) + return -ENOMEM; + + platform_set_drvdata(pdev, codec); + + twl4030_codec_dev = pdev; + mutex_init(&codec->mutex); + + /* Codec power */ + codec->resource[TWL4030_CODEC_RES_POWER].reg = TWL4030_REG_CODEC_MODE; + codec->resource[TWL4030_CODEC_RES_POWER].mask = TWL4030_CODECPDZ; + + /* PLL */ + codec->resource[TWL4030_CODEC_RES_APLL].reg = TWL4030_REG_APLL_CTL; + codec->resource[TWL4030_CODEC_RES_APLL].mask = TWL4030_APLL_EN; + + if (pdata->audio) { + cell = &codec->cells[childs]; + cell->name = "twl4030_codec_audio"; + cell->platform_data = pdata->audio; + cell->data_size = sizeof(*pdata->audio); + childs++; + } + if (pdata->vibra) { + cell = &codec->cells[childs]; + cell->name = "twl4030_codec_vibra"; + cell->platform_data = pdata->vibra; + cell->data_size = sizeof(*pdata->vibra); + childs++; + } + + if (childs) + ret = mfd_add_devices(&pdev->dev, pdev->id, codec->cells, + childs, NULL, 0); + else { + dev_err(&pdev->dev, "No platform data found for childs\n"); + ret = -ENODEV; + } + + if (!ret) + return 0; + + platform_set_drvdata(pdev, NULL); + kfree(codec); + twl4030_codec_dev = NULL; + return ret; +} + +static int __devexit twl4030_codec_remove(struct platform_device *pdev) +{ + struct twl4030_codec *codec = platform_get_drvdata(pdev); + + mfd_remove_devices(&pdev->dev); + platform_set_drvdata(pdev, NULL); + kfree(codec); + twl4030_codec_dev = NULL; + + return 0; +} + +MODULE_ALIAS("platform:twl4030_codec"); + +static struct platform_driver twl4030_codec_driver = { + .probe = twl4030_codec_probe, + .remove = __devexit_p(twl4030_codec_remove), + .driver = { + .owner = THIS_MODULE, + .name = "twl4030_codec", + }, +}; + +static int __devinit twl4030_codec_init(void) +{ + return platform_driver_register(&twl4030_codec_driver); +} +module_init(twl4030_codec_init); + +static void __devexit twl4030_codec_exit(void) +{ + platform_driver_unregister(&twl4030_codec_driver); +} +module_exit(twl4030_codec_exit); + +MODULE_AUTHOR("Peter Ujfalusi "); +MODULE_LICENSE("GPL"); + diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c index 6cb12502e306..cf462b90dee2 100644 --- a/drivers/mfd/twl4030-core.c +++ b/drivers/mfd/twl4030-core.c @@ -114,6 +114,12 @@ #define twl_has_watchdog() false #endif +#if defined(CONFIG_TWL4030_CODEC) || defined(CONFIG_TWL4030_CODEC_MODULE) +#define twl_has_codec() true +#else +#define twl_has_codec() false +#endif + /* Triton Core internal information (BEGIN) */ /* Last - for index max*/ @@ -601,6 +607,14 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features) return PTR_ERR(child); } + if (twl_has_codec() && pdata->codec) { + child = add_child(1, "twl4030_codec", + pdata->codec, sizeof(*pdata->codec), + false, 0, 0); + if (IS_ERR(child)) + return PTR_ERR(child); + } + if (twl_has_regulator()) { /* child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1); diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index 508824ee35e6..ba61add3e05a 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -401,6 +401,23 @@ struct twl4030_power_data { extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts); +struct twl4030_codec_audio_data { + unsigned int audio_mclk; + unsigned int ramp_delay_value; + unsigned int hs_extmute:1; + void (*set_hs_extmute)(int mute); +}; + +struct twl4030_codec_vibra_data { + unsigned int audio_mclk; + unsigned int coexist; +}; + +struct twl4030_codec_data { + struct twl4030_codec_audio_data *audio; + struct twl4030_codec_vibra_data *vibra; +}; + struct twl4030_platform_data { unsigned irq_base, irq_end; struct twl4030_bci_platform_data *bci; @@ -409,6 +426,7 @@ struct twl4030_platform_data { struct twl4030_keypad_data *keypad; struct twl4030_usb_data *usb; struct twl4030_power_data *power; + struct twl4030_codec_data *codec; /* LDO regulators */ struct regulator_init_data *vdac; diff --git a/include/linux/mfd/twl4030-codec.h b/include/linux/mfd/twl4030-codec.h new file mode 100644 index 000000000000..ef0a3041d759 --- /dev/null +++ b/include/linux/mfd/twl4030-codec.h @@ -0,0 +1,271 @@ +/* + * MFD driver for twl4030 codec submodule + * + * Author: Peter Ujfalusi + * + * Copyright: (C) 2009 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __TWL4030_CODEC_H__ +#define __TWL4030_CODEC_H__ + +/* Codec registers */ +#define TWL4030_REG_CODEC_MODE 0x01 +#define TWL4030_REG_OPTION 0x02 +#define TWL4030_REG_UNKNOWN 0x03 +#define TWL4030_REG_MICBIAS_CTL 0x04 +#define TWL4030_REG_ANAMICL 0x05 +#define TWL4030_REG_ANAMICR 0x06 +#define TWL4030_REG_AVADC_CTL 0x07 +#define TWL4030_REG_ADCMICSEL 0x08 +#define TWL4030_REG_DIGMIXING 0x09 +#define TWL4030_REG_ATXL1PGA 0x0A +#define TWL4030_REG_ATXR1PGA 0x0B +#define TWL4030_REG_AVTXL2PGA 0x0C +#define TWL4030_REG_AVTXR2PGA 0x0D +#define TWL4030_REG_AUDIO_IF 0x0E +#define TWL4030_REG_VOICE_IF 0x0F +#define TWL4030_REG_ARXR1PGA 0x10 +#define TWL4030_REG_ARXL1PGA 0x11 +#define TWL4030_REG_ARXR2PGA 0x12 +#define TWL4030_REG_ARXL2PGA 0x13 +#define TWL4030_REG_VRXPGA 0x14 +#define TWL4030_REG_VSTPGA 0x15 +#define TWL4030_REG_VRX2ARXPGA 0x16 +#define TWL4030_REG_AVDAC_CTL 0x17 +#define TWL4030_REG_ARX2VTXPGA 0x18 +#define TWL4030_REG_ARXL1_APGA_CTL 0x19 +#define TWL4030_REG_ARXR1_APGA_CTL 0x1A +#define TWL4030_REG_ARXL2_APGA_CTL 0x1B +#define TWL4030_REG_ARXR2_APGA_CTL 0x1C +#define TWL4030_REG_ATX2ARXPGA 0x1D +#define TWL4030_REG_BT_IF 0x1E +#define TWL4030_REG_BTPGA 0x1F +#define TWL4030_REG_BTSTPGA 0x20 +#define TWL4030_REG_EAR_CTL 0x21 +#define TWL4030_REG_HS_SEL 0x22 +#define TWL4030_REG_HS_GAIN_SET 0x23 +#define TWL4030_REG_HS_POPN_SET 0x24 +#define TWL4030_REG_PREDL_CTL 0x25 +#define TWL4030_REG_PREDR_CTL 0x26 +#define TWL4030_REG_PRECKL_CTL 0x27 +#define TWL4030_REG_PRECKR_CTL 0x28 +#define TWL4030_REG_HFL_CTL 0x29 +#define TWL4030_REG_HFR_CTL 0x2A +#define TWL4030_REG_ALC_CTL 0x2B +#define TWL4030_REG_ALC_SET1 0x2C +#define TWL4030_REG_ALC_SET2 0x2D +#define TWL4030_REG_BOOST_CTL 0x2E +#define TWL4030_REG_SOFTVOL_CTL 0x2F +#define TWL4030_REG_DTMF_FREQSEL 0x30 +#define TWL4030_REG_DTMF_TONEXT1H 0x31 +#define TWL4030_REG_DTMF_TONEXT1L 0x32 +#define TWL4030_REG_DTMF_TONEXT2H 0x33 +#define TWL4030_REG_DTMF_TONEXT2L 0x34 +#define TWL4030_REG_DTMF_TONOFF 0x35 +#define TWL4030_REG_DTMF_WANONOFF 0x36 +#define TWL4030_REG_I2S_RX_SCRAMBLE_H 0x37 +#define TWL4030_REG_I2S_RX_SCRAMBLE_M 0x38 +#define TWL4030_REG_I2S_RX_SCRAMBLE_L 0x39 +#define TWL4030_REG_APLL_CTL 0x3A +#define TWL4030_REG_DTMF_CTL 0x3B +#define TWL4030_REG_DTMF_PGA_CTL2 0x3C +#define TWL4030_REG_DTMF_PGA_CTL1 0x3D +#define TWL4030_REG_MISC_SET_1 0x3E +#define TWL4030_REG_PCMBTMUX 0x3F +#define TWL4030_REG_RX_PATH_SEL 0x43 +#define TWL4030_REG_VDL_APGA_CTL 0x44 +#define TWL4030_REG_VIBRA_CTL 0x45 +#define TWL4030_REG_VIBRA_SET 0x46 +#define TWL4030_REG_VIBRA_PWM_SET 0x47 +#define TWL4030_REG_ANAMIC_GAIN 0x48 +#define TWL4030_REG_MISC_SET_2 0x49 + +/* Bitfield Definitions */ + +/* TWL4030_CODEC_MODE (0x01) Fields */ +#define TWL4030_APLL_RATE 0xF0 +#define TWL4030_APLL_RATE_8000 0x00 +#define TWL4030_APLL_RATE_11025 0x10 +#define TWL4030_APLL_RATE_12000 0x20 +#define TWL4030_APLL_RATE_16000 0x40 +#define TWL4030_APLL_RATE_22050 0x50 +#define TWL4030_APLL_RATE_24000 0x60 +#define TWL4030_APLL_RATE_32000 0x80 +#define TWL4030_APLL_RATE_44100 0x90 +#define TWL4030_APLL_RATE_48000 0xA0 +#define TWL4030_APLL_RATE_96000 0xE0 +#define TWL4030_SEL_16K 0x08 +#define TWL4030_CODECPDZ 0x02 +#define TWL4030_OPT_MODE 0x01 +#define TWL4030_OPTION_1 (1 << 0) +#define TWL4030_OPTION_2 (0 << 0) + +/* TWL4030_OPTION (0x02) Fields */ +#define TWL4030_ATXL1_EN (1 << 0) +#define TWL4030_ATXR1_EN (1 << 1) +#define TWL4030_ATXL2_VTXL_EN (1 << 2) +#define TWL4030_ATXR2_VTXR_EN (1 << 3) +#define TWL4030_ARXL1_VRX_EN (1 << 4) +#define TWL4030_ARXR1_EN (1 << 5) +#define TWL4030_ARXL2_EN (1 << 6) +#define TWL4030_ARXR2_EN (1 << 7) + +/* TWL4030_REG_MICBIAS_CTL (0x04) Fields */ +#define TWL4030_MICBIAS2_CTL 0x40 +#define TWL4030_MICBIAS1_CTL 0x20 +#define TWL4030_HSMICBIAS_EN 0x04 +#define TWL4030_MICBIAS2_EN 0x02 +#define TWL4030_MICBIAS1_EN 0x01 + +/* ANAMICL (0x05) Fields */ +#define TWL4030_CNCL_OFFSET_START 0x80 +#define TWL4030_OFFSET_CNCL_SEL 0x60 +#define TWL4030_OFFSET_CNCL_SEL_ARX1 0x00 +#define TWL4030_OFFSET_CNCL_SEL_ARX2 0x20 +#define TWL4030_OFFSET_CNCL_SEL_VRX 0x40 +#define TWL4030_OFFSET_CNCL_SEL_ALL 0x60 +#define TWL4030_MICAMPL_EN 0x10 +#define TWL4030_CKMIC_EN 0x08 +#define TWL4030_AUXL_EN 0x04 +#define TWL4030_HSMIC_EN 0x02 +#define TWL4030_MAINMIC_EN 0x01 + +/* ANAMICR (0x06) Fields */ +#define TWL4030_MICAMPR_EN 0x10 +#define TWL4030_AUXR_EN 0x04 +#define TWL4030_SUBMIC_EN 0x01 + +/* AVADC_CTL (0x07) Fields */ +#define TWL4030_ADCL_EN 0x08 +#define TWL4030_AVADC_CLK_PRIORITY 0x04 +#define TWL4030_ADCR_EN 0x02 + +/* TWL4030_REG_ADCMICSEL (0x08) Fields */ +#define TWL4030_DIGMIC1_EN 0x08 +#define TWL4030_TX2IN_SEL 0x04 +#define TWL4030_DIGMIC0_EN 0x02 +#define TWL4030_TX1IN_SEL 0x01 + +/* AUDIO_IF (0x0E) Fields */ +#define TWL4030_AIF_SLAVE_EN 0x80 +#define TWL4030_DATA_WIDTH 0x60 +#define TWL4030_DATA_WIDTH_16S_16W 0x00 +#define TWL4030_DATA_WIDTH_32S_16W 0x40 +#define TWL4030_DATA_WIDTH_32S_24W 0x60 +#define TWL4030_AIF_FORMAT 0x18 +#define TWL4030_AIF_FORMAT_CODEC 0x00 +#define TWL4030_AIF_FORMAT_LEFT 0x08 +#define TWL4030_AIF_FORMAT_RIGHT 0x10 +#define TWL4030_AIF_FORMAT_TDM 0x18 +#define TWL4030_AIF_TRI_EN 0x04 +#define TWL4030_CLK256FS_EN 0x02 +#define TWL4030_AIF_EN 0x01 + +/* VOICE_IF (0x0F) Fields */ +#define TWL4030_VIF_SLAVE_EN 0x80 +#define TWL4030_VIF_DIN_EN 0x40 +#define TWL4030_VIF_DOUT_EN 0x20 +#define TWL4030_VIF_SWAP 0x10 +#define TWL4030_VIF_FORMAT 0x08 +#define TWL4030_VIF_TRI_EN 0x04 +#define TWL4030_VIF_SUB_EN 0x02 +#define TWL4030_VIF_EN 0x01 + +/* EAR_CTL (0x21) */ +#define TWL4030_EAR_GAIN 0x30 + +/* HS_GAIN_SET (0x23) Fields */ +#define TWL4030_HSR_GAIN 0x0C +#define TWL4030_HSR_GAIN_PWR_DOWN 0x00 +#define TWL4030_HSR_GAIN_PLUS_6DB 0x04 +#define TWL4030_HSR_GAIN_0DB 0x08 +#define TWL4030_HSR_GAIN_MINUS_6DB 0x0C +#define TWL4030_HSL_GAIN 0x03 +#define TWL4030_HSL_GAIN_PWR_DOWN 0x00 +#define TWL4030_HSL_GAIN_PLUS_6DB 0x01 +#define TWL4030_HSL_GAIN_0DB 0x02 +#define TWL4030_HSL_GAIN_MINUS_6DB 0x03 + +/* HS_POPN_SET (0x24) Fields */ +#define TWL4030_VMID_EN 0x40 +#define TWL4030_EXTMUTE 0x20 +#define TWL4030_RAMP_DELAY 0x1C +#define TWL4030_RAMP_DELAY_20MS 0x00 +#define TWL4030_RAMP_DELAY_40MS 0x04 +#define TWL4030_RAMP_DELAY_81MS 0x08 +#define TWL4030_RAMP_DELAY_161MS 0x0C +#define TWL4030_RAMP_DELAY_323MS 0x10 +#define TWL4030_RAMP_DELAY_645MS 0x14 +#define TWL4030_RAMP_DELAY_1291MS 0x18 +#define TWL4030_RAMP_DELAY_2581MS 0x1C +#define TWL4030_RAMP_EN 0x02 + +/* PREDL_CTL (0x25) */ +#define TWL4030_PREDL_GAIN 0x30 + +/* PREDR_CTL (0x26) */ +#define TWL4030_PREDR_GAIN 0x30 + +/* PRECKL_CTL (0x27) */ +#define TWL4030_PRECKL_GAIN 0x30 + +/* PRECKR_CTL (0x28) */ +#define TWL4030_PRECKR_GAIN 0x30 + +/* HFL_CTL (0x29, 0x2A) Fields */ +#define TWL4030_HF_CTL_HB_EN 0x04 +#define TWL4030_HF_CTL_LOOP_EN 0x08 +#define TWL4030_HF_CTL_RAMP_EN 0x10 +#define TWL4030_HF_CTL_REF_EN 0x20 + +/* APLL_CTL (0x3A) Fields */ +#define TWL4030_APLL_EN 0x10 +#define TWL4030_APLL_INFREQ 0x0F +#define TWL4030_APLL_INFREQ_19200KHZ 0x05 +#define TWL4030_APLL_INFREQ_26000KHZ 0x06 +#define TWL4030_APLL_INFREQ_38400KHZ 0x0F + +/* REG_MISC_SET_1 (0x3E) Fields */ +#define TWL4030_CLK64_EN 0x80 +#define TWL4030_SCRAMBLE_EN 0x40 +#define TWL4030_FMLOOP_EN 0x20 +#define TWL4030_SMOOTH_ANAVOL_EN 0x02 +#define TWL4030_DIGMIC_LR_SWAP_EN 0x01 + +/* VIBRA_CTL (0x45) */ +#define TWL4030_VIBRA_EN 0x01 +#define TWL4030_VIBRA_DIR 0x02 +#define TWL4030_VIBRA_AUDIO_SEL_L1 (0x00 << 2) +#define TWL4030_VIBRA_AUDIO_SEL_R1 (0x01 << 2) +#define TWL4030_VIBRA_AUDIO_SEL_L2 (0x02 << 2) +#define TWL4030_VIBRA_AUDIO_SEL_R2 (0x03 << 2) +#define TWL4030_VIBRA_SEL 0x10 +#define TWL4030_VIBRA_DIR_SEL 0x20 + +/* TWL4030 codec resource IDs */ +enum twl4030_codec_res { + TWL4030_CODEC_RES_POWER = 0, + TWL4030_CODEC_RES_APLL, + TWL4030_CODEC_RES_MAX, +}; + +int twl4030_codec_disable_resource(enum twl4030_codec_res id); +int twl4030_codec_enable_resource(enum twl4030_codec_res id); + +#endif /* End of __TWL4030_CODEC_H__ */ -- cgit v1.3-8-gc7d7 From 26276069d2f51955cf549faab5d3a71a4b37ba23 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 4 Nov 2009 09:58:17 +0200 Subject: MFD: TWL4030: Add audio_mclk to the codec platform data Add audio_mclk to the platform data struct for the twl4030-codec MFD driver. Signed-off-by: Peter Ujfalusi Acked-by: Samuel Ortiz Signed-off-by: Mark Brown --- include/linux/i2c/twl4030.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index ba61add3e05a..5306a759cbde 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -414,6 +414,7 @@ struct twl4030_codec_vibra_data { }; struct twl4030_codec_data { + unsigned int audio_mclk; struct twl4030_codec_audio_data *audio; struct twl4030_codec_vibra_data *vibra; }; -- cgit v1.3-8-gc7d7 From cfaf6d2c1cb231f77e24109cb1460db429608bd4 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 4 Nov 2009 09:58:19 +0200 Subject: MFD: twl4030-codec: APLL_INFREQ handling in the MFD driver Configure the APLL_INFREQ field in the APLL_CTL register based on the platform data. Provide also a function for childs to query the audio_mclk frequency. Signed-off-by: Peter Ujfalusi Acked-by: Samuel Ortiz Signed-off-by: Mark Brown --- drivers/mfd/twl4030-codec.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/mfd/twl4030-codec.h | 1 + 2 files changed, 36 insertions(+) (limited to 'include') diff --git a/drivers/mfd/twl4030-codec.c b/drivers/mfd/twl4030-codec.c index 97103078dc2a..77b914907d7c 100644 --- a/drivers/mfd/twl4030-codec.c +++ b/drivers/mfd/twl4030-codec.c @@ -41,6 +41,7 @@ struct twl4030_codec_resource { }; struct twl4030_codec { + unsigned int audio_mclk; struct mutex mutex; struct twl4030_codec_resource resource[TWL4030_CODEC_RES_MAX]; struct mfd_cell cells[TWL4030_CODEC_CELLS]; @@ -145,12 +146,45 @@ int twl4030_codec_disable_resource(unsigned id) } EXPORT_SYMBOL_GPL(twl4030_codec_disable_resource); +unsigned int twl4030_codec_get_mclk(void) +{ + struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev); + + return codec->audio_mclk; +} +EXPORT_SYMBOL_GPL(twl4030_codec_get_mclk); + static int __devinit twl4030_codec_probe(struct platform_device *pdev) { struct twl4030_codec *codec; struct twl4030_codec_data *pdata = pdev->dev.platform_data; struct mfd_cell *cell = NULL; int ret, childs = 0; + u8 val; + + if (!pdata) { + dev_err(&pdev->dev, "Platform data is missing\n"); + return -EINVAL; + } + + /* Configure APLL_INFREQ and disable APLL if enabled */ + val = 0; + switch (pdata->audio_mclk) { + case 19200000: + val |= TWL4030_APLL_INFREQ_19200KHZ; + break; + case 26000000: + val |= TWL4030_APLL_INFREQ_26000KHZ; + break; + case 38400000: + val |= TWL4030_APLL_INFREQ_38400KHZ; + break; + default: + dev_err(&pdev->dev, "Invalid audio_mclk\n"); + return -EINVAL; + } + twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE, + val, TWL4030_REG_APLL_CTL); codec = kzalloc(sizeof(struct twl4030_codec), GFP_KERNEL); if (!codec) @@ -160,6 +194,7 @@ static int __devinit twl4030_codec_probe(struct platform_device *pdev) twl4030_codec_dev = pdev; mutex_init(&codec->mutex); + codec->audio_mclk = pdata->audio_mclk; /* Codec power */ codec->resource[TWL4030_CODEC_RES_POWER].reg = TWL4030_REG_CODEC_MODE; diff --git a/include/linux/mfd/twl4030-codec.h b/include/linux/mfd/twl4030-codec.h index ef0a3041d759..2ec317c68e59 100644 --- a/include/linux/mfd/twl4030-codec.h +++ b/include/linux/mfd/twl4030-codec.h @@ -267,5 +267,6 @@ enum twl4030_codec_res { int twl4030_codec_disable_resource(enum twl4030_codec_res id); int twl4030_codec_enable_resource(enum twl4030_codec_res id); +unsigned int twl4030_codec_get_mclk(void); #endif /* End of __TWL4030_CODEC_H__ */ -- cgit v1.3-8-gc7d7 From 76b5c84f77c3abc92a3c4e185e7b78f17a0ed204 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 5 Nov 2009 22:59:46 -0800 Subject: Input: add new keycodes useful in mobile devices Add new codes for camera focus key, and camera lens cover, keypad slide, front proximity switches. Signed-off-by: Jani Nikula Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index c2b1a7d244d9..84b501ab0d8f 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -595,6 +595,8 @@ struct input_absinfo { #define KEY_NUMERIC_STAR 0x20a #define KEY_NUMERIC_POUND 0x20b +#define KEY_CAMERA_FOCUS 0x210 + /* We avoid low common keys in module aliases so they don't get huge. */ #define KEY_MIN_INTERESTING KEY_MUTE #define KEY_MAX 0x2ff @@ -677,6 +679,9 @@ struct input_absinfo { #define SW_LINEOUT_INSERT 0x06 /* set = inserted */ #define SW_JACK_PHYSICAL_INSERT 0x07 /* set = mechanical switch set */ #define SW_VIDEOOUT_INSERT 0x08 /* set = inserted */ +#define SW_CAMERA_LENS_COVER 0x09 /* set = lens covered */ +#define SW_KEYPAD_SLIDE 0x0a /* set = keypad slide out */ +#define SW_FRONT_PROXIMITY 0x0b /* set = front proximity sensor active */ #define SW_MAX 0x0f #define SW_CNT (SW_MAX+1) -- cgit v1.3-8-gc7d7 From 9aeba6297151abcb1b34f3237e4c028aae500ce4 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Sun, 22 Nov 2009 17:23:45 +0100 Subject: ALSA: opti-miro: make miro.h header available outside the alsa directory Move the miro.h header to the include/sound directory. It can be used in the Miro PCM20 radio driver (v4l). Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai --- include/sound/aci.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ sound/isa/opti9xx/miro.c | 2 +- sound/isa/opti9xx/miro.h | 73 ------------------------------------------------ 3 files changed, 74 insertions(+), 74 deletions(-) create mode 100644 include/sound/aci.h delete mode 100644 sound/isa/opti9xx/miro.h (limited to 'include') diff --git a/include/sound/aci.h b/include/sound/aci.h new file mode 100644 index 000000000000..bb796d06d0f2 --- /dev/null +++ b/include/sound/aci.h @@ -0,0 +1,73 @@ +#ifndef _ACI_H_ +#define _ACI_H_ + +#define ACI_REG_COMMAND 0 /* write register offset */ +#define ACI_REG_STATUS 1 /* read register offset */ +#define ACI_REG_BUSY 2 /* busy register offset */ +#define ACI_REG_RDS 2 /* PCM20: RDS register offset */ +#define ACI_MINTIME 500 /* ACI time out limit */ + +#define ACI_SET_MUTE 0x0d +#define ACI_SET_POWERAMP 0x0f +#define ACI_SET_TUNERMUTE 0xa3 +#define ACI_SET_TUNERMONO 0xa4 +#define ACI_SET_IDE 0xd0 +#define ACI_SET_WSS 0xd1 +#define ACI_SET_SOLOMODE 0xd2 +#define ACI_SET_PREAMP 0x03 +#define ACI_GET_PREAMP 0x21 +#define ACI_WRITE_TUNE 0xa7 +#define ACI_READ_TUNERSTEREO 0xa8 +#define ACI_READ_TUNERSTATION 0xa9 +#define ACI_READ_VERSION 0xf1 +#define ACI_READ_IDCODE 0xf2 +#define ACI_INIT 0xff +#define ACI_STATUS 0xf0 +#define ACI_S_GENERAL 0x00 +#define ACI_ERROR_OP 0xdf + +/* ACI Mixer */ + +/* These are the values for the right channel GET registers. + Add an offset of 0x01 for the left channel register. + (left=right+0x01) */ + +#define ACI_GET_MASTER 0x03 +#define ACI_GET_MIC 0x05 +#define ACI_GET_LINE 0x07 +#define ACI_GET_CD 0x09 +#define ACI_GET_SYNTH 0x0b +#define ACI_GET_PCM 0x0d +#define ACI_GET_LINE1 0x10 /* Radio on PCM20 */ +#define ACI_GET_LINE2 0x12 + +#define ACI_GET_EQ1 0x22 /* from Bass ... */ +#define ACI_GET_EQ2 0x24 +#define ACI_GET_EQ3 0x26 +#define ACI_GET_EQ4 0x28 +#define ACI_GET_EQ5 0x2a +#define ACI_GET_EQ6 0x2c +#define ACI_GET_EQ7 0x2e /* ... to Treble */ + +/* And these are the values for the right channel SET registers. + For left channel access you have to add an offset of 0x08. + MASTER is an exception, which needs an offset of 0x01 */ + +#define ACI_SET_MASTER 0x00 +#define ACI_SET_MIC 0x30 +#define ACI_SET_LINE 0x31 +#define ACI_SET_CD 0x34 +#define ACI_SET_SYNTH 0x33 +#define ACI_SET_PCM 0x32 +#define ACI_SET_LINE1 0x35 /* Radio on PCM20 */ +#define ACI_SET_LINE2 0x36 + +#define ACI_SET_EQ1 0x40 /* from Bass ... */ +#define ACI_SET_EQ2 0x41 +#define ACI_SET_EQ3 0x42 +#define ACI_SET_EQ4 0x43 +#define ACI_SET_EQ5 0x44 +#define ACI_SET_EQ6 0x45 +#define ACI_SET_EQ7 0x46 /* ... to Treble */ + +#endif /* _ACI_H_ */ diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index db4a4fbdc5ca..932a067ef980 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -40,7 +40,7 @@ #define SNDRV_LEGACY_FIND_FREE_IRQ #define SNDRV_LEGACY_FIND_FREE_DMA #include -#include "miro.h" +#include MODULE_AUTHOR("Martin Langer "); MODULE_LICENSE("GPL"); diff --git a/sound/isa/opti9xx/miro.h b/sound/isa/opti9xx/miro.h deleted file mode 100644 index 6e1385b8e07e..000000000000 --- a/sound/isa/opti9xx/miro.h +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef _MIRO_H_ -#define _MIRO_H_ - -#define ACI_REG_COMMAND 0 /* write register offset */ -#define ACI_REG_STATUS 1 /* read register offset */ -#define ACI_REG_BUSY 2 /* busy register offset */ -#define ACI_REG_RDS 2 /* PCM20: RDS register offset */ -#define ACI_MINTIME 500 /* ACI time out limit */ - -#define ACI_SET_MUTE 0x0d -#define ACI_SET_POWERAMP 0x0f -#define ACI_SET_TUNERMUTE 0xa3 -#define ACI_SET_TUNERMONO 0xa4 -#define ACI_SET_IDE 0xd0 -#define ACI_SET_WSS 0xd1 -#define ACI_SET_SOLOMODE 0xd2 -#define ACI_SET_PREAMP 0x03 -#define ACI_GET_PREAMP 0x21 -#define ACI_WRITE_TUNE 0xa7 -#define ACI_READ_TUNERSTEREO 0xa8 -#define ACI_READ_TUNERSTATION 0xa9 -#define ACI_READ_VERSION 0xf1 -#define ACI_READ_IDCODE 0xf2 -#define ACI_INIT 0xff -#define ACI_STATUS 0xf0 -#define ACI_S_GENERAL 0x00 -#define ACI_ERROR_OP 0xdf - -/* ACI Mixer */ - -/* These are the values for the right channel GET registers. - Add an offset of 0x01 for the left channel register. - (left=right+0x01) */ - -#define ACI_GET_MASTER 0x03 -#define ACI_GET_MIC 0x05 -#define ACI_GET_LINE 0x07 -#define ACI_GET_CD 0x09 -#define ACI_GET_SYNTH 0x0b -#define ACI_GET_PCM 0x0d -#define ACI_GET_LINE1 0x10 /* Radio on PCM20 */ -#define ACI_GET_LINE2 0x12 - -#define ACI_GET_EQ1 0x22 /* from Bass ... */ -#define ACI_GET_EQ2 0x24 -#define ACI_GET_EQ3 0x26 -#define ACI_GET_EQ4 0x28 -#define ACI_GET_EQ5 0x2a -#define ACI_GET_EQ6 0x2c -#define ACI_GET_EQ7 0x2e /* ... to Treble */ - -/* And these are the values for the right channel SET registers. - For left channel access you have to add an offset of 0x08. - MASTER is an exception, which needs an offset of 0x01 */ - -#define ACI_SET_MASTER 0x00 -#define ACI_SET_MIC 0x30 -#define ACI_SET_LINE 0x31 -#define ACI_SET_CD 0x34 -#define ACI_SET_SYNTH 0x33 -#define ACI_SET_PCM 0x32 -#define ACI_SET_LINE1 0x35 /* Radio on PCM20 */ -#define ACI_SET_LINE2 0x36 - -#define ACI_SET_EQ1 0x40 /* from Bass ... */ -#define ACI_SET_EQ2 0x41 -#define ACI_SET_EQ3 0x42 -#define ACI_SET_EQ4 0x43 -#define ACI_SET_EQ5 0x44 -#define ACI_SET_EQ6 0x45 -#define ACI_SET_EQ7 0x46 /* ... to Treble */ - -#endif /* _MIRO_H_ */ -- cgit v1.3-8-gc7d7 From 9dc9120c774e1d7e3d939542200bd44829c0059d Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Sun, 22 Nov 2009 17:26:34 +0100 Subject: ALSA: opti-miro: expose ACI mixer to outside drivers The ACI mixer is used to control the radio FM module installed on the Miro PCM20 sound card. Expose ACI mixer outside the sound card driver. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai --- include/sound/aci.h | 17 ++++ sound/isa/opti9xx/miro.c | 250 +++++++++++++++++++++++++++-------------------- 2 files changed, 162 insertions(+), 105 deletions(-) (limited to 'include') diff --git a/include/sound/aci.h b/include/sound/aci.h index bb796d06d0f2..ee639d355ef0 100644 --- a/include/sound/aci.h +++ b/include/sound/aci.h @@ -70,4 +70,21 @@ #define ACI_SET_EQ6 0x45 #define ACI_SET_EQ7 0x46 /* ... to Treble */ +struct snd_miro_aci { + unsigned long aci_port; + int aci_vendor; + int aci_product; + int aci_version; + int aci_amp; + int aci_preamp; + int aci_solomode; + + struct mutex aci_mutex; +}; + +int snd_aci_cmd(struct snd_miro_aci *aci, int write1, int write2, int write3); + +struct snd_miro_aci *snd_aci_get_aci(void); + #endif /* _ACI_H_ */ + diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index 932a067ef980..40b64cd54c85 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -96,7 +96,6 @@ MODULE_PARM_DESC(ide, "enable ide port"); #define OPTi9XX_MC_REG(n) n - struct snd_miro { unsigned short hardware; unsigned char password; @@ -120,17 +119,11 @@ struct snd_miro { long mpu_port; int mpu_irq; - unsigned long aci_port; - int aci_vendor; - int aci_product; - int aci_version; - int aci_amp; - int aci_preamp; - int aci_solomode; - - struct mutex aci_mutex; + struct snd_miro_aci *aci; }; +static struct snd_miro_aci aci_device; + static char * snd_opti9xx_names[] = { "unkown", "82C928", "82C929", @@ -142,13 +135,14 @@ static char * snd_opti9xx_names[] = { * ACI control */ -static int aci_busy_wait(struct snd_miro * miro) +static int aci_busy_wait(struct snd_miro_aci *aci) { long timeout; unsigned char byte; - for (timeout = 1; timeout <= ACI_MINTIME+30; timeout++) { - if (((byte=inb(miro->aci_port + ACI_REG_BUSY)) & 1) == 0) { + for (timeout = 1; timeout <= ACI_MINTIME + 30; timeout++) { + byte = inb(aci->aci_port + ACI_REG_BUSY); + if ((byte & 1) == 0) { if (timeout >= ACI_MINTIME) snd_printd("aci ready in round %ld.\n", timeout-ACI_MINTIME); @@ -174,10 +168,10 @@ static int aci_busy_wait(struct snd_miro * miro) return -EBUSY; } -static inline int aci_write(struct snd_miro * miro, unsigned char byte) +static inline int aci_write(struct snd_miro_aci *aci, unsigned char byte) { - if (aci_busy_wait(miro) >= 0) { - outb(byte, miro->aci_port + ACI_REG_COMMAND); + if (aci_busy_wait(aci) >= 0) { + outb(byte, aci->aci_port + ACI_REG_COMMAND); return 0; } else { snd_printk(KERN_ERR "aci busy, aci_write(0x%x) stopped.\n", byte); @@ -185,12 +179,12 @@ static inline int aci_write(struct snd_miro * miro, unsigned char byte) } } -static inline int aci_read(struct snd_miro * miro) +static inline int aci_read(struct snd_miro_aci *aci) { unsigned char byte; - if (aci_busy_wait(miro) >= 0) { - byte=inb(miro->aci_port + ACI_REG_STATUS); + if (aci_busy_wait(aci) >= 0) { + byte = inb(aci->aci_port + ACI_REG_STATUS); return byte; } else { snd_printk(KERN_ERR "aci busy, aci_read() stopped.\n"); @@ -198,39 +192,49 @@ static inline int aci_read(struct snd_miro * miro) } } -static int aci_cmd(struct snd_miro * miro, int write1, int write2, int write3) +int snd_aci_cmd(struct snd_miro_aci *aci, int write1, int write2, int write3) { int write[] = {write1, write2, write3}; int value, i; - if (mutex_lock_interruptible(&miro->aci_mutex)) + if (mutex_lock_interruptible(&aci->aci_mutex)) return -EINTR; for (i=0; i<3; i++) { if (write[i]< 0 || write[i] > 255) break; else { - value = aci_write(miro, write[i]); + value = aci_write(aci, write[i]); if (value < 0) goto out; } } - value = aci_read(miro); + value = aci_read(aci); -out: mutex_unlock(&miro->aci_mutex); +out: mutex_unlock(&aci->aci_mutex); return value; } +EXPORT_SYMBOL(snd_aci_cmd); + +static int aci_getvalue(struct snd_miro_aci *aci, unsigned char index) +{ + return snd_aci_cmd(aci, ACI_STATUS, index, -1); +} -static int aci_getvalue(struct snd_miro * miro, unsigned char index) +static int aci_setvalue(struct snd_miro_aci *aci, unsigned char index, + int value) { - return aci_cmd(miro, ACI_STATUS, index, -1); + return snd_aci_cmd(aci, index, value, -1); } -static int aci_setvalue(struct snd_miro * miro, unsigned char index, int value) +struct snd_miro_aci *snd_aci_get_aci(void) { - return aci_cmd(miro, index, value, -1); + if (aci_device.aci_port == 0) + return NULL; + return &aci_device; } +EXPORT_SYMBOL(snd_aci_get_aci); /* * MIXER part @@ -244,8 +248,10 @@ static int snd_miro_get_capture(struct snd_kcontrol *kcontrol, struct snd_miro *miro = snd_kcontrol_chip(kcontrol); int value; - if ((value = aci_getvalue(miro, ACI_S_GENERAL)) < 0) { - snd_printk(KERN_ERR "snd_miro_get_capture() failed: %d\n", value); + value = aci_getvalue(miro->aci, ACI_S_GENERAL); + if (value < 0) { + snd_printk(KERN_ERR "snd_miro_get_capture() failed: %d\n", + value); return value; } @@ -262,13 +268,15 @@ static int snd_miro_put_capture(struct snd_kcontrol *kcontrol, value = !(ucontrol->value.integer.value[0]); - if ((error = aci_setvalue(miro, ACI_SET_SOLOMODE, value)) < 0) { - snd_printk(KERN_ERR "snd_miro_put_capture() failed: %d\n", error); + error = aci_setvalue(miro->aci, ACI_SET_SOLOMODE, value); + if (error < 0) { + snd_printk(KERN_ERR "snd_miro_put_capture() failed: %d\n", + error); return error; } - change = (value != miro->aci_solomode); - miro->aci_solomode = value; + change = (value != miro->aci->aci_solomode); + miro->aci->aci_solomode = value; return change; } @@ -290,7 +298,7 @@ static int snd_miro_get_preamp(struct snd_kcontrol *kcontrol, struct snd_miro *miro = snd_kcontrol_chip(kcontrol); int value; - if (miro->aci_version <= 176) { + if (miro->aci->aci_version <= 176) { /* OSS says it's not readable with versions < 176. @@ -298,12 +306,14 @@ static int snd_miro_get_preamp(struct snd_kcontrol *kcontrol, which is a PCM12 with aci_version = 176. */ - ucontrol->value.integer.value[0] = miro->aci_preamp; + ucontrol->value.integer.value[0] = miro->aci->aci_preamp; return 0; } - if ((value = aci_getvalue(miro, ACI_GET_PREAMP)) < 0) { - snd_printk(KERN_ERR "snd_miro_get_preamp() failed: %d\n", value); + value = aci_getvalue(miro->aci, ACI_GET_PREAMP); + if (value < 0) { + snd_printk(KERN_ERR "snd_miro_get_preamp() failed: %d\n", + value); return value; } @@ -320,13 +330,15 @@ static int snd_miro_put_preamp(struct snd_kcontrol *kcontrol, value = ucontrol->value.integer.value[0]; - if ((error = aci_setvalue(miro, ACI_SET_PREAMP, value)) < 0) { - snd_printk(KERN_ERR "snd_miro_put_preamp() failed: %d\n", error); + error = aci_setvalue(miro->aci, ACI_SET_PREAMP, value); + if (error < 0) { + snd_printk(KERN_ERR "snd_miro_put_preamp() failed: %d\n", + error); return error; } - change = (value != miro->aci_preamp); - miro->aci_preamp = value; + change = (value != miro->aci->aci_preamp); + miro->aci->aci_preamp = value; return change; } @@ -337,7 +349,7 @@ static int snd_miro_get_amp(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_miro *miro = snd_kcontrol_chip(kcontrol); - ucontrol->value.integer.value[0] = miro->aci_amp; + ucontrol->value.integer.value[0] = miro->aci->aci_amp; return 0; } @@ -350,13 +362,14 @@ static int snd_miro_put_amp(struct snd_kcontrol *kcontrol, value = ucontrol->value.integer.value[0]; - if ((error = aci_setvalue(miro, ACI_SET_POWERAMP, value)) < 0) { + error = aci_setvalue(miro->aci, ACI_SET_POWERAMP, value); + if (error < 0) { snd_printk(KERN_ERR "snd_miro_put_amp() to %d failed: %d\n", value, error); return error; } - change = (value != miro->aci_amp); - miro->aci_amp = value; + change = (value != miro->aci->aci_amp); + miro->aci->aci_amp = value; return change; } @@ -405,12 +418,14 @@ static int snd_miro_get_double(struct snd_kcontrol *kcontrol, int right_reg = kcontrol->private_value & 0xff; int left_reg = right_reg + 1; - if ((right_val = aci_getvalue(miro, right_reg)) < 0) { + right_val = aci_getvalue(miro->aci, right_reg); + if (right_val < 0) { snd_printk(KERN_ERR "aci_getvalue(%d) failed: %d\n", right_reg, right_val); return right_val; } - if ((left_val = aci_getvalue(miro, left_reg)) < 0) { + left_val = aci_getvalue(miro->aci, left_reg); + if (left_val < 0) { snd_printk(KERN_ERR "aci_getvalue(%d) failed: %d\n", left_reg, left_val); return left_val; } @@ -446,6 +461,7 @@ static int snd_miro_put_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_miro *miro = snd_kcontrol_chip(kcontrol); + struct snd_miro_aci *aci = miro->aci; int left, right, left_old, right_old; int setreg_left, setreg_right, getreg_left, getreg_right; int change, error; @@ -461,12 +477,14 @@ static int snd_miro_put_double(struct snd_kcontrol *kcontrol, getreg_right = kcontrol->private_value & 0xff; getreg_left = getreg_right + 1; - if ((left_old = aci_getvalue(miro, getreg_left)) < 0) { + left_old = aci_getvalue(aci, getreg_left); + if (left_old < 0) { snd_printk(KERN_ERR "aci_getvalue(%d) failed: %d\n", getreg_left, left_old); return left_old; } - if ((right_old = aci_getvalue(miro, getreg_right)) < 0) { + right_old = aci_getvalue(aci, getreg_right); + if (right_old < 0) { snd_printk(KERN_ERR "aci_getvalue(%d) failed: %d\n", getreg_right, right_old); return right_old; } @@ -485,13 +503,15 @@ static int snd_miro_put_double(struct snd_kcontrol *kcontrol, right_old = 0x80 - right_old; if (left >= 0) { - if ((error = aci_setvalue(miro, setreg_left, left)) < 0) { + error = aci_setvalue(aci, setreg_left, left); + if (error < 0) { snd_printk(KERN_ERR "aci_setvalue(%d) failed: %d\n", left, error); return error; } } else { - if ((error = aci_setvalue(miro, setreg_left, 0x80 - left)) < 0) { + error = aci_setvalue(aci, setreg_left, 0x80 - left); + if (error < 0) { snd_printk(KERN_ERR "aci_setvalue(%d) failed: %d\n", 0x80 - left, error); return error; @@ -499,13 +519,15 @@ static int snd_miro_put_double(struct snd_kcontrol *kcontrol, } if (right >= 0) { - if ((error = aci_setvalue(miro, setreg_right, right)) < 0) { + error = aci_setvalue(aci, setreg_right, right); + if (error < 0) { snd_printk(KERN_ERR "aci_setvalue(%d) failed: %d\n", right, error); return error; } } else { - if ((error = aci_setvalue(miro, setreg_right, 0x80 - right)) < 0) { + error = aci_setvalue(aci, setreg_right, 0x80 - right); + if (error < 0) { snd_printk(KERN_ERR "aci_setvalue(%d) failed: %d\n", 0x80 - right, error); return error; @@ -523,12 +545,14 @@ static int snd_miro_put_double(struct snd_kcontrol *kcontrol, left_old = 0x20 - left_old; right_old = 0x20 - right_old; - if ((error = aci_setvalue(miro, setreg_left, 0x20 - left)) < 0) { + error = aci_setvalue(aci, setreg_left, 0x20 - left); + if (error < 0) { snd_printk(KERN_ERR "aci_setvalue(%d) failed: %d\n", 0x20 - left, error); return error; } - if ((error = aci_setvalue(miro, setreg_right, 0x20 - right)) < 0) { + error = aci_setvalue(aci, setreg_right, 0x20 - right); + if (error < 0) { snd_printk(KERN_ERR "aci_setvalue(%d) failed: %d\n", 0x20 - right, error); return error; @@ -626,11 +650,13 @@ static unsigned char aci_init_values[][2] __devinitdata = { static int __devinit snd_set_aci_init_values(struct snd_miro *miro) { int idx, error; + struct snd_miro_aci *aci = miro->aci; /* enable WSS on PCM1 */ - if ((miro->aci_product == 'A') && wss) { - if ((error = aci_setvalue(miro, ACI_SET_WSS, wss)) < 0) { + if ((aci->aci_product == 'A') && wss) { + error = aci_setvalue(aci, ACI_SET_WSS, wss); + if (error < 0) { snd_printk(KERN_ERR "enabling WSS mode failed\n"); return error; } @@ -639,7 +665,8 @@ static int __devinit snd_set_aci_init_values(struct snd_miro *miro) /* enable IDE port */ if (ide) { - if ((error = aci_setvalue(miro, ACI_SET_IDE, ide)) < 0) { + error = aci_setvalue(aci, ACI_SET_IDE, ide); + if (error < 0) { snd_printk(KERN_ERR "enabling IDE port failed\n"); return error; } @@ -647,17 +674,18 @@ static int __devinit snd_set_aci_init_values(struct snd_miro *miro) /* set common aci values */ - for (idx = 0; idx < ARRAY_SIZE(aci_init_values); idx++) - if ((error = aci_setvalue(miro, aci_init_values[idx][0], - aci_init_values[idx][1])) < 0) { + for (idx = 0; idx < ARRAY_SIZE(aci_init_values); idx++) { + error = aci_setvalue(aci, aci_init_values[idx][0], + aci_init_values[idx][1]); + if (error < 0) { snd_printk(KERN_ERR "aci_setvalue(%d) failed: %d\n", aci_init_values[idx][0], error); return error; } - - miro->aci_amp = 0; - miro->aci_preamp = 0; - miro->aci_solomode = 1; + } + aci->aci_amp = 0; + aci->aci_preamp = 0; + aci->aci_solomode = 1; return 0; } @@ -688,7 +716,8 @@ static int __devinit snd_miro_mixer(struct snd_card *card, return err; } - if ((miro->aci_product == 'A') || (miro->aci_product == 'B')) { + if ((miro->aci->aci_product == 'A') || + (miro->aci->aci_product == 'B')) { /* PCM1/PCM12 with power-amp and Line 2 */ if ((err = snd_ctl_add(card, snd_ctl_new1(&snd_miro_line_control[0], miro))) < 0) return err; @@ -696,16 +725,17 @@ static int __devinit snd_miro_mixer(struct snd_card *card, return err; } - if ((miro->aci_product == 'B') || (miro->aci_product == 'C')) { + if ((miro->aci->aci_product == 'B') || + (miro->aci->aci_product == 'C')) { /* PCM12/PCM20 with mic-preamp */ if ((err = snd_ctl_add(card, snd_ctl_new1(&snd_miro_preamp_control[0], miro))) < 0) return err; - if (miro->aci_version >= 176) + if (miro->aci->aci_version >= 176) if ((err = snd_ctl_add(card, snd_ctl_new1(&snd_miro_capture_control[0], miro))) < 0) return err; } - if (miro->aci_product == 'C') { + if (miro->aci->aci_product == 'C') { /* PCM20 with radio and 7 band equalizer */ if ((err = snd_ctl_add(card, snd_ctl_new1(&snd_miro_radio_control[0], miro))) < 0) return err; @@ -843,14 +873,15 @@ static void snd_miro_proc_read(struct snd_info_entry * entry, struct snd_info_buffer *buffer) { struct snd_miro *miro = (struct snd_miro *) entry->private_data; + struct snd_miro_aci *aci = miro->aci; char* model = "unknown"; /* miroSOUND PCM1 pro, early PCM12 */ if ((miro->hardware == OPTi9XX_HW_82C929) && - (miro->aci_vendor == 'm') && - (miro->aci_product == 'A')) { - switch(miro->aci_version) { + (aci->aci_vendor == 'm') && + (aci->aci_product == 'A')) { + switch (aci->aci_version) { case 3: model = "miroSOUND PCM1 pro"; break; @@ -863,9 +894,9 @@ static void snd_miro_proc_read(struct snd_info_entry * entry, /* miroSOUND PCM12, PCM12 (Rev. E), PCM12 pnp */ if ((miro->hardware == OPTi9XX_HW_82C924) && - (miro->aci_vendor == 'm') && - (miro->aci_product == 'B')) { - switch(miro->aci_version) { + (aci->aci_vendor == 'm') && + (aci->aci_product == 'B')) { + switch (aci->aci_version) { case 4: model = "miroSOUND PCM12"; break; @@ -881,9 +912,9 @@ static void snd_miro_proc_read(struct snd_info_entry * entry, /* miroSOUND PCM20 radio */ if ((miro->hardware == OPTi9XX_HW_82C924) && - (miro->aci_vendor == 'm') && - (miro->aci_product == 'C')) { - switch(miro->aci_version) { + (aci->aci_vendor == 'm') && + (aci->aci_product == 'C')) { + switch (aci->aci_version) { case 7: model = "miroSOUND PCM20 radio (Rev. E)"; break; @@ -907,17 +938,17 @@ static void snd_miro_proc_read(struct snd_info_entry * entry, snd_iprintf(buffer, "ACI information:\n"); snd_iprintf(buffer, " vendor : "); - switch(miro->aci_vendor) { + switch (aci->aci_vendor) { case 'm': snd_iprintf(buffer, "Miro\n"); break; default: - snd_iprintf(buffer, "unknown (0x%x)\n", miro->aci_vendor); + snd_iprintf(buffer, "unknown (0x%x)\n", aci->aci_vendor); break; } snd_iprintf(buffer, " product : "); - switch(miro->aci_product) { + switch (aci->aci_product) { case 'A': snd_iprintf(buffer, "miroSOUND PCM1 pro / (early) PCM12\n"); break; @@ -928,19 +959,19 @@ static void snd_miro_proc_read(struct snd_info_entry * entry, snd_iprintf(buffer, "miroSOUND PCM20 radio\n"); break; default: - snd_iprintf(buffer, "unknown (0x%x)\n", miro->aci_product); + snd_iprintf(buffer, "unknown (0x%x)\n", aci->aci_product); break; } snd_iprintf(buffer, " firmware: %d (0x%x)\n", - miro->aci_version, miro->aci_version); + aci->aci_version, aci->aci_version); snd_iprintf(buffer, " port : 0x%lx-0x%lx\n", - miro->aci_port, miro->aci_port+2); + aci->aci_port, aci->aci_port+2); snd_iprintf(buffer, " wss : 0x%x\n", wss); snd_iprintf(buffer, " ide : 0x%x\n", ide); - snd_iprintf(buffer, " solomode: 0x%x\n", miro->aci_solomode); - snd_iprintf(buffer, " amp : 0x%x\n", miro->aci_amp); - snd_iprintf(buffer, " preamp : 0x%x\n", miro->aci_preamp); + snd_iprintf(buffer, " solomode: 0x%x\n", aci->aci_solomode); + snd_iprintf(buffer, " amp : 0x%x\n", aci->aci_amp); + snd_iprintf(buffer, " preamp : 0x%x\n", aci->aci_preamp); } static void __devinit snd_miro_proc_init(struct snd_card *card, @@ -1139,46 +1170,53 @@ static int __devinit snd_card_miro_detect(struct snd_card *card, } static int __devinit snd_card_miro_aci_detect(struct snd_card *card, - struct snd_miro * miro) + struct snd_miro *miro) { unsigned char regval; int i; + struct snd_miro_aci *aci = &aci_device; + + miro->aci = aci; - mutex_init(&miro->aci_mutex); + mutex_init(&aci->aci_mutex); /* get ACI port from OPTi9xx MC 4 */ regval=inb(miro->mc_base + 4); - miro->aci_port = (regval & 0x10) ? 0x344: 0x354; + aci->aci_port = (regval & 0x10) ? 0x344 : 0x354; - if ((miro->res_aci_port = request_region(miro->aci_port, 3, "miro aci")) == NULL) { + miro->res_aci_port = request_region(aci->aci_port, 3, "miro aci"); + if (miro->res_aci_port == NULL) { snd_printk(KERN_ERR "aci i/o area 0x%lx-0x%lx already used.\n", - miro->aci_port, miro->aci_port+2); + aci->aci_port, aci->aci_port+2); return -ENOMEM; } /* force ACI into a known state */ for (i = 0; i < 3; i++) - if (aci_cmd(miro, ACI_ERROR_OP, -1, -1) < 0) { + if (snd_aci_cmd(aci, ACI_ERROR_OP, -1, -1) < 0) { snd_printk(KERN_ERR "can't force aci into known state.\n"); return -ENXIO; } - if ((miro->aci_vendor=aci_cmd(miro, ACI_READ_IDCODE, -1, -1)) < 0 || - (miro->aci_product=aci_cmd(miro, ACI_READ_IDCODE, -1, -1)) < 0) { - snd_printk(KERN_ERR "can't read aci id on 0x%lx.\n", miro->aci_port); + aci->aci_vendor = snd_aci_cmd(aci, ACI_READ_IDCODE, -1, -1); + aci->aci_product = snd_aci_cmd(aci, ACI_READ_IDCODE, -1, -1); + if (aci->aci_vendor < 0 || aci->aci_product < 0) { + snd_printk(KERN_ERR "can't read aci id on 0x%lx.\n", + aci->aci_port); return -ENXIO; } - if ((miro->aci_version=aci_cmd(miro, ACI_READ_VERSION, -1, -1)) < 0) { + aci->aci_version = snd_aci_cmd(aci, ACI_READ_VERSION, -1, -1); + if (aci->aci_version < 0) { snd_printk(KERN_ERR "can't read aci version on 0x%lx.\n", - miro->aci_port); + aci->aci_port); return -ENXIO; } - if (aci_cmd(miro, ACI_INIT, -1, -1) < 0 || - aci_cmd(miro, ACI_ERROR_OP, ACI_ERROR_OP, ACI_ERROR_OP) < 0 || - aci_cmd(miro, ACI_ERROR_OP, ACI_ERROR_OP, ACI_ERROR_OP) < 0) { + if (snd_aci_cmd(aci, ACI_INIT, -1, -1) < 0 || + snd_aci_cmd(aci, ACI_ERROR_OP, ACI_ERROR_OP, ACI_ERROR_OP) < 0 || + snd_aci_cmd(aci, ACI_ERROR_OP, ACI_ERROR_OP, ACI_ERROR_OP) < 0) { snd_printk(KERN_ERR "can't initialize aci.\n"); return -ENXIO; } @@ -1191,6 +1229,7 @@ static void snd_card_miro_free(struct snd_card *card) struct snd_miro *miro = card->private_data; release_and_free_resource(miro->res_aci_port); + miro->aci->aci_port = 0; release_and_free_resource(miro->res_mc_base); } @@ -1250,7 +1289,6 @@ static int __devinit snd_miro_probe(struct device *devptr, unsigned int n) } miro->wss_base = port; - miro->mpu_port = mpu_port; miro->irq = irq; miro->mpu_irq = mpu_irq; miro->dma1 = dma1; @@ -1272,6 +1310,8 @@ static int __devinit snd_miro_probe(struct device *devptr, unsigned int n) return -EBUSY; } } + miro->mpu_port = mpu_port; + if (miro->irq == SNDRV_AUTO_IRQ) { if ((miro->irq = snd_legacy_find_free_irq(possible_irqs)) < 0) { snd_card_free(card); @@ -1339,9 +1379,9 @@ static int __devinit snd_miro_probe(struct device *devptr, unsigned int n) return error; } - if (miro->aci_vendor == 'm') { + if (miro->aci->aci_vendor == 'm') { /* It looks like a miro sound card. */ - switch (miro->aci_product) { + switch (miro->aci->aci_product) { case 'A': sprintf(card->shortname, "miroSOUND PCM1 pro / PCM12"); -- cgit v1.3-8-gc7d7 From c4832c7bbc3f7a4813347e871d7238651bf437d3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 23 Nov 2009 10:34:39 +0100 Subject: netfilter: nf_ct_tcp: improve out-of-sync situation in TCP tracking Without this patch, if we receive a SYN packet from the client while the firewall is out-of-sync, we let it go through. Then, if we see the SYN/ACK reply coming from the server, we destroy the conntrack entry and drop the packet to trigger a new retransmission. Then, the retransmision from the client is used to start a new clean session. This patch improves the current handling. Basically, if we see an unexpected SYN packet, we annotate the TCP options. Then, if we see the reply SYN/ACK, this means that the firewall was indeed out-of-sync. Therefore, we set a clean new session from the existing entry based on the annotated values. This patch adds two new 8-bits fields that fit in a 16-bits gap of the ip_ct_tcp structure. This patch is particularly useful for conntrackd since the asynchronous nature of the state-synchronization allows to have backup nodes that are not perfect copies of the master. This helps to improve the recovery under some worst-case scenarios. I have tested this by creating lots of conntrack entries in wrong state: for ((i=1024;i<65535;i++)); do conntrack -I -p tcp -s 192.168.2.101 -d 192.168.2.2 --sport $i --dport 80 -t 800 --state ESTABLISHED -u ASSURED,SEEN_REPLY; done Then, I make some TCP connections: $ echo GET / | nc 192.168.2.2 80 The events show the result: [UPDATE] tcp 6 60 SYN_RECV src=192.168.2.101 dst=192.168.2.2 sport=33220 dport=80 src=192.168.2.2 dst=192.168.2.101 sport=80 dport=33220 [ASSURED] [UPDATE] tcp 6 432000 ESTABLISHED src=192.168.2.101 dst=192.168.2.2 sport=33220 dport=80 src=192.168.2.2 dst=192.168.2.101 sport=80 dport=33220 [ASSURED] [UPDATE] tcp 6 120 FIN_WAIT src=192.168.2.101 dst=192.168.2.2 sport=33220 dport=80 src=192.168.2.2 dst=192.168.2.101 sport=80 dport=33220 [ASSURED] [UPDATE] tcp 6 30 LAST_ACK src=192.168.2.101 dst=192.168.2.2 sport=33220 dport=80 src=192.168.2.2 dst=192.168.2.101 sport=80 dport=33220 [ASSURED] [UPDATE] tcp 6 120 TIME_WAIT src=192.168.2.101 dst=192.168.2.2 sport=33220 dport=80 src=192.168.2.2 dst=192.168.2.101 sport=80 dport=33220 [ASSURED] and tcpdump shows no retransmissions: 20:47:57.271951 IP 192.168.2.101.33221 > 192.168.2.2.www: S 435402517:435402517(0) win 5840 20:47:57.273538 IP 192.168.2.2.www > 192.168.2.101.33221: S 3509927945:3509927945(0) ack 435402518 win 5792 20:47:57.273608 IP 192.168.2.101.33221 > 192.168.2.2.www: . ack 3509927946 win 92 20:47:57.273693 IP 192.168.2.101.33221 > 192.168.2.2.www: P 435402518:435402524(6) ack 3509927946 win 92 20:47:57.275492 IP 192.168.2.2.www > 192.168.2.101.33221: . ack 435402524 win 362 20:47:57.276492 IP 192.168.2.2.www > 192.168.2.101.33221: P 3509927946:3509928082(136) ack 435402524 win 362 20:47:57.276515 IP 192.168.2.101.33221 > 192.168.2.2.www: . ack 3509928082 win 108 20:47:57.276521 IP 192.168.2.2.www > 192.168.2.101.33221: F 3509928082:3509928082(0) ack 435402524 win 362 20:47:57.277369 IP 192.168.2.101.33221 > 192.168.2.2.www: F 435402524:435402524(0) ack 3509928083 win 108 20:47:57.279491 IP 192.168.2.2.www > 192.168.2.101.33221: . ack 435402525 win 362 I also added a rule to log invalid packets, with no occurrences :-) . Signed-off-by: Pablo Neira Ayuso Acked-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_tcp.h | 3 ++ net/netfilter/nf_conntrack_proto_tcp.c | 51 ++++++++++++++++++++++++------ 2 files changed, 44 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index 4352feed2377..ece22e94dcbf 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -67,6 +67,9 @@ struct ip_ct_tcp u_int32_t last_ack; /* Last sequence number seen in opposite dir */ u_int32_t last_end; /* Last seq + len */ u_int16_t last_win; /* Last window advertisement seen in dir */ + /* For SYN packets while we may be out-of-sync */ + u_int8_t last_wscale; /* Last window scaling factor seen */ + u_int8_t last_flags; /* Last flags set */ }; #endif /* __KERNEL__ */ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 97a82ba75376..9cc6b5cb06af 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -908,23 +908,54 @@ static int tcp_packet(struct nf_conn *ct, /* b) This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is - * not. We kill this session and block the SYN/ACK so - * that the client cannot but retransmit its SYN and - * thus initiate a clean new session. + * not. We get in sync from the previously annotated + * values. */ - spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: killing out of sync session "); - nf_ct_kill(ct); - return NF_DROP; + old_state = TCP_CONNTRACK_SYN_SENT; + new_state = TCP_CONNTRACK_SYN_RECV; + ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = + ct->proto.tcp.last_end; + ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = + ct->proto.tcp.last_end; + ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = + ct->proto.tcp.last_win == 0 ? + 1 : ct->proto.tcp.last_win; + ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = + ct->proto.tcp.last_wscale; + ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = + ct->proto.tcp.last_flags; + memset(&ct->proto.tcp.seen[dir], 0, + sizeof(struct ip_ct_tcp_state)); + break; } ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; ct->proto.tcp.last_seq = ntohl(th->seq); ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); - + ct->proto.tcp.last_win = ntohs(th->window); + + /* a) This is a SYN in ORIGINAL. The client and the server + * may be in sync but we are not. In that case, we annotate + * the TCP options and let the packet go through. If it is a + * valid SYN packet, the server will reply with a SYN/ACK, and + * then we'll get in sync. Otherwise, the server ignores it. */ + if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { + struct ip_ct_tcp_state seen = {}; + + ct->proto.tcp.last_flags = + ct->proto.tcp.last_wscale = 0; + tcp_options(skb, dataoff, th, &seen); + if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { + ct->proto.tcp.last_flags |= + IP_CT_TCP_FLAG_WINDOW_SCALE; + ct->proto.tcp.last_wscale = seen.td_scale; + } + if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { + ct->proto.tcp.last_flags |= + IP_CT_TCP_FLAG_SACK_PERM; + } + } spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, -- cgit v1.3-8-gc7d7 From 4ed7c92d68a5387ba5f7030dc76eab03558e27f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 11:37:29 +0100 Subject: perf_events: Undo some recursion damage Make perf_swevent_get_recursion_context return a context number and disable preemption. This could be used to remove the IRQ disable from the trace bit and index the per-cpu buffer with. Signed-off-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: <20091123103819.993226816@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 8 ++--- include/trace/ftrace.h | 17 ++++++----- kernel/perf_event.c | 71 +++++++++++++++++++------------------------ kernel/trace/trace_kprobe.c | 14 +++++---- kernel/trace/trace_syscalls.c | 14 +++++---- 5 files changed, 61 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 74e98b1d3391..43adbd7f0010 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,8 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); -extern int perf_swevent_get_recursion_context(int **recursion); -extern void perf_swevent_put_recursion_context(int *recursion); +extern int perf_swevent_get_recursion_context(void); +extern void perf_swevent_put_recursion_context(int rctx); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -904,8 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } -static int perf_swevent_get_recursion_context(int **recursion) { return -1; } -static void perf_swevent_put_recursion_context(int *recursion) { } +static inline int perf_swevent_get_recursion_context(void) { return -1; } +static inline void perf_swevent_put_recursion_context(int rctx) { } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c222ef5238bf..c3417c13e3ed 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -724,8 +724,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - extern int perf_swevent_get_recursion_context(int **recursion); \ - extern void perf_swevent_put_recursion_context(int *recursion); \ + extern int perf_swevent_get_recursion_context(void); \ + extern void perf_swevent_put_recursion_context(int rctx); \ struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ @@ -736,8 +736,8 @@ static void ftrace_profile_##call(proto) \ int __data_size; \ char *trace_buf; \ char *raw_data; \ - int *recursion; \ int __cpu; \ + int rctx; \ int pc; \ \ pc = preempt_count(); \ @@ -753,8 +753,9 @@ static void ftrace_profile_##call(proto) \ \ local_irq_save(irq_flags); \ \ - if (perf_swevent_get_recursion_context(&recursion)) \ - goto end_recursion; \ + rctx = perf_swevent_get_recursion_context(); \ + if (rctx < 0) \ + goto end_recursion; \ \ __cpu = smp_processor_id(); \ \ @@ -781,9 +782,9 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ -end: \ - perf_swevent_put_recursion_context(recursion); \ -end_recursion: \ +end: \ + perf_swevent_put_recursion_context(rctx); \ +end_recursion: \ local_irq_restore(irq_flags); \ \ } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 50f11b5f8c3d..0b0d5f72fe7d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3869,45 +3869,50 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, } } -/* - * Must be called with preemption disabled - */ -int perf_swevent_get_recursion_context(int **recursion) +int perf_swevent_get_recursion_context(void) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + int rctx; if (in_nmi()) - *recursion = &cpuctx->recursion[3]; + rctx = 3; else if (in_irq()) - *recursion = &cpuctx->recursion[2]; + rctx = 2; else if (in_softirq()) - *recursion = &cpuctx->recursion[1]; + rctx = 1; else - *recursion = &cpuctx->recursion[0]; + rctx = 0; - if (**recursion) + if (cpuctx->recursion[rctx]) { + put_cpu_var(perf_cpu_context); return -1; + } - (**recursion)++; + cpuctx->recursion[rctx]++; + barrier(); - return 0; + return rctx; } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); -void perf_swevent_put_recursion_context(int *recursion) +void perf_swevent_put_recursion_context(int rctx) { - (*recursion)--; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + barrier(); + cpuctx->recursion[rctx]++; + put_cpu_var(perf_cpu_context); } EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); -static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) { + struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + cpuctx = &__get_cpu_var(perf_cpu_context); rcu_read_lock(); perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, nr, nmi, data, regs); @@ -3921,34 +3926,22 @@ static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, rcu_read_unlock(); } -static void do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - int *recursion; - - preempt_disable(); - - if (perf_swevent_get_recursion_context(&recursion)) - goto out; - - __do_perf_sw_event(type, event_id, nr, nmi, data, regs); - - perf_swevent_put_recursion_context(recursion); -out: - preempt_enable(); -} - void __perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { struct perf_sample_data data; + int rctx; + + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) + return; data.addr = addr; data.raw = NULL; do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); + + perf_swevent_put_recursion_context(rctx); } static void perf_swevent_read(struct perf_event *event) @@ -4172,7 +4165,7 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, regs = task_pt_regs(current); /* Trace events already protected against recursion */ - __do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, + do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data, regs); } EXPORT_SYMBOL_GPL(perf_tp_event); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 22e6f68b05b3..79ce6a2bd74f 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1213,7 +1213,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, unsigned long irq_flags; char *trace_buf; char *raw_data; - int *recursion; + int rctx; pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); @@ -1229,7 +1229,8 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, */ local_irq_save(irq_flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; __cpu = smp_processor_id(); @@ -1258,7 +1259,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, perf_tp_event(call->id, entry->ip, 1, entry, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(irq_flags); @@ -1276,8 +1277,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, int size, __size, i, pc, __cpu; unsigned long irq_flags; char *trace_buf; - int *recursion; char *raw_data; + int rctx; pc = preempt_count(); __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); @@ -1293,7 +1294,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, */ local_irq_save(irq_flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; __cpu = smp_processor_id(); @@ -1323,7 +1325,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, perf_tp_event(call->id, entry->ret_ip, 1, entry, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(irq_flags); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 41b6dd963daa..9189cbe86079 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -481,8 +481,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) unsigned long flags; char *trace_buf; char *raw_data; - int *recursion; int syscall_nr; + int rctx; int size; int cpu; @@ -506,7 +506,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; cpu = smp_processor_id(); @@ -530,7 +531,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) perf_tp_event(sys_data->enter_id, 0, 1, rec, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(flags); } @@ -582,7 +583,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) int syscall_nr; char *trace_buf; char *raw_data; - int *recursion; + int rctx; int size; int cpu; @@ -609,7 +610,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; cpu = smp_processor_id(); @@ -634,7 +636,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(flags); } -- cgit v1.3-8-gc7d7 From e6db4876575f3fdd5b1df2cbff826df95ab9af6a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 23 Nov 2009 15:42:32 +0100 Subject: hw-breakpoints: Include only linux/perf_event.h from kernel part of bp headers As userspace only needs the breakpoints enum types from the breakpoints headers. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad LKML-Reference: <1258987355-8751-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 4659e0c55ea6..76a48ab9a81e 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -1,8 +1,6 @@ #ifndef _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H -#include - enum { HW_BREAKPOINT_LEN_1 = 1, HW_BREAKPOINT_LEN_2 = 2, @@ -19,6 +17,8 @@ enum { #ifdef __KERNEL__ #ifdef CONFIG_HAVE_HW_BREAKPOINT +#include + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; -- cgit v1.3-8-gc7d7 From 475cba4ec8ee6b427cc3567692e6f48dd483c069 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 23 Nov 2009 15:53:52 -0500 Subject: sctp: implement definition for SACK-IMMEDIATELY extension This patch implement the definition for SACK-IMMEDIATELY extension. Section 3. The I-bit in the DATA Chunk Header The following Figure 1 shows the extended DATA chunk. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Type = 0 | Res |I|U|B|E| Length | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | TSN | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Stream Identifier | Stream Sequence Number | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Payload Protocol Identifier | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ \ \ / User Data / \ \ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Figure 1 The only difference between the DATA chunk in Figure 1 and the DATA chunk defined in [RFC4960] is the addition of the I-bit in the flags field of the chunk header. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- include/linux/sctp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index b464b9d3d242..c20d3ce673c0 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -242,6 +242,7 @@ enum { SCTP_DATA_FIRST_FRAG = 0x02, SCTP_DATA_NOT_FRAG = 0x03, SCTP_DATA_UNORDERED = 0x04, + SCTP_DATA_SACK_IMM = 0x08, }; enum { SCTP_DATA_FRAG_MASK = 0x03, }; -- cgit v1.3-8-gc7d7 From b93d6471748de2ce02cc24774b774deb306a57a8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 23 Nov 2009 15:53:56 -0500 Subject: sctp: implement the sender side for SACK-IMMEDIATELY extension This patch implement the sender side for SACK-IMMEDIATELY extension. Section 4.1. Sender Side Considerations Whenever the sender of a DATA chunk can benefit from the corresponding SACK chunk being sent back without delay, the sender MAY set the I-bit in the DATA chunk header. Reasons for setting the I-bit include o The sender is in the SHUTDOWN-PENDING state. o The application requests to set the I-bit of the last DATA chunk of a user message when providing the user message to the SCTP implementation. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- include/net/sctp/user.h | 1 + net/sctp/chunk.c | 15 ++++++++++++++- net/sctp/outqueue.c | 7 +++++++ 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index be2334aaf52e..50b2431405f2 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -206,6 +206,7 @@ enum sctp_sinfo_flags { SCTP_UNORDERED = 1, /* Send/receive message unordered. */ SCTP_ADDR_OVER = 2, /* Override the primary destination. */ SCTP_ABORT=4, /* Send an ABORT message to the peer. */ + SCTP_SACK_IMMEDIATELY = 8, /* SACK should be sent without delay */ SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */ }; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index acf7c4d128f7..8e4320040f05 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -263,9 +263,18 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (0 == i) frag |= SCTP_DATA_FIRST_FRAG; - if ((i == (whole - 1)) && !over) + if ((i == (whole - 1)) && !over) { frag |= SCTP_DATA_LAST_FRAG; + /* The application requests to set the I-bit of the + * last DATA chunk of a user message when providing + * the user message to the SCTP implementation. + */ + if ((sinfo->sinfo_flags & SCTP_EOF) || + (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY)) + frag |= SCTP_DATA_SACK_IMM; + } + chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 0); if (!chunk) @@ -297,6 +306,10 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, else frag = SCTP_DATA_LAST_FRAG; + if ((sinfo->sinfo_flags & SCTP_EOF) || + (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY)) + frag |= SCTP_DATA_SACK_IMM; + chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag, 0); if (!chunk) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c9f20e28521b..5732661c87d3 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1011,6 +1011,13 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) break; case SCTP_XMIT_OK: + /* The sender is in the SHUTDOWN-PENDING state, + * The sender MAY set the I-bit in the DATA + * chunk header. + */ + if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) + chunk->chunk_hdr->flags |= SCTP_DATA_SACK_IMM; + break; default: -- cgit v1.3-8-gc7d7 From 6383cfb3ed3c5c0bea06da0099c219ef4237ecf5 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:56 -0500 Subject: sctp: Fix malformed "Invalid Stream Identifier" error The "Invalid Stream Identifier" error has a 16 bit reserved field at the end, thus making the parameter length be 8 bytes. We've never supplied that reserved field making wireshark tag the packet as malformed. Reported-by: Chris Dischino Signed-off-by: Vlad Yasevich --- include/net/sctp/sm.h | 3 ++- net/sctp/sm_make_chunk.c | 13 +++++++++---- net/sctp/sm_statefuns.c | 13 ++++++++----- 3 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index c1dd89365833..851c813adb3a 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -243,7 +243,8 @@ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *, const struct sctp_chunk *chunk, __be16 cause_code, const void *payload, - size_t paylen); + size_t paylen, + size_t reserve_tail); struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *, union sctp_addr *, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 9d881a61ac02..9e732916b671 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -987,7 +987,10 @@ static void *sctp_addto_param(struct sctp_chunk *chunk, int len, target = skb_put(chunk->skb, len); - memcpy(target, data, len); + if (data) + memcpy(target, data, len); + else + memset(target, 0, len); /* Adjust the chunk length field. */ chunk->chunk_hdr->length = htons(chunklen + len); @@ -1129,16 +1132,18 @@ nodata: struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, const struct sctp_chunk *chunk, __be16 cause_code, const void *payload, - size_t paylen) + size_t paylen, size_t reserve_tail) { struct sctp_chunk *retval; - retval = sctp_make_op_error_space(asoc, chunk, paylen); + retval = sctp_make_op_error_space(asoc, chunk, paylen + reserve_tail); if (!retval) goto nodata; - sctp_init_cause(retval, cause_code, paylen); + sctp_init_cause(retval, cause_code, paylen + reserve_tail); sctp_addto_chunk(retval, paylen, payload); + if (reserve_tail) + sctp_addto_param(retval, reserve_tail, NULL); nodata: return retval; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8ee24c9dc7e9..16a603527df2 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1720,7 +1720,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_COOKIE_IN_SHUTDOWN, - NULL, 0); + NULL, 0, 0); if (err) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err)); @@ -3977,7 +3977,7 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, err_chunk = sctp_make_op_error(asoc, chunk, SCTP_ERROR_UNSUP_HMAC, &auth_hdr->hmac_id, - sizeof(__u16)); + sizeof(__u16), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -4069,7 +4069,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length))); + WORD_ROUND(ntohs(hdr->length)), + 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -4088,7 +4089,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length))); + WORD_ROUND(ntohs(hdr->length)), + 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -6052,7 +6054,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM, &data_hdr->stream, - sizeof(data_hdr->stream)); + sizeof(data_hdr->stream), + sizeof(u16)); if (err) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err)); -- cgit v1.3-8-gc7d7 From 90f2f5318b3a5b0898fef0fec9b91376c7de7a2c Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:57 -0500 Subject: sctp: Update SWS avaoidance receiver side algorithm We currently send window update SACKs every time we free up 1 PMTU worth of data. That a lot more SACKs then necessary. Instead, we'll now send back the actuall window every time we send a sack, and do window-update SACKs when a fraction of the receive buffer has been opened. The fraction is controlled with a sysctl. Signed-off-by: Vlad Yasevich --- include/net/sctp/constants.h | 4 ++++ include/net/sctp/structs.h | 6 ++++++ net/sctp/associola.c | 5 +++-- net/sctp/protocol.c | 3 +++ net/sctp/sm_sideeffect.c | 3 +-- net/sctp/sysctl.c | 13 +++++++++++++ 6 files changed, 30 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 58f714a3b670..63908840eef0 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -308,6 +308,10 @@ enum { SCTP_MAX_GABS = 16 }; #define SCTP_DEFAULT_MINWINDOW 1500 /* default minimum rwnd size */ #define SCTP_DEFAULT_MAXWINDOW 65535 /* default rwnd size */ +#define SCTP_DEFAULT_RWND_SHIFT 4 /* by default, update on 1/16 of + * rcvbuf, which is 1/8 of initial + * window + */ #define SCTP_DEFAULT_MAXSEGMENT 1500 /* MTU size, this is the limit * to which we will raise the P-MTU. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index cd2e18778f81..90876b657775 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -231,6 +231,11 @@ extern struct sctp_globals { /* Flag to indicate whether computing and verifying checksum * is disabled. */ int checksum_disable; + + /* Threshold for rwnd update SACKS. Receive buffer shifted this many + * bits is an indicator of when to send and window update SACK. + */ + int rwnd_update_shift; } sctp_globals; #define sctp_rto_initial (sctp_globals.rto_initial) @@ -267,6 +272,7 @@ extern struct sctp_globals { #define sctp_prsctp_enable (sctp_globals.prsctp_enable) #define sctp_auth_enable (sctp_globals.auth_enable) #define sctp_checksum_disable (sctp_globals.checksum_disable) +#define sctp_rwnd_upd_shift (sctp_globals.rwnd_update_shift) /* SCTP Socket type: UDP or TCP style. */ typedef enum { diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8e755ebff3b8..37e982510bea 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1383,8 +1383,9 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_RECEIVED: case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && - ((asoc->rwnd - asoc->a_rwnd) >= - min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu))) + ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, + (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift), + asoc->pathmtu))) return 1; break; default: diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 08ef203d36ac..a3c8988758b1 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1258,6 +1258,9 @@ SCTP_STATIC __init int sctp_init(void) /* Set SCOPE policy to enabled */ sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE; + /* Set the default rwnd update threshold */ + sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; + sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index eda4fe783be5..8ae67098e094 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -217,8 +217,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } else { - if (asoc->a_rwnd > asoc->rwnd) - asoc->a_rwnd = asoc->rwnd; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (!sack) goto nomem; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index ab7151da120f..ae03ded2bf1a 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -52,6 +52,7 @@ static int int_max = INT_MAX; static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ +static int rwnd_scale_max = 16; extern int sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; @@ -284,6 +285,18 @@ static ctl_table sctp_table[] = { .extra1 = &zero, .extra2 = &addr_scope_max, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rwnd_update_shift", + .data = &sctp_rwnd_upd_shift, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &rwnd_scale_max, + }, + { .ctl_name = 0 } }; -- cgit v1.3-8-gc7d7 From a242b41dedfe0fd51ab1c906daa703c09b196744 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 23 Nov 2009 15:53:58 -0500 Subject: sctp: remove deprecated SCTP_GET_*_OLD stuffs SCTP_GET_*_OLD stuffs are schedlued to be removed. Cc: Vlad Yasevich Signed-off-by: WANG Cong Signed-off-by: Vlad Yasevich --- Documentation/feature-removal-schedule.txt | 12 -- include/net/sctp/user.h | 8 - net/sctp/socket.c | 325 ----------------------------- 3 files changed, 345 deletions(-) (limited to 'include') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index bc693fffabe0..72ae06f73acf 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -302,18 +302,6 @@ Who: ocfs2-devel@oss.oracle.com --------------------------- -What: SCTP_GET_PEER_ADDRS_NUM_OLD, SCTP_GET_PEER_ADDRS_OLD, - SCTP_GET_LOCAL_ADDRS_NUM_OLD, SCTP_GET_LOCAL_ADDRS_OLD -When: June 2009 -Why: A newer version of the options have been introduced in 2005 that - removes the limitions of the old API. The sctp library has been - converted to use these new options at the same time. Any user - space app that directly uses the old options should convert to using - the new options. -Who: Vlad Yasevich - ---------------------------- - What: Ability for non root users to shm_get hugetlb pages based on mlock resource limits When: 2.6.31 diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index 50b2431405f2..fceab4d2413e 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -131,14 +131,6 @@ enum sctp_optname { #define SCTP_SOCKOPT_BINDX_REM SCTP_SOCKOPT_BINDX_REM SCTP_SOCKOPT_PEELOFF, /* peel off association. */ #define SCTP_SOCKOPT_PEELOFF SCTP_SOCKOPT_PEELOFF - SCTP_GET_PEER_ADDRS_NUM_OLD, /* Get number of peer addresss. */ -#define SCTP_GET_PEER_ADDRS_NUM_OLD SCTP_GET_PEER_ADDRS_NUM_OLD - SCTP_GET_PEER_ADDRS_OLD, /* Get all peer addresss. */ -#define SCTP_GET_PEER_ADDRS_OLD SCTP_GET_PEER_ADDRS_OLD - SCTP_GET_LOCAL_ADDRS_NUM_OLD, /* Get number of local addresss. */ -#define SCTP_GET_LOCAL_ADDRS_NUM_OLD SCTP_GET_LOCAL_ADDRS_NUM_OLD - SCTP_GET_LOCAL_ADDRS_OLD, /* Get all local addresss. */ -#define SCTP_GET_LOCAL_ADDRS_OLD SCTP_GET_LOCAL_ADDRS_OLD SCTP_SOCKOPT_CONNECTX_OLD, /* CONNECTX old requests. */ #define SCTP_SOCKOPT_CONNECTX_OLD SCTP_SOCKOPT_CONNECTX_OLD SCTP_GET_PEER_ADDRS, /* Get all peer addresss. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a4577a75c6c0..d2681a6bc6fa 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4348,90 +4348,6 @@ static int sctp_getsockopt_initmsg(struct sock *sk, int len, char __user *optval return 0; } -static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - sctp_assoc_t id; - struct sctp_association *asoc; - struct list_head *pos; - int cnt = 0; - - if (len < sizeof(sctp_assoc_t)) - return -EINVAL; - - if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) - return -EFAULT; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_NUM_OLD " - "socket option deprecated\n"); - /* For UDP-style sockets, id specifies the association to query. */ - asoc = sctp_id2assoc(sk, id); - if (!asoc) - return -EINVAL; - - list_for_each(pos, &asoc->peer.transport_addr_list) { - cnt ++; - } - - return cnt; -} - -/* - * Old API for getting list of peer addresses. Does not work for 32-bit - * programs running on a 64-bit kernel - */ -static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - struct sctp_association *asoc; - int cnt = 0; - struct sctp_getaddrs_old getaddrs; - struct sctp_transport *from; - void __user *to; - union sctp_addr temp; - struct sctp_sock *sp = sctp_sk(sk); - int addrlen; - - if (len < sizeof(struct sctp_getaddrs_old)) - return -EINVAL; - - len = sizeof(struct sctp_getaddrs_old); - - if (copy_from_user(&getaddrs, optval, len)) - return -EFAULT; - - if (getaddrs.addr_num <= 0) return -EINVAL; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_OLD " - "socket option deprecated\n"); - - /* For UDP-style sockets, id specifies the association to query. */ - asoc = sctp_id2assoc(sk, getaddrs.assoc_id); - if (!asoc) - return -EINVAL; - - to = (void __user *)getaddrs.addrs; - list_for_each_entry(from, &asoc->peer.transport_addr_list, - transports) { - memcpy(&temp, &from->ipaddr, sizeof(temp)); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; - if (copy_to_user(to, &temp, addrlen)) - return -EFAULT; - to += addrlen ; - cnt ++; - if (cnt >= getaddrs.addr_num) break; - } - getaddrs.addr_num = cnt; - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, &getaddrs, len)) - return -EFAULT; - - return 0; -} static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -4484,125 +4400,6 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, return 0; } -static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - sctp_assoc_t id; - struct sctp_bind_addr *bp; - struct sctp_association *asoc; - struct sctp_sockaddr_entry *addr; - int cnt = 0; - - if (len < sizeof(sctp_assoc_t)) - return -EINVAL; - - if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) - return -EFAULT; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_NUM_OLD " - "socket option deprecated\n"); - - /* - * For UDP-style sockets, id specifies the association to query. - * If the id field is set to the value '0' then the locally bound - * addresses are returned without regard to any particular - * association. - */ - if (0 == id) { - bp = &sctp_sk(sk)->ep->base.bind_addr; - } else { - asoc = sctp_id2assoc(sk, id); - if (!asoc) - return -EINVAL; - bp = &asoc->base.bind_addr; - } - - /* If the endpoint is bound to 0.0.0.0 or ::0, count the valid - * addresses from the global local address list. - */ - if (sctp_list_single_entry(&bp->address_list)) { - addr = list_entry(bp->address_list.next, - struct sctp_sockaddr_entry, list); - if (sctp_is_any(sk, &addr->a)) { - rcu_read_lock(); - list_for_each_entry_rcu(addr, - &sctp_local_addr_list, list) { - if (!addr->valid) - continue; - - if ((PF_INET == sk->sk_family) && - (AF_INET6 == addr->a.sa.sa_family)) - continue; - - if ((PF_INET6 == sk->sk_family) && - inet_v6_ipv6only(sk) && - (AF_INET == addr->a.sa.sa_family)) - continue; - - cnt++; - } - rcu_read_unlock(); - } else { - cnt = 1; - } - goto done; - } - - /* Protection on the bound address list is not needed, - * since in the socket option context we hold the socket lock, - * so there is no way that the bound address list can change. - */ - list_for_each_entry(addr, &bp->address_list, list) { - cnt ++; - } -done: - return cnt; -} - -/* Helper function that copies local addresses to user and returns the number - * of addresses copied. - */ -static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, - int max_addrs, void *to, - int *bytes_copied) -{ - struct sctp_sockaddr_entry *addr; - union sctp_addr temp; - int cnt = 0; - int addrlen; - - rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { - if (!addr->valid) - continue; - - if ((PF_INET == sk->sk_family) && - (AF_INET6 == addr->a.sa.sa_family)) - continue; - if ((PF_INET6 == sk->sk_family) && - inet_v6_ipv6only(sk) && - (AF_INET == addr->a.sa.sa_family)) - continue; - memcpy(&temp, &addr->a, sizeof(temp)); - if (!temp.v4.sin_port) - temp.v4.sin_port = htons(port); - - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), - &temp); - addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - memcpy(to, &temp, addrlen); - - to += addrlen; - *bytes_copied += addrlen; - cnt ++; - if (cnt >= max_addrs) break; - } - rcu_read_unlock(); - - return cnt; -} - static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, size_t space_left, int *bytes_copied) { @@ -4646,112 +4443,6 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, return cnt; } -/* Old API for getting list of local addresses. Does not work for 32-bit - * programs running on a 64-bit kernel - */ -static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, - char __user *optval, int __user *optlen) -{ - struct sctp_bind_addr *bp; - struct sctp_association *asoc; - int cnt = 0; - struct sctp_getaddrs_old getaddrs; - struct sctp_sockaddr_entry *addr; - void __user *to; - union sctp_addr temp; - struct sctp_sock *sp = sctp_sk(sk); - int addrlen; - int err = 0; - void *addrs; - void *buf; - int bytes_copied = 0; - - if (len < sizeof(struct sctp_getaddrs_old)) - return -EINVAL; - - len = sizeof(struct sctp_getaddrs_old); - if (copy_from_user(&getaddrs, optval, len)) - return -EFAULT; - - if (getaddrs.addr_num <= 0 || - getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr))) - return -EINVAL; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_OLD " - "socket option deprecated\n"); - - /* - * For UDP-style sockets, id specifies the association to query. - * If the id field is set to the value '0' then the locally bound - * addresses are returned without regard to any particular - * association. - */ - if (0 == getaddrs.assoc_id) { - bp = &sctp_sk(sk)->ep->base.bind_addr; - } else { - asoc = sctp_id2assoc(sk, getaddrs.assoc_id); - if (!asoc) - return -EINVAL; - bp = &asoc->base.bind_addr; - } - - to = getaddrs.addrs; - - /* Allocate space for a local instance of packed array to hold all - * the data. We store addresses here first and then put write them - * to the user in one shot. - */ - addrs = kmalloc(sizeof(union sctp_addr) * getaddrs.addr_num, - GFP_KERNEL); - if (!addrs) - return -ENOMEM; - - /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid - * addresses from the global local address list. - */ - if (sctp_list_single_entry(&bp->address_list)) { - addr = list_entry(bp->address_list.next, - struct sctp_sockaddr_entry, list); - if (sctp_is_any(sk, &addr->a)) { - cnt = sctp_copy_laddrs_old(sk, bp->port, - getaddrs.addr_num, - addrs, &bytes_copied); - goto copy_getaddrs; - } - } - - buf = addrs; - /* Protection on the bound address list is not needed since - * in the socket option context we hold a socket lock and - * thus the bound address list can't change. - */ - list_for_each_entry(addr, &bp->address_list, list) { - memcpy(&temp, &addr->a, sizeof(temp)); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - memcpy(buf, &temp, addrlen); - buf += addrlen; - bytes_copied += addrlen; - cnt ++; - if (cnt >= getaddrs.addr_num) break; - } - -copy_getaddrs: - /* copy the entire address list into the user provided space */ - if (copy_to_user(to, addrs, bytes_copied)) { - err = -EFAULT; - goto error; - } - - /* copy the leading structure back to user */ - getaddrs.addr_num = cnt; - if (copy_to_user(optval, &getaddrs, len)) - err = -EFAULT; - -error: - kfree(addrs); - return err; -} static int sctp_getsockopt_local_addrs(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -5602,22 +5293,6 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_INITMSG: retval = sctp_getsockopt_initmsg(sk, len, optval, optlen); break; - case SCTP_GET_PEER_ADDRS_NUM_OLD: - retval = sctp_getsockopt_peer_addrs_num_old(sk, len, optval, - optlen); - break; - case SCTP_GET_LOCAL_ADDRS_NUM_OLD: - retval = sctp_getsockopt_local_addrs_num_old(sk, len, optval, - optlen); - break; - case SCTP_GET_PEER_ADDRS_OLD: - retval = sctp_getsockopt_peer_addrs_old(sk, len, optval, - optlen); - break; - case SCTP_GET_LOCAL_ADDRS_OLD: - retval = sctp_getsockopt_local_addrs_old(sk, len, optval, - optlen); - break; case SCTP_GET_PEER_ADDRS: retval = sctp_getsockopt_peer_addrs(sk, len, optval, optlen); -- cgit v1.3-8-gc7d7 From 245cba7e55929dc2b10b7d915bfba0168eeeed17 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:58 -0500 Subject: sctp: Remove useless last_time_used variable The transport last_time_used variable is rather useless. It was only used when determining if CWND needs to be updated due to idle transport. However, idle transport detection was based on a Heartbeat timer and last_time_used was not incremented when sending Heartbeats. As a result the check for cwnd reduction was always true. We can get rid of the variable and just base our cwnd manipulation on the HB timer (like the code comment sais). We also have to call into the cwnd manipulation function regardless of whether HBs are enabled or not. That way we will detect idle transports if the user has disabled Heartbeats. Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 6 ------ net/sctp/output.c | 2 -- net/sctp/sm_statefuns.c | 5 +++-- net/sctp/transport.c | 7 ++----- 4 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 90876b657775..ac794a6823eb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -950,12 +950,6 @@ struct sctp_transport { /* Source address. */ union sctp_addr saddr; - /* When was the last time(in jiffies) that a data packet was sent on - * this transport? This is used to adjust the cwnd when the transport - * becomes inactive. - */ - unsigned long last_time_used; - /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to * the destination address every heartbeat interval. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 5cbda8f1ddfd..9e8e0ea844be 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -557,8 +557,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) struct timer_list *timer; unsigned long timeout; - tp->last_time_used = jiffies; - /* Restart the AUTOCLOSE timer when sending data. */ if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) { timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 16a603527df2..1ef9de9bbae9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -996,14 +996,15 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, sctp_sf_heartbeat(ep, asoc, type, arg, commands)) return SCTP_DISPOSITION_NOMEM; + /* Set transport error counter and association error counter * when sending heartbeat. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, - SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT, SCTP_TRANSPORT(transport)); } + sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, + SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE, SCTP_TRANSPORT(transport)); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3b141bb32faf..2df29cbdaf5a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -83,7 +83,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->fast_recovery = 0; peer->last_time_heard = jiffies; - peer->last_time_used = jiffies; peer->last_time_ecne_reduced = jiffies; peer->init_sent_count = 0; @@ -565,10 +564,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * to be done every RTO interval, we do it every hearbeat * interval. */ - if (time_after(jiffies, transport->last_time_used + - transport->rto)) - transport->cwnd = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + transport->cwnd = max(transport->cwnd/2, + 4*transport->asoc->pathmtu); break; } -- cgit v1.3-8-gc7d7 From a5b03ad2143c5bc9ae76533e8321fe66258b4f35 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:59 -0500 Subject: sctp: Turn the enum socket options into defines Recent attempt to remove deprecated socket options demonstrated that removing options from the enum space will have severe binary compatibility issues. The reason is that it changes the subsequent enum space and causes option values to be redefined. To solve this, and to get rid of the ugly double statements for every option, we simply convert to the #define scheme. Signed-off-by: Vlad Yasevich --- include/net/sctp/user.h | 125 +++++++++++++++++------------------------------- 1 file changed, 43 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index fceab4d2413e..2b2769c5ca9f 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -60,88 +60,49 @@ typedef __s32 sctp_assoc_t; /* The following symbols come from the Sockets API Extensions for * SCTP . */ -enum sctp_optname { - SCTP_RTOINFO, -#define SCTP_RTOINFO SCTP_RTOINFO - SCTP_ASSOCINFO, -#define SCTP_ASSOCINFO SCTP_ASSOCINFO - SCTP_INITMSG, -#define SCTP_INITMSG SCTP_INITMSG - SCTP_NODELAY, /* Get/set nodelay option. */ -#define SCTP_NODELAY SCTP_NODELAY - SCTP_AUTOCLOSE, -#define SCTP_AUTOCLOSE SCTP_AUTOCLOSE - SCTP_SET_PEER_PRIMARY_ADDR, -#define SCTP_SET_PEER_PRIMARY_ADDR SCTP_SET_PEER_PRIMARY_ADDR - SCTP_PRIMARY_ADDR, -#define SCTP_PRIMARY_ADDR SCTP_PRIMARY_ADDR - SCTP_ADAPTATION_LAYER, -#define SCTP_ADAPTATION_LAYER SCTP_ADAPTATION_LAYER - SCTP_DISABLE_FRAGMENTS, -#define SCTP_DISABLE_FRAGMENTS SCTP_DISABLE_FRAGMENTS - SCTP_PEER_ADDR_PARAMS, -#define SCTP_PEER_ADDR_PARAMS SCTP_PEER_ADDR_PARAMS - SCTP_DEFAULT_SEND_PARAM, -#define SCTP_DEFAULT_SEND_PARAM SCTP_DEFAULT_SEND_PARAM - SCTP_EVENTS, -#define SCTP_EVENTS SCTP_EVENTS - SCTP_I_WANT_MAPPED_V4_ADDR, /* Turn on/off mapped v4 addresses */ -#define SCTP_I_WANT_MAPPED_V4_ADDR SCTP_I_WANT_MAPPED_V4_ADDR - SCTP_MAXSEG, /* Get/set maximum fragment. */ -#define SCTP_MAXSEG SCTP_MAXSEG - SCTP_STATUS, -#define SCTP_STATUS SCTP_STATUS - SCTP_GET_PEER_ADDR_INFO, -#define SCTP_GET_PEER_ADDR_INFO SCTP_GET_PEER_ADDR_INFO - SCTP_DELAYED_ACK, -#define SCTP_DELAYED_ACK_TIME SCTP_DELAYED_ACK -#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK - SCTP_CONTEXT, /* Receive Context */ -#define SCTP_CONTEXT SCTP_CONTEXT - SCTP_FRAGMENT_INTERLEAVE, -#define SCTP_FRAGMENT_INTERLEAVE SCTP_FRAGMENT_INTERLEAVE - SCTP_PARTIAL_DELIVERY_POINT, /* Set/Get partial delivery point */ -#define SCTP_PARTIAL_DELIVERY_POINT SCTP_PARTIAL_DELIVERY_POINT - SCTP_MAX_BURST, /* Set/Get max burst */ -#define SCTP_MAX_BURST SCTP_MAX_BURST - SCTP_AUTH_CHUNK, /* Set only: add a chunk type to authenticat */ -#define SCTP_AUTH_CHUNK SCTP_AUTH_CHUNK - SCTP_HMAC_IDENT, -#define SCTP_HMAC_IDENT SCTP_HMAC_IDENT - SCTP_AUTH_KEY, -#define SCTP_AUTH_KEY SCTP_AUTH_KEY - SCTP_AUTH_ACTIVE_KEY, -#define SCTP_AUTH_ACTIVE_KEY SCTP_AUTH_ACTIVE_KEY - SCTP_AUTH_DELETE_KEY, -#define SCTP_AUTH_DELETE_KEY SCTP_AUTH_DELETE_KEY - SCTP_PEER_AUTH_CHUNKS, /* Read only */ -#define SCTP_PEER_AUTH_CHUNKS SCTP_PEER_AUTH_CHUNKS - SCTP_LOCAL_AUTH_CHUNKS, /* Read only */ -#define SCTP_LOCAL_AUTH_CHUNKS SCTP_LOCAL_AUTH_CHUNKS - SCTP_GET_ASSOC_NUMBER, /* Read only */ -#define SCTP_GET_ASSOC_NUMBER SCTP_GET_ASSOC_NUMBER - - - /* Internal Socket Options. Some of the sctp library functions are - * implemented using these socket options. - */ - SCTP_SOCKOPT_BINDX_ADD = 100,/* BINDX requests for adding addresses. */ -#define SCTP_SOCKOPT_BINDX_ADD SCTP_SOCKOPT_BINDX_ADD - SCTP_SOCKOPT_BINDX_REM, /* BINDX requests for removing addresses. */ -#define SCTP_SOCKOPT_BINDX_REM SCTP_SOCKOPT_BINDX_REM - SCTP_SOCKOPT_PEELOFF, /* peel off association. */ -#define SCTP_SOCKOPT_PEELOFF SCTP_SOCKOPT_PEELOFF - SCTP_SOCKOPT_CONNECTX_OLD, /* CONNECTX old requests. */ -#define SCTP_SOCKOPT_CONNECTX_OLD SCTP_SOCKOPT_CONNECTX_OLD - SCTP_GET_PEER_ADDRS, /* Get all peer addresss. */ -#define SCTP_GET_PEER_ADDRS SCTP_GET_PEER_ADDRS - SCTP_GET_LOCAL_ADDRS, /* Get all local addresss. */ -#define SCTP_GET_LOCAL_ADDRS SCTP_GET_LOCAL_ADDRS - SCTP_SOCKOPT_CONNECTX, /* CONNECTX requests. */ -#define SCTP_SOCKOPT_CONNECTX SCTP_SOCKOPT_CONNECTX - SCTP_SOCKOPT_CONNECTX3, /* CONNECTX requests. (new implementation) */ -#define SCTP_SOCKOPT_CONNECTX3 SCTP_SOCKOPT_CONNECTX3 -}; +#define SCTP_RTOINFO 0 +#define SCTP_ASSOCINFO 1 +#define SCTP_INITMSG 2 +#define SCTP_NODELAY 3 /* Get/set nodelay option. */ +#define SCTP_AUTOCLOSE 4 +#define SCTP_SET_PEER_PRIMARY_ADDR 5 +#define SCTP_PRIMARY_ADDR 6 +#define SCTP_ADAPTATION_LAYER 7 +#define SCTP_DISABLE_FRAGMENTS 8 +#define SCTP_PEER_ADDR_PARAMS 9 +#define SCTP_DEFAULT_SEND_PARAM 10 +#define SCTP_EVENTS 11 +#define SCTP_I_WANT_MAPPED_V4_ADDR 12 /* Turn on/off mapped v4 addresses */ +#define SCTP_MAXSEG 13 /* Get/set maximum fragment. */ +#define SCTP_STATUS 14 +#define SCTP_GET_PEER_ADDR_INFO 15 +#define SCTP_DELAYED_ACK_TIME 16 +#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK_TIME +#define SCTP_CONTEXT 17 +#define SCTP_FRAGMENT_INTERLEAVE 18 +#define SCTP_PARTIAL_DELIVERY_POINT 19 /* Set/Get partial delivery point */ +#define SCTP_MAX_BURST 20 /* Set/Get max burst */ +#define SCTP_AUTH_CHUNK 21 /* Set only: add a chunk type to authenticate */ +#define SCTP_HMAC_IDENT 22 +#define SCTP_AUTH_KEY 23 +#define SCTP_AUTH_ACTIVE_KEY 24 +#define SCTP_AUTH_DELETE_KEY 25 +#define SCTP_PEER_AUTH_CHUNKS 26 /* Read only */ +#define SCTP_LOCAL_AUTH_CHUNKS 27 /* Read only */ +#define SCTP_GET_ASSOC_NUMBER 28 /* Read only */ + +/* Internal Socket Options. Some of the sctp library functions are + * implemented using these socket options. + */ +#define SCTP_SOCKOPT_BINDX_ADD 100 /* BINDX requests for adding addrs */ +#define SCTP_SOCKOPT_BINDX_REM 101 /* BINDX requests for removing addrs. */ +#define SCTP_SOCKOPT_PEELOFF 102 /* peel off association. */ +/* Options 104-106 are deprecated and removed. Do not use this space */ +#define SCTP_SOCKOPT_CONNECTX_OLD 107 /* CONNECTX old requests. */ +#define SCTP_GET_PEER_ADDRS 108 /* Get all peer addresss. */ +#define SCTP_GET_LOCAL_ADDRS 109 /* Get all local addresss. */ +#define SCTP_SOCKOPT_CONNECTX 110 /* CONNECTX requests. */ +#define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ /* * 5.2.1 SCTP Initiation Structure (SCTP_INIT) -- cgit v1.3-8-gc7d7 From 46d5a808558181e03a4760d2188cc9879445738a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:54:00 -0500 Subject: sctp: Update max.burst implementation Current implementation of max.burst ends up limiting new data during cwnd decay period. The decay is happening becuase the connection is idle and we are allowed to fill the congestion window. The point of max.burst is to limit micro-bursts in response to large acks. This still happens, as max.burst is still applied to each transmit opportunity. It will also apply if a very large send is made (greater then allowed by burst). Tested-by: Florian Niederbacher Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 4 ++++ net/sctp/associola.c | 1 + net/sctp/output.c | 23 ----------------------- net/sctp/outqueue.c | 15 +++++++++++++++ net/sctp/transport.c | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ac794a6823eb..bc3f8d879c5c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -942,6 +942,8 @@ struct sctp_transport { /* Data that has been sent, but not acknowledged. */ __u32 flight_size; + __u32 burst_limited; /* Holds old cwnd when max.burst is applied */ + /* TSN marking the fast recovery exit point */ __u32 fast_recovery_exit; @@ -1070,6 +1072,8 @@ void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32); void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t); +void sctp_transport_burst_limited(struct sctp_transport *); +void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *); void sctp_transport_update_pmtu(struct sctp_transport *, u32); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 37e982510bea..880dae2ca87b 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -738,6 +738,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, peer->partial_bytes_acked = 0; peer->flight_size = 0; + peer->burst_limited = 0; /* Set the transport's RTO.initial value */ peer->rto = asoc->rto_initial; diff --git a/net/sctp/output.c b/net/sctp/output.c index 9e8e0ea844be..b210d2077e28 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -615,7 +615,6 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, sctp_xmit_t retval = SCTP_XMIT_OK; size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; - __u32 max_burst_bytes; struct sctp_association *asoc = transport->asoc; struct sctp_outq *q = &asoc->outqueue; @@ -648,28 +647,6 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, } } - /* sctpimpguide-05 2.14.2 - * D) When the time comes for the sender to - * transmit new DATA chunks, the protocol parameter Max.Burst MUST - * first be applied to limit how many new DATA chunks may be sent. - * The limit is applied by adjusting cwnd as follows: - * if ((flightsize + Max.Burst * MTU) < cwnd) - * cwnd = flightsize + Max.Burst * MTU - */ - max_burst_bytes = asoc->max_burst * asoc->pathmtu; - if ((flight_size + max_burst_bytes) < transport->cwnd) { - transport->cwnd = flight_size + max_burst_bytes; - SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " - "transport: %p, cwnd: %d, " - "ssthresh: %d, flight_size: %d, " - "pba: %d\n", - __func__, transport, - transport->cwnd, - transport->ssthresh, - transport->flight_size, - transport->partial_bytes_acked); - } - /* RFC 2960 6.1 Transmission of DATA Chunks * * B) At any given time, the sender MUST NOT transmit new data diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 5732661c87d3..2f2377369e2b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -931,6 +931,14 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) goto sctp_flush_out; } + /* Apply Max.Burst limitation to the current transport in + * case it will be used for new data. We are going to + * rest it before we return, but we want to apply the limit + * to the currently queued data. + */ + if (transport) + sctp_transport_burst_limited(transport); + /* Finally, transmit new packets. */ while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { /* RFC 2960 6.5 Every DATA chunk MUST carry a valid @@ -976,6 +984,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) packet = &transport->packet; sctp_packet_config(packet, vtag, asoc->peer.ecn_capable); + /* We've switched transports, so apply the + * Burst limit to the new transport. + */ + sctp_transport_burst_limited(transport); } SCTP_DEBUG_PRINTK("sctp_outq_flush(%p, %p[%s]), ", @@ -1070,6 +1082,9 @@ sctp_flush_out: packet = &t->packet; if (!sctp_packet_empty(packet)) error = sctp_packet_transmit(packet); + + /* Clear the burst limited state, if any */ + sctp_transport_burst_reset(t); } return error; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 2df29cbdaf5a..9a6cb22d129f 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -576,6 +576,43 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, transport->cwnd, transport->ssthresh); } +/* Apply Max.Burst limit to the congestion window: + * sctpimpguide-05 2.14.2 + * D) When the time comes for the sender to + * transmit new DATA chunks, the protocol parameter Max.Burst MUST + * first be applied to limit how many new DATA chunks may be sent. + * The limit is applied by adjusting cwnd as follows: + * if ((flightsize+ Max.Burst * MTU) < cwnd) + * cwnd = flightsize + Max.Burst * MTU + */ + +void sctp_transport_burst_limited(struct sctp_transport *t) +{ + struct sctp_association *asoc = t->asoc; + u32 old_cwnd = t->cwnd; + u32 max_burst_bytes; + + if (t->burst_limited) + return; + + max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); + if (max_burst_bytes < old_cwnd) { + t->cwnd = max_burst_bytes; + t->burst_limited = old_cwnd; + } +} + +/* Restore the old cwnd congestion window, after the burst had it's + * desired effect. + */ +void sctp_transport_burst_reset(struct sctp_transport *t) +{ + if (t->burst_limited) { + t->cwnd = t->burst_limited; + t->burst_limited = 0; + } +} + /* What is the next timeout value for this transport? */ unsigned long sctp_transport_timeout(struct sctp_transport *t) { @@ -598,6 +635,7 @@ void sctp_transport_reset(struct sctp_transport *t) * (see Section 6.2.1) */ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); + t->burst_limited = 0; t->ssthresh = asoc->peer.i.a_rwnd; t->last_rto = t->rto = asoc->rto_initial; t->rtt = 0; -- cgit v1.3-8-gc7d7 From fa7c27ee9394fc0d52404b2a89882e95868a60b9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 23 Nov 2009 22:30:12 +0100 Subject: hw-breakpoints: Fix misordered ifdef Fix a misplaced ifdef. We need the perf event headers also in off-case to avoid the following build error: include/linux/hw_breakpoint.h:94: error: expected declaration specifiers or '...' before 'perf_callback_t' include/linux/hw_breakpoint.h:102: error: expected declaration specifiers or '...' before 'perf_callback_t' include/linux/hw_breakpoint.h:109: error: expected declaration specifiers or '...' before 'perf_callback_t' include/linux/hw_breakpoint.h:116: error: expected declaration specifiers or '...' before 'perf_callback_t' Reported-by: Kisskb-bot by Michael Ellerman Signed-off-by: Frederic Weisbecker Cc: Prasad LKML-Reference: <1259011812-8093-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 76a48ab9a81e..c9f7f7c7b0e0 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -15,10 +15,11 @@ enum { }; #ifdef __KERNEL__ -#ifdef CONFIG_HAVE_HW_BREAKPOINT #include +#ifdef CONFIG_HAVE_HW_BREAKPOINT + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; -- cgit v1.3-8-gc7d7 From b3a222e52e4d4be77cc4520a57af1a4a0d8222d1 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 23 Nov 2009 16:21:30 -0600 Subject: remove CONFIG_SECURITY_FILE_CAPABILITIES compile option As far as I know, all distros currently ship kernels with default CONFIG_SECURITY_FILE_CAPABILITIES=y. Since having the option on leaves a 'no_file_caps' option to boot without file capabilities, the main reason to keep the option is that turning it off saves you (on my s390x partition) 5k. In particular, vmlinux sizes came to: without patch fscaps=n: 53598392 without patch fscaps=y: 53603406 with this patch applied: 53603342 with the security-next tree. Against this we must weigh the fact that there is no simple way for userspace to figure out whether file capabilities are supported, while things like per-process securebits, capability bounding sets, and adding bits to pI if CAP_SETPCAP is in pE are not supported with SECURITY_FILE_CAPABILITIES=n, leaving a bit of a problem for applications wanting to know whether they can use them and/or why something failed. It also adds another subtly different set of semantics which we must maintain at the risk of severe security regressions. So this patch removes the SECURITY_FILE_CAPABILITIES compile option. It drops the kernel size by about 50k over the stock SECURITY_FILE_CAPABILITIES=y kernel, by removing the cap_limit_ptraced_target() function. Changelog: Nov 20: remove cap_limit_ptraced_target() as it's logic was ifndef'ed. Signed-off-by: Serge E. Hallyn Acked-by: Andrew G. Morgan" Signed-off-by: James Morris --- include/linux/capability.h | 2 -- include/linux/init_task.h | 4 --- kernel/capability.c | 2 -- security/Kconfig | 9 ------ security/commoncap.c | 72 ++-------------------------------------------- 5 files changed, 2 insertions(+), 87 deletions(-) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index c8f2a5f70ed5..39e5ff512fbe 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -92,9 +92,7 @@ struct vfs_cap_data { #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 #define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES extern int file_caps_enabled; -#endif typedef struct kernel_cap_struct { __u32 cap[_KERNEL_CAPABILITY_U32S]; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 21a6f5d9af22..8d10aa7fd4c9 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -83,16 +83,12 @@ extern struct group_info init_groups; #define INIT_IDS #endif -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES /* * Because of the reduced scope of CAP_SETPCAP when filesystem * capabilities are in effect, it is safe to allow CAP_SETPCAP to * be available in the default configuration. */ # define CAP_INIT_BSET CAP_FULL_SET -#else -# define CAP_INIT_BSET CAP_INIT_EFF_SET -#endif #ifdef CONFIG_TREE_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ diff --git a/kernel/capability.c b/kernel/capability.c index c450375e855f..7f876e60521f 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -29,7 +29,6 @@ EXPORT_SYMBOL(__cap_empty_set); EXPORT_SYMBOL(__cap_full_set); EXPORT_SYMBOL(__cap_init_eff_set); -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES int file_caps_enabled = 1; static int __init file_caps_disable(char *str) @@ -38,7 +37,6 @@ static int __init file_caps_disable(char *str) return 1; } __setup("no_file_caps", file_caps_disable); -#endif /* * More recent versions of libcap are available from: diff --git a/security/Kconfig b/security/Kconfig index 95cc08913ca1..226b9556b25f 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -91,15 +91,6 @@ config SECURITY_PATH implement pathname based access controls. If you are unsure how to answer this question, answer N. -config SECURITY_FILE_CAPABILITIES - bool "File POSIX Capabilities" - default n - help - This enables filesystem capabilities, allowing you to give - binaries a subset of root's powers without using setuid 0. - - If in doubt, answer N. - config INTEL_TXT bool "Enable Intel(R) Trusted Execution Technology (Intel(R) TXT)" depends on HAVE_INTEL_TXT diff --git a/security/commoncap.c b/security/commoncap.c index 45b87af4ae5d..f800fdb3de94 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -173,7 +173,6 @@ int cap_capget(struct task_struct *target, kernel_cap_t *effective, */ static inline int cap_inh_is_capped(void) { -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES /* they are so limited unless the current task has the CAP_SETPCAP * capability @@ -181,7 +180,6 @@ static inline int cap_inh_is_capped(void) if (cap_capable(current, current_cred(), CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0) return 0; -#endif return 1; } @@ -239,8 +237,6 @@ static inline void bprm_clear_caps(struct linux_binprm *bprm) bprm->cap_effective = false; } -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES - /** * cap_inode_need_killpriv - Determine if inode change affects privileges * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV @@ -421,49 +417,6 @@ out: return rc; } -#else -int cap_inode_need_killpriv(struct dentry *dentry) -{ - return 0; -} - -int cap_inode_killpriv(struct dentry *dentry) -{ - return 0; -} - -int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps) -{ - memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data)); - return -ENODATA; -} - -static inline int get_file_caps(struct linux_binprm *bprm, bool *effective) -{ - bprm_clear_caps(bprm); - return 0; -} -#endif - -/* - * Determine whether a exec'ing process's new permitted capabilities should be - * limited to just what it already has. - * - * This prevents processes that are being ptraced from gaining access to - * CAP_SETPCAP, unless the process they're tracing already has it, and the - * binary they're executing has filecaps that elevate it. - * - * Returns 1 if they should be limited, 0 if they are not. - */ -static inline int cap_limit_ptraced_target(void) -{ -#ifndef CONFIG_SECURITY_FILE_CAPABILITIES - if (capable(CAP_SETPCAP)) - return 0; -#endif - return 1; -} - /** * cap_bprm_set_creds - Set up the proposed credentials for execve(). * @bprm: The execution parameters, including the proposed creds @@ -523,9 +476,8 @@ skip: new->euid = new->uid; new->egid = new->gid; } - if (cap_limit_ptraced_target()) - new->cap_permitted = cap_intersect(new->cap_permitted, - old->cap_permitted); + new->cap_permitted = cap_intersect(new->cap_permitted, + old->cap_permitted); } new->suid = new->fsuid = new->euid; @@ -739,7 +691,6 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) return 0; } -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES /* * Rationale: code calling task_setscheduler, task_setioprio, and * task_setnice, assumes that @@ -820,22 +771,6 @@ static long cap_prctl_drop(struct cred *new, unsigned long cap) return 0; } -#else -int cap_task_setscheduler (struct task_struct *p, int policy, - struct sched_param *lp) -{ - return 0; -} -int cap_task_setioprio (struct task_struct *p, int ioprio) -{ - return 0; -} -int cap_task_setnice (struct task_struct *p, int nice) -{ - return 0; -} -#endif - /** * cap_task_prctl - Implement process control functions for this security module * @option: The process control function requested @@ -866,7 +801,6 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, error = !!cap_raised(new->cap_bset, arg2); goto no_change; -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES case PR_CAPBSET_DROP: error = cap_prctl_drop(new, arg2); if (error < 0) @@ -917,8 +851,6 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, error = new->securebits; goto no_change; -#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ - case PR_GET_KEEPCAPS: if (issecure(SECURE_KEEP_CAPS)) error = 1; -- cgit v1.3-8-gc7d7 From c9286b7e293a1ea054e857ff3f5a23d0ad8d4f36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2009 19:50:38 +0100 Subject: locking: Remove unused prototype commit 910067d1(remove generic__raw_read_trylock()) removed the implementation but left the prototype around. Remove it. Signed-off-by: Thomas Gleixner --- include/linux/spinlock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index f0ca7a7a1757..faf1482028df 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -79,8 +79,6 @@ */ #include -extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); - /* * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them): */ -- cgit v1.3-8-gc7d7 From a49ed0bf427a8328a3296eebedc7697fe5098dbf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2009 19:57:50 +0100 Subject: locking: Use __[SPIN|RW]_LOCK_UNLOCKED in [spin|rw]_lock_init() SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. Replace them with the __*_LOCK_UNLOCKED variants. Signed-off-by: Thomas Gleixner --- include/linux/spinlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index faf1482028df..71dccfeb0d88 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -100,7 +100,7 @@ do { \ #else # define spin_lock_init(lock) \ - do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) + do { *(lock) = __SPIN_LOCK_UNLOCKED(lock); } while (0) #endif #ifdef CONFIG_DEBUG_SPINLOCK @@ -114,7 +114,7 @@ do { \ } while (0) #else # define rwlock_init(lock) \ - do { *(lock) = RW_LOCK_UNLOCKED; } while (0) + do { *(lock) = __RW_LOCK_UNLOCKED(lock); } while (0) #endif #define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) -- cgit v1.3-8-gc7d7 From ad85dfe67bbf13d5fa20764e4ce801a1e6e526d8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 18 Nov 2009 15:52:51 +0100 Subject: DRBD: Now the code is 8.3.6 + 3 fixes (without compat crap) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 18942ad115d9..99a4d76694ed 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.5" +#define REL_VERSION "8.3.6" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 91 -- cgit v1.3-8-gc7d7 From ff038f5c37c2070829004a0678372766c2b32180 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:27:27 -0500 Subject: tracing: Create new TRACE_EVENT_TEMPLATE There are some places in the kernel that define several tracepoints and they are all identical besides the name. The code to enable, disable and record is created for every trace point even if most of the code is identical. This patch adds TRACE_EVENT_TEMPLATE that lets the developer create a template TRACE_EVENT and create trace points with DEFINE_EVENT, which is based off of a given template. Each trace point used by this will share most of the code, and bring down the size of the kernel when there are several duplicate events. Usage is: TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print); Which would be the same as defining a normal TRACE_EVENT. To create the trace events that the trace points will use: DEFINE_EVENT(template, name, proto, args) is done. The template is the name of the TRACE_EVENT_TEMPLATE to use. The name is the name of the trace point. The parameters proto and args must be the same as the proto and args of the template. If they are not the same, then a compile error will result. I tried hard removing this duplication but the C preprocessor is not powerful enough (or my CPP magic experience points is not at a high enough level) to not need them. A lot of trace events are coming in with new XFS development. Most of the trace points are identical except for the name. The following shows the advantage of having TRACE_EVENT_TEMPLATE: $ size fs/xfs/xfs.o.* text data bss dec hex filename 452114 2788 3520 458422 6feb6 fs/xfs/xfs.o.old 638482 38116 3744 680342 a6196 fs/xfs/xfs.o.template 996954 38116 4480 1039550 fdcbe fs/xfs/xfs.o.trace xfs.o.old is without any tracepoints. xfs.o.template uses the new TRACE_EVENT_TEMPLATE. xfs.o.trace uses the current TRACE_EVENT macros. Requested-by: Christoph Hellwig Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 4 ++ include/trace/define_trace.h | 6 ++ include/trace/ftrace.h | 149 +++++++++++++++++++++++++++++++------------ 3 files changed, 117 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2aac8a83e89b..88a5b5a809ec 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -280,6 +280,10 @@ static inline void tracepoint_synchronize_unregister(void) * TRACE_EVENT_FN to perform any (un)registration work. */ +#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) +#define DEFINE_EVENT(template, name, proto, args) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 2a4b3bf74033..244985814a43 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -31,6 +31,10 @@ assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg) +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ + DEFINE_TRACE(name) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -63,6 +67,8 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN +#undef TRACE_EVENT_TEMPLATE +#undef DEFINE_EVENT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c3417c13e3ed..2969f65d8002 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,6 +18,26 @@ #include +/* + * TRACE_EVENT_TEMPLATE can be used to add a generic function + * handlers for events. That is, if all events have the same + * parameters and just have distinct trace points. + * Each tracepoint can be defined with DEFINE_EVENT and that + * will map the TRACE_EVENT_TEMPLATE to the tracepoint. + * + * TRACE_EVENT is a one to one mapping between tracepoint and template. + */ +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + TRACE_EVENT_TEMPLATE(name, \ + PARAMS(proto), \ + PARAMS(args), \ + PARAMS(tstruct), \ + PARAMS(assign), \ + PARAMS(print)); \ + DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); + + #undef __field #define __field(type, item) type item; @@ -36,13 +56,15 @@ #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -#undef TRACE_EVENT -#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - char __data[0]; \ - }; \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + char __data[0]; \ + }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ static struct ftrace_event_call event_##name #undef __cpparg @@ -89,12 +111,15 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ tstruct; \ }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -170,8 +195,8 @@ #undef TP_perf_assign #define TP_perf_assign(args...) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ ftrace_format_##call(struct ftrace_event_call *unused, \ struct trace_seq *s) \ @@ -186,6 +211,9 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ return ret; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -255,10 +283,11 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ ftrace_print_symbols_seq(p, value, symbols); \ }) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ static enum print_line_t \ -ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +ftrace_raw_output_id_##call(int event_id, const char *name, \ + struct trace_iterator *iter, int flags) \ { \ struct trace_seq *s = &iter->seq; \ struct ftrace_raw_##call *field; \ @@ -268,7 +297,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ entry = iter->ent; \ \ - if (entry->type != event_##call.id) { \ + if (entry->type != event_id) { \ WARN_ON_ONCE(1); \ return TRACE_TYPE_UNHANDLED; \ } \ @@ -277,14 +306,25 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ p = &get_cpu_var(ftrace_event_seq); \ trace_seq_init(p); \ - ret = trace_seq_printf(s, #call ": " print); \ + ret = trace_seq_printf(s, "%s: ", name); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ put_cpu(); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ return TRACE_TYPE_HANDLED; \ } - + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ +static enum print_line_t \ +ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \ +{ \ + return ftrace_raw_output_id_##template(event_##name.id, \ + #name, iter, flags); \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext @@ -318,8 +358,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ @@ -335,6 +375,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ return ret; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -361,10 +404,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ __data_size += (len) * sizeof(type); #undef __string -#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \ +#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ static inline int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ @@ -376,6 +419,9 @@ static inline int ftrace_get_offsets_##call( \ return __data_size; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #ifdef CONFIG_EVENT_PROFILE @@ -397,19 +443,22 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ \ -static void ftrace_profile_##call(proto); \ +static void ftrace_profile_##name(proto); \ \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\ +static int ftrace_profile_enable_##name(struct ftrace_event_call *unused)\ { \ - return register_trace_##call(ftrace_profile_##call); \ + return register_trace_##name(ftrace_profile_##name); \ } \ \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ +static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ { \ - unregister_trace_##call(ftrace_profile_##call); \ + unregister_trace_##name(ftrace_profile_##name); \ } #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -550,15 +599,13 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ - \ -static struct ftrace_event_call event_##call; \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ \ -static void ftrace_raw_event_##call(proto) \ +static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ struct ring_buffer *buffer; \ @@ -572,7 +619,7 @@ static void ftrace_raw_event_##call(proto) \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ event = trace_current_buffer_lock_reserve(&buffer, \ - event_##call.id, \ + event_call->id, \ sizeof(*entry) + __data_size, \ irq_flags, pc); \ if (!event) \ @@ -587,6 +634,14 @@ static void ftrace_raw_event_##call(proto) \ if (!filter_current_check_discard(buffer, event_call, entry, event)) \ trace_nowake_buffer_unlock_commit(buffer, \ event, irq_flags, pc); \ +} + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + ftrace_raw_event_id_##template(&event_##call, args); \ } \ \ static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ @@ -630,8 +685,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = ftrace_raw_init_event_##call, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ - .show_format = ftrace_format_##call, \ - .define_fields = ftrace_define_fields_##call, \ + .show_format = ftrace_format_##template, \ + .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } @@ -719,14 +774,15 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __perf_count #define __perf_count(c) __count = (c) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -static void ftrace_profile_##call(proto) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +static void \ +ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ extern int perf_swevent_get_recursion_context(void); \ extern void perf_swevent_put_recursion_context(int rctx); \ - struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ @@ -789,6 +845,15 @@ end_recursion: \ \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + struct ftrace_event_call *event_call = &event_##call; \ + \ + ftrace_profile_templ_##template(event_call, args); \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif /* CONFIG_EVENT_PROFILE */ -- cgit v1.3-8-gc7d7 From e5bc9721684e9412f3e0465222f317c362a8ab47 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:36:26 -0500 Subject: tracing: Create new DEFINE_EVENT_PRINT After creating the TRACE_EVENT_TEMPLATE I started to look at other trace points to see what duplication was made. I noticed that there are several trace points where they are almost identical except for the name and the output format. Since TRACE_EVENT_TEMPLATE was successful in bringing down the size of trace events, I added a DEFINE_EVENT_PRINT. DEFINE_EVENT_PRINT is used just like DEFINE_EVENT is. That is, the DEFINE_EVENT_PRINT also uses a TRACE_EVENT_TEMPLATE, but it allows the developer to overwrite the print format. If there are two or more TRACE_EVENTS that are identical except for the name and print, then they can be converted to use a TRACE_EVENT_TEMPLATE. Since the TRACE_EVENT_TEMPLATE already does the print output, the first trace event would have its print format held in the TRACE_EVENT_TEMPLATE and be defined with a DEFINE_EVENT. The rest will use the DEFINE_EVENT_PRINT and override the print format. Converting the sched trace points to both DEFINE_EVENT and DEFINE_EVENT_PRINT. Five were converted to DEFINE_EVENT and two were converted to DEFINE_EVENT_PRINT. I was able to get the following: $ size kernel/sched.o-* text data bss dec hex filename 79299 6776 2520 88595 15a13 kernel/sched.o-notrace 101941 11896 2584 116421 1c6c5 kernel/sched.o-templ 104779 11896 2584 119259 1d1db kernel/sched.o-trace sched.o-notrace is the scheduler compiled with no trace points. sched.o-templ is with the use of DEFINE_EVENT and DEFINE_EVENT_PRINT sched.o-trace is the current trace events. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 2 + include/trace/define_trace.h | 5 ++ include/trace/ftrace.h | 123 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 126 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 88a5b5a809ec..7063383cca13 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -283,6 +283,8 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 244985814a43..5d7d855ae21e 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -35,6 +35,10 @@ #define DEFINE_EVENT(template, name, proto, args) \ DEFINE_TRACE(name) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_TRACE(name) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -69,6 +73,7 @@ #undef TRACE_EVENT_FN #undef TRACE_EVENT_TEMPLATE #undef DEFINE_EVENT +#undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 2969f65d8002..b0461772bc8d 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -67,6 +67,10 @@ #define DEFINE_EVENT(template, name, proto, args) \ static struct ftrace_event_call event_##name +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #undef __cpparg #define __cpparg(arg...) arg @@ -120,6 +124,10 @@ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -198,15 +206,28 @@ #undef TRACE_EVENT_TEMPLATE #define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ +ftrace_format_setup_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ struct ftrace_raw_##call field __attribute__((unused)); \ int ret = 0; \ \ tstruct; \ \ - trace_seq_printf(s, "\nprint fmt: " print); \ + return ret; \ +} \ + \ +static int \ +ftrace_format_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##call(unused, s); \ + if (!ret) \ + return ret; \ + \ + ret = trace_seq_printf(s, "\nprint fmt: " print); \ \ return ret; \ } @@ -214,6 +235,23 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ +static int \ +ftrace_format_##name(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##template(unused, s); \ + if (!ret) \ + return ret; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -325,6 +363,38 @@ ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \ #name, iter, flags); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ +static enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##template *field; \ + struct trace_entry *entry; \ + struct trace_seq *p; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + p = &get_cpu_var(ftrace_event_seq); \ + trace_seq_init(p); \ + ret = trace_seq_printf(s, "%s: ", #call); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ + put_cpu(); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext @@ -378,6 +448,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -422,6 +496,10 @@ static inline int ftrace_get_offsets_##call( \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #ifdef CONFIG_EVENT_PROFILE @@ -461,6 +539,10 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ unregister_trace_##name(ftrace_profile_##name); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif @@ -674,7 +756,19 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ event_##call.id = id; \ INIT_LIST_HEAD(&event_##call.fields); \ return 0; \ -} \ +} + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -690,6 +784,23 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ _TRACE_PROFILE_INIT(call) \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .event = &ftrace_event_type_##call, \ + .raw_init = ftrace_raw_init_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##template, \ + _TRACE_PROFILE_INIT(call) \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -854,6 +965,10 @@ static void ftrace_profile_##call(proto) \ ftrace_profile_templ_##template(event_call, args); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif /* CONFIG_EVENT_PROFILE */ -- cgit v1.3-8-gc7d7 From 75ec29ab848a7e92a41aaafaeb33d1afbc839be4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:48:08 -0500 Subject: tracing: Convert some sched trace events to DEFINE_EVENT and _PRINT Converting some of the scheduler trace events to use the TRACE_EVENT_TEMPLATE, DEFINE_EVENT and DEFINE_EVENT_PRINT helped to save some space: $ size kernel/sched.o-* text data bss dec hex filename 79299 6776 2520 88595 15a13 kernel/sched.o-notrace 101941 11896 2584 116421 1c6c5 kernel/sched.o-templ 104779 11896 2584 119259 1d1db kernel/sched.o-trace sched.o-notrace is without any tracepoints compiled sched.o-templ is with this patch sched.o-trace is the tracepoints before this patch The trace events converted to DEFINE_EVENT: sched_wakeup, sched_wakeup_new, sched_process_free, sched_process_exit, and sched_stat_wait. The trace events converted to DEFINE_EVENT_PRINT: sched_stat_sleep and sched_stat_iowait. Note, since the TRACE_EVENT_TEMPLATE always uses a print, the sched_stat_wait print format is defined in the template and this template is used by sched_stat_sleep and sched_stat_iowait. But the later two override the print format. Signed-off-by: Steven Rostedt --- include/trace/events/sched.h | 170 +++++++++++++------------------------------ 1 file changed, 52 insertions(+), 118 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index b50b9856c59f..238f74b58486 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task, * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup, +TRACE_EVENT_TEMPLATE(sched_wakeup_template, TP_PROTO(struct rq *rq, struct task_struct *p, int success), @@ -110,38 +110,19 @@ TRACE_EVENT(sched_wakeup, __entry->success, __entry->target_cpu) ); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); + /* * Tracepoint for waking up a new task: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - __field( int, target_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - __entry->target_cpu = task_cpu(p); - ), - - TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->target_cpu) -); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); /* * Tracepoint for task switches, performed by the scheduler: @@ -216,10 +197,7 @@ TRACE_EVENT(sched_migrate_task, __entry->orig_cpu, __entry->dest_cpu) ); -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, +TRACE_EVENT_TEMPLATE(sched_process_template, TP_PROTO(struct task_struct *p), @@ -242,29 +220,19 @@ TRACE_EVENT(sched_process_free, ); /* - * Tracepoint for a task exiting: + * Tracepoint for freeing a task: */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), +DEFINE_EVENT(sched_process_template, sched_process_free, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); + - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("comm=%s pid=%d prio=%d", - __entry->comm, __entry->pid, __entry->prio) -); +/* + * Tracepoint for a task exiting: + */ +DEFINE_EVENT(sched_process_template, sched_process_exit, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); /* * Tracepoint for a waiting task: @@ -348,12 +316,7 @@ TRACE_EVENT(sched_signal_send, * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ - -/* - * Tracepoint for accounting wait time (time the task is runnable - * but not actually running due to scheduler contention). - */ -TRACE_EVENT(sched_stat_wait, +TRACE_EVENT_TEMPLATE(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), @@ -379,6 +342,37 @@ TRACE_EVENT(sched_stat_wait, (unsigned long long)__entry->delay) ); + +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_wait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + +/* + * Tracepoint for accounting sleep time (time the task is not runnable, + * including iowait, see below). + */ +DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay), + TP_printk("task: %s:%d sleep: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay)); + +/* + * Tracepoint for accounting iowait time (time the task is not runnable + * due to waiting on IO to complete). + */ +DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay), + TP_printk("task: %s:%d iowait: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay)); + /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). @@ -412,66 +406,6 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); -/* - * Tracepoint for accounting sleep time (time the task is not runnable, - * including iowait, see below). - */ -TRACE_EVENT(sched_stat_sleep, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - -/* - * Tracepoint for accounting iowait time (time the task is not runnable - * due to waiting on IO to complete). - */ -TRACE_EVENT(sched_stat_iowait, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ -- cgit v1.3-8-gc7d7 From cc3316e7a97cdbfc34633e20195f8c98b9ff9ff5 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 14 Oct 2009 09:01:39 +0800 Subject: ACPICA: Update version to 20091013 Version 20091013. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index e723b0fd8e41..0e7efeacf6cb 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20090903 +#define ACPI_CA_VERSION 0x20091013 #include "actypes.h" #include "actbl.h" -- cgit v1.3-8-gc7d7 From 2263576cfc6e8f6ab038126c3254404b9fcb1c33 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Fri, 13 Nov 2009 10:06:08 +0800 Subject: ACPICA: Add post-order callback to acpi_walk_namespace The existing interface only has a pre-order callback. This change adds an additional parameter for a post-order callback which will be more useful for bus scans. ACPICA BZ 779. Also update the external calls to acpi_walk_namespace. http://www.acpica.org/bugzilla/show_bug.cgi?id=779 Signed-off-by: Lin Ming Signed-off-by: Bob Moore Signed-off-by: Len Brown --- arch/ia64/sn/kernel/io_acpi_init.c | 2 +- arch/x86/kernel/cpu/cpufreq/longhaul.c | 2 +- drivers/acpi/acpi_memhotplug.c | 4 +- drivers/acpi/acpica/acnamesp.h | 3 +- drivers/acpi/acpica/dsinit.c | 2 +- drivers/acpi/acpica/evgpeblk.c | 8 +- drivers/acpi/acpica/evregion.c | 6 +- drivers/acpi/acpica/nsdump.c | 4 +- drivers/acpi/acpica/nsdumpdv.c | 3 +- drivers/acpi/acpica/nsinit.c | 8 +- drivers/acpi/acpica/nswalk.c | 200 +++++++++++++++++++-------------- drivers/acpi/acpica/nsxfeval.c | 32 +++--- drivers/acpi/container.c | 4 +- drivers/acpi/dock.c | 8 +- drivers/acpi/ec.c | 2 +- drivers/acpi/glue.c | 2 +- drivers/acpi/pci_slot.c | 8 +- drivers/acpi/processor_core.c | 4 +- drivers/acpi/scan.c | 2 +- drivers/acpi/video_detect.c | 6 +- drivers/gpu/drm/i915/intel_lvds.c | 2 +- drivers/i2c/busses/i2c-scmi.c | 2 +- drivers/pci/hotplug/acpi_pcihp.c | 2 +- drivers/pci/hotplug/acpiphp_glue.c | 22 ++-- drivers/pci/hotplug/acpiphp_ibm.c | 2 +- drivers/platform/x86/intel_menlow.c | 2 +- drivers/platform/x86/sony-laptop.c | 2 +- drivers/platform/x86/thinkpad_acpi.c | 2 +- include/acpi/acpixf.h | 3 +- 29 files changed, 197 insertions(+), 152 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c index fd50ff94302b..66f633bff059 100644 --- a/arch/ia64/sn/kernel/io_acpi_init.c +++ b/arch/ia64/sn/kernel/io_acpi_init.c @@ -390,7 +390,7 @@ sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info, pcidev_match.handle = NULL; acpi_walk_namespace(ACPI_TYPE_DEVICE, rootbus_handle, ACPI_UINT32_MAX, - find_matching_device, &pcidev_match, NULL); + find_matching_device, NULL, &pcidev_match, NULL); if (!pcidev_match.handle) { printk(KERN_ERR diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index cabd2fa3fc93..7e7eea4f8261 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -885,7 +885,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) /* Find ACPI data for processor */ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, &longhaul_walk_callback, + ACPI_UINT32_MAX, &longhaul_walk_callback, NULL, NULL, (void *)&pr); /* Check ACPI support for C3 state */ diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 28ccdbc05ac8..3597d73f28f6 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -537,7 +537,7 @@ static int __init acpi_memory_device_init(void) status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - acpi_memory_register_notify_handler, + acpi_memory_register_notify_handler, NULL, NULL, NULL); if (ACPI_FAILURE(status)) { @@ -561,7 +561,7 @@ static void __exit acpi_memory_device_exit(void) */ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - acpi_memory_deregister_notify_handler, + acpi_memory_deregister_notify_handler, NULL, NULL, NULL); if (ACPI_FAILURE(status)) diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h index 09a2764c734b..168e60893a2a 100644 --- a/drivers/acpi/acpica/acnamesp.h +++ b/drivers/acpi/acpica/acnamesp.h @@ -104,7 +104,8 @@ acpi_ns_walk_namespace(acpi_object_type type, acpi_handle start_object, u32 max_depth, u32 flags, - acpi_walk_callback user_function, + acpi_walk_callback pre_order_visit, + acpi_walk_callback post_order_visit, void *context, void **return_value); struct acpi_namespace_node *acpi_ns_get_next_node(struct acpi_namespace_node diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c index 3aae13f30c5e..f23fa0be6fc2 100644 --- a/drivers/acpi/acpica/dsinit.c +++ b/drivers/acpi/acpica/dsinit.c @@ -192,7 +192,7 @@ acpi_ds_initialize_objects(u32 table_index, status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, start_node, ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK, acpi_ds_init_one_object, - &info, NULL); + NULL, &info, NULL); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "During WalkNamespace")); } diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c index a60aaa7635f3..247920900187 100644 --- a/drivers/acpi/acpica/evgpeblk.c +++ b/drivers/acpi/acpica/evgpeblk.c @@ -945,8 +945,8 @@ acpi_ev_create_gpe_block(struct acpi_namespace_node *gpe_device, status = acpi_ns_walk_namespace(ACPI_TYPE_METHOD, gpe_device, ACPI_UINT32_MAX, ACPI_NS_WALK_NO_UNLOCK, - acpi_ev_save_method_info, gpe_block, - NULL); + acpi_ev_save_method_info, NULL, + gpe_block, NULL); /* Return the new block */ @@ -1022,8 +1022,8 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device, status = acpi_ns_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK, - acpi_ev_match_prw_and_gpe, &gpe_info, - NULL); + acpi_ev_match_prw_and_gpe, NULL, + &gpe_info, NULL); } /* diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index c9fa040725d4..582b0af01e99 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -1025,8 +1025,8 @@ acpi_ev_install_space_handler(struct acpi_namespace_node * node, */ status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK, - acpi_ev_install_handler, handler_obj, - NULL); + acpi_ev_install_handler, NULL, + handler_obj, NULL); unlock_and_exit: return_ACPI_STATUS(status); @@ -1062,7 +1062,7 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, */ status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, - &space_id, NULL); + NULL, &space_id, NULL); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c index 2bad613db73a..2deb986861ca 100644 --- a/drivers/acpi/acpica/nsdump.c +++ b/drivers/acpi/acpica/nsdump.c @@ -634,8 +634,8 @@ acpi_ns_dump_objects(acpi_object_type type, (void)acpi_ns_walk_namespace(type, start_handle, max_depth, ACPI_NS_WALK_NO_UNLOCK | ACPI_NS_WALK_TEMP_NODES, - acpi_ns_dump_one_object, (void *)&info, - NULL); + acpi_ns_dump_one_object, NULL, + (void *)&info, NULL); } #endif /* ACPI_FUTURE_USAGE */ diff --git a/drivers/acpi/acpica/nsdumpdv.c b/drivers/acpi/acpica/nsdumpdv.c index 0fe87f1aef16..36be7f0e97ec 100644 --- a/drivers/acpi/acpica/nsdumpdv.c +++ b/drivers/acpi/acpica/nsdumpdv.c @@ -131,7 +131,8 @@ void acpi_ns_dump_root_devices(void) status = acpi_ns_walk_namespace(ACPI_TYPE_DEVICE, sys_bus_handle, ACPI_UINT32_MAX, ACPI_NS_WALK_NO_UNLOCK, - acpi_ns_dump_one_device, NULL, NULL); + acpi_ns_dump_one_device, NULL, NULL, + NULL); } #endif diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index 1d5b360eb25b..4f8abac231d2 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c @@ -96,7 +96,7 @@ acpi_status acpi_ns_initialize_objects(void) /* Walk entire namespace from the supplied root */ status = acpi_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, acpi_ns_init_one_object, + ACPI_UINT32_MAX, acpi_ns_init_one_object, NULL, &info, NULL); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "During WalkNamespace")); @@ -156,7 +156,8 @@ acpi_status acpi_ns_initialize_devices(void) status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, FALSE, - acpi_ns_find_ini_methods, &info, NULL); + acpi_ns_find_ini_methods, NULL, &info, + NULL); if (ACPI_FAILURE(status)) { goto error_exit; } @@ -189,7 +190,8 @@ acpi_status acpi_ns_initialize_devices(void) status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, FALSE, - acpi_ns_init_one_device, &info, NULL); + acpi_ns_init_one_device, NULL, &info, + NULL); ACPI_FREE(info.evaluate_info); if (ACPI_FAILURE(status)) { diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c index 35539df5c75d..d7e6b52b4482 100644 --- a/drivers/acpi/acpica/nswalk.c +++ b/drivers/acpi/acpica/nswalk.c @@ -165,24 +165,27 @@ struct acpi_namespace_node *acpi_ns_get_next_node_typed(acpi_object_type type, * max_depth - Depth to which search is to reach * Flags - Whether to unlock the NS before invoking * the callback routine - * user_function - Called when an object of "Type" is found - * Context - Passed to user function - * return_value - from the user_function if terminated early. - * Otherwise, returns NULL. + * pre_order_visit - Called during tree pre-order visit + * when an object of "Type" is found + * post_order_visit - Called during tree post-order visit + * when an object of "Type" is found + * Context - Passed to user function(s) above + * return_value - from the user_function if terminated + * early. Otherwise, returns NULL. * RETURNS: Status * * DESCRIPTION: Performs a modified depth-first walk of the namespace tree, * starting (and ending) at the node specified by start_handle. - * The user_function is called whenever a node that matches - * the type parameter is found. If the user function returns + * The callback function is called whenever a node that matches + * the type parameter is found. If the callback function returns * a non-zero value, the search is terminated immediately and * this value is returned to the caller. * * The point of this procedure is to provide a generic namespace * walk routine that can be called from multiple places to - * provide multiple services; the User Function can be tailored - * to each task, whether it is a print function, a compare - * function, etc. + * provide multiple services; the callback function(s) can be + * tailored to each task, whether it is a print function, + * a compare function, etc. * ******************************************************************************/ @@ -191,7 +194,8 @@ acpi_ns_walk_namespace(acpi_object_type type, acpi_handle start_node, u32 max_depth, u32 flags, - acpi_walk_callback user_function, + acpi_walk_callback pre_order_visit, + acpi_walk_callback post_order_visit, void *context, void **return_value) { acpi_status status; @@ -200,6 +204,7 @@ acpi_ns_walk_namespace(acpi_object_type type, struct acpi_namespace_node *parent_node; acpi_object_type child_type; u32 level; + u8 node_previously_visited = FALSE; ACPI_FUNCTION_TRACE(ns_walk_namespace); @@ -212,7 +217,7 @@ acpi_ns_walk_namespace(acpi_object_type type, /* Null child means "get first node" */ parent_node = start_node; - child_node = NULL; + child_node = acpi_ns_get_next_node(parent_node, NULL); child_type = ACPI_TYPE_ANY; level = 1; @@ -221,102 +226,129 @@ acpi_ns_walk_namespace(acpi_object_type type, * started. When Level is zero, the loop is done because we have * bubbled up to (and passed) the original parent handle (start_entry) */ - while (level > 0) { + while (level > 0 && child_node) { + status = AE_OK; - /* Get the next node in this scope. Null if not found */ + /* Found next child, get the type if we are not searching for ANY */ - status = AE_OK; - child_node = acpi_ns_get_next_node(parent_node, child_node); - if (child_node) { + if (type != ACPI_TYPE_ANY) { + child_type = child_node->type; + } - /* Found next child, get the type if we are not searching for ANY */ + /* + * Ignore all temporary namespace nodes (created during control + * method execution) unless told otherwise. These temporary nodes + * can cause a race condition because they can be deleted during + * the execution of the user function (if the namespace is + * unlocked before invocation of the user function.) Only the + * debugger namespace dump will examine the temporary nodes. + */ + if ((child_node->flags & ANOBJ_TEMPORARY) && + !(flags & ACPI_NS_WALK_TEMP_NODES)) { + status = AE_CTRL_DEPTH; + } - if (type != ACPI_TYPE_ANY) { - child_type = child_node->type; - } + /* Type must match requested type */ + else if (child_type == type) { /* - * Ignore all temporary namespace nodes (created during control - * method execution) unless told otherwise. These temporary nodes - * can cause a race condition because they can be deleted during - * the execution of the user function (if the namespace is - * unlocked before invocation of the user function.) Only the - * debugger namespace dump will examine the temporary nodes. + * Found a matching node, invoke the user callback function. + * Unlock the namespace if flag is set. */ - if ((child_node->flags & ANOBJ_TEMPORARY) && - !(flags & ACPI_NS_WALK_TEMP_NODES)) { - status = AE_CTRL_DEPTH; + if (flags & ACPI_NS_WALK_UNLOCK) { + mutex_status = + acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); + if (ACPI_FAILURE(mutex_status)) { + return_ACPI_STATUS(mutex_status); + } } - /* Type must match requested type */ - - else if (child_type == type) { - /* - * Found a matching node, invoke the user callback function. - * Unlock the namespace if flag is set. - */ - if (flags & ACPI_NS_WALK_UNLOCK) { - mutex_status = - acpi_ut_release_mutex - (ACPI_MTX_NAMESPACE); - if (ACPI_FAILURE(mutex_status)) { - return_ACPI_STATUS - (mutex_status); - } + /* + * Invoke the user function, either pre-order or post-order + * or both. + */ + if (!node_previously_visited) { + if (pre_order_visit) { + status = + pre_order_visit(child_node, level, + context, + return_value); } + } else { + if (post_order_visit) { + status = + post_order_visit(child_node, level, + context, + return_value); + } + } - status = - user_function(child_node, level, context, - return_value); - - if (flags & ACPI_NS_WALK_UNLOCK) { - mutex_status = - acpi_ut_acquire_mutex - (ACPI_MTX_NAMESPACE); - if (ACPI_FAILURE(mutex_status)) { - return_ACPI_STATUS - (mutex_status); - } + if (flags & ACPI_NS_WALK_UNLOCK) { + mutex_status = + acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); + if (ACPI_FAILURE(mutex_status)) { + return_ACPI_STATUS(mutex_status); } + } - switch (status) { - case AE_OK: - case AE_CTRL_DEPTH: + switch (status) { + case AE_OK: + case AE_CTRL_DEPTH: - /* Just keep going */ - break; + /* Just keep going */ + break; - case AE_CTRL_TERMINATE: + case AE_CTRL_TERMINATE: - /* Exit now, with OK status */ + /* Exit now, with OK status */ - return_ACPI_STATUS(AE_OK); + return_ACPI_STATUS(AE_OK); - default: + default: - /* All others are valid exceptions */ + /* All others are valid exceptions */ - return_ACPI_STATUS(status); - } + return_ACPI_STATUS(status); + } + } + + /* + * Depth first search: Attempt to go down another level in the + * namespace if we are allowed to. Don't go any further if we have + * reached the caller specified maximum depth or if the user + * function has specified that the maximum depth has been reached. + */ + if (!node_previously_visited && + (level < max_depth) && (status != AE_CTRL_DEPTH)) { + if (child_node->child) { + + /* There is at least one child of this node, visit it */ + + level++; + parent_node = child_node; + child_node = + acpi_ns_get_next_node(parent_node, NULL); + continue; } + } - /* - * Depth first search: Attempt to go down another level in the - * namespace if we are allowed to. Don't go any further if we have - * reached the caller specified maximum depth or if the user - * function has specified that the maximum depth has been reached. - */ - if ((level < max_depth) && (status != AE_CTRL_DEPTH)) { - if (child_node->child) { + /* No more children, re-visit this node */ - /* There is at least one child of this node, visit it */ + if (!node_previously_visited) { + node_previously_visited = TRUE; + continue; + } - level++; - parent_node = child_node; - child_node = NULL; - } - } - } else { + /* No more children, visit peers */ + + child_node = acpi_ns_get_next_node(parent_node, child_node); + if (child_node) { + node_previously_visited = FALSE; + } + + /* No peers, re-visit parent */ + + else { /* * No more children of this node (acpi_ns_get_next_node failed), go * back upwards in the namespace tree to the node's parent. @@ -324,6 +356,8 @@ acpi_ns_walk_namespace(acpi_object_type type, level--; child_node = parent_node; parent_node = acpi_ns_get_parent_node(parent_node); + + node_previously_visited = TRUE; } } diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c index 4929dbdbc8f0..f2bd1da77001 100644 --- a/drivers/acpi/acpica/nsxfeval.c +++ b/drivers/acpi/acpica/nsxfeval.c @@ -433,8 +433,11 @@ static void acpi_ns_resolve_references(struct acpi_evaluate_info *info) * PARAMETERS: Type - acpi_object_type to search for * start_object - Handle in namespace where search begins * max_depth - Depth to which search is to reach - * user_function - Called when an object of "Type" is found - * Context - Passed to user function + * pre_order_visit - Called during tree pre-order visit + * when an object of "Type" is found + * post_order_visit - Called during tree post-order visit + * when an object of "Type" is found + * Context - Passed to user function(s) above * return_value - Location where return value of * user_function is put if terminated early * @@ -443,16 +446,16 @@ static void acpi_ns_resolve_references(struct acpi_evaluate_info *info) * * DESCRIPTION: Performs a modified depth-first walk of the namespace tree, * starting (and ending) at the object specified by start_handle. - * The user_function is called whenever an object that matches - * the type parameter is found. If the user function returns + * The callback function is called whenever an object that matches + * the type parameter is found. If the callback function returns * a non-zero value, the search is terminated immediately and this * value is returned to the caller. * * The point of this procedure is to provide a generic namespace * walk routine that can be called from multiple places to - * provide multiple services; the User Function can be tailored - * to each task, whether it is a print function, a compare - * function, etc. + * provide multiple services; the callback function(s) can be + * tailored to each task, whether it is a print function, + * a compare function, etc. * ******************************************************************************/ @@ -460,7 +463,8 @@ acpi_status acpi_walk_namespace(acpi_object_type type, acpi_handle start_object, u32 max_depth, - acpi_walk_callback user_function, + acpi_walk_callback pre_order_visit, + acpi_walk_callback post_order_visit, void *context, void **return_value) { acpi_status status; @@ -469,7 +473,8 @@ acpi_walk_namespace(acpi_object_type type, /* Parameter validation */ - if ((type > ACPI_TYPE_LOCAL_MAX) || (!max_depth) || (!user_function)) { + if ((type > ACPI_TYPE_LOCAL_MAX) || + (!max_depth) || (!pre_order_visit && !post_order_visit)) { return_ACPI_STATUS(AE_BAD_PARAMETER); } @@ -501,8 +506,9 @@ acpi_walk_namespace(acpi_object_type type, } status = acpi_ns_walk_namespace(type, start_object, max_depth, - ACPI_NS_WALK_UNLOCK, user_function, - context, return_value); + ACPI_NS_WALK_UNLOCK, pre_order_visit, + post_order_visit, context, + return_value); (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); @@ -681,8 +687,8 @@ acpi_get_devices(const char *HID, status = acpi_ns_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK, - acpi_ns_get_device_callback, &info, - return_value); + acpi_ns_get_device_callback, NULL, + &info, return_value); (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); return_ACPI_STATUS(status); diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c index 642bb305cb65..5faf6c21257d 100644 --- a/drivers/acpi/container.c +++ b/drivers/acpi/container.c @@ -258,7 +258,7 @@ static int __init acpi_container_init(void) acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - container_walk_namespace_cb, &action, NULL); + container_walk_namespace_cb, NULL, &action, NULL); return (0); } @@ -271,7 +271,7 @@ static void __exit acpi_container_exit(void) acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - container_walk_namespace_cb, &action, NULL); + container_walk_namespace_cb, NULL, &action, NULL); acpi_bus_unregister_driver(&acpi_container_driver); diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 7338b6a3e049..30be3c148f7e 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -1030,8 +1030,8 @@ static int dock_add(acpi_handle handle) /* Find dependent devices */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, find_dock_devices, dock_station, - NULL); + ACPI_UINT32_MAX, find_dock_devices, NULL, + dock_station, NULL); /* add the dock station as a device dependent on itself */ dd = alloc_dock_dependent_device(handle); @@ -1127,11 +1127,11 @@ static int __init dock_init(void) /* look for a dock station */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, find_dock, NULL, NULL); + ACPI_UINT32_MAX, find_dock, NULL, NULL, NULL); /* look for bay */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, find_bay, NULL, NULL); + ACPI_UINT32_MAX, find_bay, NULL, NULL, NULL); if (!dock_station_count) { printk(KERN_INFO PREFIX "No dock devices found.\n"); return 0; diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index baef28c1e630..75b147f5c8fd 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -820,7 +820,7 @@ static int acpi_ec_add(struct acpi_device *device) /* Find and register all query methods */ acpi_walk_namespace(ACPI_TYPE_METHOD, ec->handle, 1, - acpi_ec_register_query_methods, ec, NULL); + acpi_ec_register_query_methods, NULL, ec, NULL); if (!first_ec) first_ec = ec; diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index c6645f26224b..4c8fcff662cf 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -113,7 +113,7 @@ acpi_handle acpi_get_child(acpi_handle parent, acpi_integer address) if (!parent) return NULL; acpi_walk_namespace(ACPI_TYPE_DEVICE, parent, - 1, do_acpi_find_child, &find, NULL); + 1, do_acpi_find_child, NULL, &find, NULL); return find.handle; } diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c index 45da2bae36c8..11f219743204 100644 --- a/drivers/acpi/pci_slot.c +++ b/drivers/acpi/pci_slot.c @@ -219,12 +219,12 @@ walk_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv) dbg("p2p bridge walk, pci_bus = %x\n", dev->subordinate->number); status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - user_function, &child_context, NULL); + user_function, NULL, &child_context, NULL); if (ACPI_FAILURE(status)) goto out; status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - walk_p2p_bridge, &child_context, NULL); + walk_p2p_bridge, NULL, &child_context, NULL); out: pci_dev_put(dev); return AE_OK; @@ -277,12 +277,12 @@ walk_root_bridge(acpi_handle handle, acpi_walk_callback user_function) dbg("root bridge walk, pci_bus = %x\n", pci_bus->number); status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - user_function, &context, NULL); + user_function, NULL, &context, NULL); if (ACPI_FAILURE(status)) return status; status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - walk_p2p_bridge, &context, NULL); + walk_p2p_bridge, NULL, &context, NULL); if (ACPI_FAILURE(status)) err("%s: walk_p2p_bridge failure - %d\n", __func__, status); diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index ec742a4e5635..cb4283f5a79d 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -1102,7 +1102,7 @@ void acpi_processor_install_hotplug_notify(void) acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - processor_walk_namespace_cb, &action, NULL); + processor_walk_namespace_cb, NULL, &action, NULL); #endif register_hotcpu_notifier(&acpi_cpu_notifier); } @@ -1115,7 +1115,7 @@ void acpi_processor_uninstall_hotplug_notify(void) acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - processor_walk_namespace_cb, &action, NULL); + processor_walk_namespace_cb, NULL, &action, NULL); #endif unregister_hotcpu_notifier(&acpi_cpu_notifier); } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 14a7481c97d7..ff9f6226085d 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1332,7 +1332,7 @@ static int acpi_bus_scan(acpi_handle handle, struct acpi_bus_ops *ops, status = acpi_bus_check_add(handle, 0, ops, &device); if (ACPI_SUCCESS(status)) acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX, - acpi_bus_check_add, ops, &device); + acpi_bus_check_add, NULL, ops, &device); if (child) *child = device; diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 575593a8b4e6..8c1b431616df 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -101,7 +101,7 @@ long acpi_is_video_device(struct acpi_device *device) /* Only check for backlight functionality if one of the above hit. */ if (video_caps) acpi_walk_namespace(ACPI_TYPE_DEVICE, device->handle, - ACPI_UINT32_MAX, acpi_backlight_cap_match, + ACPI_UINT32_MAX, acpi_backlight_cap_match, NULL, &video_caps, NULL); return video_caps; @@ -151,7 +151,7 @@ long acpi_video_get_capabilities(acpi_handle graphics_handle) if (!graphics_handle) { /* Only do the global walk through all graphics devices once */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, find_video, + ACPI_UINT32_MAX, find_video, NULL, &caps, NULL); /* There might be boot param flags set already... */ acpi_video_support |= caps; @@ -173,7 +173,7 @@ long acpi_video_get_capabilities(acpi_handle graphics_handle) return 0; } acpi_walk_namespace(ACPI_TYPE_DEVICE, graphics_handle, - ACPI_UINT32_MAX, find_video, + ACPI_UINT32_MAX, find_video, NULL, &caps, NULL); } ACPI_DEBUG_PRINT((ACPI_DB_INFO, "We have 0x%lX video support %s %s\n", diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 05598ae10c4b..eb365021bb5a 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -899,7 +899,7 @@ static int intel_lid_present(void) acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - check_lid_device, &lid_present, NULL); + check_lid_device, NULL, &lid_present, NULL); return lid_present; } diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c index b4a55d407bf5..365e0becaf12 100644 --- a/drivers/i2c/busses/i2c-scmi.c +++ b/drivers/i2c/busses/i2c-scmi.c @@ -363,7 +363,7 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) smbus_cmi->cap_write = 0; acpi_walk_namespace(ACPI_TYPE_METHOD, smbus_cmi->handle, 1, - acpi_smbus_cmi_query_methods, smbus_cmi, NULL); + acpi_smbus_cmi_query_methods, NULL, smbus_cmi, NULL); if (smbus_cmi->cap_info == 0) goto err; diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index a73028ec52e5..0f32571b94df 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -471,7 +471,7 @@ int acpi_pci_detect_ejectable(acpi_handle handle) return found; acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, - check_hotplug, (void *)&found, NULL); + check_hotplug, NULL, (void *)&found, NULL); return found; } EXPORT_SYMBOL_GPL(acpi_pci_detect_ejectable); diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 58d25a163a8b..df1b0ea089d1 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -266,7 +266,7 @@ static int detect_ejectable_slots(acpi_handle handle) int found = acpi_pci_detect_ejectable(handle); if (!found) { acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - is_pci_dock_device, (void *)&found, NULL); + is_pci_dock_device, NULL, (void *)&found, NULL); } return found; } @@ -281,7 +281,7 @@ static void init_bridge_misc(struct acpiphp_bridge *bridge) /* register all slot objects under this bridge */ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, bridge->handle, (u32)1, - register_slot, bridge, NULL); + register_slot, NULL, bridge, NULL); if (ACPI_FAILURE(status)) { list_del(&bridge->list); return; @@ -447,7 +447,7 @@ find_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv) /* search P2P bridges under this p2p bridge */ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - find_p2p_bridge, NULL, NULL); + find_p2p_bridge, NULL, NULL, NULL); if (ACPI_FAILURE(status)) warn("find_p2p_bridge failed (error code = 0x%x)\n", status); @@ -485,7 +485,7 @@ static int add_bridge(acpi_handle handle) /* search P2P bridges under this host bridge */ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - find_p2p_bridge, NULL, NULL); + find_p2p_bridge, NULL, NULL, NULL); if (ACPI_FAILURE(status)) warn("find_p2p_bridge failed (error code = 0x%x)\n", status); @@ -573,7 +573,7 @@ cleanup_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv) /* cleanup p2p bridges under this P2P bridge in a depth-first manner */ acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - cleanup_p2p_bridge, NULL, NULL); + cleanup_p2p_bridge, NULL, NULL, NULL); bridge = acpiphp_handle_to_bridge(handle); if (bridge) @@ -589,7 +589,7 @@ static void remove_bridge(acpi_handle handle) /* cleanup p2p bridges under this host bridge in a depth-first manner */ acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, - (u32)1, cleanup_p2p_bridge, NULL, NULL); + (u32)1, cleanup_p2p_bridge, NULL, NULL, NULL); /* * On root bridges with hotplug slots directly underneath (ie, @@ -778,7 +778,7 @@ static int acpiphp_configure_ioapics(acpi_handle handle) { ioapic_add(handle, 0, NULL, NULL); acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, - ACPI_UINT32_MAX, ioapic_add, NULL, NULL); + ACPI_UINT32_MAX, ioapic_add, NULL, NULL, NULL); return 0; } @@ -786,7 +786,7 @@ static int acpiphp_unconfigure_ioapics(acpi_handle handle) { ioapic_remove(handle, 0, NULL, NULL); acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, - ACPI_UINT32_MAX, ioapic_remove, NULL, NULL); + ACPI_UINT32_MAX, ioapic_remove, NULL, NULL, NULL); return 0; } @@ -1367,7 +1367,7 @@ static void handle_hotplug_event_bridge(acpi_handle handle, u32 type, void *cont bridge = acpiphp_handle_to_bridge(handle); if (type == ACPI_NOTIFY_BUS_CHECK) { acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, ACPI_UINT32_MAX, - count_sub_bridges, &num_sub_bridges, NULL); + count_sub_bridges, NULL, &num_sub_bridges, NULL); } if (!bridge && !num_sub_bridges) { @@ -1388,7 +1388,7 @@ static void handle_hotplug_event_bridge(acpi_handle handle, u32 type, void *cont } if (num_sub_bridges) acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, - ACPI_UINT32_MAX, check_sub_bridges, NULL, NULL); + ACPI_UINT32_MAX, check_sub_bridges, NULL, NULL, NULL); break; case ACPI_NOTIFY_DEVICE_CHECK: @@ -1512,7 +1512,7 @@ int __init acpiphp_glue_init(void) int num = 0; acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, find_root_bridges, &num, NULL); + ACPI_UINT32_MAX, find_root_bridges, NULL, &num, NULL); if (num <= 0) return -1; diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index e7be66dbac21..aa5df485f8cf 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -434,7 +434,7 @@ static int __init ibm_acpiphp_init(void) dbg("%s\n", __func__); if (acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, ibm_find_acpi_device, + ACPI_UINT32_MAX, ibm_find_acpi_device, NULL, &ibm_acpi_handle, NULL) != FOUND_APCI) { err("%s: acpi_walk_namespace failed\n", __func__); retval = -ENODEV; diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c index 29432a50be45..f0a90a6bf396 100644 --- a/drivers/platform/x86/intel_menlow.c +++ b/drivers/platform/x86/intel_menlow.c @@ -510,7 +510,7 @@ static int __init intel_menlow_module_init(void) /* Looking for sensors in each ACPI thermal zone */ status = acpi_walk_namespace(ACPI_TYPE_THERMAL, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - intel_menlow_register_sensor, NULL, NULL); + intel_menlow_register_sensor, NULL, NULL, NULL); if (ACPI_FAILURE(status)) return -ENODEV; diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index a2a742c8ff7e..7a2cc8a5c975 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -1203,7 +1203,7 @@ static int sony_nc_add(struct acpi_device *device) if (debug) { status = acpi_walk_namespace(ACPI_TYPE_METHOD, sony_nc_acpi_handle, - 1, sony_walk_callback, NULL, NULL); + 1, sony_walk_callback, NULL, NULL, NULL); if (ACPI_FAILURE(status)) { printk(KERN_WARNING DRV_PFX "unable to walk acpi resources\n"); result = -ENODEV; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index d93108d148fc..22cb50fe2b2c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1087,7 +1087,7 @@ static int __init tpacpi_check_std_acpi_brightness_support(void) */ status = acpi_walk_namespace(ACPI_TYPE_METHOD, vid_handle, 3, - tpacpi_acpi_walk_find_bcl, NULL, + tpacpi_acpi_walk_find_bcl, NULL, NULL, &bcl_ptr); if (ACPI_SUCCESS(status) && bcl_levels > 2) { diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 0e7efeacf6cb..f9b8b28ad802 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -154,7 +154,8 @@ acpi_status acpi_walk_namespace(acpi_object_type type, acpi_handle start_object, u32 max_depth, - acpi_walk_callback user_function, + acpi_walk_callback pre_order_visit, + acpi_walk_callback post_order_visit, void *context, void **return_value); acpi_status -- cgit v1.3-8-gc7d7 From b00eb796f1b67c46036b5490e83b31741f1eebaf Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 13 Nov 2009 10:01:43 +0800 Subject: ACPICA: Update version to 20091112. Version 20091112. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index f9b8b28ad802..5e1ad3cd1bbd 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20091013 +#define ACPI_CA_VERSION 0x20091112 #include "actypes.h" #include "actbl.h" -- cgit v1.3-8-gc7d7 From 35a8a3fdcd4f973a5430e868f2f2a5c363803a5b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 25 Nov 2009 17:50:00 +0100 Subject: drbd: moved CN_IDX_DRBD and CN_VAL_DRBD to the right file Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/connector.h | 2 ++ include/linux/drbd.h | 7 ------- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/connector.h b/include/linux/connector.h index 3a14615fd35c..72ba63eb83c5 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -43,6 +43,8 @@ #define CN_DST_VAL 0x1 #define CN_IDX_DM 0x7 /* Device Mapper */ #define CN_VAL_DM_USERSPACE_LOG 0x1 +#define CN_IDX_DRBD 0x8 +#define CN_VAL_DRBD 0x1 #define CN_NETLINK_USERS 8 diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 99a4d76694ed..e84f4733cb55 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -322,13 +322,6 @@ enum drbd_timeout_flag { #define DRBD_NL_CREATE_DEVICE 0x01 #define DRBD_NL_SET_DEFAULTS 0x02 -/* The following line should be moved over to linux/connector.h - * when the time comes */ -#ifndef CN_IDX_DRBD -# define CN_IDX_DRBD 0x4 -/* Ubuntu "intrepid ibex" release defined CN_IDX_DRBD as 0x6 */ -#endif -#define CN_VAL_DRBD 0x1 /* For searching a vacant cn_idx value */ #define CN_IDX_STEP 6977 -- cgit v1.3-8-gc7d7 From b8007ef7422270864eae523cb38d7522a53a94d3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 Nov 2009 13:45:32 +0800 Subject: tracing: Separate raw syscall from syscall tracer The current syscall tracer mixes raw syscalls and real syscalls. echo 1 > events/syscalls/enable And we get these from the output: (XXXX insteads " grep-20914 [001] 588211.446347" .. etc) XXXX: sys_read(fd: 3, buf: 80609a8, count: 7000) XXXX: sys_enter: NR 3 (3, 80609a8, 7000, a, 1000, bfce8ef8) XXXX: sys_read -> 0x138 XXXX: sys_exit: NR 3 = 312 XXXX: sys_read(fd: 3, buf: 8060ae0, count: 7000) XXXX: sys_enter: NR 3 (3, 8060ae0, 7000, a, 1000, bfce8ef8) XXXX: sys_read -> 0x138 XXXX: sys_exit: NR 3 = 312 There are 2 drawbacks here. A) two almost identical records are saved in ringbuffer when a syscall enters or exits. (4 records for every syscall) This wastes precious space in the ring buffer. B) the lines including "sys_enter/sys_exit" produces hardly any useful information for the output (no labels). The user can use this method to prevent these drawbacks: echo 1 > events/syscalls/enable echo 0 > events/syscalls/sys_enter/enable echo 0 > events/syscalls/sys_exit/enable But this is not user friendly. So we separate raw syscall from syscall tracer. After this fix applied: syscall tracer's output (echo 1 > events/syscalls/enable): XXXX: sys_read(fd: 3, buf: bfe87d88, count: 200) XXXX: sys_read -> 0x200 XXXX: sys_fstat64(fd: 3, statbuf: bfe87c98) XXXX: sys_fstat64 -> 0x0 XXXX: sys_close(fd: 3) raw syscall tracer's output (echo 1 > events/raw_syscalls/enable): XXXX: sys_enter: NR 175 (0, bf92bf18, bf92bf98, 8, b748cff4, bf92bef8) XXXX: sys_exit: NR 175 = 0 XXXX: sys_enter: NR 175 (2, bf92bf98, 0, 8, b748cff4, bf92bef8) XXXX: sys_exit: NR 175 = 0 XXXX: sys_enter: NR 3 (9, bf927f9c, 4000, b77e2518, b77dce60, bf92bff8) Signed-off-by: Lai Jiangshan LKML-Reference: <4AEFC37C.5080609@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/events/syscalls.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 397dff2dbd5a..fb726ac7caee 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM -#define TRACE_SYSTEM syscalls +#define TRACE_SYSTEM raw_syscalls +#define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_EVENTS_SYSCALLS_H -- cgit v1.3-8-gc7d7 From c0fa59df7214e546f8a37bc677867ac7b67b5c93 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 19 Nov 2009 11:36:10 +0000 Subject: ASoC: Add BCLK calculation utility for TDM mode too Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- include/sound/soc.h | 1 + sound/soc/soc-utils.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 13b117aac5d9..0d7718f9280d 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -230,6 +230,7 @@ int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid); /* Utility functions to get clock rates from various things */ int snd_soc_calc_frame_size(int sample_size, int channels, int tdm_slots); int snd_soc_params_to_frame_size(struct snd_pcm_hw_params *params); +int snd_soc_calc_bclk(int fs, int sample_size, int channels, int tdm_slots); int snd_soc_params_to_bclk(struct snd_pcm_hw_params *parms); /* set runtime hw params */ diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index b16aaaeb0aab..1d07b931f3d8 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -54,6 +54,12 @@ int snd_soc_params_to_frame_size(struct snd_pcm_hw_params *params) } EXPORT_SYMBOL_GPL(snd_soc_params_to_frame_size); +int snd_soc_calc_bclk(int fs, int sample_size, int channels, int tdm_slots) +{ + return fs * snd_soc_calc_frame_size(sample_size, channels, tdm_slots); +} +EXPORT_SYMBOL_GPL(snd_soc_calc_bclk); + int snd_soc_params_to_bclk(struct snd_pcm_hw_params *params) { int ret; -- cgit v1.3-8-gc7d7 From 4e242d1616781f9f1f0b01abf878700b259cd8b5 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 25 Nov 2009 00:29:51 +0000 Subject: xfrm: Define new XFRM netlink auth attribute with specified truncation bits The new XFRMA_ALG_AUTH_TRUNC attribute taking a xfrm_algo_auth as argument allows the installation of authentication algorithms with a truncation length specified in userspace, i.e. SHA256 with 128 bit instead of 96 bit truncation. Signed-off-by: Martin Willi Signed-off-by: David S. Miller --- include/linux/xfrm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 3246f0e66bcc..29e04beb1fc9 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -90,6 +90,13 @@ struct xfrm_algo { char alg_key[0]; }; +struct xfrm_algo_auth { + char alg_name[64]; + unsigned int alg_key_len; /* in bits */ + unsigned int alg_trunc_len; /* in bits */ + char alg_key[0]; +}; + struct xfrm_algo_aead { char alg_name[64]; unsigned int alg_key_len; /* in bits */ @@ -274,6 +281,7 @@ enum xfrm_attr_type_t { XFRMA_MIGRATE, XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */ XFRMA_KMADDRESS, /* struct xfrm_user_kmaddress */ + XFRMA_ALG_AUTH_TRUNC, /* struct xfrm_algo_auth */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) -- cgit v1.3-8-gc7d7 From 4447bb33f09444920a8f1d89e1540137429351b6 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 25 Nov 2009 00:29:52 +0000 Subject: xfrm: Store aalg in xfrm_state with a user specified truncation length Adding a xfrm_state requires an authentication algorithm specified either as xfrm_algo or as xfrm_algo_auth with a specific truncation length. For compatibility, both attributes are dumped to userspace, and we also accept both attributes, but prefer the new syntax. If no truncation length is specified, or the authentication algorithm is specified using xfrm_algo, the truncation length from the algorithm description in the kernel is used. Signed-off-by: Martin Willi Signed-off-by: David S. Miller --- include/net/xfrm.h | 12 ++++- net/xfrm/xfrm_state.c | 2 +- net/xfrm/xfrm_user.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 133 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 93d184b91a8c..6d85861ab990 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -162,7 +162,7 @@ struct xfrm_state { struct xfrm_lifetime_cfg lft; /* Data for transformer */ - struct xfrm_algo *aalg; + struct xfrm_algo_auth *aalg; struct xfrm_algo *ealg; struct xfrm_algo *calg; struct xfrm_algo_aead *aead; @@ -1532,12 +1532,22 @@ static inline int xfrm_alg_len(struct xfrm_algo *alg) return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } +static inline int xfrm_alg_auth_len(struct xfrm_algo_auth *alg) +{ + return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); +} + #ifdef CONFIG_XFRM_MIGRATE static inline struct xfrm_algo *xfrm_algo_clone(struct xfrm_algo *orig) { return kmemdup(orig, xfrm_alg_len(orig), GFP_KERNEL); } +static inline struct xfrm_algo_auth *xfrm_algo_auth_clone(struct xfrm_algo_auth *orig) +{ + return kmemdup(orig, xfrm_alg_auth_len(orig), GFP_KERNEL); +} + static inline void xfrm_states_put(struct xfrm_state **states, int n) { int i; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index e9ac0cec0877..d847f1a52b44 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1114,7 +1114,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) x->props.saddr = orig->props.saddr; if (orig->aalg) { - x->aalg = xfrm_algo_clone(orig->aalg); + x->aalg = xfrm_algo_auth_clone(orig->aalg); if (!x->aalg) goto error; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b95a2d64eb59..fb42d778d278 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -62,6 +62,22 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) return 0; } +static int verify_auth_trunc(struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_ALG_AUTH_TRUNC]; + struct xfrm_algo_auth *algp; + + if (!rt) + return 0; + + algp = nla_data(rt); + if (nla_len(rt) < xfrm_alg_auth_len(algp)) + return -EINVAL; + + algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; + return 0; +} + static int verify_aead(struct nlattr **attrs) { struct nlattr *rt = attrs[XFRMA_ALG_AEAD]; @@ -128,7 +144,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->id.proto) { case IPPROTO_AH: - if (!attrs[XFRMA_ALG_AUTH] || + if ((!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_AUTH_TRUNC]) || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ALG_COMP]) @@ -139,10 +156,12 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, if (attrs[XFRMA_ALG_COMP]) goto out; if (!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_AUTH_TRUNC] && !attrs[XFRMA_ALG_CRYPT] && !attrs[XFRMA_ALG_AEAD]) goto out; if ((attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_CRYPT]) && attrs[XFRMA_ALG_AEAD]) goto out; @@ -152,6 +171,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, if (!attrs[XFRMA_ALG_COMP] || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_CRYPT]) goto out; break; @@ -161,6 +181,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, case IPPROTO_ROUTING: if (attrs[XFRMA_ALG_COMP] || attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ENCAP] || @@ -176,6 +197,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, if ((err = verify_aead(attrs))) goto out; + if ((err = verify_auth_trunc(attrs))) + goto out; if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH))) goto out; if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT))) @@ -229,6 +252,66 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return 0; } +static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props, + struct nlattr *rta) +{ + struct xfrm_algo *ualg; + struct xfrm_algo_auth *p; + struct xfrm_algo_desc *algo; + + if (!rta) + return 0; + + ualg = nla_data(rta); + + algo = xfrm_aalg_get_byname(ualg->alg_name, 1); + if (!algo) + return -ENOSYS; + *props = algo->desc.sadb_alg_id; + + p = kmalloc(sizeof(*p) + (ualg->alg_key_len + 7) / 8, GFP_KERNEL); + if (!p) + return -ENOMEM; + + strcpy(p->alg_name, algo->name); + p->alg_key_len = ualg->alg_key_len; + p->alg_trunc_len = algo->uinfo.auth.icv_truncbits; + memcpy(p->alg_key, ualg->alg_key, (ualg->alg_key_len + 7) / 8); + + *algpp = p; + return 0; +} + +static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props, + struct nlattr *rta) +{ + struct xfrm_algo_auth *p, *ualg; + struct xfrm_algo_desc *algo; + + if (!rta) + return 0; + + ualg = nla_data(rta); + + algo = xfrm_aalg_get_byname(ualg->alg_name, 1); + if (!algo) + return -ENOSYS; + if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits) + return -EINVAL; + *props = algo->desc.sadb_alg_id; + + p = kmemdup(ualg, xfrm_alg_auth_len(ualg), GFP_KERNEL); + if (!p) + return -ENOMEM; + + strcpy(p->alg_name, algo->name); + if (!p->alg_trunc_len) + p->alg_trunc_len = algo->uinfo.auth.icv_truncbits; + + *algpp = p; + return 0; +} + static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, struct nlattr *rta) { @@ -332,10 +415,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if ((err = attach_aead(&x->aead, &x->props.ealgo, attrs[XFRMA_ALG_AEAD]))) goto error; - if ((err = attach_one_algo(&x->aalg, &x->props.aalgo, - xfrm_aalg_get_byname, - attrs[XFRMA_ALG_AUTH]))) + if ((err = attach_auth_trunc(&x->aalg, &x->props.aalgo, + attrs[XFRMA_ALG_AUTH_TRUNC]))) goto error; + if (!x->props.aalgo) { + if ((err = attach_auth(&x->aalg, &x->props.aalgo, + attrs[XFRMA_ALG_AUTH]))) + goto error; + } if ((err = attach_one_algo(&x->ealg, &x->props.ealgo, xfrm_ealg_get_byname, attrs[XFRMA_ALG_CRYPT]))) @@ -548,6 +635,24 @@ static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) return 0; } +static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) +{ + struct xfrm_algo *algo; + struct nlattr *nla; + + nla = nla_reserve(skb, XFRMA_ALG_AUTH, + sizeof(*algo) + (auth->alg_key_len + 7) / 8); + if (!nla) + return -EMSGSIZE; + + algo = nla_data(nla); + strcpy(algo->alg_name, auth->alg_name); + memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); + algo->alg_key_len = auth->alg_key_len; + + return 0; +} + /* Don't change this without updating xfrm_sa_len! */ static int copy_to_user_state_extra(struct xfrm_state *x, struct xfrm_usersa_info *p, @@ -563,8 +668,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (x->aead) NLA_PUT(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead); - if (x->aalg) - NLA_PUT(skb, XFRMA_ALG_AUTH, xfrm_alg_len(x->aalg), x->aalg); + if (x->aalg) { + if (copy_to_user_auth(x->aalg, skb)) + goto nla_put_failure; + + NLA_PUT(skb, XFRMA_ALG_AUTH_TRUNC, + xfrm_alg_auth_len(x->aalg), x->aalg); + } if (x->ealg) NLA_PUT(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg); if (x->calg) @@ -2117,8 +2227,11 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) size_t l = 0; if (x->aead) l += nla_total_size(aead_len(x->aead)); - if (x->aalg) - l += nla_total_size(xfrm_alg_len(x->aalg)); + if (x->aalg) { + l += nla_total_size(sizeof(struct xfrm_algo) + + (x->aalg->alg_key_len + 7) / 8); + l += nla_total_size(xfrm_alg_auth_len(x->aalg)); + } if (x->ealg) l += nla_total_size(xfrm_alg_len(x->ealg)); if (x->calg) -- cgit v1.3-8-gc7d7 From 091ad3658e3c76c5fb05f65bfb64a0246f8f31b5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 09:04:55 +0100 Subject: events: Rename TRACE_EVENT_TEMPLATE() to DECLARE_EVENT_CLASS() It is not quite obvious at first sight what TRACE_EVENT_TEMPLATE does: does it define an event as well beyond defining a template? To clarify this, rename it to DECLARE_EVENT_CLASS, which follows the various 'DECLARE_*()' idioms we already have in the kernel: DECLARE_EVENT_CLASS(class) DEFINE_EVENT(class, event1) DEFINE_EVENT(class, event2) DEFINE_EVENT(class, event3) To complete this logic we should also rename TRACE_EVENT() to: DEFINE_SINGLE_EVENT(single_event) ... but in a more quiet moment of the kernel cycle. Cc: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 2 +- include/trace/define_trace.h | 2 +- include/trace/events/sched.h | 6 +++--- include/trace/ftrace.h | 46 ++++++++++++++++++++++---------------------- 4 files changed, 28 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7063383cca13..f59604ed0ec6 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -280,7 +280,7 @@ static inline void tracepoint_synchronize_unregister(void) * TRACE_EVENT_FN to perform any (un)registration work. */ -#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 5d7d855ae21e..5acfb1eb4df9 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -71,7 +71,7 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN -#undef TRACE_EVENT_TEMPLATE +#undef DECLARE_EVENT_CLASS #undef DEFINE_EVENT #undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 238f74b58486..5ce795021851 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task, * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT_TEMPLATE(sched_wakeup_template, +DECLARE_EVENT_CLASS(sched_wakeup_template, TP_PROTO(struct rq *rq, struct task_struct *p, int success), @@ -197,7 +197,7 @@ TRACE_EVENT(sched_migrate_task, __entry->orig_cpu, __entry->dest_cpu) ); -TRACE_EVENT_TEMPLATE(sched_process_template, +DECLARE_EVENT_CLASS(sched_process_template, TP_PROTO(struct task_struct *p), @@ -316,7 +316,7 @@ TRACE_EVENT(sched_signal_send, * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ -TRACE_EVENT_TEMPLATE(sched_stat_template, +DECLARE_EVENT_CLASS(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b0461772bc8d..2c9c073e45ad 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -19,17 +19,17 @@ #include /* - * TRACE_EVENT_TEMPLATE can be used to add a generic function + * DECLARE_EVENT_CLASS can be used to add a generic function * handlers for events. That is, if all events have the same * parameters and just have distinct trace points. * Each tracepoint can be defined with DEFINE_EVENT and that - * will map the TRACE_EVENT_TEMPLATE to the tracepoint. + * will map the DECLARE_EVENT_CLASS to the tracepoint. * * TRACE_EVENT is a one to one mapping between tracepoint and template. */ #undef TRACE_EVENT #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - TRACE_EVENT_TEMPLATE(name, \ + DECLARE_EVENT_CLASS(name, \ PARAMS(proto), \ PARAMS(args), \ PARAMS(tstruct), \ @@ -56,8 +56,8 @@ #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ struct ftrace_raw_##name { \ struct trace_entry ent; \ tstruct \ @@ -115,8 +115,8 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ tstruct; \ }; @@ -203,8 +203,8 @@ #undef TP_perf_assign #define TP_perf_assign(args...) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ ftrace_format_setup_##call(struct ftrace_event_call *unused, \ struct trace_seq *s) \ @@ -321,8 +321,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ ftrace_print_symbols_seq(p, value, symbols); \ }) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static enum print_line_t \ ftrace_raw_output_id_##call(int event_id, const char *name, \ struct trace_iterator *iter, int flags) \ @@ -428,8 +428,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ @@ -480,8 +480,8 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __string #define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ @@ -521,8 +521,8 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ @@ -681,8 +681,8 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ \ static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ proto) \ @@ -764,8 +764,8 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ @@ -885,8 +885,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __perf_count #define __perf_count(c) __count = (c) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static void \ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ proto) \ -- cgit v1.3-8-gc7d7 From 925684d6d589e40e41007edf47c69e729d911263 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:03:23 +0800 Subject: tracing: Convert module refcnt events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 29854 1980 128 31962 7cda kernel/module.o.old 28750 1980 128 30858 788a kernel/module.o Two events are converted: module_refcnt: module_get, module_put No change in functionality. Signed-off-by: Li Zefan Cc: Rusty Russell Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E283B.3010508@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/module.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 84160fb18478..4b0f48ba16a6 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -51,7 +51,7 @@ TRACE_EVENT(module_free, TP_printk("%s", __get_str(name)) ); -TRACE_EVENT(module_get, +DECLARE_EVENT_CLASS(module_refcnt, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), @@ -73,26 +73,18 @@ TRACE_EVENT(module_get, __get_str(name), (void *)__entry->ip, __entry->refcnt) ); -TRACE_EVENT(module_put, +DEFINE_EVENT(module_refcnt, module_get, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_ARGS(mod, ip, refcnt), + TP_ARGS(mod, ip, refcnt) +); - TP_STRUCT__entry( - __field( unsigned long, ip ) - __field( int, refcnt ) - __string( name, mod->name ) - ), +DEFINE_EVENT(module_refcnt, module_put, - TP_fast_assign( - __entry->ip = ip; - __entry->refcnt = refcnt; - __assign_str(name, mod->name); - ), + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_printk("%s call_site=%pf refcnt=%d", - __get_str(name), (void *)__entry->ip, __entry->refcnt) + TP_ARGS(mod, ip, refcnt) ); TRACE_EVENT(module_request, -- cgit v1.3-8-gc7d7 From 53d0422c2d10808fddb2c30859193bfea164c7e3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:04:10 +0800 Subject: tracing: Convert some kmem events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 333987 69800 27228 431015 693a7 mm/built-in.o.old 330030 69800 27228 427058 68432 mm/built-in.o 8 events are converted: kmem_alloc: kmalloc, kmem_cache_alloc kmem_alloc_node: kmalloc_node, kmem_cache_alloc_node kmem_free: kfree, kmem_cache_free mm_page: mm_page_alloc_zone_locked, mm_page_pcpu_drain No change in functionality. Signed-off-by: Li Zefan Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mel Gorman LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/kmem.h | 130 ++++++++++++++------------------------------ mm/page_alloc.c | 4 +- mm/util.c | 3 - 3 files changed, 43 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index eaf46bdd18a5..3adca0ca9dbe 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -44,7 +44,7 @@ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ ) : "GFP_NOWAIT" -TRACE_EVENT(kmalloc, +DECLARE_EVENT_CLASS(kmem_alloc, TP_PROTO(unsigned long call_site, const void *ptr, @@ -78,41 +78,23 @@ TRACE_EVENT(kmalloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(kmem_cache_alloc, +DEFINE_EVENT(kmem_alloc, kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), +DEFINE_EVENT(kmem_alloc, kmem_cache_alloc, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags)) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) ); -TRACE_EVENT(kmalloc_node, +DECLARE_EVENT_CLASS(kmem_alloc_node, TP_PROTO(unsigned long call_site, const void *ptr, @@ -150,45 +132,25 @@ TRACE_EVENT(kmalloc_node, __entry->node) ); -TRACE_EVENT(kmem_cache_alloc_node, +DEFINE_EVENT(kmem_alloc_node, kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), +DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags), - __entry->node) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) ); -TRACE_EVENT(kfree, +DECLARE_EVENT_CLASS(kmem_free, TP_PROTO(unsigned long call_site, const void *ptr), @@ -207,23 +169,18 @@ TRACE_EVENT(kfree, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); -TRACE_EVENT(kmem_cache_free, +DEFINE_EVENT(kmem_free, kfree, TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr), + TP_ARGS(call_site, ptr) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), +DEFINE_EVENT(kmem_free, kmem_cache_free, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), + TP_PROTO(unsigned long call_site, const void *ptr), - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) + TP_ARGS(call_site, ptr) ); TRACE_EVENT(mm_page_free_direct, @@ -299,7 +256,7 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(mm_page_alloc_zone_locked, +DECLARE_EVENT_CLASS(mm_page, TP_PROTO(struct page *page, unsigned int order, int migratetype), @@ -325,29 +282,22 @@ TRACE_EVENT(mm_page_alloc_zone_locked, __entry->order == 0) ); -TRACE_EVENT(mm_page_pcpu_drain, +DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, - TP_PROTO(struct page *page, int order, int migratetype), + TP_PROTO(struct page *page, unsigned int order, int migratetype), - TP_ARGS(page, order, migratetype), + TP_ARGS(page, order, migratetype) +); - TP_STRUCT__entry( - __field( struct page *, page ) - __field( int, order ) - __field( int, migratetype ) - ), +DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, - TP_fast_assign( - __entry->page = page; - __entry->order = order; - __entry->migratetype = migratetype; - ), + TP_PROTO(struct page *page, unsigned int order, int migratetype), + + TP_ARGS(page, order, migratetype), TP_printk("page=%p pfn=%lu order=%d migratetype=%d", - __entry->page, - page_to_pfn(__entry->page), - __entry->order, - __entry->migratetype) + __entry->page, page_to_pfn(__entry->page), + __entry->order, __entry->migratetype) ); TRACE_EVENT(mm_page_alloc_extfrag, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2bc2ac63f41e..bdb22f55d006 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -48,12 +48,14 @@ #include #include #include -#include #include #include #include "internal.h" +#define CREATE_TRACE_POINTS +#include + /* * Array of node states. */ diff --git a/mm/util.c b/mm/util.c index 7c35ad95f927..15d197571b4d 100644 --- a/mm/util.c +++ b/mm/util.c @@ -6,9 +6,6 @@ #include #include -#define CREATE_TRACE_POINTS -#include - /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate -- cgit v1.3-8-gc7d7 From c467307c1a812c3150b27a68c2b2d3397bb40a4f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:04:31 +0800 Subject: tracing: Convert softirq events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 12781 952 36 13769 35c9 kernel/softirq.o.old 11981 952 32 12965 32a5 kernel/softirq.o Two events are converted: softirq: softirq_entry, softirq_exit No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E287F.4030708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/irq.h | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index dcfcd4407623..0e4cfb694fe7 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -82,18 +82,7 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); -/** - * softirq_entry - called immediately before the softirq handler - * @h: pointer to struct softirq_action - * @vec: pointer to first struct softirq_action in softirq_vec array - * - * The @h parameter, contains a pointer to the struct softirq_action - * which has a pointer to the action handler that is called. By subtracting - * the @vec pointer from the @h pointer, we can determine the softirq - * number. Also, when used in combination with the softirq_exit tracepoint - * we can determine the softirq latency. - */ -TRACE_EVENT(softirq_entry, +DECLARE_EVENT_CLASS(softirq, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), @@ -111,6 +100,24 @@ TRACE_EVENT(softirq_entry, show_softirq_name(__entry->vec)) ); +/** + * softirq_entry - called immediately before the softirq handler + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter, contains a pointer to the struct softirq_action + * which has a pointer to the action handler that is called. By subtracting + * the @vec pointer from the @h pointer, we can determine the softirq + * number. Also, when used in combination with the softirq_exit tracepoint + * we can determine the softirq latency. + */ +DEFINE_EVENT(softirq, softirq_entry, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + /** * softirq_exit - called immediately after the softirq handler returns * @h: pointer to struct softirq_action @@ -122,22 +129,11 @@ TRACE_EVENT(softirq_entry, * combination with the softirq_entry tracepoint we can determine the softirq * latency. */ -TRACE_EVENT(softirq_exit, +DEFINE_EVENT(softirq, softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - - TP_STRUCT__entry( - __field( int, vec ) - ), - - TP_fast_assign( - __entry->vec = (int)(h - vec); - ), - - TP_printk("vec=%d [action=%s]", __entry->vec, - show_softirq_name(__entry->vec)) + TP_ARGS(h, vec) ); #endif /* _TRACE_IRQ_H */ -- cgit v1.3-8-gc7d7 From 382ece710bf88b08440b598731361e5a47582b62 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:05:03 +0800 Subject: tracing: Convert some workqueue events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 13171 800 72 14043 36db kernel/workqueue.o.old 12243 800 68 13111 3337 kernel/workqueue.o Two events are converted: workqueue: workqueue_insertion, workqueue_execution No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E289F.5010104@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/workqueue.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index e4612dbd7ba6..d6c974474e70 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -8,7 +8,7 @@ #include #include -TRACE_EVENT(workqueue_insertion, +DECLARE_EVENT_CLASS(workqueue, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), @@ -30,26 +30,18 @@ TRACE_EVENT(workqueue_insertion, __entry->thread_pid, __entry->func) ); -TRACE_EVENT(workqueue_execution, +DEFINE_EVENT(workqueue, workqueue_insertion, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work), + TP_ARGS(wq_thread, work) +); - TP_STRUCT__entry( - __array(char, thread_comm, TASK_COMM_LEN) - __field(pid_t, thread_pid) - __field(work_func_t, func) - ), +DEFINE_EVENT(workqueue, workqueue_execution, - TP_fast_assign( - memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); - __entry->thread_pid = wq_thread->pid; - __entry->func = work->func; - ), + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_printk("thread=%s:%d func=%pf", __entry->thread_comm, - __entry->thread_pid, __entry->func) + TP_ARGS(wq_thread, work) ); /* Trace the creation of one workqueue thread on a cpu */ -- cgit v1.3-8-gc7d7 From 7703466b4c0a21b88d701882bef0d45bcb0a0281 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:05:38 +0800 Subject: tracing: Convert some power events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 4312 524 12 4848 12f0 kernel/trace/power-traces.o.old 3455 524 8 3987 f93 kernel/trace/power-traces.o Two events are converted: power: power_start, power_frequency No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Arjan van de Ven LKML-Reference: <4B0E28C2.1090906@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/power.h | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 9bb96e5a2848..c4efe9b8280d 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -16,7 +16,7 @@ enum { }; #endif -TRACE_EVENT(power_start, +DECLARE_EVENT_CLASS(power, TP_PROTO(unsigned int type, unsigned int state), @@ -35,42 +35,36 @@ TRACE_EVENT(power_start, TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state) ); -TRACE_EVENT(power_end, - - TP_PROTO(int dummy), +DEFINE_EVENT(power, power_start, - TP_ARGS(dummy), + TP_PROTO(unsigned int type, unsigned int state), - TP_STRUCT__entry( - __field( u64, dummy ) - ), + TP_ARGS(type, state) +); - TP_fast_assign( - __entry->dummy = 0xffff; - ), +DEFINE_EVENT(power, power_frequency, - TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + TP_PROTO(unsigned int type, unsigned int state), + TP_ARGS(type, state) ); +TRACE_EVENT(power_end, -TRACE_EVENT(power_frequency, - - TP_PROTO(unsigned int type, unsigned int state), + TP_PROTO(int dummy), - TP_ARGS(type, state), + TP_ARGS(dummy), TP_STRUCT__entry( - __field( u64, type ) - __field( u64, state ) + __field( u64, dummy ) ), TP_fast_assign( - __entry->type = type; - __entry->state = state; + __entry->dummy = 0xffff; ), - TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state) + TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + ); #endif /* _TRACE_POWER_H */ -- cgit v1.3-8-gc7d7 From 77ca1e0294f25fc26053ba14353e703158acef26 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:06:14 +0800 Subject: tracing: Convert some block events to DEFINE_EVENT use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 53570 3284 184 57038 dece block/blk-core.o.old 43702 3284 144 47130 b81a block/blk-core.o 12 events are converted: block_rq: block_rq_insert, block_rq_issue block_rq_with_error: block_rq_{abort, requeue, complete} block_bio: block_bio_{backmerge, frontmerge, queue} block_get_rq: block_getrq, block_sleeprq block_unplug: block_unplug_timer, block_unplug_io No change in functionality. Signed-off-by: Li Zefan Cc: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E28E6.7060609@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/block.h | 202 +++++++++---------------------------------- 1 file changed, 42 insertions(+), 160 deletions(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 00405b5f624a..5fb72733331e 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -8,7 +8,7 @@ #include #include -TRACE_EVENT(block_rq_abort, +DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -40,41 +40,28 @@ TRACE_EVENT(block_rq_abort, __entry->nr_sector, __entry->errors) ); -TRACE_EVENT(block_rq_insert, +DEFINE_EVENT(block_rq_with_error, block_rq_abort, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(q, rq) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( unsigned int, bytes ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), +DEFINE_EVENT(block_rq_with_error, block_rq_requeue, - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->bytes = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0; + TP_PROTO(struct request_queue *q, struct request *rq), - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_ARGS(q, rq) +); - TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __entry->bytes, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) +DEFINE_EVENT(block_rq_with_error, block_rq_complete, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_issue, +DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request_queue *q, struct request *rq), @@ -86,7 +73,7 @@ TRACE_EVENT(block_rq_issue, __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) + __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -108,68 +95,18 @@ TRACE_EVENT(block_rq_issue, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_rq_requeue, +DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_complete, +DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); TRACE_EVENT(block_bio_bounce, @@ -228,7 +165,7 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -TRACE_EVENT(block_bio_backmerge, +DECLARE_EVENT_CLASS(block_bio, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -256,63 +193,28 @@ TRACE_EVENT(block_bio_backmerge, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_bio_frontmerge, +DEFINE_EVENT(block_bio, block_bio_backmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_bio_queue, +DEFINE_EVENT(block_bio, block_bio_frontmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), + TP_ARGS(q, bio) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_bio, block_bio_queue, - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_getrq, +DECLARE_EVENT_CLASS(block_get_rq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), @@ -341,33 +243,18 @@ TRACE_EVENT(block_getrq, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_sleeprq, +DEFINE_EVENT(block_get_rq, block_getrq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw), + TP_ARGS(q, bio, rw) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_get_rq, block_sleeprq, - TP_fast_assign( - __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; - __entry->sector = bio ? bio->bi_sector : 0; - __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; - blk_fill_rwbs(__entry->rwbs, - bio ? bio->bi_rw : 0, __entry->nr_sector); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio, rw) ); TRACE_EVENT(block_plug, @@ -387,7 +274,7 @@ TRACE_EVENT(block_plug, TP_printk("[%s]", __entry->comm) ); -TRACE_EVENT(block_unplug_timer, +DECLARE_EVENT_CLASS(block_unplug, TP_PROTO(struct request_queue *q), @@ -406,23 +293,18 @@ TRACE_EVENT(block_unplug_timer, TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); -TRACE_EVENT(block_unplug_io, +DEFINE_EVENT(block_unplug, block_unplug_timer, TP_PROTO(struct request_queue *q), - TP_ARGS(q), + TP_ARGS(q) +); - TP_STRUCT__entry( - __field( int, nr_rq ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_unplug, block_unplug_io, - TP_fast_assign( - __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q), - TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) + TP_ARGS(q) ); TRACE_EVENT(block_split, -- cgit v1.3-8-gc7d7 From 071688f36e7eba3e37b2fc48e35bfdab99b80b4d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:06:55 +0800 Subject: tracing: Convert some jbd2 events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 34903 1693 448 37044 90b4 fs/jbd2/journal.o.old 31931 1693 416 34040 84f8 fs/jbd2/journal.o Four events are converted: jbd2_commit: jbd2_start_commit, jbd2_commit_{locking, flushing, logging} No change in functionality. Signed-off-by: Li Zefan Cc: Theodore Ts'o Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E290F.7030909@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/jbd2.h | 63 ++++++++------------------------------------- 1 file changed, 11 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 3c60b75adb9e..96b370a050de 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -30,7 +30,7 @@ TRACE_EVENT(jbd2_checkpoint, jbd2_dev_to_name(__entry->dev), __entry->result) ); -TRACE_EVENT(jbd2_start_commit, +DECLARE_EVENT_CLASS(jbd2_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), @@ -53,73 +53,32 @@ TRACE_EVENT(jbd2_start_commit, __entry->sync_commit) ); -TRACE_EVENT(jbd2_commit_locking, +DEFINE_EVENT(jbd2_commit, jbd2_start_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_flushing, +DEFINE_EVENT(jbd2_commit, jbd2_commit_locking, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_logging, +DEFINE_EVENT(jbd2_commit, jbd2_commit_flushing, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), + TP_ARGS(journal, commit_transaction) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), +DEFINE_EVENT(jbd2_commit, jbd2_commit_logging, - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); TRACE_EVENT(jbd2_end_commit, -- cgit v1.3-8-gc7d7 From b5eb34c3592545c756e50d882c08417eb60740a7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:07:36 +0800 Subject: tracing: Convert some ext4 events to DEFINE_TRACE Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 294695 6104 340 301139 49853 fs/ext4/ext4.o.old 289983 6104 324 296411 485db fs/ext4/ext4.o 5 events are convertd: ext4__write_begin: ext4_write_begin, ext4_da_write_begin ext4__write_end: ext4_{ordered, writeback, journalled}_write_end No change in functionality. Signed-off-by: Li Zefan Cc: Theodore Ts'o Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E2938.2040708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/ext4.h | 129 ++++++++++++-------------------------------- 1 file changed, 35 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index d09550bf3f95..318f76535bd4 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -90,7 +90,7 @@ TRACE_EVENT(ext4_allocate_inode, (unsigned long) __entry->dir, __entry->mode) ); -TRACE_EVENT(ext4_write_begin, +DECLARE_EVENT_CLASS(ext4__write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), @@ -118,7 +118,23 @@ TRACE_EVENT(ext4_write_begin, __entry->pos, __entry->len, __entry->flags) ); -TRACE_EVENT(ext4_ordered_write_end, +DEFINE_EVENT(ext4__write_begin, ext4_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DECLARE_EVENT_CLASS(ext4__write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), @@ -145,57 +161,36 @@ TRACE_EVENT(ext4_ordered_write_end, __entry->pos, __entry->len, __entry->copied) ); -TRACE_EVENT(ext4_writeback_write_end, +DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), + TP_ARGS(inode, pos, len, copied) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), +DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end, - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_ARGS(inode, pos, len, copied) ); -TRACE_EVENT(ext4_journalled_write_end, +DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), + TP_ARGS(inode, pos, len, copied) +); - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), +DEFINE_EVENT(ext4__write_end, ext4_da_write_end, - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied) ); TRACE_EVENT(ext4_writepage, @@ -337,60 +332,6 @@ TRACE_EVENT(ext4_da_writepages_result, (unsigned long) __entry->writeback_index) ); -TRACE_EVENT(ext4_da_write_begin, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int flags), - - TP_ARGS(inode, pos, len, flags), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, flags ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->flags = flags; - ), - - TP_printk("dev %s ino %lu pos %llu len %u flags %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->flags) -); - -TRACE_EVENT(ext4_da_write_end, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int copied), - - TP_ARGS(inode, pos, len, copied), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), - - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) -); - TRACE_EVENT(ext4_discard_blocks, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), -- cgit v1.3-8-gc7d7 From 470dda7417f284b9cfc96560b2acd98df63798a2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:08:01 +0800 Subject: tracing: Restore original format of sched events The original format for sched_stat_iowait and sched_stat_sleep: $ cat events/sched/sched_stat_iowait/format ... print fmt: "comm=%s pid=%d delay=%Lu [ns]", ... $ cat events/sched/sched_stat_sleep/format ... print fmt: "comm=%s pid=%d delay=%Lu [ns]", ... But commit commit 75ec29ab848a7e92a41aaafaeb33d1afbc839be4 ("tracing: Convert some sched trace events to DEFINE_EVENT and _PRINT") broke the format: $ cat events/sched/sched_stat_iowait/format print fmt: "task: %s:%d iowait: %Lu [ns]", ... $ cat events/sched/sched_stat_sleep/format print fmt: "task: %s:%d sleep: %Lu [ns]", ... No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E2951.9050800@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 5ce795021851..9d316b22388c 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -355,23 +355,17 @@ DEFINE_EVENT(sched_stat_template, sched_stat_wait, * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). */ -DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep, - TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), - TP_printk("task: %s:%d sleep: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay)); +DEFINE_EVENT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); /* * Tracepoint for accounting iowait time (time the task is not runnable * due to waiting on IO to complete). */ -DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait, - TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), - TP_printk("task: %s:%d iowait: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay)); +DEFINE_EVENT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); /* * Tracepoint for accounting runtime (time the task is executing -- cgit v1.3-8-gc7d7 From 2d4dc890b5c8fabd818a8586607e6843c4375e62 Mon Sep 17 00:00:00 2001 From: Ilya Loginov Date: Thu, 26 Nov 2009 09:16:19 +0100 Subject: block: add helpers to run flush_dcache_page() against a bio and a request's pages Mtdblock driver doesn't call flush_dcache_page for pages in request. So, this causes problems on architectures where the icache doesn't fill from the dcache or with dcache aliases. The patch fixes this. The ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE symbol was introduced to avoid pointless empty cache-thrashing loops on architectures for which flush_dcache_page() is a no-op. Every architecture was provided with this flush pages on architectires where ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE is equal 1 or do nothing otherwise. See "fix mtd_blkdevs problem with caches on some architectures" discussion on LKML for more information. Signed-off-by: Ilya Loginov Cc: Ingo Molnar Cc: David Woodhouse Cc: Peter Horton Cc: "Ed L. Cashin" Signed-off-by: Jens Axboe --- arch/alpha/include/asm/cacheflush.h | 1 + arch/arm/include/asm/cacheflush.h | 1 + arch/avr32/include/asm/cacheflush.h | 1 + arch/blackfin/include/asm/cacheflush.h | 2 ++ arch/cris/include/asm/cacheflush.h | 1 + arch/frv/include/asm/cacheflush.h | 1 + arch/h8300/include/asm/cacheflush.h | 1 + arch/ia64/include/asm/cacheflush.h | 1 + arch/m32r/include/asm/cacheflush.h | 3 +++ arch/m68k/include/asm/cacheflush_mm.h | 1 + arch/m68k/include/asm/cacheflush_no.h | 1 + arch/microblaze/include/asm/cacheflush.h | 1 + arch/mips/include/asm/cacheflush.h | 1 + arch/mn10300/include/asm/cacheflush.h | 1 + arch/parisc/include/asm/cacheflush.h | 1 + arch/powerpc/include/asm/cacheflush.h | 1 + arch/s390/include/asm/cacheflush.h | 1 + arch/score/include/asm/cacheflush.h | 1 + arch/sh/include/asm/cacheflush.h | 1 + arch/sparc/include/asm/cacheflush_32.h | 1 + arch/sparc/include/asm/cacheflush_64.h | 1 + arch/x86/include/asm/cacheflush.h | 1 + arch/xtensa/include/asm/cacheflush.h | 1 + block/blk-core.c | 19 +++++++++++++++++++ drivers/mtd/mtd_blkdevs.c | 2 ++ fs/bio.c | 12 ++++++++++++ include/asm-generic/cacheflush.h | 1 + include/linux/bio.h | 12 ++++++++++++ include/linux/blkdev.h | 11 +++++++++++ 29 files changed, 83 insertions(+) (limited to 'include') diff --git a/arch/alpha/include/asm/cacheflush.h b/arch/alpha/include/asm/cacheflush.h index b686cc7fc44e..01d71e1c8a9e 100644 --- a/arch/alpha/include/asm/cacheflush.h +++ b/arch/alpha/include/asm/cacheflush.h @@ -9,6 +9,7 @@ #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index fd03fb63a332..247b7b0adc2a 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -408,6 +408,7 @@ extern void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, * about to change to user space. This is the same method as used on SPARC64. * See update_mmu_cache for the user space part. */ +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); extern void __flush_dcache_page(struct address_space *mapping, struct page *page); diff --git a/arch/avr32/include/asm/cacheflush.h b/arch/avr32/include/asm/cacheflush.h index 670674749b20..96e53820bbbd 100644 --- a/arch/avr32/include/asm/cacheflush.h +++ b/arch/avr32/include/asm/cacheflush.h @@ -107,6 +107,7 @@ extern void flush_icache_page(struct vm_area_struct *vma, struct page *page); * do something here, but only for certain configurations. No such * configurations exist at this time. */ +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(page) do { } while (0) #define flush_dcache_mmap_unlock(page) do { } while (0) diff --git a/arch/blackfin/include/asm/cacheflush.h b/arch/blackfin/include/asm/cacheflush.h index af03a36c7a4e..417eaac7fe99 100644 --- a/arch/blackfin/include/asm/cacheflush.h +++ b/arch/blackfin/include/asm/cacheflush.h @@ -68,9 +68,11 @@ do { memcpy(dst, src, len); \ #endif #if defined(CONFIG_BFIN_EXTMEM_WRITEBACK) || defined(CONFIG_BFIN_L2_WRITEBACK) # define flush_dcache_range(start,end) blackfin_dcache_flush_range((start), (end)) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 # define flush_dcache_page(page) blackfin_dflush_page(page_address(page)) #else # define flush_dcache_range(start,end) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 # define flush_dcache_page(page) do { } while (0) #endif diff --git a/arch/cris/include/asm/cacheflush.h b/arch/cris/include/asm/cacheflush.h index cf60e3f69f8d..36795bca605e 100644 --- a/arch/cris/include/asm/cacheflush.h +++ b/arch/cris/include/asm/cacheflush.h @@ -12,6 +12,7 @@ #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/frv/include/asm/cacheflush.h b/arch/frv/include/asm/cacheflush.h index 432a69e7f3d4..edbac54ae015 100644 --- a/arch/frv/include/asm/cacheflush.h +++ b/arch/frv/include/asm/cacheflush.h @@ -47,6 +47,7 @@ static inline void __flush_cache_all(void) } /* dcache/icache coherency... */ +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #ifdef CONFIG_MMU extern void flush_dcache_page(struct page *page); #else diff --git a/arch/h8300/include/asm/cacheflush.h b/arch/h8300/include/asm/cacheflush.h index 5ffdca217b95..4cf2df20c1ce 100644 --- a/arch/h8300/include/asm/cacheflush.h +++ b/arch/h8300/include/asm/cacheflush.h @@ -15,6 +15,7 @@ #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma,a,b) #define flush_cache_page(vma,p,pfn) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) #define flush_dcache_mmap_lock(mapping) #define flush_dcache_mmap_unlock(mapping) diff --git a/arch/ia64/include/asm/cacheflush.h b/arch/ia64/include/asm/cacheflush.h index c8ce2719fee8..429eefc93ee7 100644 --- a/arch/ia64/include/asm/cacheflush.h +++ b/arch/ia64/include/asm/cacheflush.h @@ -25,6 +25,7 @@ #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define flush_dcache_page(page) \ do { \ clear_bit(PG_arch_1, &(page)->flags); \ diff --git a/arch/m32r/include/asm/cacheflush.h b/arch/m32r/include/asm/cacheflush.h index 78587c958146..8e8e04516c39 100644 --- a/arch/m32r/include/asm/cacheflush.h +++ b/arch/m32r/include/asm/cacheflush.h @@ -12,6 +12,7 @@ extern void _flush_cache_copyback_all(void); #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) @@ -33,6 +34,7 @@ extern void smp_flush_cache_all(void); #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) @@ -46,6 +48,7 @@ extern void smp_flush_cache_all(void); #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h index 16bf375fdbe1..73de7c89d8e0 100644 --- a/arch/m68k/include/asm/cacheflush_mm.h +++ b/arch/m68k/include/asm/cacheflush_mm.h @@ -128,6 +128,7 @@ static inline void __flush_page_to_ram(void *vaddr) } } +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define flush_dcache_page(page) __flush_page_to_ram(page_address(page)) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/m68k/include/asm/cacheflush_no.h b/arch/m68k/include/asm/cacheflush_no.h index c65f00a94553..89f195656be7 100644 --- a/arch/m68k/include/asm/cacheflush_no.h +++ b/arch/m68k/include/asm/cacheflush_no.h @@ -12,6 +12,7 @@ #define flush_cache_range(vma, start, end) __flush_cache_all() #define flush_cache_page(vma, vmaddr) do { } while (0) #define flush_dcache_range(start,len) __flush_cache_all() +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/microblaze/include/asm/cacheflush.h b/arch/microblaze/include/asm/cacheflush.h index f989d6aad648..088076e657b3 100644 --- a/arch/microblaze/include/asm/cacheflush.h +++ b/arch/microblaze/include/asm/cacheflush.h @@ -37,6 +37,7 @@ #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) #define flush_dcache_range(start, end) __invalidate_dcache_range(start, end) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h index 03b1d69b142f..40bb9fde205f 100644 --- a/arch/mips/include/asm/cacheflush.h +++ b/arch/mips/include/asm/cacheflush.h @@ -38,6 +38,7 @@ extern void (*flush_cache_range)(struct vm_area_struct *vma, extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn); extern void __flush_dcache_page(struct page *page); +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 static inline void flush_dcache_page(struct page *page) { if (cpu_has_dc_aliases || !cpu_has_ic_fills_f_dc) diff --git a/arch/mn10300/include/asm/cacheflush.h b/arch/mn10300/include/asm/cacheflush.h index 1a55d61f0d06..29e692f7f030 100644 --- a/arch/mn10300/include/asm/cacheflush.h +++ b/arch/mn10300/include/asm/cacheflush.h @@ -26,6 +26,7 @@ #define flush_cache_page(vma, vmaddr, pfn) do {} while (0) #define flush_cache_vmap(start, end) do {} while (0) #define flush_cache_vunmap(start, end) do {} while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do {} while (0) #define flush_dcache_mmap_lock(mapping) do {} while (0) #define flush_dcache_mmap_unlock(mapping) do {} while (0) diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 724395143f26..7a73b615c23d 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -42,6 +42,7 @@ void flush_cache_mm(struct mm_struct *mm); #define flush_cache_vmap(start, end) flush_cache_all() #define flush_cache_vunmap(start, end) flush_cache_all() +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) \ diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index ba667a383b8c..ab9e402518e8 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -25,6 +25,7 @@ #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h index 49d5af916d01..405cc97c6249 100644 --- a/arch/s390/include/asm/cacheflush.h +++ b/arch/s390/include/asm/cacheflush.h @@ -10,6 +10,7 @@ #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/score/include/asm/cacheflush.h b/arch/score/include/asm/cacheflush.h index 07cc8fc457cd..caaba24036e3 100644 --- a/arch/score/include/asm/cacheflush.h +++ b/arch/score/include/asm/cacheflush.h @@ -16,6 +16,7 @@ extern void flush_icache_range(unsigned long start, unsigned long end); extern void flush_dcache_range(unsigned long start, unsigned long end); #define flush_cache_dup_mm(mm) do {} while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do {} while (0) #define flush_dcache_mmap_lock(mapping) do {} while (0) #define flush_dcache_mmap_unlock(mapping) do {} while (0) diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index c29918f3c819..dda96eb3e7c0 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -42,6 +42,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *page); extern void flush_icache_range(unsigned long start, unsigned long end); extern void flush_icache_page(struct vm_area_struct *vma, diff --git a/arch/sparc/include/asm/cacheflush_32.h b/arch/sparc/include/asm/cacheflush_32.h index 68ac10910271..2e468773f250 100644 --- a/arch/sparc/include/asm/cacheflush_32.h +++ b/arch/sparc/include/asm/cacheflush_32.h @@ -75,6 +75,7 @@ BTFIXUPDEF_CALL(void, flush_sig_insns, struct mm_struct *, unsigned long) extern void sparc_flush_page_to_ram(struct page *page); +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define flush_dcache_page(page) sparc_flush_page_to_ram(page) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/sparc/include/asm/cacheflush_64.h b/arch/sparc/include/asm/cacheflush_64.h index c43321729b3b..b95384033e89 100644 --- a/arch/sparc/include/asm/cacheflush_64.h +++ b/arch/sparc/include/asm/cacheflush_64.h @@ -37,6 +37,7 @@ extern void flush_dcache_page_all(struct mm_struct *mm, struct page *page); #endif extern void __flush_dcache_range(unsigned long start, unsigned long end); +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *page); #define flush_icache_page(vma, pg) do { } while(0) diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index b54f6afe7ec4..9076add593a8 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -12,6 +12,7 @@ static inline void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { } static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) { } +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 static inline void flush_dcache_page(struct page *page) { } static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h index b7b8fbe47c77..a508f2f73bd7 100644 --- a/arch/xtensa/include/asm/cacheflush.h +++ b/arch/xtensa/include/asm/cacheflush.h @@ -101,6 +101,7 @@ static inline void __invalidate_icache_page_alias(unsigned long virt, #define flush_cache_vmap(start,end) flush_cache_all() #define flush_cache_vunmap(start,end) flush_cache_all() +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page*); extern void flush_cache_range(struct vm_area_struct*, ulong, ulong); extern void flush_cache_page(struct vm_area_struct*, unsigned long, unsigned long); diff --git a/block/blk-core.c b/block/blk-core.c index 71da5111120c..718897e6d37f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2358,6 +2358,25 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->rq_disk = bio->bi_bdev->bd_disk; } +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +/** + * rq_flush_dcache_pages - Helper function to flush all pages in a request + * @rq: the request to be flushed + * + * Description: + * Flush all pages in @rq. + */ +void rq_flush_dcache_pages(struct request *rq) +{ + struct req_iterator iter; + struct bio_vec *bvec; + + rq_for_each_segment(bvec, rq, iter) + flush_dcache_page(bvec->bv_page); +} +EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); +#endif + /** * blk_lld_busy - Check if underlying low-level drivers of a device are busy * @q : the queue of the device being checked diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 8ca17a3e96ea..64e2b379a350 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -59,12 +59,14 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, for (; nsect > 0; nsect--, block++, buf += tr->blksize) if (tr->readsect(dev, block, buf)) return -EIO; + rq_flush_dcache_pages(req); return 0; case WRITE: if (!tr->writesect) return -EIO; + rq_flush_dcache_pages(req); for (; nsect > 0; nsect--, block++, buf += tr->blksize) if (tr->writesect(dev, block, buf)) return -EIO; diff --git a/fs/bio.c b/fs/bio.c index 12da5db8682c..e23a63f4f7de 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1393,6 +1393,18 @@ void bio_check_pages_dirty(struct bio *bio) } } +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +void bio_flush_dcache_pages(struct bio *bi) +{ + int i; + struct bio_vec *bvec; + + bio_for_each_segment(bvec, bi, i) + flush_dcache_page(bvec->bv_page); +} +EXPORT_SYMBOL(bio_flush_dcache_pages); +#endif + /** * bio_endio - end I/O on a bio * @bio: bio diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h index ba4ec39a1131..57b5c3c82e86 100644 --- a/include/asm-generic/cacheflush.h +++ b/include/asm-generic/cacheflush.h @@ -13,6 +13,7 @@ #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/include/linux/bio.h b/include/linux/bio.h index 474792b825d0..7fc5606e6ea5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -391,6 +391,18 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); + +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" +#endif +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +extern void bio_flush_dcache_pages(struct bio *bi); +#else +static inline void bio_flush_dcache_pages(struct bio *bi) +{ +} +#endif + extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, unsigned long, unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1cc02972fbe2..e727f6c44c44 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -752,6 +752,17 @@ struct req_iterator { #define rq_iter_last(rq, _iter) \ (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" +#endif +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +extern void rq_flush_dcache_pages(struct request *rq); +#else +static inline void rq_flush_dcache_pages(struct request *rq) +{ +} +#endif + extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); -- cgit v1.3-8-gc7d7 From d9449ce35a1e8fb58dd2d419f9215562a14ecca0 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 26 Nov 2009 09:45:40 +0100 Subject: Fix regression in direct writes performance due to WRITE_ODIRECT flag removal There seems to be a regression in direct write path due to following commit in for-2.6.33 branch of block tree. commit 1af60fbd759d31f565552fea315c2033947cfbe6 Author: Jeff Moyer Date: Fri Oct 2 18:56:53 2009 -0400 block: get rid of the WRITE_ODIRECT flag Marking direct writes as WRITE_SYNC_PLUG instead of WRITE_ODIRECT, sets the NOIDLE flag in bio and hence in request. This tells CFQ to not expect more request from the queue and not idle on it (despite the fact that queue's think time is less and it is not seeky). So direct writers lose big time when competing with sequential readers. Using fio, I have run one direct writer and two sequential readers and following are the results with 2.6.32-rc7 kernel and with for-2.6.33 branch. Test ==== 1 direct writer and 2 sequential reader running simultaneously. [global] directory=/mnt/sdc/fio/ runtime=10 [seqwrite] rw=write size=4G direct=1 [seqread] rw=read size=2G numjobs=2 2.6.32-rc7 ========== direct writes: aggrb=2,968KB/s readers : aggrb=101MB/s for-2.6.33 branch ================= direct write: aggrb=19KB/s readers aggrb=137MB/s This patch brings back the WRITE_ODIRECT flag, with the difference that we don't set the BIO_RW_UNPLUG flag so that device is not unplugged after submission of request and an explicit unplug from submitter is required. That way we fix the jeff's issue of not enough merging taking place in aio path as well as make sure direct writes get their fair share. After the fix ============= for-2.6.33 + fix ---------------- direct writes: aggrb=2,728KB/s reads: aggrb=103MB/s Thanks Vivek Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- fs/direct-io.c | 2 +- include/linux/fs.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/direct-io.c b/fs/direct-io.c index 3af761c8c5cc..b912270942fa 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1124,7 +1124,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, int acquire_i_mutex = 0; if (rw & WRITE) - rw = WRITE_SYNC_PLUG; + rw = WRITE_ODIRECT_PLUG; if (bdev) bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); diff --git a/include/linux/fs.h b/include/linux/fs.h index 2f5fca4147c2..79cea8051736 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -129,6 +129,7 @@ struct inodes_stat_t { * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device * immediately after submission. The write equivalent * of READ_SYNC. + * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. * SWRITE_SYNC * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. * See SWRITE. @@ -150,6 +151,7 @@ struct inodes_stat_t { #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) +#define WRITE_ODIRECT_PLUG (WRITE | (1 << BIO_RW_SYNCIO)) #define SWRITE_SYNC_PLUG \ (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) -- cgit v1.3-8-gc7d7 From d1eb650ff4130972fa21462fa49cd35a2865403b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:45 -0500 Subject: tracepoint: Move signal sending tracepoint to events/signal.h Move signal sending event to events/signal.h. This patch also renames sched_signal_send event to signal_generate. Changes in v4: - Fix a typo of task_struct pointer. Changes in v3: - Add docbook style comments Changes in v2: - Add siginfo argument - Add siginfo storing macro Signed-off-by: Masami Hiramatsu Reviewed-by: Jason Baron Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215645.30449.60208.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- Documentation/DocBook/tracepoint.tmpl | 5 +++ include/trace/events/sched.h | 25 ------------- include/trace/events/signal.h | 66 +++++++++++++++++++++++++++++++++++ kernel/signal.c | 5 +-- 4 files changed, 74 insertions(+), 27 deletions(-) create mode 100644 include/trace/events/signal.h (limited to 'include') diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl index b0756d0fd579..8bca1d5cec09 100644 --- a/Documentation/DocBook/tracepoint.tmpl +++ b/Documentation/DocBook/tracepoint.tmpl @@ -86,4 +86,9 @@ !Iinclude/trace/events/irq.h + + SIGNAL +!Iinclude/trace/events/signal.h + + diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9d316b22388c..cfceb0b73e20 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -287,31 +287,6 @@ TRACE_EVENT(sched_process_fork, __entry->child_comm, __entry->child_pid) ); -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig=%d comm=%s pid=%d", - __entry->sig, __entry->comm, __entry->pid) -); - /* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h new file mode 100644 index 000000000000..ef51756a801d --- /dev/null +++ b/include/trace/events/signal.h @@ -0,0 +1,66 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM signal + +#if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SIGNAL_H + +#include +#include +#include + +#define TP_STORE_SIGINFO(__entry, info) \ + do { \ + if (info == SEND_SIG_NOINFO) { \ + __entry->errno = 0; \ + __entry->code = SI_USER; \ + } else if (info == SEND_SIG_PRIV) { \ + __entry->errno = 0; \ + __entry->code = SI_KERNEL; \ + } else { \ + __entry->errno = info->si_errno; \ + __entry->code = info->si_code; \ + } \ + } while (0) + +/** + * signal_generate - called when a signal is generated + * @sig: signal number + * @info: pointer to struct siginfo + * @task: pointer to struct task_struct + * + * Current process sends a 'sig' signal to 'task' process with + * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, + * 'info' is not a pointer and you can't access its field. Instead, + * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV + * means that si_code is SI_KERNEL. + */ +TRACE_EVENT(signal_generate, + + TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), + + TP_ARGS(sig, info, task), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + ), + + TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", + __entry->sig, __entry->errno, __entry->code, + __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SIGNAL_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/signal.c b/kernel/signal.c index 6705320784fd..a1e0cc6b32c3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -27,7 +27,8 @@ #include #include #include -#include +#define CREATE_TRACE_POINTS +#include #include #include @@ -834,7 +835,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, struct sigqueue *q; int override_rlimit; - trace_sched_signal_send(sig, t); + trace_signal_generate(sig, info, t); assert_spin_locked(&t->sighand->siglock); -- cgit v1.3-8-gc7d7 From f9d4257e01d266e67420cc99d456b6d4c8464f54 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:51 -0500 Subject: tracepoint: Add signal deliver event Add a tracepoint where a process gets a signal. This tracepoint shows signal-number, sa-handler and sa-flag. Changes in v3: - Add docbook style comments Changes in v2: - Add siginfo argument - Fix comment Signed-off-by: Masami Hiramatsu Reviewed-by: Jason Baron Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215651.30449.20926.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/trace/events/signal.h | 39 +++++++++++++++++++++++++++++++++++++++ kernel/signal.c | 3 +++ 2 files changed, 42 insertions(+) (limited to 'include') diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index ef51756a801d..a6d71de0dc0d 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -60,6 +60,45 @@ TRACE_EVENT(signal_generate, __entry->comm, __entry->pid) ); +/** + * signal_deliver - called when a signal is delivered + * @sig: signal number + * @info: pointer to struct siginfo + * @ka: pointer to struct k_sigaction + * + * A 'sig' signal is delivered to current process with 'info' siginfo, + * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or + * SIG_DFL. + * Note that some signals reported by signal_generate tracepoint can be + * lost, ignored or modified (by debugger) before hitting this tracepoint. + * This means, this can show which signals are actually delivered, but + * matching generated signals and delivered signals may not be correct. + */ +TRACE_EVENT(signal_deliver, + + TP_PROTO(int sig, struct siginfo *info, struct k_sigaction *ka), + + TP_ARGS(sig, info, ka), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __field( unsigned long, sa_handler ) + __field( unsigned long, sa_flags ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + __entry->sa_handler = (unsigned long)ka->sa.sa_handler; + __entry->sa_flags = ka->sa.sa_flags; + ), + + TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx", + __entry->sig, __entry->errno, __entry->code, + __entry->sa_handler, __entry->sa_flags) +); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ diff --git a/kernel/signal.c b/kernel/signal.c index a1e0cc6b32c3..349d44937406 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1840,6 +1840,9 @@ relock: ka = &sighand->action[signr-1]; } + /* Trace actually delivered signals. */ + trace_signal_deliver(signr, info, ka); + if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ continue; if (ka->sa.sa_handler != SIG_DFL) { -- cgit v1.3-8-gc7d7 From ba005e1f417295d28cd1563ab82bc33af07fb16a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:58 -0500 Subject: tracepoint: Add signal loss events Add signal_overflow_fail and signal_lose_info tracepoints for signal-lost events. Changes in v3: - Add docbook style comments Changes in v2: - Use siginfo string macro Suggested-by: Roland McGrath Reviewed-by: Jason Baron Signed-off-by: Masami Hiramatsu Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215658.30449.9934.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/trace/events/signal.h | 68 +++++++++++++++++++++++++++++++++++++++++++ kernel/signal.c | 19 ++++++++---- 2 files changed, 82 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index a6d71de0dc0d..a510b75ac304 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -99,6 +99,74 @@ TRACE_EVENT(signal_deliver, __entry->sig, __entry->errno, __entry->code, __entry->sa_handler, __entry->sa_flags) ); + +/** + * signal_overflow_fail - called when signal queue is overflow + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel fails to generate 'sig' signal with 'info' siginfo, because + * siginfo queue is overflow, and the signal is dropped. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of RT signals. + */ +TRACE_EVENT(signal_overflow_fail, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); + +/** + * signal_lose_info - called when siginfo is lost + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo + * queue is overflow. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of non-RT signals. + */ +TRACE_EVENT(signal_lose_info, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ diff --git a/kernel/signal.c b/kernel/signal.c index 349d44937406..93e72e5feae6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -897,12 +897,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, break; } } else if (!is_si_special(info)) { - if (sig >= SIGRTMIN && info->si_code != SI_USER) - /* - * Queue overflow, abort. We may abort if the signal was rt - * and sent by user using something other than kill(). - */ + if (sig >= SIGRTMIN && info->si_code != SI_USER) { + /* + * Queue overflow, abort. We may abort if the + * signal was rt and sent by user using something + * other than kill(). + */ + trace_signal_overflow_fail(sig, group, info); return -EAGAIN; + } else { + /* + * This is a silent loss of information. We still + * send the signal, but the *info bits are lost. + */ + trace_signal_lose_info(sig, group, info); + } } out_set: -- cgit v1.3-8-gc7d7 From d180c5bccec02612256fd8076ff3c1fac3429553 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 14:48:30 +0900 Subject: sched: Introduce task_times() to replace task_{u,s}time() pair Functions task_{u,s}time() are called in pair in almost all cases. However task_stime() is implemented to call task_utime() from its inside, so such paired calls run task_utime() twice. It means we do heavy divisions (div_u64 + do_div) twice to get utime and stime which can be obtained at same time by one set of divisions. This patch introduces a function task_times(*tsk, *utime, *stime) to retrieve utime and stime at once in better, optimized way. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Stanislaw Gruszka Cc: Spencer Candland Cc: Oleg Nesterov Cc: Balbir Singh Cc: Americo Wang LKML-Reference: <4B0E16AE.906@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- fs/proc/array.c | 3 +-- include/linux/sched.h | 1 + kernel/exit.c | 7 +++++-- kernel/sched.c | 55 ++++++++++++++++++++++++++++++++------------------- kernel/sys.c | 3 +-- 5 files changed, 43 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/fs/proc/array.c b/fs/proc/array.c index e209f64ab27b..330deda70d08 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -535,8 +535,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (!whole) { min_flt = task->min_flt; maj_flt = task->maj_flt; - utime = task_utime(task); - stime = task_stime(task); + task_times(task, &utime, &stime); gtime = task_gtime(task); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 78ba664474f3..fe6ae1516640 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1723,6 +1723,7 @@ static inline void put_task_struct(struct task_struct *t) extern cputime_t task_utime(struct task_struct *p); extern cputime_t task_stime(struct task_struct *p); extern cputime_t task_gtime(struct task_struct *p); +extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); /* * Per process flags diff --git a/kernel/exit.c b/kernel/exit.c index f7864ac2ecc1..29068ab2670a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -91,6 +91,8 @@ static void __exit_signal(struct task_struct *tsk) if (atomic_dec_and_test(&sig->count)) posix_cpu_timers_exit_group(tsk); else { + cputime_t utime, stime; + /* * If there is any task waiting for the group exit * then notify it: @@ -110,8 +112,9 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - sig->utime = cputime_add(sig->utime, task_utime(tsk)); - sig->stime = cputime_add(sig->stime, task_stime(tsk)); + task_times(tsk, &utime, &stime); + sig->utime = cputime_add(sig->utime, utime); + sig->stime = cputime_add(sig->stime, stime); sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; diff --git a/kernel/sched.c b/kernel/sched.c index 315ba4059f93..475a6f2b7158 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5191,6 +5191,14 @@ cputime_t task_stime(struct task_struct *p) { return p->stime; } + +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + if (ut) + *ut = task_utime(p); + if (st) + *st = task_stime(p); +} #else #ifndef nsecs_to_cputime @@ -5198,41 +5206,48 @@ cputime_t task_stime(struct task_struct *p) msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC)) #endif -cputime_t task_utime(struct task_struct *p) +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { - cputime_t utime = p->utime, total = utime + p->stime; - u64 temp; + cputime_t rtime, utime = p->utime, total = utime + p->stime; /* * Use CFS's precise accounting: */ - temp = (u64)nsecs_to_cputime(p->se.sum_exec_runtime); + rtime = nsecs_to_cputime(p->se.sum_exec_runtime); if (total) { - temp *= utime; + u64 temp; + + temp = (u64)(rtime * utime); do_div(temp, total); - } - utime = (cputime_t)temp; + utime = (cputime_t)temp; + } else + utime = rtime; + /* + * Compare with previous values, to keep monotonicity: + */ p->prev_utime = max(p->prev_utime, utime); - return p->prev_utime; + p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); + + if (ut) + *ut = p->prev_utime; + if (st) + *st = p->prev_stime; +} + +cputime_t task_utime(struct task_struct *p) +{ + cputime_t utime; + task_times(p, &utime, NULL); + return utime; } cputime_t task_stime(struct task_struct *p) { cputime_t stime; - - /* - * Use CFS's precise accounting. (we subtract utime from - * the total, to make sure the total observed by userspace - * grows monotonically - apps rely on that): - */ - stime = nsecs_to_cputime(p->se.sum_exec_runtime) - task_utime(p); - - if (stime >= 0) - p->prev_stime = max(p->prev_stime, stime); - - return p->prev_stime; + task_times(p, NULL, &stime); + return stime; } #endif diff --git a/kernel/sys.c b/kernel/sys.c index ce17760d9c51..bbdfce0d4347 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1346,8 +1346,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) utime = stime = cputime_zero; if (who == RUSAGE_THREAD) { - utime = task_utime(current); - stime = task_stime(current); + task_times(current, &utime, &stime); accumulate_thread_rusage(p, r); maxrss = p->signal->maxrss; goto out; -- cgit v1.3-8-gc7d7 From d5b7c78e975302a1bab28263266c39ecb71acad4 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 14:49:05 +0900 Subject: sched: Remove task_{u,s,g}time() Now all task_{u,s}time() pairs are replaced by task_times(). And task_gtime() is too simple to be an inline function. Cleanup them all. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Stanislaw Gruszka Cc: Spencer Candland Cc: Oleg Nesterov Cc: Balbir Singh Cc: Americo Wang LKML-Reference: <4B0E16D1.70902@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- fs/proc/array.c | 4 ++-- include/linux/sched.h | 3 --- kernel/exit.c | 2 +- kernel/sched.c | 33 ++------------------------------- 4 files changed, 5 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/fs/proc/array.c b/fs/proc/array.c index 330deda70d08..ca61a88aed66 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -511,7 +511,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, do { min_flt += t->min_flt; maj_flt += t->maj_flt; - gtime = cputime_add(gtime, task_gtime(t)); + gtime = cputime_add(gtime, t->gtime); t = next_thread(t); } while (t != task); @@ -536,7 +536,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt = task->min_flt; maj_flt = task->maj_flt; task_times(task, &utime, &stime); - gtime = task_gtime(task); + gtime = task->gtime; } /* scale priority and nice values from timeslices to -20..20 */ diff --git a/include/linux/sched.h b/include/linux/sched.h index fe6ae1516640..0395b0f4df3a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1720,9 +1720,6 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } -extern cputime_t task_utime(struct task_struct *p); -extern cputime_t task_stime(struct task_struct *p); -extern cputime_t task_gtime(struct task_struct *p); extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); /* diff --git a/kernel/exit.c b/kernel/exit.c index 29068ab2670a..2eaf68b634e3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -115,7 +115,7 @@ static void __exit_signal(struct task_struct *tsk) task_times(tsk, &utime, &stime); sig->utime = cputime_add(sig->utime, utime); sig->stime = cputime_add(sig->stime, stime); - sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); + sig->gtime = cputime_add(sig->gtime, tsk->gtime); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; diff --git a/kernel/sched.c b/kernel/sched.c index 475a6f2b7158..82251c21f785 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5182,22 +5182,12 @@ void account_idle_ticks(unsigned long ticks) * Use precise platform statistics if available: */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING -cputime_t task_utime(struct task_struct *p) -{ - return p->utime; -} - -cputime_t task_stime(struct task_struct *p) -{ - return p->stime; -} - void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { if (ut) - *ut = task_utime(p); + *ut = p->utime; if (st) - *st = task_stime(p); + *st = p->stime; } #else @@ -5235,27 +5225,8 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) if (st) *st = p->prev_stime; } - -cputime_t task_utime(struct task_struct *p) -{ - cputime_t utime; - task_times(p, &utime, NULL); - return utime; -} - -cputime_t task_stime(struct task_struct *p) -{ - cputime_t stime; - task_times(p, NULL, &stime); - return stime; -} #endif -inline cputime_t task_gtime(struct task_struct *p) -{ - return p->gtime; -} - /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. -- cgit v1.3-8-gc7d7 From b7b20df91d43d5e59578b8fc16e895c0c8cbd9b5 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 14:49:27 +0900 Subject: sched, time: Define nsecs_to_jiffies() Use of msecs_to_jiffies() for nsecs_to_cputime() have some problems: - The type of msecs_to_jiffies()'s argument is unsigned int, so it cannot convert msecs greater than UINT_MAX = about 49.7 days. - msecs_to_jiffies() returns MAX_JIFFY_OFFSET if MSB of argument is set, assuming that input was negative value. So it cannot convert msecs greater than INT_MAX = about 24.8 days too. This patch defines a new function nsecs_to_jiffies() that can deal greater values, and that can deal all incoming values as unsigned. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Stanislaw Gruszka Cc: Spencer Candland Cc: Oleg Nesterov Cc: Balbir Singh Cc: Amrico Wang Cc: Thomas Gleixner Cc: John Stultz LKML-Reference: <4B0E16E7.5070307@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/jiffies.h | 1 + kernel/sched.c | 3 +-- kernel/time.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 1a9cf78bfce5..6811f4bfc6e7 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x); extern unsigned long clock_t_to_jiffies(unsigned long x); extern u64 jiffies_64_to_clock_t(u64 x); extern u64 nsec_to_clock_t(u64 x); +extern unsigned long nsecs_to_jiffies(u64 n); #define TIMESTAMP_SIZE 30 diff --git a/kernel/sched.c b/kernel/sched.c index 82251c21f785..b3d4e2be95aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5192,8 +5192,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) #else #ifndef nsecs_to_cputime -# define nsecs_to_cputime(__nsecs) \ - msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC)) +# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) #endif void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) diff --git a/kernel/time.c b/kernel/time.c index 2e2e469a7fec..804798005d19 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -662,6 +662,36 @@ u64 nsec_to_clock_t(u64 x) #endif } +/** + * nsecs_to_jiffies - Convert nsecs in u64 to jiffies + * + * @n: nsecs in u64 + * + * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64. + * And this doesn't return MAX_JIFFY_OFFSET since this function is designed + * for scheduler, not for use in device drivers to calculate timeout value. + * + * note: + * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) + * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years + */ +unsigned long nsecs_to_jiffies(u64 n) +{ +#if (NSEC_PER_SEC % HZ) == 0 + /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */ + return div_u64(n, NSEC_PER_SEC / HZ); +#elif (HZ % 512) == 0 + /* overflow after 292 years if HZ = 1024 */ + return div_u64(n * HZ / 512, NSEC_PER_SEC / 512); +#else + /* + * Generic case - optimized for cases where HZ is a multiple of 3. + * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc. + */ + return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ); +#endif +} + #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void) { -- cgit v1.3-8-gc7d7 From 860dc73608a091e0b325218acc2701709d5f221a Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 19 Nov 2009 17:48:29 -0500 Subject: [SCSI] fix async scan add/remove race resulting in an oops Async scanning introduced a very wide window where the SCSI device is up and running but has not yet been added to sysfs. We delay the adding until all scans have completed to retain the same ordering as sync scanning. This delay in visibility causes an oops if a device is removed before we make it visible because the SCSI removal routines have an inbuilt assumption that if a device is in SDEV_RUNNING state, it must be visible (which is not necessarily true in the async scanning case). Fix this by introducing an additional is_visible flag which we can use to condition the tear down so we do the right thing for running but not yet made visible. Reported-by: Alexey Kuznetsov Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 18 +++---------- drivers/scsi/scsi_sysfs.c | 63 ++++++++++++++++++++-------------------------- include/scsi/scsi_device.h | 1 + 3 files changed, 32 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 0547a7f44d42..47291bcff0d5 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -952,16 +952,6 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, return SCSI_SCAN_LUN_PRESENT; } -static inline void scsi_destroy_sdev(struct scsi_device *sdev) -{ - scsi_device_set_state(sdev, SDEV_DEL); - if (sdev->host->hostt->slave_destroy) - sdev->host->hostt->slave_destroy(sdev); - transport_destroy_device(&sdev->sdev_gendev); - put_device(&sdev->sdev_dev); - put_device(&sdev->sdev_gendev); -} - #ifdef CONFIG_SCSI_LOGGING /** * scsi_inq_str - print INQUIRY data from min to max index, strip trailing whitespace @@ -1139,7 +1129,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget, } } } else - scsi_destroy_sdev(sdev); + __scsi_remove_device(sdev); out: return res; } @@ -1500,7 +1490,7 @@ static int scsi_report_lun_scan(struct scsi_target *starget, int bflags, /* * the sdev we used didn't appear in the report luns scan */ - scsi_destroy_sdev(sdev); + __scsi_remove_device(sdev); return ret; } @@ -1710,7 +1700,7 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost) shost_for_each_device(sdev, shost) { if (!scsi_host_scan_allowed(shost) || scsi_sysfs_add_sdev(sdev) != 0) - scsi_destroy_sdev(sdev); + __scsi_remove_device(sdev); } } @@ -1943,7 +1933,7 @@ void scsi_free_host_dev(struct scsi_device *sdev) { BUG_ON(sdev->id != sdev->host->this_id); - scsi_destroy_sdev(sdev); + __scsi_remove_device(sdev); } EXPORT_SYMBOL(scsi_free_host_dev); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 5c7eb63a19d1..392d8db33905 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -854,82 +854,73 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) transport_configure_device(&starget->dev); error = device_add(&sdev->sdev_gendev); if (error) { - put_device(sdev->sdev_gendev.parent); printk(KERN_INFO "error 1\n"); - return error; + goto out_remove; } error = device_add(&sdev->sdev_dev); if (error) { printk(KERN_INFO "error 2\n"); - goto clean_device; + device_del(&sdev->sdev_gendev); + goto out_remove; } + transport_add_device(&sdev->sdev_gendev); + sdev->is_visible = 1; /* create queue files, which may be writable, depending on the host */ if (sdev->host->hostt->change_queue_depth) error = device_create_file(&sdev->sdev_gendev, &sdev_attr_queue_depth_rw); else error = device_create_file(&sdev->sdev_gendev, &dev_attr_queue_depth); - if (error) { - __scsi_remove_device(sdev); - goto out; - } + if (error) + goto out_remove; + if (sdev->host->hostt->change_queue_type) error = device_create_file(&sdev->sdev_gendev, &sdev_attr_queue_type_rw); else error = device_create_file(&sdev->sdev_gendev, &dev_attr_queue_type); - if (error) { - __scsi_remove_device(sdev); - goto out; - } + if (error) + goto out_remove; error = bsg_register_queue(rq, &sdev->sdev_gendev, NULL, NULL); if (error) + /* we're treating error on bsg register as non-fatal, + * so pretend nothing went wrong */ sdev_printk(KERN_INFO, sdev, "Failed to register bsg queue, errno=%d\n", error); - /* we're treating error on bsg register as non-fatal, so pretend - * nothing went wrong */ - error = 0; - /* add additional host specific attributes */ if (sdev->host->hostt->sdev_attrs) { for (i = 0; sdev->host->hostt->sdev_attrs[i]; i++) { error = device_create_file(&sdev->sdev_gendev, sdev->host->hostt->sdev_attrs[i]); - if (error) { - __scsi_remove_device(sdev); - goto out; - } + if (error) + goto out_remove; } } - transport_add_device(&sdev->sdev_gendev); - out: - return error; - - clean_device: - scsi_device_set_state(sdev, SDEV_CANCEL); - - device_del(&sdev->sdev_gendev); - transport_destroy_device(&sdev->sdev_gendev); - put_device(&sdev->sdev_dev); - put_device(&sdev->sdev_gendev); + return 0; + out_remove: + __scsi_remove_device(sdev); return error; + } void __scsi_remove_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; - if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0) - return; + if (sdev->is_visible) { + if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0) + return; - bsg_unregister_queue(sdev->request_queue); - device_unregister(&sdev->sdev_dev); - transport_remove_device(dev); - device_del(dev); + bsg_unregister_queue(sdev->request_queue); + device_unregister(&sdev->sdev_dev); + transport_remove_device(dev); + device_del(dev); + } else + put_device(&sdev->sdev_dev); scsi_device_set_state(sdev, SDEV_DEL); if (sdev->host->hostt->slave_destroy) sdev->host->hostt->slave_destroy(sdev); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 9af48cbf0036..f097ae340bc1 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -145,6 +145,7 @@ struct scsi_device { unsigned retry_hwerror:1; /* Retry HARDWARE_ERROR */ unsigned last_sector_bug:1; /* do not use multisector accesses on SD_LAST_BUGGY_SECTORS */ + unsigned is_visible:1; /* is the device visible in sysfs */ DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */ struct list_head event_list; /* asserted events */ -- cgit v1.3-8-gc7d7 From 445409602c09219767c06497c0dc2285eac244ed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 26 Nov 2009 06:07:08 +0000 Subject: veth: move loopback logic to common location The veth driver contains code to forward an skb from the start_xmit function of one network device into the receive path of another device. Moving that code into a common location lets us reuse the code for direct forwarding of data between macvlan ports, and possibly in other drivers. Signed-off-by: Arnd Bergmann Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/veth.c | 17 +++-------------- include/linux/netdevice.h | 2 ++ net/core/dev.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 2d657f2314cb..6c4b5a2d7877 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -155,8 +155,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) struct veth_net_stats *stats, *rcv_stats; int length, cpu; - skb_orphan(skb); - priv = netdev_priv(dev); rcv = priv->peer; rcv_priv = netdev_priv(rcv); @@ -168,20 +166,12 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) if (!(rcv->flags & IFF_UP)) goto tx_drop; - if (skb->len > (rcv->mtu + MTU_PAD)) - goto rx_drop; - - skb->tstamp.tv64 = 0; - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, rcv); if (dev->features & NETIF_F_NO_CSUM) skb->ip_summed = rcv_priv->ip_summed; - skb->mark = 0; - secpath_reset(skb); - nf_reset(skb); - - length = skb->len; + length = skb->len + ETH_HLEN; + if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS) + goto rx_drop; stats->tx_bytes += length; stats->tx_packets++; @@ -189,7 +179,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) rcv_stats->rx_bytes += length; rcv_stats->rx_packets++; - netif_rx(skb); return NETDEV_TX_OK; tx_drop: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97873e31661c..9428793775a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1562,6 +1562,8 @@ extern int dev_set_mac_address(struct net_device *, extern int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq); +extern int dev_forward_skb(struct net_device *dev, + struct sk_buff *skb); extern int netdev_budget; diff --git a/net/core/dev.c b/net/core/dev.c index e65af6041415..7775e8b48deb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -105,6 +105,7 @@ #include #include #include +#include #include #include #include @@ -1419,6 +1420,45 @@ static inline void net_timestamp(struct sk_buff *skb) skb->tstamp.tv64 = 0; } +/** + * dev_forward_skb - loopback an skb to another netif + * + * @dev: destination network device + * @skb: buffer to forward + * + * return values: + * NET_RX_SUCCESS (no congestion) + * NET_RX_DROP (packet was dropped) + * + * dev_forward_skb can be used for injecting an skb from the + * start_xmit function of one device into the receive queue + * of another device. + * + * The receiving device may be in another namespace, so + * we have to clear all information in the skb that could + * impact namespace isolation. + */ +int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) +{ + skb_orphan(skb); + + if (!(dev->flags & IFF_UP)) + return NET_RX_DROP; + + if (skb->len > (dev->mtu + dev->hard_header_len)) + return NET_RX_DROP; + + skb_dst_drop(skb); + skb->tstamp.tv64 = 0; + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, dev); + skb->mark = 0; + secpath_reset(skb); + nf_reset(skb); + return netif_rx(skb); +} +EXPORT_SYMBOL_GPL(dev_forward_skb); + /* * Support routine. Sends outgoing frames to any network * taps currently in use. -- cgit v1.3-8-gc7d7 From 27c0b1a850cdea6298f573d835782f3337be913c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 26 Nov 2009 06:07:11 +0000 Subject: macvlan: export macvlan mode through netlink In order to support all three modes of macvlan at runtime, extend the existing netlink protocol to allow choosing the mode per macvlan slave interface. This depends on a matching patch to iproute2 in order to become accessible in user land. Signed-off-by: Arnd Bergmann Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 56 +++++++++++++++++++++++++++++++++++++++++++------ include/linux/if_link.h | 15 +++++++++++++ 2 files changed, 65 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d6bd84353f44..322112c7358a 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -33,12 +33,6 @@ #define MACVLAN_HASH_SIZE (1 << BITS_PER_BYTE) -enum macvlan_mode { - MACVLAN_MODE_PRIVATE = 1, - MACVLAN_MODE_VEPA = 2, - MACVLAN_MODE_BRIDGE = 4, -}; - struct macvlan_port { struct net_device *dev; struct hlist_head vlan_hash[MACVLAN_HASH_SIZE]; @@ -614,6 +608,17 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) return -EADDRNOTAVAIL; } + + if (data && data[IFLA_MACVLAN_MODE]) { + switch (nla_get_u32(data[IFLA_MACVLAN_MODE])) { + case MACVLAN_MODE_PRIVATE: + case MACVLAN_MODE_VEPA: + case MACVLAN_MODE_BRIDGE: + break; + default: + return -EINVAL; + } + } return 0; } @@ -678,6 +683,10 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev, vlan->dev = dev; vlan->port = port; + vlan->mode = MACVLAN_MODE_VEPA; + if (data && data[IFLA_MACVLAN_MODE]) + vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]); + err = register_netdevice(dev); if (err < 0) return err; @@ -699,6 +708,36 @@ static void macvlan_dellink(struct net_device *dev, struct list_head *head) macvlan_port_destroy(port->dev); } +static int macvlan_changelink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + if (data && data[IFLA_MACVLAN_MODE]) + vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]); + return 0; +} + +static size_t macvlan_get_size(const struct net_device *dev) +{ + return nla_total_size(4); +} + +static int macvlan_fill_info(struct sk_buff *skb, + const struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + + NLA_PUT_U32(skb, IFLA_MACVLAN_MODE, vlan->mode); + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { + [IFLA_MACVLAN_MODE] = { .type = NLA_U32 }, +}; + static struct rtnl_link_ops macvlan_link_ops __read_mostly = { .kind = "macvlan", .priv_size = sizeof(struct macvlan_dev), @@ -707,6 +746,11 @@ static struct rtnl_link_ops macvlan_link_ops __read_mostly = { .validate = macvlan_validate, .newlink = macvlan_newlink, .dellink = macvlan_dellink, + .maxtype = IFLA_MACVLAN_MAX, + .policy = macvlan_policy, + .changelink = macvlan_changelink, + .get_size = macvlan_get_size, + .fill_info = macvlan_fill_info, }; static int macvlan_device_event(struct notifier_block *unused, diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 1d3b2425f661..6674791622ca 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -181,4 +181,19 @@ struct ifla_vlan_qos_mapping { __u32 to; }; +/* MACVLAN section */ +enum { + IFLA_MACVLAN_UNSPEC, + IFLA_MACVLAN_MODE, + __IFLA_MACVLAN_MAX, +}; + +#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1) + +enum macvlan_mode { + MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */ + MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */ + MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */ +}; + #endif /* _LINUX_IF_LINK_H */ -- cgit v1.3-8-gc7d7 From 5e7565930524410f097f5b04f8aba663089a6ffc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Nov 2009 07:54:54 +0000 Subject: vlan: support "loose binding" to the underlying network device Currently the UP/DOWN state of VLANs is synchronized to the state of the underlying device, meaning all VLANs are set down once the underlying device is set down. This causes all routes to the VLAN devices to vanish. Add a flag to specify a "loose binding" mode, in which only the operstate is transfered, but the VLAN device state is independant. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 1 + net/8021q/vlan.c | 9 +++++++-- net/8021q/vlan_dev.c | 6 ++++-- net/8021q/vlan_netlink.c | 3 ++- 4 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 153f6b9e722c..3d870fda8c4f 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -339,6 +339,7 @@ enum vlan_ioctl_cmds { enum vlan_flags { VLAN_FLAG_REORDER_HDR = 0x1, VLAN_FLAG_GVRP = 0x2, + VLAN_FLAG_LOOSE_BINDING = 0x4, }; enum vlan_name_types { diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 1483243edf14..225aa2fac0e3 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -431,6 +431,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, struct vlan_group *grp; int i, flgs; struct net_device *vlandev; + struct vlan_dev_info *vlan; LIST_HEAD(list); if (is_vlan_dev(dev)) @@ -507,7 +508,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (!(flgs & IFF_UP)) continue; - dev_change_flags(vlandev, flgs & ~IFF_UP); + vlan = vlan_dev_info(vlandev); + if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) + dev_change_flags(vlandev, flgs & ~IFF_UP); vlan_transfer_operstate(dev, vlandev); } break; @@ -523,7 +526,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (flgs & IFF_UP) continue; - dev_change_flags(vlandev, flgs | IFF_UP); + vlan = vlan_dev_info(vlandev); + if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) + dev_change_flags(vlandev, flgs | IFF_UP); vlan_transfer_operstate(dev, vlandev); } break; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index de0dc6bacbe8..b7889782047e 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -431,7 +431,8 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) struct vlan_dev_info *vlan = vlan_dev_info(dev); u32 old_flags = vlan->flags; - if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP)) + if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | + VLAN_FLAG_LOOSE_BINDING)) return -EINVAL; vlan->flags = (old_flags & ~mask) | (flags & mask); @@ -456,7 +457,8 @@ static int vlan_dev_open(struct net_device *dev) struct net_device *real_dev = vlan->real_dev; int err; - if (!(real_dev->flags & IFF_UP)) + if (!(real_dev->flags & IFF_UP) && + !(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) return -ENETDOWN; if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) { diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 3c9cf6a8e7fb..ddc105734af7 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -60,7 +60,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); if ((flags->flags & flags->mask) & - ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP)) + ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | + VLAN_FLAG_LOOSE_BINDING)) return -EINVAL; } -- cgit v1.3-8-gc7d7 From 5fa10b28e57f94a90535cfeafe89dcee9f47d540 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Nov 2009 04:55:53 +0100 Subject: hw-breakpoints: Use struct perf_event_attr to define user breakpoints In-kernel user breakpoints are created using functions in which we pass breakpoint parameters as individual variables: address, length and type. Although it fits well for x86, this just does not scale across archictectures that may support this api later as these may have more or different needs. Pass in a perf_event_attr structure instead because it is meant to evolve as much as possible into a generic hardware breakpoint parameter structure. Reported-by: K.Prasad Signed-off-by: Frederic Weisbecker LKML-Reference: <1259294154-5197-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 74 ++++++++++++++++++++---------------- include/linux/hw_breakpoint.h | 36 ++++++++---------- kernel/hw_breakpoint.c | 87 +++++++++---------------------------------- 3 files changed, 75 insertions(+), 122 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 75e0cd847bd6..2941b32ea666 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -593,6 +593,34 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) return dr7; } +static struct perf_event * +ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, + struct task_struct *tsk) +{ + int err; + int gen_len, gen_type; + DEFINE_BREAKPOINT_ATTR(attr); + + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ + if (!bp) + return ERR_PTR(-EINVAL); + + err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); + if (err) + return ERR_PTR(err); + + attr = bp->attr; + attr.bp_len = gen_len; + attr.bp_type = gen_type; + attr.disabled = 0; + + return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); +} + /* * Handle ptrace writes to debug register 7. */ @@ -603,7 +631,6 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) int i, orig_ret = 0, rc = 0; int enabled, second_pass = 0; unsigned len, type; - int gen_len, gen_type; struct perf_event *bp; data &= ~DR_CONTROL_RESERVED; @@ -634,33 +661,12 @@ restore: continue; } - /* - * We shoud have at least an inactive breakpoint at this - * slot. It means the user is writing dr7 without having - * written the address register first - */ - if (!bp) { - rc = -EINVAL; - break; - } - - rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); - if (rc) - break; - - /* - * This is a temporary thing as bp is unregistered/registered - * to simulate modification - */ - bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, - gen_type, bp->callback, - tsk, true); - thread->ptrace_bps[i] = NULL; + bp = ptrace_modify_breakpoint(bp, len, type, tsk); /* Incorrect bp, or we have a bug in bp API */ if (IS_ERR(bp)) { rc = PTR_ERR(bp); - bp = NULL; + thread->ptrace_bps[i] = NULL; break; } thread->ptrace_bps[i] = bp; @@ -707,24 +713,26 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, { struct perf_event *bp; struct thread_struct *t = &tsk->thread; + DEFINE_BREAKPOINT_ATTR(attr); if (!t->ptrace_bps[nr]) { /* * Put stub len and type to register (reserve) an inactive but * correct bp */ - bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, - HW_BREAKPOINT_W, - ptrace_triggered, tsk, - false); + attr.bp_addr = addr; + attr.bp_len = HW_BREAKPOINT_LEN_1; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + + bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); } else { bp = t->ptrace_bps[nr]; t->ptrace_bps[nr] = NULL; - bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, - bp->attr.bp_type, - bp->callback, - tsk, - bp->attr.disabled); + + attr = bp->attr; + attr.bp_addr = addr; + bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); } /* * CHECKME: the previous code returned -EIO if the addr wasn't a diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index c9f7f7c7b0e0..5da472e434b7 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -20,6 +20,14 @@ enum { #ifdef CONFIG_HAVE_HW_BREAKPOINT +/* As it's for in-kernel or ptrace use, we want it to be pinned */ +#define DEFINE_BREAKPOINT_ATTR(name) \ +struct perf_event_attr name = { \ + .type = PERF_TYPE_BREAKPOINT, \ + .size = sizeof(name), \ + .pinned = 1, \ +}; + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -36,22 +44,16 @@ static inline int hw_breakpoint_len(struct perf_event *bp) } extern struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, +register_user_hw_breakpoint(struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active); + struct task_struct *tsk); /* FIXME: only change from the attr, and don't unregister */ extern struct perf_event * modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, + struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active); + struct task_struct *tsk); /* * Kernel breakpoints are not associated with any particular thread. @@ -89,20 +91,14 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) #else /* !CONFIG_HAVE_HW_BREAKPOINT */ static inline struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, +register_user_hw_breakpoint(struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) { return NULL; } + struct task_struct *tsk) { return NULL; } static inline struct perf_event * modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, + struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) { return NULL; } + struct task_struct *tsk) { return NULL; } static inline struct perf_event * register_wide_hw_breakpoint_cpu(unsigned long addr, int len, diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 32e1018191be..2a47514f12fd 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -289,90 +289,32 @@ int register_perf_hw_breakpoint(struct perf_event *bp) return __register_perf_hw_breakpoint(bp); } -/* - * Register a breakpoint bound to a task and a given cpu. - * If cpu is -1, the breakpoint is active for the task in every cpu - * If the task is -1, the breakpoint is active for every tasks in the given - * cpu. - */ -static struct perf_event * -register_user_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - pid_t pid, - int cpu, - bool active) -{ - struct perf_event_attr *attr; - struct perf_event *bp; - - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - if (!attr) - return ERR_PTR(-ENOMEM); - - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(*attr); - attr->bp_addr = addr; - attr->bp_len = len; - attr->bp_type = type; - /* - * Such breakpoints are used by debuggers to trigger signals when - * we hit the excepted memory op. We can't miss such events, they - * must be pinned. - */ - attr->pinned = 1; - - if (!active) - attr->disabled = 1; - - bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); - kfree(attr); - - return bp; -} - /** * register_user_hw_breakpoint - register a hardware breakpoint for user space - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) + * @attr: breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @active: should we activate it while registering it - * */ struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, +register_user_hw_breakpoint(struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) + struct task_struct *tsk) { - return register_user_hw_breakpoint_cpu(addr, len, type, triggered, - tsk->pid, -1, active); + return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); /** * modify_user_hw_breakpoint - modify a user-space hardware breakpoint * @bp: the breakpoint structure to modify - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) + * @attr: new breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @active: should we activate it while registering it */ struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, +modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) + struct task_struct *tsk) { /* * FIXME: do it without unregistering @@ -381,8 +323,7 @@ modify_user_hw_breakpoint(struct perf_event *bp, */ unregister_hw_breakpoint(bp); - return register_user_hw_breakpoint(addr, len, type, triggered, - tsk, active); + return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); @@ -406,8 +347,16 @@ register_kernel_hw_breakpoint_cpu(unsigned long addr, int cpu, bool active) { - return register_user_hw_breakpoint_cpu(addr, len, type, triggered, - -1, cpu, active); + DEFINE_BREAKPOINT_ATTR(attr); + + attr.bp_addr = addr; + attr.bp_len = len; + attr.bp_type = type; + + if (!active) + attr.disabled = 1; + + return perf_event_create_kernel_counter(&attr, cpu, -1, triggered); } /** -- cgit v1.3-8-gc7d7 From dd1853c3f493f6d22d9e5390b192a07b73d2ac0a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Nov 2009 04:55:54 +0100 Subject: hw-breakpoints: Use struct perf_event_attr to define kernel breakpoints Kernel breakpoints are created using functions in which we pass breakpoint parameters as individual variables: address, length and type. Although it fits well for x86, this just does not scale across architectures that may support this api later as these may have more or different needs. Pass in a perf_event_attr structure instead because it is meant to evolve as much as possible into a generic hardware breakpoint parameter structure. Reported-by: K.Prasad Signed-off-by: Frederic Weisbecker LKML-Reference: <1259294154-5197-2-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 35 ++++++++++++--------------- kernel/hw_breakpoint.c | 35 ++++----------------------- kernel/trace/trace_ksym.c | 42 ++++++++++++++++----------------- samples/hw_breakpoint/data_breakpoint.c | 10 ++++---- 4 files changed, 44 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 5da472e434b7..a03daed08c59 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -28,6 +28,13 @@ struct perf_event_attr name = { \ .pinned = 1, \ }; +static inline void hw_breakpoint_init(struct perf_event_attr *attr) +{ + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(*attr); + attr->pinned = 1; +} + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -59,19 +66,13 @@ modify_user_hw_breakpoint(struct perf_event *bp, * Kernel breakpoints are not associated with any particular thread. */ extern struct perf_event * -register_wide_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, +register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_callback_t triggered, - int cpu, - bool active); + int cpu); extern struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active); +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered); extern int register_perf_hw_breakpoint(struct perf_event *bp); extern int __register_perf_hw_breakpoint(struct perf_event *bp); @@ -100,18 +101,12 @@ modify_user_hw_breakpoint(struct perf_event *bp, perf_callback_t triggered, struct task_struct *tsk) { return NULL; } static inline struct perf_event * -register_wide_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, +register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_callback_t triggered, - int cpu, - bool active) { return NULL; } + int cpu) { return NULL; } static inline struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active) { return NULL; } +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered) { return NULL; } static inline int register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } static inline int diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 2a47514f12fd..cf5ee1628411 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -339,42 +339,16 @@ void unregister_hw_breakpoint(struct perf_event *bp) } EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); -static struct perf_event * -register_kernel_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - int cpu, - bool active) -{ - DEFINE_BREAKPOINT_ATTR(attr); - - attr.bp_addr = addr; - attr.bp_len = len; - attr.bp_type = type; - - if (!active) - attr.disabled = 1; - - return perf_event_create_kernel_counter(&attr, cpu, -1, triggered); -} - /** * register_wide_hw_breakpoint - register a wide breakpoint in the kernel - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) + * @attr: breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint - * @active: should we activate it while registering it * * @return a set of per_cpu pointers to perf events */ struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active) +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered) { struct perf_event **cpu_events, **pevent, *bp; long err; @@ -386,8 +360,7 @@ register_wide_hw_breakpoint(unsigned long addr, for_each_possible_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); - bp = register_kernel_hw_breakpoint_cpu(addr, len, type, - triggered, cpu, active); + bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); *pevent = bp; diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index c538b15b95d6..ddfa0fd43bc0 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -42,9 +42,7 @@ struct trace_ksym { struct perf_event **ksym_hbp; - unsigned long ksym_addr; - int type; - int len; + struct perf_event_attr attr; #ifdef CONFIG_PROFILE_KSYM_TRACER unsigned long counter; #endif @@ -71,7 +69,7 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) rcu_read_lock(); hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { - if ((entry->ksym_addr == hbp_hit_addr) && + if ((entry->attr.bp_addr == hbp_hit_addr) && (entry->counter <= MAX_UL_INT)) { entry->counter++; break; @@ -192,14 +190,15 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) if (!entry) return -ENOMEM; - entry->type = op; - entry->ksym_addr = addr; - entry->len = HW_BREAKPOINT_LEN_4; + hw_breakpoint_init(&entry->attr); + + entry->attr.bp_type = op; + entry->attr.bp_addr = addr; + entry->attr.bp_len = HW_BREAKPOINT_LEN_4; ret = -EAGAIN; - entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, - entry->len, entry->type, - ksym_hbp_handler, true); + entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, + ksym_hbp_handler); if (IS_ERR(entry->ksym_hbp)) { ret = PTR_ERR(entry->ksym_hbp); @@ -236,12 +235,12 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); - if (entry->type == HW_BREAKPOINT_R) + ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr); + if (entry->attr.bp_type == HW_BREAKPOINT_R) ret = trace_seq_puts(s, "r--\n"); - else if (entry->type == HW_BREAKPOINT_W) + else if (entry->attr.bp_type == HW_BREAKPOINT_W) ret = trace_seq_puts(s, "-w-\n"); - else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) + else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) ret = trace_seq_puts(s, "rw-\n"); WARN_ON_ONCE(!ret); } @@ -317,9 +316,9 @@ static ssize_t ksym_trace_filter_write(struct file *file, ret = -EINVAL; hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - if (entry->ksym_addr == ksym_addr) { + if (entry->attr.bp_addr == ksym_addr) { /* Check for malformed request: (6) */ - if (entry->type != op) + if (entry->attr.bp_type != op) changed = 1; else goto out; @@ -328,13 +327,12 @@ static ssize_t ksym_trace_filter_write(struct file *file, } if (changed) { unregister_wide_hw_breakpoint(entry->ksym_hbp); - entry->type = op; + entry->attr.bp_type = op; ret = 0; if (op > 0) { entry->ksym_hbp = - register_wide_hw_breakpoint(entry->ksym_addr, - entry->len, entry->type, - ksym_hbp_handler, true); + register_wide_hw_breakpoint(&entry->attr, + ksym_hbp_handler); if (IS_ERR(entry->ksym_hbp)) ret = PTR_ERR(entry->ksym_hbp); else @@ -489,7 +487,7 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); - access_type = entry->type; + access_type = entry->attr.bp_type; switch (access_type) { case HW_BREAKPOINT_R: @@ -505,7 +503,7 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) seq_puts(m, " NA "); } - if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0) + if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) seq_printf(m, " %-36s", fn_name); else seq_printf(m, " %-36s", ""); diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c index ee7f9fbaffbd..29525500df00 100644 --- a/samples/hw_breakpoint/data_breakpoint.c +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -51,13 +51,13 @@ static void sample_hbp_handler(struct perf_event *temp, void *data) static int __init hw_break_module_init(void) { int ret; - unsigned long addr; + DEFINE_BREAKPOINT_ATTR(attr); - addr = kallsyms_lookup_name(ksym_name); + attr.bp_addr = kallsyms_lookup_name(ksym_name); + attr.bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; - sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4, - HW_BREAKPOINT_W | HW_BREAKPOINT_R, - sample_hbp_handler, true); + sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler); if (IS_ERR(sample_hbp)) { ret = PTR_ERR(sample_hbp); goto fail; -- cgit v1.3-8-gc7d7 From c8602edf3f9471466755329b78d309f2a01dd449 Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Tue, 10 Feb 2009 14:54:57 +0100 Subject: move drivers/mfd/*.h to include/linux/mfd So drivers like collie_battery driver can use those files easier. --- drivers/mfd/mcp-core.c | 2 +- drivers/mfd/mcp-sa11x0.c | 2 +- drivers/mfd/mcp.h | 66 ----------- drivers/mfd/ucb1x00-assabet.c | 2 +- drivers/mfd/ucb1x00-core.c | 2 +- drivers/mfd/ucb1x00-ts.c | 2 +- drivers/mfd/ucb1x00.h | 255 ------------------------------------------ include/linux/mfd/mcp.h | 68 +++++++++++ include/linux/mfd/ucb1x00.h | 255 ++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 328 insertions(+), 326 deletions(-) delete mode 100644 drivers/mfd/mcp.h delete mode 100644 drivers/mfd/ucb1x00.h create mode 100644 include/linux/mfd/mcp.h create mode 100644 include/linux/mfd/ucb1x00.h (limited to 'include') diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c index 57271cb3b316..84815f9ef636 100644 --- a/drivers/mfd/mcp-core.c +++ b/drivers/mfd/mcp-core.c @@ -17,11 +17,11 @@ #include #include #include +#include #include #include -#include "mcp.h" #define to_mcp(d) container_of(d, struct mcp, attached_device) #define to_mcp_driver(d) container_of(d, struct mcp_driver, drv) diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c index 62b32dabf629..212189815c8e 100644 --- a/drivers/mfd/mcp-sa11x0.c +++ b/drivers/mfd/mcp-sa11x0.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -28,7 +29,6 @@ #include -#include "mcp.h" struct mcp_sa11x0 { u32 mccr0; diff --git a/drivers/mfd/mcp.h b/drivers/mfd/mcp.h deleted file mode 100644 index c093a93b8808..000000000000 --- a/drivers/mfd/mcp.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * linux/drivers/mfd/mcp.h - * - * Copyright (C) 2001 Russell King, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. - */ -#ifndef MCP_H -#define MCP_H - -struct mcp_ops; - -struct mcp { - struct module *owner; - struct mcp_ops *ops; - spinlock_t lock; - int use_count; - unsigned int sclk_rate; - unsigned int rw_timeout; - dma_device_t dma_audio_rd; - dma_device_t dma_audio_wr; - dma_device_t dma_telco_rd; - dma_device_t dma_telco_wr; - struct device attached_device; -}; - -struct mcp_ops { - void (*set_telecom_divisor)(struct mcp *, unsigned int); - void (*set_audio_divisor)(struct mcp *, unsigned int); - void (*reg_write)(struct mcp *, unsigned int, unsigned int); - unsigned int (*reg_read)(struct mcp *, unsigned int); - void (*enable)(struct mcp *); - void (*disable)(struct mcp *); -}; - -void mcp_set_telecom_divisor(struct mcp *, unsigned int); -void mcp_set_audio_divisor(struct mcp *, unsigned int); -void mcp_reg_write(struct mcp *, unsigned int, unsigned int); -unsigned int mcp_reg_read(struct mcp *, unsigned int); -void mcp_enable(struct mcp *); -void mcp_disable(struct mcp *); -#define mcp_get_sclk_rate(mcp) ((mcp)->sclk_rate) - -struct mcp *mcp_host_alloc(struct device *, size_t); -int mcp_host_register(struct mcp *); -void mcp_host_unregister(struct mcp *); - -struct mcp_driver { - struct device_driver drv; - int (*probe)(struct mcp *); - void (*remove)(struct mcp *); - int (*suspend)(struct mcp *, pm_message_t); - int (*resume)(struct mcp *); -}; - -int mcp_driver_register(struct mcp_driver *); -void mcp_driver_unregister(struct mcp_driver *); - -#define mcp_get_drvdata(mcp) dev_get_drvdata(&(mcp)->attached_device) -#define mcp_set_drvdata(mcp,d) dev_set_drvdata(&(mcp)->attached_device, d) - -#define mcp_priv(mcp) ((void *)((mcp)+1)) - -#endif diff --git a/drivers/mfd/ucb1x00-assabet.c b/drivers/mfd/ucb1x00-assabet.c index 86fed4870f93..cea9da60850d 100644 --- a/drivers/mfd/ucb1x00-assabet.c +++ b/drivers/mfd/ucb1x00-assabet.c @@ -14,10 +14,10 @@ #include #include #include +#include #include -#include "ucb1x00.h" #define UCB1X00_ATTR(name,input)\ static ssize_t name##_show(struct device *dev, struct device_attribute *attr, \ diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index 60c3988f3cf3..f9de7891e57f 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -25,11 +25,11 @@ #include #include #include +#include #include #include -#include "ucb1x00.h" static DEFINE_MUTEX(ucb1x00_mutex); static LIST_HEAD(ucb1x00_drivers); diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c index 61b7d3eb9a2f..000cb414a78a 100644 --- a/drivers/mfd/ucb1x00-ts.c +++ b/drivers/mfd/ucb1x00-ts.c @@ -30,12 +30,12 @@ #include #include #include +#include #include #include #include -#include "ucb1x00.h" struct ucb1x00_ts { diff --git a/drivers/mfd/ucb1x00.h b/drivers/mfd/ucb1x00.h deleted file mode 100644 index a8ad8a0ed5db..000000000000 --- a/drivers/mfd/ucb1x00.h +++ /dev/null @@ -1,255 +0,0 @@ -/* - * linux/drivers/mfd/ucb1x00.h - * - * Copyright (C) 2001 Russell King, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. - */ -#ifndef UCB1200_H -#define UCB1200_H - -#define UCB_IO_DATA 0x00 -#define UCB_IO_DIR 0x01 - -#define UCB_IO_0 (1 << 0) -#define UCB_IO_1 (1 << 1) -#define UCB_IO_2 (1 << 2) -#define UCB_IO_3 (1 << 3) -#define UCB_IO_4 (1 << 4) -#define UCB_IO_5 (1 << 5) -#define UCB_IO_6 (1 << 6) -#define UCB_IO_7 (1 << 7) -#define UCB_IO_8 (1 << 8) -#define UCB_IO_9 (1 << 9) - -#define UCB_IE_RIS 0x02 -#define UCB_IE_FAL 0x03 -#define UCB_IE_STATUS 0x04 -#define UCB_IE_CLEAR 0x04 -#define UCB_IE_ADC (1 << 11) -#define UCB_IE_TSPX (1 << 12) -#define UCB_IE_TSMX (1 << 13) -#define UCB_IE_TCLIP (1 << 14) -#define UCB_IE_ACLIP (1 << 15) - -#define UCB_IRQ_TSPX 12 - -#define UCB_TC_A 0x05 -#define UCB_TC_A_LOOP (1 << 7) /* UCB1200 */ -#define UCB_TC_A_AMPL (1 << 7) /* UCB1300 */ - -#define UCB_TC_B 0x06 -#define UCB_TC_B_VOICE_ENA (1 << 3) -#define UCB_TC_B_CLIP (1 << 4) -#define UCB_TC_B_ATT (1 << 6) -#define UCB_TC_B_SIDE_ENA (1 << 11) -#define UCB_TC_B_MUTE (1 << 13) -#define UCB_TC_B_IN_ENA (1 << 14) -#define UCB_TC_B_OUT_ENA (1 << 15) - -#define UCB_AC_A 0x07 -#define UCB_AC_B 0x08 -#define UCB_AC_B_LOOP (1 << 8) -#define UCB_AC_B_MUTE (1 << 13) -#define UCB_AC_B_IN_ENA (1 << 14) -#define UCB_AC_B_OUT_ENA (1 << 15) - -#define UCB_TS_CR 0x09 -#define UCB_TS_CR_TSMX_POW (1 << 0) -#define UCB_TS_CR_TSPX_POW (1 << 1) -#define UCB_TS_CR_TSMY_POW (1 << 2) -#define UCB_TS_CR_TSPY_POW (1 << 3) -#define UCB_TS_CR_TSMX_GND (1 << 4) -#define UCB_TS_CR_TSPX_GND (1 << 5) -#define UCB_TS_CR_TSMY_GND (1 << 6) -#define UCB_TS_CR_TSPY_GND (1 << 7) -#define UCB_TS_CR_MODE_INT (0 << 8) -#define UCB_TS_CR_MODE_PRES (1 << 8) -#define UCB_TS_CR_MODE_POS (2 << 8) -#define UCB_TS_CR_BIAS_ENA (1 << 11) -#define UCB_TS_CR_TSPX_LOW (1 << 12) -#define UCB_TS_CR_TSMX_LOW (1 << 13) - -#define UCB_ADC_CR 0x0a -#define UCB_ADC_SYNC_ENA (1 << 0) -#define UCB_ADC_VREFBYP_CON (1 << 1) -#define UCB_ADC_INP_TSPX (0 << 2) -#define UCB_ADC_INP_TSMX (1 << 2) -#define UCB_ADC_INP_TSPY (2 << 2) -#define UCB_ADC_INP_TSMY (3 << 2) -#define UCB_ADC_INP_AD0 (4 << 2) -#define UCB_ADC_INP_AD1 (5 << 2) -#define UCB_ADC_INP_AD2 (6 << 2) -#define UCB_ADC_INP_AD3 (7 << 2) -#define UCB_ADC_EXT_REF (1 << 5) -#define UCB_ADC_START (1 << 7) -#define UCB_ADC_ENA (1 << 15) - -#define UCB_ADC_DATA 0x0b -#define UCB_ADC_DAT_VAL (1 << 15) -#define UCB_ADC_DAT(x) (((x) & 0x7fe0) >> 5) - -#define UCB_ID 0x0c -#define UCB_ID_1200 0x1004 -#define UCB_ID_1300 0x1005 -#define UCB_ID_TC35143 0x9712 - -#define UCB_MODE 0x0d -#define UCB_MODE_DYN_VFLAG_ENA (1 << 12) -#define UCB_MODE_AUD_OFF_CAN (1 << 13) - -#include "mcp.h" - -struct ucb1x00_irq { - void *devid; - void (*fn)(int, void *); -}; - -struct ucb1x00 { - spinlock_t lock; - struct mcp *mcp; - unsigned int irq; - struct semaphore adc_sem; - spinlock_t io_lock; - u16 id; - u16 io_dir; - u16 io_out; - u16 adc_cr; - u16 irq_fal_enbl; - u16 irq_ris_enbl; - struct ucb1x00_irq irq_handler[16]; - struct device dev; - struct list_head node; - struct list_head devs; -}; - -struct ucb1x00_driver; - -struct ucb1x00_dev { - struct list_head dev_node; - struct list_head drv_node; - struct ucb1x00 *ucb; - struct ucb1x00_driver *drv; - void *priv; -}; - -struct ucb1x00_driver { - struct list_head node; - struct list_head devs; - int (*add)(struct ucb1x00_dev *dev); - void (*remove)(struct ucb1x00_dev *dev); - int (*suspend)(struct ucb1x00_dev *dev, pm_message_t state); - int (*resume)(struct ucb1x00_dev *dev); -}; - -#define classdev_to_ucb1x00(cd) container_of(cd, struct ucb1x00, dev) - -int ucb1x00_register_driver(struct ucb1x00_driver *); -void ucb1x00_unregister_driver(struct ucb1x00_driver *); - -/** - * ucb1x00_clkrate - return the UCB1x00 SIB clock rate - * @ucb: UCB1x00 structure describing chip - * - * Return the SIB clock rate in Hz. - */ -static inline unsigned int ucb1x00_clkrate(struct ucb1x00 *ucb) -{ - return mcp_get_sclk_rate(ucb->mcp); -} - -/** - * ucb1x00_enable - enable the UCB1x00 SIB clock - * @ucb: UCB1x00 structure describing chip - * - * Enable the SIB clock. This can be called multiple times. - */ -static inline void ucb1x00_enable(struct ucb1x00 *ucb) -{ - mcp_enable(ucb->mcp); -} - -/** - * ucb1x00_disable - disable the UCB1x00 SIB clock - * @ucb: UCB1x00 structure describing chip - * - * Disable the SIB clock. The SIB clock will only be disabled - * when the number of ucb1x00_enable calls match the number of - * ucb1x00_disable calls. - */ -static inline void ucb1x00_disable(struct ucb1x00 *ucb) -{ - mcp_disable(ucb->mcp); -} - -/** - * ucb1x00_reg_write - write a UCB1x00 register - * @ucb: UCB1x00 structure describing chip - * @reg: UCB1x00 4-bit register index to write - * @val: UCB1x00 16-bit value to write - * - * Write the UCB1x00 register @reg with value @val. The SIB - * clock must be running for this function to return. - */ -static inline void ucb1x00_reg_write(struct ucb1x00 *ucb, unsigned int reg, unsigned int val) -{ - mcp_reg_write(ucb->mcp, reg, val); -} - -/** - * ucb1x00_reg_read - read a UCB1x00 register - * @ucb: UCB1x00 structure describing chip - * @reg: UCB1x00 4-bit register index to write - * - * Read the UCB1x00 register @reg and return its value. The SIB - * clock must be running for this function to return. - */ -static inline unsigned int ucb1x00_reg_read(struct ucb1x00 *ucb, unsigned int reg) -{ - return mcp_reg_read(ucb->mcp, reg); -} -/** - * ucb1x00_set_audio_divisor - - * @ucb: UCB1x00 structure describing chip - * @div: SIB clock divisor - */ -static inline void ucb1x00_set_audio_divisor(struct ucb1x00 *ucb, unsigned int div) -{ - mcp_set_audio_divisor(ucb->mcp, div); -} - -/** - * ucb1x00_set_telecom_divisor - - * @ucb: UCB1x00 structure describing chip - * @div: SIB clock divisor - */ -static inline void ucb1x00_set_telecom_divisor(struct ucb1x00 *ucb, unsigned int div) -{ - mcp_set_telecom_divisor(ucb->mcp, div); -} - -void ucb1x00_io_set_dir(struct ucb1x00 *ucb, unsigned int, unsigned int); -void ucb1x00_io_write(struct ucb1x00 *ucb, unsigned int, unsigned int); -unsigned int ucb1x00_io_read(struct ucb1x00 *ucb); - -#define UCB_NOSYNC (0) -#define UCB_SYNC (1) - -unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync); -void ucb1x00_adc_enable(struct ucb1x00 *ucb); -void ucb1x00_adc_disable(struct ucb1x00 *ucb); - -/* - * Which edges of the IRQ do you want to control today? - */ -#define UCB_RISING (1 << 0) -#define UCB_FALLING (1 << 1) - -int ucb1x00_hook_irq(struct ucb1x00 *ucb, unsigned int idx, void (*fn)(int, void *), void *devid); -void ucb1x00_enable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges); -void ucb1x00_disable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges); -int ucb1x00_free_irq(struct ucb1x00 *ucb, unsigned int idx, void *devid); - -#endif diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h new file mode 100644 index 000000000000..be95e09fd746 --- /dev/null +++ b/include/linux/mfd/mcp.h @@ -0,0 +1,68 @@ +/* + * linux/drivers/mfd/mcp.h + * + * Copyright (C) 2001 Russell King, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + */ +#ifndef MCP_H +#define MCP_H + +#include + +struct mcp_ops; + +struct mcp { + struct module *owner; + struct mcp_ops *ops; + spinlock_t lock; + int use_count; + unsigned int sclk_rate; + unsigned int rw_timeout; + dma_device_t dma_audio_rd; + dma_device_t dma_audio_wr; + dma_device_t dma_telco_rd; + dma_device_t dma_telco_wr; + struct device attached_device; +}; + +struct mcp_ops { + void (*set_telecom_divisor)(struct mcp *, unsigned int); + void (*set_audio_divisor)(struct mcp *, unsigned int); + void (*reg_write)(struct mcp *, unsigned int, unsigned int); + unsigned int (*reg_read)(struct mcp *, unsigned int); + void (*enable)(struct mcp *); + void (*disable)(struct mcp *); +}; + +void mcp_set_telecom_divisor(struct mcp *, unsigned int); +void mcp_set_audio_divisor(struct mcp *, unsigned int); +void mcp_reg_write(struct mcp *, unsigned int, unsigned int); +unsigned int mcp_reg_read(struct mcp *, unsigned int); +void mcp_enable(struct mcp *); +void mcp_disable(struct mcp *); +#define mcp_get_sclk_rate(mcp) ((mcp)->sclk_rate) + +struct mcp *mcp_host_alloc(struct device *, size_t); +int mcp_host_register(struct mcp *); +void mcp_host_unregister(struct mcp *); + +struct mcp_driver { + struct device_driver drv; + int (*probe)(struct mcp *); + void (*remove)(struct mcp *); + int (*suspend)(struct mcp *, pm_message_t); + int (*resume)(struct mcp *); +}; + +int mcp_driver_register(struct mcp_driver *); +void mcp_driver_unregister(struct mcp_driver *); + +#define mcp_get_drvdata(mcp) dev_get_drvdata(&(mcp)->attached_device) +#define mcp_set_drvdata(mcp,d) dev_set_drvdata(&(mcp)->attached_device, d) + +#define mcp_priv(mcp) ((void *)((mcp)+1)) + +#endif diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h new file mode 100644 index 000000000000..eac346336382 --- /dev/null +++ b/include/linux/mfd/ucb1x00.h @@ -0,0 +1,255 @@ +/* + * linux/include/mfd/ucb1x00.h + * + * Copyright (C) 2001 Russell King, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + */ +#ifndef UCB1200_H +#define UCB1200_H + +#include +#define UCB_IO_DATA 0x00 +#define UCB_IO_DIR 0x01 + +#define UCB_IO_0 (1 << 0) +#define UCB_IO_1 (1 << 1) +#define UCB_IO_2 (1 << 2) +#define UCB_IO_3 (1 << 3) +#define UCB_IO_4 (1 << 4) +#define UCB_IO_5 (1 << 5) +#define UCB_IO_6 (1 << 6) +#define UCB_IO_7 (1 << 7) +#define UCB_IO_8 (1 << 8) +#define UCB_IO_9 (1 << 9) + +#define UCB_IE_RIS 0x02 +#define UCB_IE_FAL 0x03 +#define UCB_IE_STATUS 0x04 +#define UCB_IE_CLEAR 0x04 +#define UCB_IE_ADC (1 << 11) +#define UCB_IE_TSPX (1 << 12) +#define UCB_IE_TSMX (1 << 13) +#define UCB_IE_TCLIP (1 << 14) +#define UCB_IE_ACLIP (1 << 15) + +#define UCB_IRQ_TSPX 12 + +#define UCB_TC_A 0x05 +#define UCB_TC_A_LOOP (1 << 7) /* UCB1200 */ +#define UCB_TC_A_AMPL (1 << 7) /* UCB1300 */ + +#define UCB_TC_B 0x06 +#define UCB_TC_B_VOICE_ENA (1 << 3) +#define UCB_TC_B_CLIP (1 << 4) +#define UCB_TC_B_ATT (1 << 6) +#define UCB_TC_B_SIDE_ENA (1 << 11) +#define UCB_TC_B_MUTE (1 << 13) +#define UCB_TC_B_IN_ENA (1 << 14) +#define UCB_TC_B_OUT_ENA (1 << 15) + +#define UCB_AC_A 0x07 +#define UCB_AC_B 0x08 +#define UCB_AC_B_LOOP (1 << 8) +#define UCB_AC_B_MUTE (1 << 13) +#define UCB_AC_B_IN_ENA (1 << 14) +#define UCB_AC_B_OUT_ENA (1 << 15) + +#define UCB_TS_CR 0x09 +#define UCB_TS_CR_TSMX_POW (1 << 0) +#define UCB_TS_CR_TSPX_POW (1 << 1) +#define UCB_TS_CR_TSMY_POW (1 << 2) +#define UCB_TS_CR_TSPY_POW (1 << 3) +#define UCB_TS_CR_TSMX_GND (1 << 4) +#define UCB_TS_CR_TSPX_GND (1 << 5) +#define UCB_TS_CR_TSMY_GND (1 << 6) +#define UCB_TS_CR_TSPY_GND (1 << 7) +#define UCB_TS_CR_MODE_INT (0 << 8) +#define UCB_TS_CR_MODE_PRES (1 << 8) +#define UCB_TS_CR_MODE_POS (2 << 8) +#define UCB_TS_CR_BIAS_ENA (1 << 11) +#define UCB_TS_CR_TSPX_LOW (1 << 12) +#define UCB_TS_CR_TSMX_LOW (1 << 13) + +#define UCB_ADC_CR 0x0a +#define UCB_ADC_SYNC_ENA (1 << 0) +#define UCB_ADC_VREFBYP_CON (1 << 1) +#define UCB_ADC_INP_TSPX (0 << 2) +#define UCB_ADC_INP_TSMX (1 << 2) +#define UCB_ADC_INP_TSPY (2 << 2) +#define UCB_ADC_INP_TSMY (3 << 2) +#define UCB_ADC_INP_AD0 (4 << 2) +#define UCB_ADC_INP_AD1 (5 << 2) +#define UCB_ADC_INP_AD2 (6 << 2) +#define UCB_ADC_INP_AD3 (7 << 2) +#define UCB_ADC_EXT_REF (1 << 5) +#define UCB_ADC_START (1 << 7) +#define UCB_ADC_ENA (1 << 15) + +#define UCB_ADC_DATA 0x0b +#define UCB_ADC_DAT_VAL (1 << 15) +#define UCB_ADC_DAT(x) (((x) & 0x7fe0) >> 5) + +#define UCB_ID 0x0c +#define UCB_ID_1200 0x1004 +#define UCB_ID_1300 0x1005 +#define UCB_ID_TC35143 0x9712 + +#define UCB_MODE 0x0d +#define UCB_MODE_DYN_VFLAG_ENA (1 << 12) +#define UCB_MODE_AUD_OFF_CAN (1 << 13) + + +struct ucb1x00_irq { + void *devid; + void (*fn)(int, void *); +}; + +struct ucb1x00 { + spinlock_t lock; + struct mcp *mcp; + unsigned int irq; + struct semaphore adc_sem; + spinlock_t io_lock; + u16 id; + u16 io_dir; + u16 io_out; + u16 adc_cr; + u16 irq_fal_enbl; + u16 irq_ris_enbl; + struct ucb1x00_irq irq_handler[16]; + struct device dev; + struct list_head node; + struct list_head devs; +}; + +struct ucb1x00_driver; + +struct ucb1x00_dev { + struct list_head dev_node; + struct list_head drv_node; + struct ucb1x00 *ucb; + struct ucb1x00_driver *drv; + void *priv; +}; + +struct ucb1x00_driver { + struct list_head node; + struct list_head devs; + int (*add)(struct ucb1x00_dev *dev); + void (*remove)(struct ucb1x00_dev *dev); + int (*suspend)(struct ucb1x00_dev *dev, pm_message_t state); + int (*resume)(struct ucb1x00_dev *dev); +}; + +#define classdev_to_ucb1x00(cd) container_of(cd, struct ucb1x00, dev) + +int ucb1x00_register_driver(struct ucb1x00_driver *); +void ucb1x00_unregister_driver(struct ucb1x00_driver *); + +/** + * ucb1x00_clkrate - return the UCB1x00 SIB clock rate + * @ucb: UCB1x00 structure describing chip + * + * Return the SIB clock rate in Hz. + */ +static inline unsigned int ucb1x00_clkrate(struct ucb1x00 *ucb) +{ + return mcp_get_sclk_rate(ucb->mcp); +} + +/** + * ucb1x00_enable - enable the UCB1x00 SIB clock + * @ucb: UCB1x00 structure describing chip + * + * Enable the SIB clock. This can be called multiple times. + */ +static inline void ucb1x00_enable(struct ucb1x00 *ucb) +{ + mcp_enable(ucb->mcp); +} + +/** + * ucb1x00_disable - disable the UCB1x00 SIB clock + * @ucb: UCB1x00 structure describing chip + * + * Disable the SIB clock. The SIB clock will only be disabled + * when the number of ucb1x00_enable calls match the number of + * ucb1x00_disable calls. + */ +static inline void ucb1x00_disable(struct ucb1x00 *ucb) +{ + mcp_disable(ucb->mcp); +} + +/** + * ucb1x00_reg_write - write a UCB1x00 register + * @ucb: UCB1x00 structure describing chip + * @reg: UCB1x00 4-bit register index to write + * @val: UCB1x00 16-bit value to write + * + * Write the UCB1x00 register @reg with value @val. The SIB + * clock must be running for this function to return. + */ +static inline void ucb1x00_reg_write(struct ucb1x00 *ucb, unsigned int reg, unsigned int val) +{ + mcp_reg_write(ucb->mcp, reg, val); +} + +/** + * ucb1x00_reg_read - read a UCB1x00 register + * @ucb: UCB1x00 structure describing chip + * @reg: UCB1x00 4-bit register index to write + * + * Read the UCB1x00 register @reg and return its value. The SIB + * clock must be running for this function to return. + */ +static inline unsigned int ucb1x00_reg_read(struct ucb1x00 *ucb, unsigned int reg) +{ + return mcp_reg_read(ucb->mcp, reg); +} +/** + * ucb1x00_set_audio_divisor - + * @ucb: UCB1x00 structure describing chip + * @div: SIB clock divisor + */ +static inline void ucb1x00_set_audio_divisor(struct ucb1x00 *ucb, unsigned int div) +{ + mcp_set_audio_divisor(ucb->mcp, div); +} + +/** + * ucb1x00_set_telecom_divisor - + * @ucb: UCB1x00 structure describing chip + * @div: SIB clock divisor + */ +static inline void ucb1x00_set_telecom_divisor(struct ucb1x00 *ucb, unsigned int div) +{ + mcp_set_telecom_divisor(ucb->mcp, div); +} + +void ucb1x00_io_set_dir(struct ucb1x00 *ucb, unsigned int, unsigned int); +void ucb1x00_io_write(struct ucb1x00 *ucb, unsigned int, unsigned int); +unsigned int ucb1x00_io_read(struct ucb1x00 *ucb); + +#define UCB_NOSYNC (0) +#define UCB_SYNC (1) + +unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync); +void ucb1x00_adc_enable(struct ucb1x00 *ucb); +void ucb1x00_adc_disable(struct ucb1x00 *ucb); + +/* + * Which edges of the IRQ do you want to control today? + */ +#define UCB_RISING (1 << 0) +#define UCB_FALLING (1 << 1) + +int ucb1x00_hook_irq(struct ucb1x00 *ucb, unsigned int idx, void (*fn)(int, void *), void *devid); +void ucb1x00_enable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges); +void ucb1x00_disable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges); +int ucb1x00_free_irq(struct ucb1x00 *ucb, unsigned int idx, void *devid); + +#endif -- cgit v1.3-8-gc7d7 From 9ca3dc805cd0d89c44f88b9a399061946781323a Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Tue, 10 Feb 2009 14:50:56 +0100 Subject: add gpiolib support to ucb1x00 The old access methods to the gpios will be removed when all users has been converted. (mainly ucb1x00-ts) --- arch/arm/mach-sa1100/include/mach/mcp.h | 1 + drivers/mfd/mcp-sa11x0.c | 1 + drivers/mfd/ucb1x00-core.c | 87 ++++++++++++++++++++++++++++++++- include/linux/mfd/mcp.h | 1 + include/linux/mfd/ucb1x00.h | 3 ++ 5 files changed, 91 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h index fb8b09a57ad7..ed1a331508a7 100644 --- a/arch/arm/mach-sa1100/include/mach/mcp.h +++ b/arch/arm/mach-sa1100/include/mach/mcp.h @@ -16,6 +16,7 @@ struct mcp_plat_data { u32 mccr0; u32 mccr1; unsigned int sclk_rate; + int gpio_base; }; #endif diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c index 212189815c8e..258427232728 100644 --- a/drivers/mfd/mcp-sa11x0.c +++ b/drivers/mfd/mcp-sa11x0.c @@ -163,6 +163,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev) mcp->dma_audio_wr = DMA_Ser4MCP0Wr; mcp->dma_telco_rd = DMA_Ser4MCP1Rd; mcp->dma_telco_wr = DMA_Ser4MCP1Wr; + mcp->gpio_base = data->gpio_base; platform_set_drvdata(pdev, mcp); diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index f9de7891e57f..252b74188ec2 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -26,11 +26,11 @@ #include #include #include +#include #include #include - static DEFINE_MUTEX(ucb1x00_mutex); static LIST_HEAD(ucb1x00_drivers); static LIST_HEAD(ucb1x00_devices); @@ -108,6 +108,60 @@ unsigned int ucb1x00_io_read(struct ucb1x00 *ucb) return ucb1x00_reg_read(ucb, UCB_IO_DATA); } +static void ucb1x00_gpio_set(struct gpio_chip *chip, unsigned offset, int value) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + unsigned long flags; + + spin_lock_irqsave(&ucb->io_lock, flags); + if (value) + ucb->io_out |= 1 << offset; + else + ucb->io_out &= ~(1 << offset); + + ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out); + spin_unlock_irqrestore(&ucb->io_lock, flags); +} + +static int ucb1x00_gpio_get(struct gpio_chip *chip, unsigned offset) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + return ucb1x00_reg_read(ucb, UCB_IO_DATA) & (1 << offset); +} + +static int ucb1x00_gpio_direction_input(struct gpio_chip *chip, unsigned offset) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + unsigned long flags; + + spin_lock_irqsave(&ucb->io_lock, flags); + ucb->io_dir &= ~(1 << offset); + ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir); + spin_unlock_irqrestore(&ucb->io_lock, flags); + + return 0; +} + +static int ucb1x00_gpio_direction_output(struct gpio_chip *chip, unsigned offset + , int value) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + unsigned long flags; + + spin_lock_irqsave(&ucb->io_lock, flags); + ucb->io_dir |= (1 << offset); + ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir); + + if (value) + ucb->io_out |= 1 << offset; + else + ucb->io_out &= ~(1 << offset); + ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out); + spin_unlock_irqrestore(&ucb->io_lock, flags); + + return 0; +} + /* * UCB1300 data sheet says we must: * 1. enable ADC => 5us (including reference startup time) @@ -476,6 +530,7 @@ static int ucb1x00_probe(struct mcp *mcp) struct ucb1x00_driver *drv; unsigned int id; int ret = -ENODEV; + int temp; mcp_enable(mcp); id = mcp_reg_read(mcp, UCB_ID); @@ -508,12 +563,27 @@ static int ucb1x00_probe(struct mcp *mcp) goto err_free; } + ucb->gpio.base = -1; + if (mcp->gpio_base != 0) { + ucb->gpio.label = dev_name(&ucb->dev); + ucb->gpio.base = mcp->gpio_base; + ucb->gpio.ngpio = 10; + ucb->gpio.set = ucb1x00_gpio_set; + ucb->gpio.get = ucb1x00_gpio_get; + ucb->gpio.direction_input = ucb1x00_gpio_direction_input; + ucb->gpio.direction_output = ucb1x00_gpio_direction_output; + ret = gpiochip_add(&ucb->gpio); + if (ret) + goto err_free; + } else + dev_info(&ucb->dev, "gpio_base not set so no gpiolib support"); + ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING, "UCB1x00", ucb); if (ret) { printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n", ucb->irq, ret); - goto err_free; + goto err_gpio; } mcp_set_drvdata(mcp, ucb); @@ -522,6 +592,7 @@ static int ucb1x00_probe(struct mcp *mcp) if (ret) goto err_irq; + INIT_LIST_HEAD(&ucb->devs); mutex_lock(&ucb1x00_mutex); list_add(&ucb->node, &ucb1x00_devices); @@ -529,10 +600,14 @@ static int ucb1x00_probe(struct mcp *mcp) ucb1x00_add_dev(ucb, drv); } mutex_unlock(&ucb1x00_mutex); + goto out; err_irq: free_irq(ucb->irq, ucb); + err_gpio: + if (ucb->gpio.base != -1) + temp = gpiochip_remove(&ucb->gpio); err_free: kfree(ucb); err_disable: @@ -545,6 +620,7 @@ static void ucb1x00_remove(struct mcp *mcp) { struct ucb1x00 *ucb = mcp_get_drvdata(mcp); struct list_head *l, *n; + int ret; mutex_lock(&ucb1x00_mutex); list_del(&ucb->node); @@ -554,6 +630,12 @@ static void ucb1x00_remove(struct mcp *mcp) } mutex_unlock(&ucb1x00_mutex); + if (ucb->gpio.base != -1) { + ret = gpiochip_remove(&ucb->gpio); + if (ret) + dev_err(&ucb->dev, "Can't remove gpio chip: %d\n", ret); + } + free_irq(ucb->irq, ucb); device_unregister(&ucb->dev); } @@ -604,6 +686,7 @@ static int ucb1x00_resume(struct mcp *mcp) struct ucb1x00 *ucb = mcp_get_drvdata(mcp); struct ucb1x00_dev *dev; + ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir); mutex_lock(&ucb1x00_mutex); list_for_each_entry(dev, &ucb->devs, dev_node) { if (dev->drv->resume) diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h index be95e09fd746..ee496708e38b 100644 --- a/include/linux/mfd/mcp.h +++ b/include/linux/mfd/mcp.h @@ -26,6 +26,7 @@ struct mcp { dma_device_t dma_telco_rd; dma_device_t dma_telco_wr; struct device attached_device; + int gpio_base; }; struct mcp_ops { diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h index eac346336382..aa9c3789bed4 100644 --- a/include/linux/mfd/ucb1x00.h +++ b/include/linux/mfd/ucb1x00.h @@ -11,6 +11,8 @@ #define UCB1200_H #include +#include + #define UCB_IO_DATA 0x00 #define UCB_IO_DIR 0x01 @@ -123,6 +125,7 @@ struct ucb1x00 { struct device dev; struct list_head node; struct list_head devs; + struct gpio_chip gpio; }; struct ucb1x00_driver; -- cgit v1.3-8-gc7d7 From f5560da549ea2e32dd41e36548c0e7dee3d4aabb Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:38 +0900 Subject: pcmcia: Pass struct pcmcia_device to pcmcia_release_window() No logic changes, just pass struct pcmcia_device to pcmcia_release_window(). [linux@dominikbrodowski.net: update to 2.6.31] CC: netdev@vger.kernel.org CC: Jiri Kosina Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski --- drivers/char/pcmcia/ipwireless/main.c | 14 +++++++------- drivers/mtd/maps/pcmciamtd.c | 2 +- drivers/net/pcmcia/fmvj18x_cs.c | 4 ++-- drivers/net/pcmcia/ibmtr_cs.c | 2 +- drivers/net/pcmcia/pcnet_cs.c | 4 ++-- drivers/net/wireless/ray_cs.c | 4 ++-- drivers/pcmcia/pcmcia_resource.c | 5 +++-- include/pcmcia/ds.h | 2 +- 8 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 24bffa4ece49..91ca23d2b042 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -168,15 +168,15 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, return 0; exit3: - pcmcia_release_window(ipw->handle_attr_memory); + pcmcia_release_window(p_dev, ipw->handle_attr_memory); exit2: if (ipw->common_memory) { release_mem_region(ipw->request_common_memory.Base, ipw->request_common_memory.Size); iounmap(ipw->common_memory); - pcmcia_release_window(ipw->handle_common_memory); + pcmcia_release_window(p_dev, ipw->handle_common_memory); } else - pcmcia_release_window(ipw->handle_common_memory); + pcmcia_release_window(p_dev, ipw->handle_common_memory); exit1: release_resource(io_resource); pcmcia_disable_device(p_dev); @@ -260,13 +260,13 @@ exit: release_mem_region(ipw->request_attr_memory.Base, ipw->request_attr_memory.Size); iounmap(ipw->attr_memory); - pcmcia_release_window(ipw->handle_attr_memory); + pcmcia_release_window(link, ipw->handle_attr_memory); } if (ipw->common_memory) { release_mem_region(ipw->request_common_memory.Base, ipw->request_common_memory.Size); iounmap(ipw->common_memory); - pcmcia_release_window(ipw->handle_common_memory); + pcmcia_release_window(link, ipw->handle_common_memory); } pcmcia_disable_device(link); return -1; @@ -287,9 +287,9 @@ static void release_ipwireless(struct ipw_dev *ipw) iounmap(ipw->attr_memory); } if (ipw->common_memory) - pcmcia_release_window(ipw->handle_common_memory); + pcmcia_release_window(ipw->link, ipw->handle_common_memory); if (ipw->attr_memory) - pcmcia_release_window(ipw->handle_attr_memory); + pcmcia_release_window(ipw->link, ipw->handle_attr_memory); /* Break the link with Card Services */ pcmcia_disable_device(ipw->link); diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index b698dbaaf9e6..16d9985aadcd 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -344,7 +344,7 @@ static void pcmciamtd_release(struct pcmcia_device *link) iounmap(dev->win_base); dev->win_base = NULL; } - pcmcia_release_window(link->win); + pcmcia_release_window(link, link->win); } pcmcia_disable_device(link); } diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index fdec5c333a2a..61726c82be46 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -599,7 +599,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) } iounmap(base); - j = pcmcia_release_window(link->win); + j = pcmcia_release_window(link, link->win); return (i != 0x200) ? 0 : -1; } /* fmvj18x_get_hwinfo */ @@ -666,7 +666,7 @@ static void fmvj18x_release(struct pcmcia_device *link) tmp = lp->base; lp->base = NULL; /* set NULL before iounmap */ iounmap(tmp); - j = pcmcia_release_window(link->win); + j = pcmcia_release_window(link, link->win); } pcmcia_disable_device(link); diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 0d914f3b2941..7a985ca079d7 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -328,7 +328,7 @@ static void ibmtr_release(struct pcmcia_device *link) if (link->win) { struct tok_info *ti = netdev_priv(dev); iounmap(ti->mmio); - pcmcia_release_window(info->sram_win_handle); + pcmcia_release_window(link, info->sram_win_handle); } pcmcia_disable_device(link); } diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 80ab9de1c4df..4156a6c5811e 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -337,7 +337,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) } iounmap(virt); - j = pcmcia_release_window(link->win); + j = pcmcia_release_window(link, link->win); return (i < NR_INFO) ? hw_info+i : NULL; } /* get_hwinfo */ @@ -1513,7 +1513,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, pcnet_reset_8390(dev); if (i != (TX_PAGES<<8)) { iounmap(info->base); - pcmcia_release_window(link->win); + pcmcia_release_window(link, link->win); info->base = NULL; link->win = NULL; goto failed; } diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 28db7914a5d1..ed328750de45 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -820,10 +820,10 @@ static void ray_release(struct pcmcia_device *link) iounmap(local->rmem); iounmap(local->amem); /* Do bother checking to see if these succeed or not */ - i = pcmcia_release_window(local->amem_handle); + i = pcmcia_release_window(link, local->amem_handle); if (i != 0) dev_dbg(&link->dev, "ReleaseWindow(local->amem) ret = %x\n", i); - i = pcmcia_release_window(local->rmem_handle); + i = pcmcia_release_window(link, local->rmem_handle); if (i != 0) dev_dbg(&link->dev, "ReleaseWindow(local->rmem) ret = %x\n", i); pcmcia_disable_device(link); diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index cda48ea9b6fd..820a6e5868e8 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -442,9 +442,10 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) } /* pcmcia_release_irq */ -int pcmcia_release_window(window_handle_t win) +int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) { struct pcmcia_socket *s; + window_handle_t win = wh; if ((win == NULL) || (win->magic != WINDOW_MAGIC)) return -EINVAL; @@ -891,7 +892,7 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev) { pcmcia_release_io(p_dev, &p_dev->io); pcmcia_release_irq(p_dev, &p_dev->irq); if (p_dev->win) - pcmcia_release_window(p_dev->win); + pcmcia_release_window(p_dev, p_dev->win); } EXPORT_SYMBOL(pcmcia_disable_device); diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index d82392de4e92..40b098d7aa41 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -201,7 +201,7 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh); -int pcmcia_release_window(window_handle_t win); +int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); int pcmcia_get_mem_page(window_handle_t win, memreq_t *req); int pcmcia_map_mem_page(window_handle_t win, memreq_t *req); -- cgit v1.3-8-gc7d7 From 868575d1e87ff2091800aea816972ddb46de60d5 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:43 +0900 Subject: pcmcia: Pass struct pcmcia_device to pcmcia_map_mem_page() No logic changes, just pass struct pcmcia_device to pcmcia_map_mem_page() [linux@dominikbrodowski.net: update to 2.6.31] CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-scsi@vger.kernel.org CC: Jiri Kosina Acked-by: Karsten Keil (for ISDN) Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski --- drivers/char/pcmcia/ipwireless/main.c | 4 ++-- drivers/isdn/hisax/sedlbauer_cs.c | 2 +- drivers/mtd/maps/pcmciamtd.c | 2 +- drivers/net/pcmcia/fmvj18x_cs.c | 4 ++-- drivers/net/pcmcia/ibmtr_cs.c | 4 ++-- drivers/net/pcmcia/pcnet_cs.c | 4 ++-- drivers/net/pcmcia/smc91c92_cs.c | 2 +- drivers/net/pcmcia/xirc2ps_cs.c | 2 +- drivers/net/wireless/airo_cs.c | 2 +- drivers/net/wireless/b43/pcmcia.c | 2 +- drivers/net/wireless/netwave_cs.c | 2 +- drivers/net/wireless/ray_cs.c | 6 +++--- drivers/net/wireless/wavelan_cs.c | 2 +- drivers/pcmcia/pcmcia_resource.c | 5 ++++- drivers/scsi/pcmcia/nsp_cs.c | 2 +- drivers/staging/comedi/drivers/ni_daq_700.c | 2 +- drivers/staging/comedi/drivers/ni_daq_dio24.c | 2 +- drivers/staging/comedi/drivers/ni_labpc_cs.c | 2 +- include/pcmcia/ds.h | 3 ++- 19 files changed, 29 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 91ca23d2b042..10af1c0dc368 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -125,7 +125,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, memreq_common_memory.CardOffset = cfg->mem.win[0].card_addr; memreq_common_memory.Page = 0; - ret = pcmcia_map_mem_page(ipw->handle_common_memory, + ret = pcmcia_map_mem_page(p_dev, ipw->handle_common_memory, &memreq_common_memory); if (ret != 0) @@ -154,7 +154,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, memreq_attr_memory.CardOffset = 0; memreq_attr_memory.Page = 0; - ret = pcmcia_map_mem_page(ipw->handle_attr_memory, + ret = pcmcia_map_mem_page(p_dev, ipw->handle_attr_memory, &memreq_attr_memory); if (ret != 0) diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c index 33d7530527c7..598e17a551b7 100644 --- a/drivers/isdn/hisax/sedlbauer_cs.c +++ b/drivers/isdn/hisax/sedlbauer_cs.c @@ -276,7 +276,7 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev, return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map) != 0) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0) return -ENODEV; } return 0; diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index 16d9985aadcd..80b9005c3736 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -118,7 +118,7 @@ static caddr_t remap_window(struct map_info *map, unsigned long to) DEBUG(2, "Remapping window from 0x%8.8x to 0x%8.8x", dev->offset, mrq.CardOffset); mrq.Page = 0; - ret = pcmcia_map_mem_page(win, &mrq); + ret = pcmcia_map_mem_page(dev->p_dev, win, &mrq); if (ret != 0) return NULL; dev->offset = mrq.CardOffset; diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 61726c82be46..6b9c79e4a2c0 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -574,7 +574,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) base = ioremap(req.Base, req.Size); mem.Page = 0; mem.CardOffset = 0; - pcmcia_map_mem_page(link->win, &mem); + pcmcia_map_mem_page(link, link->win, &mem); /* * MBH10304 CISTPL_FUNCE_LAN_NODE_ID format @@ -630,7 +630,7 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link) mem.Page = 0; mem.CardOffset = 0; - i = pcmcia_map_mem_page(link->win, &mem); + i = pcmcia_map_mem_page(link, link->win, &mem); if (i != 0) { iounmap(lp->base); lp->base = NULL; diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 7a985ca079d7..6fc89eba9c8f 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -252,7 +252,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) mem.CardOffset = mmiobase; mem.Page = 0; - ret = pcmcia_map_mem_page(link->win, &mem); + ret = pcmcia_map_mem_page(link, link->win, &mem); if (ret) goto failed; ti->mmio = ioremap(req.Base, req.Size); @@ -269,7 +269,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) mem.CardOffset = srambase; mem.Page = 0; - ret = pcmcia_map_mem_page(info->sram_win_handle, &mem); + ret = pcmcia_map_mem_page(link, info->sram_win_handle, &mem); if (ret) goto failed; diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 4156a6c5811e..75e68946455e 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -325,7 +325,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) mem.Page = 0; for (i = 0; i < NR_INFO; i++) { mem.CardOffset = hw_info[i].offset & ~(req.Size-1); - pcmcia_map_mem_page(link->win, &mem); + pcmcia_map_mem_page(link, link->win, &mem); base = &virt[hw_info[i].offset & (req.Size-1)]; if ((readb(base+0) == hw_info[i].a0) && (readb(base+2) == hw_info[i].a1) && @@ -1499,7 +1499,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, offset = mem.CardOffset % window_size; mem.CardOffset -= offset; mem.Page = 0; - ret = pcmcia_map_mem_page(link->win, &mem); + ret = pcmcia_map_mem_page(link, link->win, &mem); if (ret) goto failed; diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index 580ec444a654..4ceaa45d7d81 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -480,7 +480,7 @@ static int mhz_mfc_config(struct pcmcia_device *link) mem.CardOffset = mem.Page = 0; if (smc->manfid == MANFID_MOTOROLA) mem.CardOffset = link->conf.ConfigBase; - i = pcmcia_map_mem_page(link->win, &mem); + i = pcmcia_map_mem_page(link, link->win, &mem); if ((i == 0) && (smc->manfid == MANFID_MEGAHERTZ) diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index fbf926539285..9bc4d6002ae0 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -932,7 +932,7 @@ xirc2ps_config(struct pcmcia_device * link) local->dingo_ccr = ioremap(req.Base,0x1000) + 0x0800; mem.CardOffset = 0x0; mem.Page = 0; - if ((err = pcmcia_map_mem_page(link->win, &mem))) + if ((err = pcmcia_map_mem_page(link, link->win, &mem))) goto config_error; /* Setup the CCRs; there are no infos in the CIS about the Ethernet diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c index a1b84fc48afd..2aa6978d76ca 100644 --- a/drivers/net/wireless/airo_cs.c +++ b/drivers/net/wireless/airo_cs.c @@ -261,7 +261,7 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev, return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map) != 0) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0) return -ENODEV; } /* If we got this far, we're cool! */ diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c index cd14b7e8ba80..c0a34bbcb8d5 100644 --- a/drivers/net/wireless/b43/pcmcia.c +++ b/drivers/net/wireless/b43/pcmcia.c @@ -93,7 +93,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev) mem.CardOffset = 0; mem.Page = 0; - res = pcmcia_map_mem_page(dev->win, &mem); + res = pcmcia_map_mem_page(dev, dev->win, &mem); if (res != 0) goto err_disable; diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index cbd85de0c601..4a8c62126c55 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -763,7 +763,7 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { if (ret) goto failed; mem.CardOffset = 0x20000; mem.Page = 0; - ret = pcmcia_map_mem_page(link->win, &mem); + ret = pcmcia_map_mem_page(link, link->win, &mem); if (ret) goto failed; diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index ed328750de45..f719ffcb3c74 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -443,7 +443,7 @@ static int ray_config(struct pcmcia_device *link) goto failed; mem.CardOffset = 0x0000; mem.Page = 0; - ret = pcmcia_map_mem_page(link->win, &mem); + ret = pcmcia_map_mem_page(link, link->win, &mem); if (ret) goto failed; local->sram = ioremap(req.Base, req.Size); @@ -459,7 +459,7 @@ static int ray_config(struct pcmcia_device *link) goto failed; mem.CardOffset = 0x8000; mem.Page = 0; - ret = pcmcia_map_mem_page(local->rmem_handle, &mem); + ret = pcmcia_map_mem_page(link, local->rmem_handle, &mem); if (ret) goto failed; local->rmem = ioremap(req.Base, req.Size); @@ -475,7 +475,7 @@ static int ray_config(struct pcmcia_device *link) goto failed; mem.CardOffset = 0x0000; mem.Page = 0; - ret = pcmcia_map_mem_page(local->amem_handle, &mem); + ret = pcmcia_map_mem_page(link, local->amem_handle, &mem); if (ret) goto failed; local->amem = ioremap(req.Base, req.Size); diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index 2fad4ac89827..d4df25d329d4 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -3885,7 +3885,7 @@ wv_pcmcia_config(struct pcmcia_device * link) dev->mem_end = dev->mem_start + req.Size; mem.CardOffset = 0; mem.Page = 0; - i = pcmcia_map_mem_page(link->win, &mem); + i = pcmcia_map_mem_page(link, link->win, &mem); if (i != 0) break; diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index 820a6e5868e8..b60952a3df93 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -234,9 +234,12 @@ int pcmcia_get_mem_page(window_handle_t win, memreq_t *req) EXPORT_SYMBOL(pcmcia_get_mem_page); -int pcmcia_map_mem_page(window_handle_t win, memreq_t *req) +int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh, + memreq_t *req) { struct pcmcia_socket *s; + window_handle_t win = wh; + if ((win == NULL) || (win->magic != WINDOW_MAGIC)) return -EINVAL; s = win->sock; diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index e32c344d7ad8..c54108fc89a1 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -1687,7 +1687,7 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev, if (pcmcia_request_window(&p_dev, &cfg_mem->req, &p_dev->win) != 0) goto next_entry; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map) != 0) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0) goto next_entry; cfg_mem->data->MmioAddress = (unsigned long) ioremap_nocache(cfg_mem->req.Base, cfg_mem->req.Size); diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c index e06d5b2bc336..aeb85b3ec7a7 100644 --- a/drivers/staging/comedi/drivers/ni_daq_700.c +++ b/drivers/staging/comedi/drivers/ni_daq_700.c @@ -616,7 +616,7 @@ static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev, return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map)) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map)) return -ENODEV; } /* If we got this far, we're cool! */ diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c index 9257a4224eab..0968a67b97cb 100644 --- a/drivers/staging/comedi/drivers/ni_daq_dio24.c +++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c @@ -367,7 +367,7 @@ static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev, return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map)) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map)) return -ENODEV; } /* If we got this far, we're cool! */ diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c index 28ff4a68cab3..b4c7dfa54638 100644 --- a/drivers/staging/comedi/drivers/ni_labpc_cs.c +++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c @@ -346,7 +346,7 @@ static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev, return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map)) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map)) return -ENODEV; } /* If we got this far, we're cool! */ diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 40b098d7aa41..f240bfa454f8 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -204,7 +204,8 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); int pcmcia_get_mem_page(window_handle_t win, memreq_t *req); -int pcmcia_map_mem_page(window_handle_t win, memreq_t *req); +int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, + memreq_t *req); int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod); void pcmcia_disable_device(struct pcmcia_device *p_dev); -- cgit v1.3-8-gc7d7 From 16456ebabfec3f8f509fc18b45f256d066a1b360 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:48 +0900 Subject: pcmcia: Pass struct pcmcia_socket to pcmcia_get_mem_page() No logic changes, just pass struct pcmcia_socket to pcmcia_get_mem_page() [linux@dominikbrodowski.net: update to 2.6.31] Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski --- drivers/pcmcia/cs_internal.h | 2 ++ drivers/pcmcia/pcmcia_ioctl.c | 2 +- drivers/pcmcia/pcmcia_resource.c | 5 ++++- include/pcmcia/ds.h | 2 -- 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 0a3ada970bf8..8e09a4998c84 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -227,6 +227,8 @@ extern void pcmcia_put_dev(struct pcmcia_device *p_dev); struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s, unsigned int function); +int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t win, + memreq_t *req); /* pcmcia_ioctl.c */ extern void __init pcmcia_setup_ioctl(void); diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c index 056fd131c89c..c829ead5cdb7 100644 --- a/drivers/pcmcia/pcmcia_ioctl.c +++ b/drivers/pcmcia/pcmcia_ioctl.c @@ -924,7 +924,7 @@ static int ds_ioctl(struct inode * inode, struct file * file, buf->win_info.handle->index + 1, &buf->win_info.window); break; case DS_GET_MEM_PAGE: - ret = pcmcia_get_mem_page(buf->win_info.handle, + ret = pcmcia_get_mem_page(s, buf->win_info.handle, &buf->win_info.map); break; case DS_REPLACE_CIS: diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index b60952a3df93..a092749f79cb 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -223,8 +223,11 @@ EXPORT_SYMBOL(pcmcia_get_window); * * Change the card address of an already open memory window. */ -int pcmcia_get_mem_page(window_handle_t win, memreq_t *req) +int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh, + memreq_t *req) { + window_handle_t win = wh; + if ((win == NULL) || (win->magic != WINDOW_MAGIC)) return -EINVAL; req->Page = 0; diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index f240bfa454f8..cbf5f05745f2 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -202,8 +202,6 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh); int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); - -int pcmcia_get_mem_page(window_handle_t win, memreq_t *req); int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, memreq_t *req); -- cgit v1.3-8-gc7d7 From 0bdf9b3dd3cfa5cbd5d55172c19f5dd166208e17 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:53 +0900 Subject: pcmcia: Change window_handle_t logic to unsigned long Logic changes based on top of the other patches: This set of patches changed window_handle_t from being a pointer to an unsigned long. The unsigned long is now a simple index into socket->win[]. Going from a pointer to unsigned long should leave the user space interface unchanged unless I'm mistaken. This change results in code that is less error prone and a user space interface which is much cleaner and safer. A nice side effect is that we are also are able to remove all members except one from window_t. [ linux@dominikbrodowski.net: Update to 2.6.31. Also, a plain "index" to socket->win[] does not work, as several codepaths rely on "window_handle_t" being non-zero if used. Therefore, set the window_handle_t to the socket->win[] index + 1. ] CC: netdev@vger.kernel.org Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski --- drivers/net/pcmcia/pcnet_cs.c | 2 +- drivers/pcmcia/cs_internal.h | 4 +-- drivers/pcmcia/pcmcia_ioctl.c | 4 +-- drivers/pcmcia/pcmcia_resource.c | 59 ++++++++++++++++++++-------------------- include/pcmcia/cs_types.h | 3 +- include/pcmcia/ss.h | 5 ---- 6 files changed, 36 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 75e68946455e..518b094c9f6d 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -1514,7 +1514,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, if (i != (TX_PAGES<<8)) { iounmap(info->base); pcmcia_release_window(link, link->win); - info->base = NULL; link->win = NULL; + info->base = NULL; link->win = 0; goto failed; } diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 8e09a4998c84..6df41de39d16 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -149,8 +149,8 @@ extern struct list_head pcmcia_socket_list; extern struct class pcmcia_socket_class; int pcmcia_get_window(struct pcmcia_socket *s, - window_handle_t *handle, - int idx, + window_handle_t *wh_out, + window_handle_t wh, win_req_t *req); int pccard_register_pcmcia(struct pcmcia_socket *s, struct pcmcia_callback *c); struct pcmcia_socket *pcmcia_get_socket_by_nr(unsigned int nr); diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c index c829ead5cdb7..6245fde02b79 100644 --- a/drivers/pcmcia/pcmcia_ioctl.c +++ b/drivers/pcmcia/pcmcia_ioctl.c @@ -916,12 +916,12 @@ static int ds_ioctl(struct inode * inode, struct file * file, goto free_out; break; case DS_GET_FIRST_WINDOW: - ret = pcmcia_get_window(s, &buf->win_info.handle, 0, + ret = pcmcia_get_window(s, &buf->win_info.handle, 1, &buf->win_info.window); break; case DS_GET_NEXT_WINDOW: ret = pcmcia_get_window(s, &buf->win_info.handle, - buf->win_info.handle->index + 1, &buf->win_info.window); + buf->win_info.handle + 1, &buf->win_info.window); break; case DS_GET_MEM_PAGE: ret = pcmcia_get_mem_page(s, buf->win_info.handle, diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index a092749f79cb..ae6abc7833d4 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -187,15 +187,19 @@ EXPORT_SYMBOL(pcmcia_access_configuration_register); /** pcmcia_get_window */ -int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *handle, - int idx, win_req_t *req) +int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out, + window_handle_t wh, win_req_t *req) { window_t *win; - int w; + window_handle_t w; if (!s || !(s->state & SOCKET_PRESENT)) return -ENODEV; - for (w = idx; w < MAX_WIN; w++) + + wh--; + if (wh >= MAX_WIN) + return -EINVAL; + for (w = wh; w < MAX_WIN; w++) if (s->state & SOCKET_WIN_REQ(w)) break; if (w == MAX_WIN) @@ -213,7 +217,8 @@ int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *handle, req->Attributes |= WIN_DATA_WIDTH_16; if (win->ctl.flags & MAP_USE_WAIT) req->Attributes |= WIN_USE_WAIT; - *handle = win; + + *wh_out = w++; return 0; } /* pcmcia_get_window */ EXPORT_SYMBOL(pcmcia_get_window); @@ -226,12 +231,12 @@ EXPORT_SYMBOL(pcmcia_get_window); int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh, memreq_t *req) { - window_handle_t win = wh; - - if ((win == NULL) || (win->magic != WINDOW_MAGIC)) + wh--; + if (wh >= MAX_WIN) return -EINVAL; + req->Page = 0; - req->CardOffset = win->ctl.card_start; + req->CardOffset = skt->win[wh].ctl.card_start; return 0; } /* pcmcia_get_mem_page */ EXPORT_SYMBOL(pcmcia_get_mem_page); @@ -240,18 +245,17 @@ EXPORT_SYMBOL(pcmcia_get_mem_page); int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh, memreq_t *req) { - struct pcmcia_socket *s; - window_handle_t win = wh; + struct pcmcia_socket *s = p_dev->socket; - if ((win == NULL) || (win->magic != WINDOW_MAGIC)) + wh--; + if (wh >= MAX_WIN) return -EINVAL; - s = win->sock; if (req->Page != 0) { dev_dbg(&s->dev, "failure: requested page is zero\n"); return -EINVAL; } - win->ctl.card_start = req->CardOffset; - if (s->ops->set_mem_map(s, &win->ctl) != 0) { + s->win[wh].ctl.card_start = req->CardOffset; + if (s->ops->set_mem_map(s, &s->win[wh].ctl) != 0) { dev_dbg(&s->dev, "failed to set_mem_map\n"); return -EIO; } @@ -450,13 +454,16 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) { - struct pcmcia_socket *s; - window_handle_t win = wh; + struct pcmcia_socket *s = p_dev->socket; + window_t *win; - if ((win == NULL) || (win->magic != WINDOW_MAGIC)) + wh--; + if (wh >= MAX_WIN) return -EINVAL; - s = win->sock; - if (!(win->handle->_win & CLIENT_WIN_REQ(win->index))) { + + win = &s->win[wh]; + + if (!(p_dev->_win & CLIENT_WIN_REQ(wh))) { dev_dbg(&s->dev, "not releasing unknown window\n"); return -EINVAL; } @@ -464,7 +471,7 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) /* Shut down memory window */ win->ctl.flags &= ~MAP_ACTIVE; s->ops->set_mem_map(s, &win->ctl); - s->state &= ~SOCKET_WIN_REQ(win->index); + s->state &= ~SOCKET_WIN_REQ(wh); /* Release system memory */ if (win->ctl.res) { @@ -472,9 +479,7 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) kfree(win->ctl.res); win->ctl.res = NULL; } - win->handle->_win &= ~CLIENT_WIN_REQ(win->index); - - win->magic = 0; + p_dev->_win &= ~CLIENT_WIN_REQ(wh); return 0; } /* pcmcia_release_window */ @@ -847,10 +852,6 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h } win = &s->win[w]; - win->magic = WINDOW_MAGIC; - win->index = w; - win->handle = *p_dev; - win->sock = s; if (!(s->features & SS_CAP_STATIC_MAP)) { win->ctl.res = pcmcia_find_mem_region(req->Base, req->Size, align, @@ -887,7 +888,7 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h } else { req->Base = win->ctl.res->start; } - *wh = win; + *wh = w + 1; return 0; } /* pcmcia_request_window */ diff --git a/include/pcmcia/cs_types.h b/include/pcmcia/cs_types.h index 315965a37930..f5e3b8386c8f 100644 --- a/include/pcmcia/cs_types.h +++ b/include/pcmcia/cs_types.h @@ -26,8 +26,7 @@ typedef u_int event_t; typedef u_char cisdata_t; typedef u_short page_t; -struct window_t; -typedef struct window_t *window_handle_t; +typedef unsigned long window_handle_t; struct region_t; typedef struct region_t *memory_handle_t; diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 753da9b087d3..6301c3f4f19e 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -107,12 +107,7 @@ typedef struct io_window_t { struct resource *res; } io_window_t; -#define WINDOW_MAGIC 0xB35C typedef struct window_t { - u_short magic; - u_short index; - struct pcmcia_device *handle; - struct pcmcia_socket *sock; pccard_mem_map ctl; } window_t; -- cgit v1.3-8-gc7d7 From 82f88e36004162f49a9340ffbbaebe89016e4835 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 3 Nov 2009 01:16:12 +0100 Subject: pcmcia: remove unused "window_t" typedef Signed-off-by: Dominik Brodowski --- drivers/pcmcia/pcmcia_ioctl.c | 18 ++++++++-------- drivers/pcmcia/pcmcia_resource.c | 46 ++++++++++++++++++++-------------------- include/pcmcia/ss.h | 6 +----- 3 files changed, 33 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c index 38b3a26a3ff3..c4d7908fa37f 100644 --- a/drivers/pcmcia/pcmcia_ioctl.c +++ b/drivers/pcmcia/pcmcia_ioctl.c @@ -224,7 +224,7 @@ static int pcmcia_adjust_resource_info(adjust_t *adj) static int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out, window_handle_t wh, win_req_t *req) { - window_t *win; + pccard_mem_map *win; window_handle_t w; wh--; @@ -238,17 +238,17 @@ static int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out, if (w == MAX_WIN) return -EINVAL; win = &s->win[w]; - req->Base = win->ctl.res->start; - req->Size = win->ctl.res->end - win->ctl.res->start + 1; - req->AccessSpeed = win->ctl.speed; + req->Base = win->res->start; + req->Size = win->res->end - win->res->start + 1; + req->AccessSpeed = win->speed; req->Attributes = 0; - if (win->ctl.flags & MAP_ATTRIB) + if (win->flags & MAP_ATTRIB) req->Attributes |= WIN_MEMORY_TYPE_AM; - if (win->ctl.flags & MAP_ACTIVE) + if (win->flags & MAP_ACTIVE) req->Attributes |= WIN_ENABLE; - if (win->ctl.flags & MAP_16BIT) + if (win->flags & MAP_16BIT) req->Attributes |= WIN_DATA_WIDTH_16; - if (win->ctl.flags & MAP_USE_WAIT) + if (win->flags & MAP_USE_WAIT) req->Attributes |= WIN_USE_WAIT; *wh_out = w + 1; @@ -268,7 +268,7 @@ static int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh, return -EINVAL; req->Page = 0; - req->CardOffset = skt->win[wh].ctl.card_start; + req->CardOffset = skt->win[wh].card_start; return 0; } /* pcmcia_get_mem_page */ diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index ae68b26a7050..50468543d607 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -197,8 +197,8 @@ int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh, dev_dbg(&s->dev, "failure: requested page is zero\n"); return -EINVAL; } - s->win[wh].ctl.card_start = req->CardOffset; - if (s->ops->set_mem_map(s, &s->win[wh].ctl) != 0) { + s->win[wh].card_start = req->CardOffset; + if (s->ops->set_mem_map(s, &s->win[wh]) != 0) { dev_dbg(&s->dev, "failed to set_mem_map\n"); return -EIO; } @@ -398,7 +398,7 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) { struct pcmcia_socket *s = p_dev->socket; - window_t *win; + pccard_mem_map *win; wh--; if (wh >= MAX_WIN) @@ -412,15 +412,15 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) } /* Shut down memory window */ - win->ctl.flags &= ~MAP_ACTIVE; - s->ops->set_mem_map(s, &win->ctl); + win->flags &= ~MAP_ACTIVE; + s->ops->set_mem_map(s, win); s->state &= ~SOCKET_WIN_REQ(wh); /* Release system memory */ - if (win->ctl.res) { - release_resource(win->ctl.res); - kfree(win->ctl.res); - win->ctl.res = NULL; + if (win->res) { + release_resource(win->res); + kfree(win->res); + win->res = NULL; } p_dev->_win &= ~CLIENT_WIN_REQ(wh); @@ -755,7 +755,7 @@ EXPORT_SYMBOL(pcmcia_request_irq); int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh) { struct pcmcia_socket *s = (*p_dev)->socket; - window_t *win; + pccard_mem_map *win; u_long align; int w; @@ -797,9 +797,9 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h win = &s->win[w]; if (!(s->features & SS_CAP_STATIC_MAP)) { - win->ctl.res = pcmcia_find_mem_region(req->Base, req->Size, align, + win->res = pcmcia_find_mem_region(req->Base, req->Size, align, (req->Attributes & WIN_MAP_BELOW_1MB), s); - if (!win->ctl.res) { + if (!win->res) { dev_dbg(&s->dev, "allocating mem region failed\n"); return -EINVAL; } @@ -807,19 +807,19 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h (*p_dev)->_win |= CLIENT_WIN_REQ(w); /* Configure the socket controller */ - win->ctl.map = w+1; - win->ctl.flags = 0; - win->ctl.speed = req->AccessSpeed; + win->map = w+1; + win->flags = 0; + win->speed = req->AccessSpeed; if (req->Attributes & WIN_MEMORY_TYPE) - win->ctl.flags |= MAP_ATTRIB; + win->flags |= MAP_ATTRIB; if (req->Attributes & WIN_ENABLE) - win->ctl.flags |= MAP_ACTIVE; + win->flags |= MAP_ACTIVE; if (req->Attributes & WIN_DATA_WIDTH_16) - win->ctl.flags |= MAP_16BIT; + win->flags |= MAP_16BIT; if (req->Attributes & WIN_USE_WAIT) - win->ctl.flags |= MAP_USE_WAIT; - win->ctl.card_start = 0; - if (s->ops->set_mem_map(s, &win->ctl) != 0) { + win->flags |= MAP_USE_WAIT; + win->card_start = 0; + if (s->ops->set_mem_map(s, win) != 0) { dev_dbg(&s->dev, "failed to set memory mapping\n"); return -EIO; } @@ -827,9 +827,9 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h /* Return window handle */ if (s->features & SS_CAP_STATIC_MAP) { - req->Base = win->ctl.static_start; + req->Base = win->static_start; } else { - req->Base = win->ctl.res->start; + req->Base = win->res->start; } *wh = w + 1; diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 6301c3f4f19e..d85f725be7e3 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -107,10 +107,6 @@ typedef struct io_window_t { struct resource *res; } io_window_t; -typedef struct window_t { - pccard_mem_map ctl; -} window_t; - /* Maximum number of IO windows per socket */ #define MAX_IO_WIN 2 @@ -150,7 +146,7 @@ struct pcmcia_socket { u_int Config; } irq; io_window_t io[MAX_IO_WIN]; - window_t win[MAX_WIN]; + pccard_mem_map win[MAX_WIN]; struct list_head cis_cache; size_t fake_cis_len; u8 *fake_cis; -- cgit v1.3-8-gc7d7 From 6838b03fc6564ea07d0cd87ea6e198d90ab1fc3e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 3 Nov 2009 01:31:52 +0100 Subject: pcmcia: pcmcia_request_window() doesn't need a pointer to a pointer pcmcia_request_window() only needs a pointer to struct pcmcia_device, not a pointer to a pointer. CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-scsi@vger.kernel.org CC: Jiri Kosina Acked-by: Karsten Keil (for ISDN) Signed-off-by: Dominik Brodowski --- drivers/char/pcmcia/ipwireless/main.c | 4 ++-- drivers/isdn/hisax/sedlbauer_cs.c | 2 +- drivers/mtd/maps/pcmciamtd.c | 2 +- drivers/net/pcmcia/fmvj18x_cs.c | 4 ++-- drivers/net/pcmcia/ibmtr_cs.c | 4 ++-- drivers/net/pcmcia/pcnet_cs.c | 4 ++-- drivers/net/pcmcia/smc91c92_cs.c | 2 +- drivers/net/pcmcia/xirc2ps_cs.c | 2 +- drivers/net/wireless/airo_cs.c | 2 +- drivers/net/wireless/b43/pcmcia.c | 2 +- drivers/net/wireless/netwave_cs.c | 2 +- drivers/net/wireless/ray_cs.c | 6 +++--- drivers/net/wireless/wavelan_cs.c | 2 +- drivers/pcmcia/pcmcia_resource.c | 6 +++--- drivers/scsi/pcmcia/nsp_cs.c | 2 +- drivers/staging/comedi/drivers/ni_daq_700.c | 2 +- drivers/staging/comedi/drivers/ni_daq_dio24.c | 2 +- drivers/staging/comedi/drivers/ni_labpc_cs.c | 2 +- include/pcmcia/ds.h | 2 +- 19 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 10af1c0dc368..082146a26c87 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -116,7 +116,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, ipw->request_common_memory.Size = 0x1000; ipw->request_common_memory.AccessSpeed = 0; - ret = pcmcia_request_window(&p_dev, &ipw->request_common_memory, + ret = pcmcia_request_window(p_dev, &ipw->request_common_memory, &ipw->handle_common_memory); if (ret != 0) @@ -145,7 +145,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, ipw->request_attr_memory.Size = 0; /* this used to be 0x1000 */ ipw->request_attr_memory.AccessSpeed = 0; - ret = pcmcia_request_window(&p_dev, &ipw->request_attr_memory, + ret = pcmcia_request_window(p_dev, &ipw->request_attr_memory, &ipw->handle_attr_memory); if (ret != 0) diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c index 598e17a551b7..331716fc6b30 100644 --- a/drivers/isdn/hisax/sedlbauer_cs.c +++ b/drivers/isdn/hisax/sedlbauer_cs.c @@ -272,7 +272,7 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev, req->Base = mem->win[0].host_addr; req->Size = mem->win[0].len; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win) != 0) + if (pcmcia_request_window(p_dev, req, &p_dev->win) != 0) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index 80b9005c3736..689d6a79ffc0 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -530,7 +530,7 @@ static int pcmciamtd_config(struct pcmcia_device *link) int ret; DEBUG(2, "requesting window with size = %dKiB memspeed = %d", req.Size >> 10, req.AccessSpeed); - ret = pcmcia_request_window(&link, &req, &link->win); + ret = pcmcia_request_window(link, &req, &link->win); DEBUG(2, "ret = %d dev->win_size = %d", ret, dev->win_size); if(ret) { req.Size >>= 1; diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 6b9c79e4a2c0..85f7c4562707 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -567,7 +567,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = 0; req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) return -1; @@ -618,7 +618,7 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = 0; req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) return -1; diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 6fc89eba9c8f..14fe5fa35d86 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -246,7 +246,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x2000; req.AccessSpeed = 250; - ret = pcmcia_request_window(&link, &req, &link->win); + ret = pcmcia_request_window(link, &req, &link->win); if (ret) goto failed; @@ -263,7 +263,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) req.Base = 0; req.Size = sramsize * 1024; req.AccessSpeed = 250; - ret = pcmcia_request_window(&link, &req, &info->sram_win_handle); + ret = pcmcia_request_window(link, &req, &info->sram_win_handle); if (ret) goto failed; diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 518b094c9f6d..d06a0ce7cb89 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -317,7 +317,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = 0; req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) return NULL; @@ -1491,7 +1491,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, req.Attributes |= WIN_USE_WAIT; req.Base = 0; req.Size = window_size; req.AccessSpeed = mem_speed; - ret = pcmcia_request_window(&link, &req, &link->win); + ret = pcmcia_request_window(link, &req, &link->win); if (ret) goto failed; diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index 4ceaa45d7d81..c6ca3764ab6d 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -472,7 +472,7 @@ static int mhz_mfc_config(struct pcmcia_device *link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) return -ENODEV; diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index 9bc4d6002ae0..265852ab1a2d 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -926,7 +926,7 @@ xirc2ps_config(struct pcmcia_device * link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = req.Size = 0; req.AccessSpeed = 0; - if ((err = pcmcia_request_window(&link, &req, &link->win))) + if ((err = pcmcia_request_window(link, &req, &link->win))) goto config_error; local->dingo_ccr = ioremap(req.Base,0x1000) + 0x0800; diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c index 2aa6978d76ca..34d775c18a31 100644 --- a/drivers/net/wireless/airo_cs.c +++ b/drivers/net/wireless/airo_cs.c @@ -257,7 +257,7 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev, req->Base = mem->win[0].host_addr; req->Size = mem->win[0].len; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win) != 0) + if (pcmcia_request_window(p_dev, req, &p_dev->win) != 0) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c index c0a34bbcb8d5..2588358294be 100644 --- a/drivers/net/wireless/b43/pcmcia.c +++ b/drivers/net/wireless/b43/pcmcia.c @@ -87,7 +87,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev) win.Base = 0; win.Size = SSB_CORE_SIZE; win.AccessSpeed = 250; - res = pcmcia_request_window(&dev, &win, &dev->win); + res = pcmcia_request_window(dev, &win, &dev->win); if (res != 0) goto err_kfree_ssb; diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index 4a8c62126c55..bd4eff79bd13 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -759,7 +759,7 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_CM|WIN_ENABLE; req.Base = 0; req.Size = 0x8000; req.AccessSpeed = mem_speed; - ret = pcmcia_request_window(&link, &req, &link->win); + ret = pcmcia_request_window(link, &req, &link->win); if (ret) goto failed; mem.CardOffset = 0x20000; mem.Page = 0; diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index f719ffcb3c74..66e2d10cd754 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -438,7 +438,7 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x8000; req.AccessSpeed = ray_mem_speed; - ret = pcmcia_request_window(&link, &req, &link->win); + ret = pcmcia_request_window(link, &req, &link->win); if (ret) goto failed; mem.CardOffset = 0x0000; @@ -454,7 +454,7 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x4000; req.AccessSpeed = ray_mem_speed; - ret = pcmcia_request_window(&link, &req, &local->rmem_handle); + ret = pcmcia_request_window(link, &req, &local->rmem_handle); if (ret) goto failed; mem.CardOffset = 0x8000; @@ -470,7 +470,7 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x1000; req.AccessSpeed = ray_mem_speed; - ret = pcmcia_request_window(&link, &req, &local->amem_handle); + ret = pcmcia_request_window(link, &req, &local->amem_handle); if (ret) goto failed; mem.CardOffset = 0x0000; diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index d4df25d329d4..f8c5166fffe0 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -3876,7 +3876,7 @@ wv_pcmcia_config(struct pcmcia_device * link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = req.Size = 0; req.AccessSpeed = mem_speed; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) break; diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index 50468543d607..1cf7d54fb7e2 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -752,9 +752,9 @@ EXPORT_SYMBOL(pcmcia_request_irq); * Request_window() establishes a mapping between card memory space * and system memory space. */ -int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh) +int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_handle_t *wh) { - struct pcmcia_socket *s = (*p_dev)->socket; + struct pcmcia_socket *s = p_dev->socket; pccard_mem_map *win; u_long align; int w; @@ -804,7 +804,7 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h return -EINVAL; } } - (*p_dev)->_win |= CLIENT_WIN_REQ(w); + p_dev->_win |= CLIENT_WIN_REQ(w); /* Configure the socket controller */ win->map = w+1; diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index c54108fc89a1..9dfd6f510b65 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -1684,7 +1684,7 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev, if (cfg_mem->req.Size < 0x1000) cfg_mem->req.Size = 0x1000; cfg_mem->req.AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, &cfg_mem->req, &p_dev->win) != 0) + if (pcmcia_request_window(p_dev, &cfg_mem->req, &p_dev->win) != 0) goto next_entry; map.Page = 0; map.CardOffset = mem->win[0].card_addr; if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0) diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c index aeb85b3ec7a7..7328a84ac500 100644 --- a/drivers/staging/comedi/drivers/ni_daq_700.c +++ b/drivers/staging/comedi/drivers/ni_daq_700.c @@ -612,7 +612,7 @@ static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev, if (req->Size < 0x1000) req->Size = 0x1000; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win)) + if (pcmcia_request_window(p_dev, req, &p_dev->win)) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c index 0968a67b97cb..505631553ef7 100644 --- a/drivers/staging/comedi/drivers/ni_daq_dio24.c +++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c @@ -363,7 +363,7 @@ static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev, if (req->Size < 0x1000) req->Size = 0x1000; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win)) + if (pcmcia_request_window(p_dev, req, &p_dev->win)) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c index b4c7dfa54638..7fb5058ad43c 100644 --- a/drivers/staging/comedi/drivers/ni_labpc_cs.c +++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c @@ -342,7 +342,7 @@ static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev, if (req->Size < 0x1000) req->Size = 0x1000; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win)) + if (pcmcia_request_window(p_dev, req, &p_dev->win)) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index cbf5f05745f2..d6c55fdf8d01 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -199,7 +199,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req); int pcmcia_request_configuration(struct pcmcia_device *p_dev, config_req_t *req); -int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, +int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_handle_t *wh); int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, -- cgit v1.3-8-gc7d7 From dd2e5a156525f11754d9b1e0583f6bb49c253d62 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 3 Nov 2009 10:27:34 +0100 Subject: pcmcia: remove deprecated handle_to_dev() macro Update remaining users and remove deprecated handle_to_dev() macro CC: Harald Welte CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-serial@vger.kernel.org Signed-off-by: Dominik Brodowski --- drivers/char/pcmcia/cm4000_cs.c | 2 +- drivers/char/pcmcia/cm4040_cs.c | 6 +++--- drivers/net/pcmcia/3c574_cs.c | 2 +- drivers/net/pcmcia/3c589_cs.c | 2 +- drivers/net/pcmcia/axnet_cs.c | 2 +- drivers/net/pcmcia/com20020_cs.c | 2 +- drivers/net/pcmcia/fmvj18x_cs.c | 2 +- drivers/net/pcmcia/ibmtr_cs.c | 2 +- drivers/net/pcmcia/nmclan_cs.c | 2 +- drivers/net/pcmcia/pcnet_cs.c | 2 +- drivers/net/pcmcia/smc91c92_cs.c | 2 +- drivers/net/pcmcia/xirc2ps_cs.c | 2 +- drivers/net/wireless/airo_cs.c | 2 +- drivers/net/wireless/atmel_cs.c | 4 ++-- drivers/net/wireless/hostap/hostap_cs.c | 2 +- drivers/net/wireless/libertas/if_cs.c | 4 ++-- drivers/net/wireless/netwave_cs.c | 2 +- drivers/net/wireless/orinoco/orinoco_cs.c | 2 +- drivers/net/wireless/orinoco/spectrum_cs.c | 2 +- drivers/net/wireless/ray_cs.c | 2 +- drivers/net/wireless/wavelan_cs.c | 2 +- drivers/net/wireless/wl3501_cs.c | 2 +- drivers/serial/serial_cs.c | 2 +- drivers/usb/host/sl811_cs.c | 2 +- include/pcmcia/ds.h | 3 --- sound/pcmcia/pdaudiocf/pdaudiocf.c | 2 +- sound/pcmcia/vx/vxpocket.c | 2 +- 27 files changed, 30 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index 1611c4fe97fc..2db4c0a29b05 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c @@ -45,7 +45,7 @@ /* #define ATR_CSUM */ -#define reader_to_dev(x) (&handle_to_dev(x->p_dev)) +#define reader_to_dev(x) (&x->p_dev->dev) /* n (debug level) is ignored */ /* additional debug output may be enabled by re-compiling with diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index 38790db561a1..a6a70e476bea 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -39,7 +39,7 @@ #include "cm4040_cs.h" -#define reader_to_dev(x) (&handle_to_dev(x->p_dev)) +#define reader_to_dev(x) (&x->p_dev->dev) /* n (debug level) is ignored */ /* additional debug output may be enabled by re-compiling with @@ -539,7 +539,7 @@ static int cm4040_config_check(struct pcmcia_device *p_dev, p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK; rc = pcmcia_request_io(p_dev, &p_dev->io); - dev_printk(KERN_INFO, &handle_to_dev(p_dev), + dev_printk(KERN_INFO, &p_dev->dev, "pcmcia_request_io returned 0x%x\n", rc); return rc; } @@ -561,7 +561,7 @@ static int reader_config(struct pcmcia_device *link, int devno) fail_rc = pcmcia_request_configuration(link, &link->conf); if (fail_rc != 0) { - dev_printk(KERN_INFO, &handle_to_dev(link), + dev_printk(KERN_INFO, &link->dev, "pcmcia_request_configuration failed 0x%x\n", fail_rc); goto cs_release; diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index dbef5d9cc9db..8b65e18ab230 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -449,7 +449,7 @@ static int tc574_config(struct pcmcia_device *link) } link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "3c574_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c index 6eac62b7143c..c43c21ddb2d1 100644 --- a/drivers/net/pcmcia/3c589_cs.c +++ b/drivers/net/pcmcia/3c589_cs.c @@ -316,7 +316,7 @@ static int tc589_config(struct pcmcia_device *link) printk(KERN_ERR "3c589_cs: invalid if_port requested\n"); link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_ERR "3c589_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c index 5af2ccfdb52d..0552dddd587f 100644 --- a/drivers/net/pcmcia/axnet_cs.c +++ b/drivers/net/pcmcia/axnet_cs.c @@ -400,7 +400,7 @@ static int axnet_config(struct pcmcia_device *link) info->phy_id = (i < 32) ? i : -1; link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "axnet_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c index 9a2e5006570b..51e9cb0a6d1e 100644 --- a/drivers/net/pcmcia/com20020_cs.c +++ b/drivers/net/pcmcia/com20020_cs.c @@ -302,7 +302,7 @@ static int com20020_config(struct pcmcia_device *link) lp->card_flags = ARC_CAN_10MBIT; /* pretend all of them can 10Mbit */ link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = com20020_found(dev, 0); /* calls register_netdev */ diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 85f7c4562707..9b5ca37c6684 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -532,7 +532,7 @@ static int fmvj18x_config(struct pcmcia_device *link) lp->cardtype = cardtype; link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "fmvj18x_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 14fe5fa35d86..76706e12d731 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -287,7 +287,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) ibmtr_hw_setup(dev, mmiobase); link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = ibmtr_probe_card(dev); if (i != 0) { diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c index a5363483ec6f..2d0c6f93ed8b 100644 --- a/drivers/net/pcmcia/nmclan_cs.c +++ b/drivers/net/pcmcia/nmclan_cs.c @@ -701,7 +701,7 @@ static int nmclan_config(struct pcmcia_device *link) printk(KERN_NOTICE "nmclan_cs: invalid if_port requested\n"); link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = register_netdev(dev); if (i != 0) { diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index d06a0ce7cb89..30baee7b86a2 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -646,7 +646,7 @@ static int pcnet_config(struct pcmcia_device *link) mii_phy_probe(dev); link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "pcnet_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index c6ca3764ab6d..2e795de06cb3 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -964,7 +964,7 @@ static int smc91c92_config(struct pcmcia_device *link) } link->dev_node = &smc->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_ERR "smc91c92_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index 265852ab1a2d..f75ac716390d 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -992,7 +992,7 @@ xirc2ps_config(struct pcmcia_device * link) do_reset(dev, 1); /* a kludge to make the cem56 work */ link->dev_node = &local->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if ((err=register_netdev(dev))) { printk(KNOT_XIRC "register_netdev() failed\n"); diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c index 34d775c18a31..7d3a96fdf5a3 100644 --- a/drivers/net/wireless/airo_cs.c +++ b/drivers/net/wireless/airo_cs.c @@ -322,7 +322,7 @@ static int airo_config(struct pcmcia_device *link) goto failed; ((local_info_t *)link->priv)->eth_dev = init_airo_card(link->irq.AssignedIRQ, - link->io.BasePort1, 1, &handle_to_dev(link)); + link->io.BasePort1, 1, &link->dev); if (!((local_info_t *)link->priv)->eth_dev) goto failed; diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c index 78385722a768..18a58b859223 100644 --- a/drivers/net/wireless/atmel_cs.c +++ b/drivers/net/wireless/atmel_cs.c @@ -260,7 +260,7 @@ static int atmel_config(struct pcmcia_device *link) struct pcmcia_device_id *did; dev = link->priv; - did = dev_get_drvdata(&handle_to_dev(link)); + did = dev_get_drvdata(&link->dev); dev_dbg(&link->dev, "atmel_config\n"); @@ -309,7 +309,7 @@ static int atmel_config(struct pcmcia_device *link) init_atmel_card(link->irq.AssignedIRQ, link->io.BasePort1, did ? did->driver_info : ATMEL_FW_TYPE_NONE, - &handle_to_dev(link), + &link->dev, card_present, link); if (!((local_info_t*)link->priv)->eth_dev) diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c index ca3ab849ac03..243e912729b9 100644 --- a/drivers/net/wireless/hostap/hostap_cs.c +++ b/drivers/net/wireless/hostap/hostap_cs.c @@ -623,7 +623,7 @@ static int prism2_config(struct pcmcia_device *link) /* Need to allocate net_device before requesting IRQ handler */ dev = prism2_init_local_data(&prism2_pccard_funcs, 0, - &handle_to_dev(link)); + &link->dev); if (dev == NULL) goto failed; link->priv = dev; diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c index cb40c386fc77..f2b16559b686 100644 --- a/drivers/net/wireless/libertas/if_cs.c +++ b/drivers/net/wireless/libertas/if_cs.c @@ -590,7 +590,7 @@ static int if_cs_prog_helper(struct if_cs_card *card) /* TODO: make firmware file configurable */ ret = request_firmware(&fw, "libertas_cs_helper.fw", - &handle_to_dev(card->p_dev)); + &card->p_dev->dev); if (ret) { lbs_pr_err("can't load helper firmware\n"); ret = -ENODEV; @@ -663,7 +663,7 @@ static int if_cs_prog_real(struct if_cs_card *card) /* TODO: make firmware file configurable */ ret = request_firmware(&fw, "libertas_cs.fw", - &handle_to_dev(card->p_dev)); + &card->p_dev->dev); if (ret) { lbs_pr_err("can't load firmware\n"); ret = -ENODEV; diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index bd4eff79bd13..f5333b7d2226 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -773,7 +773,7 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_DEBUG "netwave_cs: register_netdev() failed\n"); diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c index da626ec663d0..688b39823459 100644 --- a/drivers/net/wireless/orinoco/orinoco_cs.c +++ b/drivers/net/wireless/orinoco/orinoco_cs.c @@ -109,7 +109,7 @@ orinoco_cs_probe(struct pcmcia_device *link) struct orinoco_private *priv; struct orinoco_pccard *card; - priv = alloc_orinocodev(sizeof(*card), &handle_to_dev(link), + priv = alloc_orinocodev(sizeof(*card), &link->dev, orinoco_cs_hard_reset, NULL); if (!priv) return -ENOMEM; diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c index 700010e9e346..c609371d1525 100644 --- a/drivers/net/wireless/orinoco/spectrum_cs.c +++ b/drivers/net/wireless/orinoco/spectrum_cs.c @@ -182,7 +182,7 @@ spectrum_cs_probe(struct pcmcia_device *link) struct orinoco_private *priv; struct orinoco_pccard *card; - priv = alloc_orinocodev(sizeof(*card), &handle_to_dev(link), + priv = alloc_orinocodev(sizeof(*card), &link->dev, spectrum_cs_hard_reset, spectrum_cs_stop_firmware); if (!priv) diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 66e2d10cd754..5e0f4c3eac38 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -488,7 +488,7 @@ static int ray_config(struct pcmcia_device *link) return -ENODEV; } - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = register_netdev(dev); if (i != 0) { printk("ray_config register_netdev() failed\n"); diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index f8c5166fffe0..df3579aef505 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -3899,7 +3899,7 @@ wv_pcmcia_config(struct pcmcia_device * link) lp->mem, dev->irq, (u_int) dev->base_addr); #endif - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = register_netdev(dev); if(i != 0) { diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 7e8e269b4cb6..9a956c786738 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1975,7 +1975,7 @@ static int wl3501_config(struct pcmcia_device *link) dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev)) { printk(KERN_NOTICE "wl3501_cs: register_netdev() failed\n"); goto failed; diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c index 3b31bee85761..8d651a618455 100644 --- a/drivers/serial/serial_cs.c +++ b/drivers/serial/serial_cs.c @@ -387,7 +387,7 @@ static int setup_serial(struct pcmcia_device *handle, struct serial_info * info, port.irq = irq; port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ; port.uartclk = 1843200; - port.dev = &handle_to_dev(handle); + port.dev = &handle->dev; if (buggy_uart) port.flags |= UPF_BUGGY_UART; diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c index 46077420dea5..f87aba6db4e8 100644 --- a/drivers/usb/host/sl811_cs.c +++ b/drivers/usb/host/sl811_cs.c @@ -185,7 +185,7 @@ static int sl811_cs_config_check(struct pcmcia_device *p_dev, static int sl811_cs_config(struct pcmcia_device *link) { - struct device *parent = &handle_to_dev(link); + struct device *parent = &link->dev; local_info_t *dev = link->priv; int ret; diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index d6c55fdf8d01..d403c12f7978 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -138,9 +138,6 @@ struct pcmcia_device { #define to_pcmcia_dev(n) container_of(n, struct pcmcia_device, dev) #define to_pcmcia_drv(n) container_of(n, struct pcmcia_driver, drv) -/* deprecated -- don't use! */ -#define handle_to_dev(handle) (handle->dev) - /* * CIS access. diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c index 64b859925c0b..447aaaee3be6 100644 --- a/sound/pcmcia/pdaudiocf/pdaudiocf.c +++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c @@ -131,7 +131,7 @@ static int snd_pdacf_probe(struct pcmcia_device *link) return err; } - snd_card_set_dev(card, &handle_to_dev(link)); + snd_card_set_dev(card, &link->dev); pdacf->index = i; card_list[i] = card; diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index 1492744ad67f..5a5db48a91a9 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -244,7 +244,7 @@ static int vxpocket_config(struct pcmcia_device *link) if (ret) goto failed; - chip->dev = &handle_to_dev(link); + chip->dev = &link->dev; snd_card_set_dev(chip->card, chip->dev); if (snd_vxpocket_assign_resources(chip, link->io.BasePort1, link->irq.AssignedIRQ) < 0) -- cgit v1.3-8-gc7d7 From 5fa9167a1bf5f5a4b7282f5e7ac56a4a5a1fa044 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 8 Nov 2009 17:24:46 +0100 Subject: pcmcia: rework the irq_req_t typedef Most of the irq_req_t typedef'd struct can be re-worked quite easily: (1) IRQInfo2 was unused in any case, so drop it. (2) IRQInfo1 was used write-only, so drop it. (3) Instance (private data to be passed to the IRQ handler): Most PCMCIA drivers using pcmcia_request_irq() to actually register an IRQ handler set the "dev_id" to the same pointer as the "priv" pointer in struct pcmcia_device. Modify the two exceptions (ipwireless, ibmtr_cs) to also work this waym and set the IRQ handler's "dev_id" to p_dev->priv unconditionally. (4) Handler is to be of type irq_handler_t. (5) Handler != NULL already tells whether an IRQ handler is present. Therefore, we do not need the IRQ_HANDLER_PRESENT flag in irq_req_t.Attributes. CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-ide@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-scsi@vger.kernel.org CC: alsa-devel@alsa-project.org CC: Jaroslav Kysela CC: Jiri Kosina CC: Karsten Keil for the Bluetooth parts: Acked-by: Marcel Holtmann Signed-off-by: Dominik Brodowski --- drivers/ata/pata_pcmcia.c | 1 - drivers/bluetooth/bluecard_cs.c | 4 +--- drivers/bluetooth/bt3c_cs.c | 4 +--- drivers/bluetooth/btuart_cs.c | 4 +--- drivers/bluetooth/dtl1_cs.c | 4 +--- drivers/char/pcmcia/ipwireless/hardware.c | 8 ++++---- drivers/char/pcmcia/ipwireless/main.c | 6 +----- drivers/char/pcmcia/synclink_cs.c | 3 --- drivers/ide/ide-cs.c | 1 - drivers/isdn/hardware/avm/avm_cs.c | 2 -- drivers/isdn/hisax/avma1_cs.c | 2 -- drivers/isdn/hisax/elsa_cs.c | 1 - drivers/isdn/hisax/sedlbauer_cs.c | 1 - drivers/isdn/hisax/teles_cs.c | 1 - drivers/net/pcmcia/3c574_cs.c | 4 +--- drivers/net/pcmcia/3c589_cs.c | 4 +--- drivers/net/pcmcia/axnet_cs.c | 1 - drivers/net/pcmcia/com20020_cs.c | 8 +++----- drivers/net/pcmcia/fmvj18x_cs.c | 6 ++---- drivers/net/pcmcia/ibmtr_cs.c | 13 +++++++++---- drivers/net/pcmcia/nmclan_cs.c | 4 +--- drivers/net/pcmcia/pcnet_cs.c | 1 - drivers/net/pcmcia/smc91c92_cs.c | 8 +++----- drivers/net/pcmcia/xirc2ps_cs.c | 3 --- drivers/net/wireless/airo_cs.c | 1 - drivers/net/wireless/atmel_cs.c | 1 - drivers/net/wireless/b43/pcmcia.c | 2 -- drivers/net/wireless/hostap/hostap_cs.c | 5 +---- drivers/net/wireless/libertas/if_cs.c | 1 - drivers/net/wireless/netwave_cs.c | 5 +---- drivers/net/wireless/orinoco/orinoco_cs.c | 4 +--- drivers/net/wireless/orinoco/spectrum_cs.c | 4 +--- drivers/net/wireless/ray_cs.c | 4 +--- drivers/net/wireless/wavelan_cs.c | 5 ++--- drivers/net/wireless/wl3501_cs.c | 5 ++--- drivers/parport/parport_cs.c | 1 - drivers/pcmcia/pcmcia_resource.c | 18 +++++++++--------- drivers/scsi/pcmcia/aha152x_stub.c | 1 - drivers/scsi/pcmcia/fdomain_stub.c | 1 - drivers/scsi/pcmcia/nsp_cs.c | 4 +--- drivers/scsi/pcmcia/qlogic_stub.c | 1 - drivers/scsi/pcmcia/sym53c500_cs.c | 1 - drivers/serial/serial_cs.c | 1 - drivers/staging/comedi/drivers/cb_das16_cs.c | 1 - drivers/staging/comedi/drivers/das08_cs.c | 1 - drivers/staging/comedi/drivers/ni_daq_700.c | 1 - drivers/staging/comedi/drivers/ni_daq_dio24.c | 1 - drivers/staging/comedi/drivers/ni_labpc_cs.c | 1 - drivers/staging/comedi/drivers/ni_mio_cs.c | 4 ---- drivers/staging/comedi/drivers/quatech_daqp_cs.c | 4 +--- drivers/usb/host/sl811_cs.c | 1 - include/pcmcia/cs.h | 14 ++++++++------ sound/pcmcia/pdaudiocf/pdaudiocf.c | 4 +--- sound/pcmcia/vx/vxpocket.c | 4 +--- 54 files changed, 59 insertions(+), 136 deletions(-) (limited to 'include') diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c index 5f94e214e17d..1b392c9e8531 100644 --- a/drivers/ata/pata_pcmcia.c +++ b/drivers/ata/pata_pcmcia.c @@ -268,7 +268,6 @@ static int pcmcia_init_one(struct pcmcia_device *pdev) pdev->io.Attributes2 = IO_DATA_PATH_WIDTH_8; pdev->io.IOAddrLines = 3; pdev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - pdev->irq.IRQInfo1 = IRQ_LEVEL_ID; pdev->conf.Attributes = CONF_ENABLE_IRQ; pdev->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c index 1e0c4d822972..2acdc605cb4b 100644 --- a/drivers/bluetooth/bluecard_cs.c +++ b/drivers/bluetooth/bluecard_cs.c @@ -867,11 +867,9 @@ static int bluecard_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = bluecard_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c index 9787fda45d84..d814a2755ccb 100644 --- a/drivers/bluetooth/bt3c_cs.c +++ b/drivers/bluetooth/bt3c_cs.c @@ -659,11 +659,9 @@ static int bt3c_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = bt3c_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c index f44d75217b2b..d339464dc15e 100644 --- a/drivers/bluetooth/btuart_cs.c +++ b/drivers/bluetooth/btuart_cs.c @@ -588,11 +588,9 @@ static int btuart_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = btuart_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c index 7cd8614a8ea9..4f02a6f3c980 100644 --- a/drivers/bluetooth/dtl1_cs.c +++ b/drivers/bluetooth/dtl1_cs.c @@ -573,11 +573,9 @@ static int dtl1_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = dtl1_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/char/pcmcia/ipwireless/hardware.c b/drivers/char/pcmcia/ipwireless/hardware.c index 4c1820cad712..99cffdab1056 100644 --- a/drivers/char/pcmcia/ipwireless/hardware.c +++ b/drivers/char/pcmcia/ipwireless/hardware.c @@ -1213,12 +1213,12 @@ static irqreturn_t ipwireless_handle_v2_v3_interrupt(int irq, irqreturn_t ipwireless_interrupt(int irq, void *dev_id) { - struct ipw_hardware *hw = dev_id; + struct ipw_dev *ipw = dev_id; - if (hw->hw_version == HW_VERSION_1) - return ipwireless_handle_v1_interrupt(irq, hw); + if (ipw->hardware->hw_version == HW_VERSION_1) + return ipwireless_handle_v1_interrupt(irq, ipw->hardware); else - return ipwireless_handle_v2_v3_interrupt(irq, hw); + return ipwireless_handle_v2_v3_interrupt(irq, ipw->hardware); } static void flush_packets_to_hw(struct ipw_hardware *hw) diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 082146a26c87..dff24dae1485 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -93,8 +93,6 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, p_dev->io.NumPorts1 = cfg->io.win[0].len; p_dev->io.IOAddrLines = 16; - p_dev->irq.IRQInfo1 = cfg->irq.IRQInfo1; - /* 0x40 causes it to generate level mode interrupts. */ /* 0x04 enables IREQ pin. */ p_dev->conf.ConfigIndex = cfg->index | 0x44; @@ -197,9 +195,8 @@ static int config_ipwireless(struct ipw_dev *ipw) link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = ipwireless_interrupt; - link->irq.Instance = ipw->hardware; INIT_WORK(&ipw->work_reboot, signalled_reboot_work); @@ -315,7 +312,6 @@ static int ipwireless_attach(struct pcmcia_device *link) ipw->link = link; link->priv = ipw; - link->irq.Instance = ipw; /* Link this device into our device list. */ link->dev_node = &ipw->nodes[0]; diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 09b2590adb8b..c31a0d913d37 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -554,7 +554,6 @@ static int mgslpc_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; link->conf.Attributes = 0; @@ -609,9 +608,7 @@ static int mgslpc_config(struct pcmcia_device *link) link->conf.ConfigIndex = 8; link->conf.Present = PRESENT_OPTION; - link->irq.Attributes |= IRQ_HANDLE_PRESENT; link->irq.Handler = mgslpc_isr; - link->irq.Instance = info; ret = pcmcia_request_irq(link, &link->irq); if (ret) diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c index 6cee6c8d0782..dd6396384c25 100644 --- a/drivers/ide/ide-cs.c +++ b/drivers/ide/ide-cs.c @@ -103,7 +103,6 @@ static int ide_probe(struct pcmcia_device *link) link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->io.IOAddrLines = 3; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c index d388eadb9fdb..5a6ae646a636 100644 --- a/drivers/isdn/hardware/avm/avm_cs.c +++ b/drivers/isdn/hardware/avm/avm_cs.c @@ -111,8 +111,6 @@ static int avmcs_probe(struct pcmcia_device *p_dev) p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE; p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; - /* General socket configuration */ p_dev->conf.Attributes = CONF_ENABLE_IRQ; p_dev->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c index 6d963f9a09ce..f9bdff39cf4a 100644 --- a/drivers/isdn/hisax/avma1_cs.c +++ b/drivers/isdn/hisax/avma1_cs.c @@ -123,8 +123,6 @@ static int avma1cs_probe(struct pcmcia_device *p_dev) p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE; p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; - /* General socket configuration */ p_dev->conf.Attributes = CONF_ENABLE_IRQ; p_dev->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c index cdcd2979fcd4..a2f709f53974 100644 --- a/drivers/isdn/hisax/elsa_cs.c +++ b/drivers/isdn/hisax/elsa_cs.c @@ -138,7 +138,6 @@ static int elsa_cs_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = IRQ_LEVEL_ID|IRQ_SHARE_ID; link->irq.Handler = NULL; /* diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c index 331716fc6b30..af5d393cc2d0 100644 --- a/drivers/isdn/hisax/sedlbauer_cs.c +++ b/drivers/isdn/hisax/sedlbauer_cs.c @@ -145,7 +145,6 @@ static int sedlbauer_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c index 7b11c15b3a97..ea705394ce2b 100644 --- a/drivers/isdn/hisax/teles_cs.c +++ b/drivers/isdn/hisax/teles_cs.c @@ -128,7 +128,6 @@ static int teles_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = IRQ_LEVEL_ID|IRQ_SHARE_ID; link->irq.Handler = NULL; /* diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index 8b65e18ab230..17a27225cc98 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -283,10 +283,8 @@ static int tc574_probe(struct pcmcia_device *link) spin_lock_init(&lp->window_lock); link->io.NumPorts1 = 32; link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &el3_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c index c43c21ddb2d1..6f8d7e2e5922 100644 --- a/drivers/net/pcmcia/3c589_cs.c +++ b/drivers/net/pcmcia/3c589_cs.c @@ -194,10 +194,8 @@ static int tc589_probe(struct pcmcia_device *link) spin_lock_init(&lp->lock); link->io.NumPorts1 = 16; link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &el3_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c index 0552dddd587f..800597b82d18 100644 --- a/drivers/net/pcmcia/axnet_cs.c +++ b/drivers/net/pcmcia/axnet_cs.c @@ -170,7 +170,6 @@ static int axnet_probe(struct pcmcia_device *link) info->p_dev = link; link->priv = dev; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c index 51e9cb0a6d1e..21d9c9d815d1 100644 --- a/drivers/net/pcmcia/com20020_cs.c +++ b/drivers/net/pcmcia/com20020_cs.c @@ -164,11 +164,10 @@ static int com20020_probe(struct pcmcia_device *p_dev) p_dev->io.NumPorts1 = 16; p_dev->io.IOAddrLines = 16; p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; p_dev->conf.Attributes = CONF_ENABLE_IRQ; p_dev->conf.IntType = INT_MEMORY_AND_IO; - p_dev->irq.Instance = info->dev = dev; + info->dev = dev; p_dev->priv = info; return com20020_config(p_dev); @@ -275,9 +274,8 @@ static int com20020_config(struct pcmcia_device *link) ioaddr = dev->base_addr = link->io.BasePort1; dev_dbg(&link->dev, "got ioaddr %Xh\n", ioaddr); - dev_dbg(&link->dev, "request IRQ %d (%Xh/%Xh)\n", - link->irq.AssignedIRQ, - link->irq.IRQInfo1, link->irq.IRQInfo2); + dev_dbg(&link->dev, "request IRQ %d\n", + link->irq.AssignedIRQ); i = pcmcia_request_irq(link, &link->irq); if (i != 0) { diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 9b5ca37c6684..6e3e1ced6db4 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -255,10 +255,8 @@ static int fmvj18x_probe(struct pcmcia_device *link) link->io.IOAddrLines = 5; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &fjn_interrupt; - link->irq.Instance = dev; /* General socket configuration */ link->conf.Attributes = CONF_ENABLE_IRQ; @@ -428,7 +426,7 @@ static int fmvj18x_config(struct pcmcia_device *link) if (link->io.NumPorts2 != 0) { link->irq.Attributes = - IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT; + IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; ret = mfc_try_io_port(link); if (ret != 0) goto failed; } else if (cardtype == UNGERMANN) { diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 76706e12d731..37f4a6fdc3ef 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -119,6 +119,12 @@ static const struct ethtool_ops netdev_ethtool_ops = { .get_drvinfo = netdev_get_drvinfo, }; +static irqreturn_t ibmtr_interrupt(int irq, void *dev_id) { + ibmtr_dev_t *info = dev_id; + struct net_device *dev = info->dev; + return tok_interrupt(irq, dev); +}; + /*====================================================================== ibmtr_attach() creates an "instance" of the driver, allocating @@ -150,14 +156,13 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 4; link->io.IOAddrLines = 16; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; - link->irq.Handler = &tok_interrupt; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; + link->irq.Handler = ibmtr_interrupt; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; - link->irq.Instance = info->dev = dev; + info->dev = dev; SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c index 2d0c6f93ed8b..dae5ef6b2609 100644 --- a/drivers/net/pcmcia/nmclan_cs.c +++ b/drivers/net/pcmcia/nmclan_cs.c @@ -463,10 +463,8 @@ static int nmclan_probe(struct pcmcia_device *link) link->io.NumPorts1 = 32; link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 5; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; link->irq.Handler = &mace_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 30baee7b86a2..de2d10085635 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -266,7 +266,6 @@ static int pcnet_probe(struct pcmcia_device *link) link->priv = dev; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index 2e795de06cb3..9e0da370912e 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -329,10 +329,8 @@ static int smc91c92_probe(struct pcmcia_device *link) link->io.NumPorts1 = 16; link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 4; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &smc_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -456,7 +454,7 @@ static int mhz_mfc_config(struct pcmcia_device *link) link->conf.Attributes |= CONF_ENABLE_SPKR; link->conf.Status = CCSR_AUDIO_ENA; link->irq.Attributes = - IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT; + IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; link->io.IOAddrLines = 16; link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts2 = 8; @@ -655,7 +653,7 @@ static int osi_config(struct pcmcia_device *link) link->conf.Attributes |= CONF_ENABLE_SPKR; link->conf.Status = CCSR_AUDIO_ENA; link->irq.Attributes = - IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT; + IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; link->io.NumPorts1 = 64; link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts2 = 8; diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index f75ac716390d..fe504b7f369f 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -556,7 +556,6 @@ xirc2ps_probe(struct pcmcia_device *link) link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; link->irq.Handler = xirc2ps_interrupt; - link->irq.Instance = dev; /* Fill in card specific entries */ dev->netdev_ops = &netdev_ops; @@ -835,8 +834,6 @@ xirc2ps_config(struct pcmcia_device * link) link->io.IOAddrLines =10; link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - link->irq.Attributes = IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; if (local->modem) { int pass; diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c index 7d3a96fdf5a3..f6036fb42319 100644 --- a/drivers/net/wireless/airo_cs.c +++ b/drivers/net/wireless/airo_cs.c @@ -134,7 +134,6 @@ static int airo_probe(struct pcmcia_device *p_dev) /* Interrupt setup */ p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; p_dev->irq.Handler = NULL; /* diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c index 18a58b859223..32407911842f 100644 --- a/drivers/net/wireless/atmel_cs.c +++ b/drivers/net/wireless/atmel_cs.c @@ -143,7 +143,6 @@ static int atmel_probe(struct pcmcia_device *p_dev) /* Interrupt setup */ p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; p_dev->irq.Handler = NULL; /* diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c index 2588358294be..984174bc7b0f 100644 --- a/drivers/net/wireless/b43/pcmcia.c +++ b/drivers/net/wireless/b43/pcmcia.c @@ -98,9 +98,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev) goto err_disable; dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - dev->irq.IRQInfo1 = IRQ_LEVEL_ID; dev->irq.Handler = NULL; /* The handler is registered later. */ - dev->irq.Instance = NULL; res = pcmcia_request_irq(dev, &dev->irq); if (res != 0) goto err_disable; diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c index 243e912729b9..c9640a3e02c9 100644 --- a/drivers/net/wireless/hostap/hostap_cs.c +++ b/drivers/net/wireless/hostap/hostap_cs.c @@ -641,11 +641,8 @@ static int prism2_config(struct pcmcia_device *link) * irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | - IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = prism2_interrupt; - link->irq.Instance = dev; ret = pcmcia_request_irq(link, &link->irq); if (ret) goto failed; diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c index f2b16559b686..b1d84592b959 100644 --- a/drivers/net/wireless/libertas/if_cs.c +++ b/drivers/net/wireless/libertas/if_cs.c @@ -837,7 +837,6 @@ static int if_cs_probe(struct pcmcia_device *p_dev) p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = NULL; - p_dev->irq.IRQInfo1 = IRQ_INFO2_VALID | IRQ_LEVEL_ID; p_dev->conf.Attributes = 0; p_dev->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index f5333b7d2226..e61e6b9440ab 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -384,8 +384,7 @@ static int netwave_probe(struct pcmcia_device *link) link->io.IOAddrLines = 5; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &netwave_interrupt; /* General socket configuration */ @@ -404,8 +403,6 @@ static int netwave_probe(struct pcmcia_device *link) dev->watchdog_timeo = TX_TIMEOUT; - link->irq.Instance = dev; - return netwave_pcmcia_config( link); } /* netwave_attach */ diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c index 688b39823459..f27bb8367c98 100644 --- a/drivers/net/wireless/orinoco/orinoco_cs.c +++ b/drivers/net/wireless/orinoco/orinoco_cs.c @@ -120,10 +120,8 @@ orinoco_cs_probe(struct pcmcia_device *link) link->priv = priv; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = orinoco_interrupt; - link->irq.Instance = priv; /* General socket configuration defaults can go here. In this * client, we assume very little, and rely on the CIS for diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c index c609371d1525..59bda240fdc2 100644 --- a/drivers/net/wireless/orinoco/spectrum_cs.c +++ b/drivers/net/wireless/orinoco/spectrum_cs.c @@ -194,10 +194,8 @@ spectrum_cs_probe(struct pcmcia_device *link) link->priv = priv; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = orinoco_interrupt; - link->irq.Instance = priv; /* General socket configuration defaults can go here. In this * client, we assume very little, and rely on the CIS for diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 5e0f4c3eac38..91213f9e2c4f 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -323,8 +323,7 @@ static int ray_probe(struct pcmcia_device *p_dev) p_dev->io.IOAddrLines = 5; /* Interrupt setup. For PCMCIA, driver takes what's given */ - p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; + p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = &ray_interrupt; /* General socket configuration */ @@ -333,7 +332,6 @@ static int ray_probe(struct pcmcia_device *p_dev) p_dev->conf.ConfigIndex = 1; p_dev->priv = dev; - p_dev->irq.Instance = dev; local->finder = p_dev; local->card_status = CARD_INSERTED; diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index df3579aef505..33918fd5b231 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -4438,8 +4438,7 @@ wavelan_probe(struct pcmcia_device *p_dev) p_dev->io.IOAddrLines = 3; /* Interrupt setup */ - p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; + p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = wavelan_interrupt; /* General socket configuration */ @@ -4451,7 +4450,7 @@ wavelan_probe(struct pcmcia_device *p_dev) if (!dev) return -ENOMEM; - p_dev->priv = p_dev->irq.Instance = dev; + p_dev->priv = dev; lp = netdev_priv(dev); diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 9a956c786738..5f0401a52cff 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1898,8 +1898,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev) p_dev->io.IOAddrLines = 5; /* Interrupt setup */ - p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; + p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = wl3501_interrupt; /* General socket configuration */ @@ -1922,7 +1921,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev) dev->wireless_handlers = &wl3501_handler_def; SET_ETHTOOL_OPS(dev, &ops); netif_stop_queue(dev); - p_dev->priv = p_dev->irq.Instance = dev; + p_dev->priv = dev; return wl3501_config(p_dev); out_link: diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c index e56a4dea6717..7dd370fa3439 100644 --- a/drivers/parport/parport_cs.c +++ b/drivers/parport/parport_cs.c @@ -106,7 +106,6 @@ static int parport_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index 1cf7d54fb7e2..a8bf8c1b45ed 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -383,8 +383,8 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) s->irq.AssignedIRQ = 0; } - if (req->Attributes & IRQ_HANDLE_PRESENT) { - free_irq(req->AssignedIRQ, req->Instance); + if (req->Handler) { + free_irq(req->AssignedIRQ, p_dev->priv); } #ifdef CONFIG_PCMCIA_PROBE @@ -664,7 +664,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) /* if the underlying IRQ infrastructure allows for it, only allocate * the IRQ, but do not enable it */ - if (!(req->Attributes & IRQ_HANDLE_PRESENT)) + if (!(req->Handler)) type |= IRQ_NOAUTOEN; #endif /* IRQ_NOAUTOEN */ @@ -674,7 +674,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) } else { int try; u32 mask = s->irq_mask; - void *data = &p_dev->dev.driver; /* something unique to this device */ + void *data = p_dev; /* something unique to this device */ for (try = 0; try < 64; try++) { irq = try % 32; @@ -691,12 +691,12 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) * registering a dummy handle works, i.e. if the IRQ isn't * marked as used by the kernel resource management core */ ret = request_irq(irq, - (req->Attributes & IRQ_HANDLE_PRESENT) ? req->Handler : test_action, + (req->Handler) ? req->Handler : test_action, type, p_dev->devname, - (req->Attributes & IRQ_HANDLE_PRESENT) ? req->Instance : data); + (req->Handler) ? p_dev->priv : data); if (!ret) { - if (!(req->Attributes & IRQ_HANDLE_PRESENT)) + if (!req->Handler) free_irq(irq, data); break; } @@ -713,9 +713,9 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) irq = s->pci_irq; } - if (ret && (req->Attributes & IRQ_HANDLE_PRESENT)) { + if (ret && req->Handler) { ret = request_irq(irq, req->Handler, type, - p_dev->devname, req->Instance); + p_dev->devname, p_dev->priv); if (ret) { dev_printk(KERN_INFO, &s->dev, "request_irq() failed\n"); diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c index 4329e4e5043d..528733b4a392 100644 --- a/drivers/scsi/pcmcia/aha152x_stub.c +++ b/drivers/scsi/pcmcia/aha152x_stub.c @@ -106,7 +106,6 @@ static int aha152x_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c index 5792b55c9d3c..914040684079 100644 --- a/drivers/scsi/pcmcia/fdomain_stub.c +++ b/drivers/scsi/pcmcia/fdomain_stub.c @@ -89,7 +89,6 @@ static int fdomain_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index 9dfd6f510b65..c2341af587a3 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -1564,12 +1564,10 @@ static int nsp_cs_probe(struct pcmcia_device *link) link->io.IOAddrLines = 10; /* not used */ /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; /* Interrupt handler */ link->irq.Handler = &nspintr; - link->irq.Instance = info; link->irq.Attributes |= IRQF_SHARED; /* General socket configuration */ diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c index 65d7ad58433f..f85f094870b4 100644 --- a/drivers/scsi/pcmcia/qlogic_stub.c +++ b/drivers/scsi/pcmcia/qlogic_stub.c @@ -162,7 +162,6 @@ static int qlogic_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c index 851a41ce4f06..e7564d8f0cbf 100644 --- a/drivers/scsi/pcmcia/sym53c500_cs.c +++ b/drivers/scsi/pcmcia/sym53c500_cs.c @@ -867,7 +867,6 @@ SYM53C500_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c index 8d651a618455..5d6f947ef7ff 100644 --- a/drivers/serial/serial_cs.c +++ b/drivers/serial/serial_cs.c @@ -334,7 +334,6 @@ static int serial_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; if (do_sound) { link->conf.Attributes |= CONF_ENABLE_SPKR; diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c index 9e758027efee..39923cb388be 100644 --- a/drivers/staging/comedi/drivers/cb_das16_cs.c +++ b/drivers/staging/comedi/drivers/cb_das16_cs.c @@ -703,7 +703,6 @@ static int das16cs_pcmcia_attach(struct pcmcia_device *link) /* Initialize the pcmcia_device structure */ /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; link->conf.Attributes = 0; diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c index 384a77a37c26..9b945e5fdd32 100644 --- a/drivers/staging/comedi/drivers/das08_cs.c +++ b/drivers/staging/comedi/drivers/das08_cs.c @@ -173,7 +173,6 @@ static int das08_pcmcia_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c index 7328a84ac500..ef5e1183d47d 100644 --- a/drivers/staging/comedi/drivers/ni_daq_700.c +++ b/drivers/staging/comedi/drivers/ni_daq_700.c @@ -503,7 +503,6 @@ static int dio700_cs_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c index 505631553ef7..9017be3a92f1 100644 --- a/drivers/staging/comedi/drivers/ni_daq_dio24.c +++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c @@ -254,7 +254,6 @@ static int dio24_cs_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c index 7fb5058ad43c..7d514b3ee754 100644 --- a/drivers/staging/comedi/drivers/ni_labpc_cs.c +++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c @@ -230,7 +230,6 @@ static int labpc_cs_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_FORCED_PULSE; - link->irq.IRQInfo1 = IRQ_INFO2_VALID | IRQ_PULSE_ID; link->irq.Handler = NULL; /* diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c index ca7ab4abdc2e..d692f4bb47ea 100644 --- a/drivers/staging/comedi/drivers/ni_mio_cs.c +++ b/drivers/staging/comedi/drivers/ni_mio_cs.c @@ -274,7 +274,6 @@ static int cs_attach(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; link->io.NumPorts1 = 16; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -325,9 +324,6 @@ static int mio_pcmcia_config_loop(struct pcmcia_device *p_dev, p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK; p_dev->io.NumPorts2 = 0; - p_dev->irq.IRQInfo1 = cfg->irq.IRQInfo1; - p_dev->irq.IRQInfo2 = cfg->irq.IRQInfo2; - for (base = 0x000; base < 0x400; base += 0x20) { p_dev->io.BasePort1 = base; ret = pcmcia_request_io(p_dev, &p_dev->io); diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c index 48e7c27ed87a..5256fd933162 100644 --- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c +++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c @@ -1041,10 +1041,8 @@ static int daqp_cs_attach(struct pcmcia_device *link) link->priv = local; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = daqp_interrupt; - link->irq.Instance = local; /* General socket configuration defaults can go here. In this diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c index f87aba6db4e8..39d253e841f6 100644 --- a/drivers/usb/host/sl811_cs.c +++ b/drivers/usb/host/sl811_cs.c @@ -243,7 +243,6 @@ static int sl811_cs_probe(struct pcmcia_device *link) /* Initialize */ link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_INFO2_VALID|IRQ_LEVEL_ID; link->irq.Handler = NULL; link->conf.Attributes = 0; diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index 904468a191ef..afc2bfb9e917 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -15,6 +15,10 @@ #ifndef _LINUX_CS_H #define _LINUX_CS_H +#ifdef __KERNEL__ +#include +#endif + /* For AccessConfigurationRegister */ typedef struct conf_reg_t { u_char Function; @@ -111,11 +115,9 @@ typedef struct io_req_t { /* For RequestIRQ and ReleaseIRQ */ typedef struct irq_req_t { - u_int Attributes; - u_int AssignedIRQ; - u_int IRQInfo1, IRQInfo2; /* IRQInfo2 is ignored */ - void *Handler; - void *Instance; + u_int Attributes; + u_int AssignedIRQ; + irq_handler_t Handler; } irq_req_t; /* Attributes for RequestIRQ and ReleaseIRQ */ @@ -125,7 +127,7 @@ typedef struct irq_req_t { #define IRQ_TYPE_DYNAMIC_SHARING 0x02 #define IRQ_FORCED_PULSE 0x04 #define IRQ_FIRST_SHARED 0x08 -#define IRQ_HANDLE_PRESENT 0x10 +//#define IRQ_HANDLE_PRESENT 0x10 #define IRQ_PULSE_ALLOCATED 0x100 /* Bits in IRQInfo1 field */ diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c index 447aaaee3be6..7717e01fc071 100644 --- a/sound/pcmcia/pdaudiocf/pdaudiocf.c +++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c @@ -142,12 +142,10 @@ static int snd_pdacf_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.NumPorts1 = 16; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT | IRQ_FORCED_PULSE; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_FORCED_PULSE; // link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = 0 /* | IRQ_LEVEL_ID */; link->irq.Handler = pdacf_interrupt; - link->irq.Instance = pdacf; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index 5a5db48a91a9..7be3b3357045 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -161,11 +161,9 @@ static int snd_vxpocket_new(struct snd_card *card, int ibl, link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.NumPorts1 = 16; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = &snd_vx_irq_handler; - link->irq.Instance = chip; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; -- cgit v1.3-8-gc7d7 From 8c0c709eea5cbab97fb464cd68b06f24acc58ee1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 25 Nov 2009 17:46:15 +0100 Subject: mac80211: move cmntr flag out of rx flags The RX flags should soon be used only for flags that cannot change within an a-MPDU, so move the cooked monitor flag into the RX status flags. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 4 ++++ net/mac80211/ieee80211_i.h | 3 +-- net/mac80211/rx.c | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3754ea405c88..1d75b960da06 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -513,6 +513,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_HT: HT MCS was used and rate_idx is MCS index * @RX_FLAG_40MHZ: HT40 (40 MHz) was used * @RX_FLAG_SHORT_GI: Short guard interval was used + * @RX_FLAG_INTERNAL_CMTR: set internally after frame was reported + * on cooked monitor to avoid double-reporting it for multiple + * virtual interfaces */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = 1<<0, @@ -526,6 +529,7 @@ enum mac80211_rx_flags { RX_FLAG_HT = 1<<9, RX_FLAG_40MHZ = 1<<10, RX_FLAG_SHORT_GI = 1<<11, + RX_FLAG_INTERNAL_CMTR = 1<<12, }; /** diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 04093e84ebd7..ba5d3637b956 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -163,8 +163,7 @@ typedef unsigned __bitwise__ ieee80211_rx_result; /* frame is destined to interface currently processed (incl. multicast frames) */ #define IEEE80211_RX_RA_MATCH BIT(1) #define IEEE80211_RX_AMSDU BIT(2) -#define IEEE80211_RX_CMNTR_REPORTED BIT(3) -#define IEEE80211_RX_FRAGMENTED BIT(4) +#define IEEE80211_RX_FRAGMENTED BIT(3) struct ieee80211_rx_data { struct sk_buff *skb; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 96f13ad05d3c..097bb0343b91 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1868,7 +1868,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, struct net_device *prev_dev = NULL; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - if (rx->flags & IEEE80211_RX_CMNTR_REPORTED) + if (status->flag & RX_FLAG_INTERNAL_CMTR) goto out_free_skb; if (skb_headroom(skb) < sizeof(*rthdr) && @@ -1929,7 +1929,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, } else goto out_free_skb; - rx->flags |= IEEE80211_RX_CMNTR_REPORTED; + status->flag |= RX_FLAG_INTERNAL_CMTR; return; out_free_skb: -- cgit v1.3-8-gc7d7 From 67fbb16be69d138a3b6645ec5395b487cb915c58 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 24 Nov 2009 23:59:15 +0100 Subject: nl80211: PMKSA caching support This is an interface to set, delete and flush PMKIDs through nl80211. Main users would be fullmac devices which firmwares are capable of generating the RSN IEs for the re-association requests, e.g. iwmc3200wifi. Signed-off-by: Samuel Ortiz Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 + include/linux/nl80211.h | 11 +++++ include/net/cfg80211.h | 28 +++++++++++ net/wireless/nl80211.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index afa8e0ac27a7..d9724a28c0c2 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1266,6 +1266,8 @@ enum ieee80211_sa_query_action { #define WLAN_MAX_KEY_LEN 32 +#define WLAN_PMKID_LEN 16 + /** * ieee80211_get_qos_ctl - get pointer to qos control bytes * @hdr: the frame diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 45db17f81aa3..da8ea2e19273 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -349,6 +349,10 @@ enum nl80211_commands { NL80211_CMD_GET_SURVEY, NL80211_CMD_NEW_SURVEY_RESULTS, + NL80211_CMD_SET_PMKSA, + NL80211_CMD_DEL_PMKSA, + NL80211_CMD_FLUSH_PMKSA, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -598,6 +602,10 @@ enum nl80211_commands { * the survey response for %NL80211_CMD_GET_SURVEY, nested attribute * containing info as possible, see &enum survey_info. * + * @NL80211_ATTR_PMKID: PMK material for PMKSA caching. + * @NL80211_ATTR_MAX_NUM_PMKIDS: maximum number of PMKIDs a firmware can + * cache, a wiphy attribute. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -732,6 +740,9 @@ enum nl80211_attrs { NL80211_ATTR_SURVEY_INFO, + NL80211_ATTR_PMKID, + NL80211_ATTR_MAX_NUM_PMKIDS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a6492e9bca97..0884b9a0f778 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -871,6 +871,19 @@ struct cfg80211_bitrate_mask { u32 fixed; /* fixed bitrate, 0 == not fixed */ u32 maxrate; /* in kbps, 0 == no limit */ }; +/** + * struct cfg80211_pmksa - PMK Security Association + * + * This structure is passed to the set/del_pmksa() method for PMKSA + * caching. + * + * @bssid: The AP's BSSID. + * @pmkid: The PMK material itself. + */ +struct cfg80211_pmksa { + u8 *bssid; + u8 *pmkid; +}; /** * struct cfg80211_ops - backend description for wireless configuration @@ -976,6 +989,13 @@ struct cfg80211_bitrate_mask { * @dump_survey: get site survey information. * * @testmode_cmd: run a test mode command + * + * @set_pmksa: Cache a PMKID for a BSSID. This is mostly useful for fullmac + * devices running firmwares capable of generating the (re) association + * RSN IE. It allows for faster roaming between WPA2 BSSIDs. + * @del_pmksa: Delete a cached PMKID. + * @flush_pmksa: Flush all cached PMKIDs. + * */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); @@ -1097,6 +1117,12 @@ struct cfg80211_ops { int (*dump_survey)(struct wiphy *wiphy, struct net_device *netdev, int idx, struct survey_info *info); + int (*set_pmksa)(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_pmksa *pmksa); + int (*del_pmksa)(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_pmksa *pmksa); + int (*flush_pmksa)(struct wiphy *wiphy, struct net_device *netdev); + /* some temporary stuff to finish wext */ int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout); @@ -1195,6 +1221,8 @@ struct wiphy { char fw_version[ETHTOOL_BUSINFO_LEN]; u32 hw_version; + u8 max_num_pmkids; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 149539ade15e..a6028433e3a0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -139,6 +139,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, + [NL80211_ATTR_PMKID] = { .type = NLA_BINARY, + .len = WLAN_PMKID_LEN }, }; /* policy for the attributes */ @@ -450,6 +452,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, sizeof(u32) * dev->wiphy.n_cipher_suites, dev->wiphy.cipher_suites); + NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, + dev->wiphy.max_num_pmkids); + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) goto nla_put_failure; @@ -561,6 +566,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(deauth, DEAUTHENTICATE); CMD(disassoc, DISASSOCIATE); CMD(join_ibss, JOIN_IBSS); + CMD(set_pmksa, SET_PMKSA); + CMD(del_pmksa, DEL_PMKSA); + CMD(flush_pmksa, FLUSH_PMKSA); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -4221,6 +4229,99 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_pmksa *pmksa) = NULL; + int err; + struct net_device *dev; + struct cfg80211_pmksa pmksa; + + memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); + + if (!info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_PMKID]) + return -EINVAL; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto out_rtnl; + + pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); + pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + + switch (info->genlhdr->cmd) { + case NL80211_CMD_SET_PMKSA: + rdev_ops = rdev->ops->set_pmksa; + break; + case NL80211_CMD_DEL_PMKSA: + rdev_ops = rdev->ops->del_pmksa; + break; + default: + WARN_ON(1); + break; + } + + if (!rdev_ops) { + err = -EOPNOTSUPP; + goto out; + } + + err = rdev_ops(&rdev->wiphy, dev, &pmksa); + + out: + cfg80211_unlock_rdev(rdev); + dev_put(dev); + out_rtnl: + rtnl_unlock(); + + return err; +} + +static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + int err; + struct net_device *dev; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto out_rtnl; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + + if (!rdev->ops->flush_pmksa) { + err = -EOPNOTSUPP; + goto out; + } + + err = rdev->ops->flush_pmksa(&rdev->wiphy, dev); + + out: + cfg80211_unlock_rdev(rdev); + dev_put(dev); + out_rtnl: + rtnl_unlock(); + + return err; + +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -4465,6 +4566,25 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .dumpit = nl80211_dump_survey, }, + { + .cmd = NL80211_CMD_SET_PMKSA, + .doit = nl80211_setdel_pmksa, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_DEL_PMKSA, + .doit = nl80211_setdel_pmksa, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_FLUSH_PMKSA, + .doit = nl80211_flush_pmksa, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + }; static struct genl_multicast_group nl80211_mlme_mcgrp = { .name = "mlme", -- cgit v1.3-8-gc7d7 From 5fdd4baef6195a1f2960e901c8877e2105f832ca Mon Sep 17 00:00:00 2001 From: Andrei Pelinescu-Onciul Date: Sun, 29 Nov 2009 00:14:02 -0800 Subject: sctp: on T3_RTX retransmit all the in-flight chunks When retransmitting due to T3 timeout, retransmit all the in-flight chunks for the corresponding transport/path, including chunks sent less then 1 rto ago. This is the correct behaviour according to rfc4960 section 6.3.3 E3 and "Note: Any DATA chunks that were sent to the address for which the T3-rtx timer expired but did not fit in one MTU (rule E3 above) should be marked for retransmission and sent as soon as cwnd allows (normally, when a SACK arrives). ". This fixes problems when more then one path is present and the T3 retransmission of the first chunk that timeouts stops the T3 timer for the initial active path, leaving all the other in-flight chunks waiting forever or until a new chunk is transmitted on the same path and timeouts (and this will happen only if the cwnd allows sending new chunks, but since cwnd was dropped to MTU by the timeout => it will wait until the first heartbeat). Example: 10 packets in flight, sent at 0.1 s intervals on the primary path. The primary path is down and the first packet timeouts. The first packet is retransmitted on another path, the T3 timer for the primary path is stopped and cwnd is set to MTU. All the other 9 in-flight packets will not be retransmitted (unless more new packets are sent on the primary path which depend on cwnd allowing it, and even in this case the 9 packets will be retransmitted only after a new packet timeouts which even in the best case would be more then RTO). This commit reverts d0ce92910bc04e107b2f3f2048f07e94f570035d and also removes the now unused transport->last_rto, introduced in b6157d8e03e1e780660a328f7183bcbfa4a93a19. p.s The problem is not only when multiple paths are there. It can happen in a single homed environment. If the application stops sending data, it possible to have a hung association. Signed-off-by: Andrei Pelinescu-Onciul Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 - net/sctp/outqueue.c | 10 ---------- net/sctp/sm_sideeffect.c | 1 - net/sctp/transport.c | 5 ++--- 4 files changed, 2 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index cd2e18778f81..0a474568b003 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -893,7 +893,6 @@ struct sctp_transport { */ /* RTO : The current retransmission timeout value. */ unsigned long rto; - unsigned long last_rto; __u32 rtt; /* This is the most recent RTT. */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c9f20e28521b..23e5e97aa617 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -423,16 +423,6 @@ void sctp_retransmit_mark(struct sctp_outq *q, if ((reason == SCTP_RTXR_FAST_RTX && (chunk->fast_retransmit == SCTP_NEED_FRTX)) || (reason != SCTP_RTXR_FAST_RTX && !chunk->tsn_gap_acked)) { - /* If this chunk was sent less then 1 rto ago, do not - * retransmit this chunk, but give the peer time - * to acknowlege it. Do this only when - * retransmitting due to T3 timeout. - */ - if (reason == SCTP_RTXR_T3_RTX && - time_before(jiffies, chunk->sent_at + - transport->last_rto)) - continue; - /* RFC 2960 6.2.1 Processing a Received SACK * * C) Any time a DATA chunk is marked for diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8674d4919556..efa516b47e81 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -480,7 +480,6 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * that indicates that we have an outstanding HB. */ if (!is_hb || transport->hb_sent) { - transport->last_rto = transport->rto; transport->rto = min((transport->rto * 2), transport->asoc->rto_max); } } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3b141bb32faf..37a1184d789f 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -74,7 +74,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * given destination transport address, set RTO to the protocol * parameter 'RTO.Initial'. */ - peer->last_rto = peer->rto = msecs_to_jiffies(sctp_rto_initial); + peer->rto = msecs_to_jiffies(sctp_rto_initial); peer->rtt = 0; peer->rttvar = 0; peer->srtt = 0; @@ -386,7 +386,6 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) tp->rto = tp->asoc->rto_max; tp->rtt = rtt; - tp->last_rto = tp->rto; /* Reset rto_pending so that a new RTT measurement is started when a * new data chunk is sent. @@ -602,7 +601,7 @@ void sctp_transport_reset(struct sctp_transport *t) */ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); t->ssthresh = asoc->peer.i.a_rwnd; - t->last_rto = t->rto = asoc->rto_initial; + t->rto = asoc->rto_initial; t->rtt = 0; t->srtt = 0; t->rttvar = 0; -- cgit v1.3-8-gc7d7 From 2f5517aefcfbdd7fdf0f03b13d292a10d445887f Mon Sep 17 00:00:00 2001 From: andrew hendry Date: Tue, 24 Nov 2009 15:15:26 +0000 Subject: X25: Move SYSCTL ifdefs into header Moves the CONFIG_SYSCTL ifdefs in x25_init into header. Signed-off-by: Andrew Hendry Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/x25.h | 6 ++++++ net/x25/af_x25.c | 4 ---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/x25.h b/include/net/x25.h index 2cda04011568..9baa07dc7d17 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -287,8 +287,14 @@ extern unsigned long x25_display_timer(struct sock *); extern void x25_check_rbuf(struct sock *); /* sysctl_net_x25.c */ +#ifdef CONFIG_SYSCTL extern void x25_register_sysctl(void); extern void x25_unregister_sysctl(void); +#else +static inline void x25_register_sysctl(void) {}; +static inline void x25_unregister_sysctl(void) {}; +#endif /* CONFIG_SYSCTL */ + struct x25_skb_cb { unsigned flags; }; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 2a3a513af3cb..f327ef5cb0e9 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1729,9 +1729,7 @@ static int __init x25_init(void) printk(KERN_INFO "X.25 for Linux Version 0.2\n"); -#ifdef CONFIG_SYSCTL x25_register_sysctl(); -#endif x25_proc_init(); out: return rc; @@ -1744,9 +1742,7 @@ static void __exit x25_exit(void) x25_link_free(); x25_route_free(); -#ifdef CONFIG_SYSCTL x25_unregister_sysctl(); -#endif unregister_netdevice_notifier(&x25_dev_notifier); -- cgit v1.3-8-gc7d7 From 5789d290cda6854b03986031df02b965572279df Mon Sep 17 00:00:00 2001 From: PJ Waskiewicz Date: Wed, 25 Nov 2009 00:11:30 +0000 Subject: ethtool: Add Direct Attach support to connector port reporting This patch allows a base driver to specify Direct Attach as the type of port through the ethtool interface. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index edd03b79e8f8..bcaa0e0a54d3 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -631,6 +631,8 @@ struct ethtool_ops { #define PORT_MII 0x02 #define PORT_FIBRE 0x03 #define PORT_BNC 0x04 +#define PORT_DA 0x05 +#define PORT_NONE 0xef #define PORT_OTHER 0xff /* Which transceiver to use. */ -- cgit v1.3-8-gc7d7 From 8e7cac79808b62f242069a6ac88d364d35621371 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 29 Nov 2009 16:34:48 +0200 Subject: core: Fix user return notifier on fork() fork() clones all thread_info flags, including TIF_USER_RETURN_NOTIFY; if the new task is first scheduled on a cpu which doesn't have user return notifiers set, this causes user return notifiers to trigger without any way of clearing itself. This is easy to trigger with a forky workload on the host in parallel with kvm, resulting in a cpu in an endless loop on the verge of returning to userspace. Fix by dropping the TIF_USER_RETURN_NOTIFY immediately after fork. Signed-off-by: Avi Kivity LKML-Reference: <1259505288-16559-1-git-send-email-avi@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/user-return-notifier.h | 7 +++++++ kernel/fork.c | 2 ++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/user-return-notifier.h b/include/linux/user-return-notifier.h index b6ac056291d7..9c4a445bb43c 100644 --- a/include/linux/user-return-notifier.h +++ b/include/linux/user-return-notifier.h @@ -26,6 +26,11 @@ static inline void propagate_user_return_notify(struct task_struct *prev, void fire_user_return_notifiers(void); +static inline void clear_user_return_notifier(struct task_struct *p) +{ + clear_tsk_thread_flag(p, TIF_USER_RETURN_NOTIFY); +} + #else struct user_return_notifier {}; @@ -37,6 +42,8 @@ static inline void propagate_user_return_notify(struct task_struct *prev, static inline void fire_user_return_notifiers(void) {} +static inline void clear_user_return_notifier(struct task_struct *p) {} + #endif #endif diff --git a/kernel/fork.c b/kernel/fork.c index 266c6af6ef1b..1b7512d5a64a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -249,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) goto out; setup_thread_stack(tsk, orig); + clear_user_return_notifier(tsk); stackend = end_of_stack(tsk); *stackend = STACK_END_MAGIC; /* for overflow detection */ -- cgit v1.3-8-gc7d7 From 827d42c9ac91ddd728e4f4a31fefb906ef2ceff7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 22 Nov 2009 12:28:41 +0100 Subject: mac80211: fix spurious delBA handling Lennert Buytenhek noticed that delBA handling in mac80211 was broken and has remotely triggerable problems, some of which are due to some code shuffling I did that ended up changing the order in which things were done -- this was commit d75636ef9c1af224f1097941879d5a8db7cd04e5 Author: Johannes Berg Date: Tue Feb 10 21:25:53 2009 +0100 mac80211: RX aggregation: clean up stop session and other parts were already present in the original commit d92684e66091c0f0101819619b315b4bb8b5bcc5 Author: Ron Rindjunsky Date: Mon Jan 28 14:07:22 2008 +0200 mac80211: A-MPDU Tx add delBA from recipient support The first problem is that I moved a BUG_ON before various checks -- thereby making it possible to hit. As the comment indicates, the BUG_ON can be removed since the ampdu_action callback must already exist when the state is != IDLE. The second problem isn't easily exploitable but there's a race condition due to unconditionally setting the state to OPERATIONAL when a delBA frame is received, even when no aggregation session was ever initiated. All the drivers accept stopping the session even then, but that opens a race window where crashes could happen before the driver accepts it. Right now, a WARN_ON may happen with non-HT drivers, while the race opens only for HT drivers. For this case, there are two things necessary to fix it: 1) don't process spurious delBA frames, and be more careful about the session state; don't drop the lock 2) HT drivers need to be prepared to handle a session stop even before the session was really started -- this is true for all drivers (that support aggregation) but iwlwifi which can be fixed easily. The other HT drivers (ath9k and ar9170) are behaving properly already. Reported-by: Lennert Buytenhek Cc: stable@kernel.org Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-tx.c | 10 +++++++++- include/net/mac80211.h | 6 ++++++ net/mac80211/agg-tx.c | 15 +++++++-------- net/mac80211/ht.c | 8 +++----- net/mac80211/ieee80211_i.h | 2 ++ 5 files changed, 27 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index fb9bcfa6d947..b7e196e3c8d3 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -1277,8 +1277,16 @@ int iwl_tx_agg_stop(struct iwl_priv *priv , const u8 *ra, u16 tid) return -ENXIO; } + if (priv->stations[sta_id].tid[tid].agg.state == + IWL_EMPTYING_HW_QUEUE_ADDBA) { + IWL_DEBUG_HT(priv, "AGG stop before setup done\n"); + ieee80211_stop_tx_ba_cb_irqsafe(priv->hw, ra, tid); + priv->stations[sta_id].tid[tid].agg.state = IWL_AGG_OFF; + return 0; + } + if (priv->stations[sta_id].tid[tid].agg.state != IWL_AGG_ON) - IWL_WARN(priv, "Stopping AGG while state not IWL_AGG_ON\n"); + IWL_WARN(priv, "Stopping AGG while state not ON or starting\n"); tid_data = &priv->stations[sta_id].tid[tid]; ssn = (tid_data->seq_number & IEEE80211_SCTL_SEQ) >> 4; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c75b960c8ac8..998c30fc8981 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1283,6 +1283,12 @@ enum ieee80211_filter_flags { * * These flags are used with the ampdu_action() callback in * &struct ieee80211_ops to indicate which action is needed. + * + * Note that drivers MUST be able to deal with a TX aggregation + * session being stopped even before they OK'ed starting it by + * calling ieee80211_start_tx_ba_cb(_irqsafe), because the peer + * might receive the addBA frame and send a delBA right away! + * * @IEEE80211_AMPDU_RX_START: start Rx aggregation * @IEEE80211_AMPDU_RX_STOP: stop Rx aggregation * @IEEE80211_AMPDU_TX_START: start Tx aggregation diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 63224d1ee8dd..89e238b001de 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -123,13 +123,18 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1 ieee80211_tx_skb(sdata, skb, 0); } -static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator) +int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, + enum ieee80211_back_parties initiator) { struct ieee80211_local *local = sta->local; int ret; u8 *state; +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", + sta->sta.addr, tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + state = &sta->ampdu_mlme.tid_state_tx[tid]; if (*state == HT_AGG_STATE_OPERATIONAL) @@ -143,7 +148,6 @@ static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, /* HW shall not deny going back to legacy */ if (WARN_ON(ret)) { - *state = HT_AGG_STATE_OPERATIONAL; /* * We may have pending packets get stuck in this case... * Not bothering with a workaround for now. @@ -525,11 +529,6 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, goto unlock; } -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", - sta->sta.addr, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator); unlock: diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 48ef1a282b91..cdc58e61d921 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -141,7 +141,6 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len) { - struct ieee80211_local *local = sdata->local; u16 tid, params; u16 initiator; @@ -161,10 +160,9 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, WLAN_BACK_INITIATOR, 0); else { /* WLAN_BACK_RECIPIENT */ spin_lock_bh(&sta->lock); - sta->ampdu_mlme.tid_state_tx[tid] = - HT_AGG_STATE_OPERATIONAL; + if (sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK) + ___ieee80211_stop_tx_ba_session(sta, tid, + WLAN_BACK_RECIPIENT); spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(&local->hw, sta->sta.addr, tid, - WLAN_BACK_RECIPIENT); } } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a910bf1f092f..10d316e455de 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1091,6 +1091,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_back_parties initiator); +int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, + enum ieee80211_back_parties initiator); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, -- cgit v1.3-8-gc7d7 From c41562b1626b578e9ce2aae5b3363ee2f142c635 Mon Sep 17 00:00:00 2001 From: Jun Nie Date: Tue, 10 Nov 2009 15:11:36 +0800 Subject: pxa168fb: remove useless vsync/hsync invert flag fb_var_screeninfo.var has already encoded this information. Signed-off-by: Jun Nie Signed-off-by: Eric Miao --- include/video/pxa168fb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/video/pxa168fb.h b/include/video/pxa168fb.h index b5cc72fe0461..8c2f385a90ea 100644 --- a/include/video/pxa168fb.h +++ b/include/video/pxa168fb.h @@ -117,8 +117,6 @@ struct pxa168fb_mach_info { unsigned invert_composite_blank:1; unsigned invert_pix_val_ena:1; unsigned invert_pixclock:1; - unsigned invert_vsync:1; - unsigned invert_hsync:1; unsigned panel_rbswap:1; unsigned active:1; unsigned enable_lcd:1; -- cgit v1.3-8-gc7d7 From b3a8549593696f5f3efcdbf280e2c8e0fe894855 Mon Sep 17 00:00:00 2001 From: Haojian Zhuang Date: Thu, 5 Nov 2009 10:27:13 -0500 Subject: backlight: da903x_bl: control WLED output current in da9034 Update WLED output current source before changing brightness. Signed-off-by: Haojian Zhuang Signed-off-by: Eric Miao --- drivers/video/backlight/da903x_bl.c | 7 +++++++ include/linux/mfd/da903x.h | 4 ++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c index 701a1081e199..7fcb0eb54c60 100644 --- a/drivers/video/backlight/da903x_bl.c +++ b/drivers/video/backlight/da903x_bl.c @@ -25,6 +25,7 @@ #define DA9034_WLED_CONTROL1 0x3C #define DA9034_WLED_CONTROL2 0x3D +#define DA9034_WLED_ISET(x) ((x) & 0x1f) #define DA9034_WLED_BOOST_EN (1 << 5) @@ -101,6 +102,7 @@ static struct backlight_ops da903x_backlight_ops = { static int da903x_backlight_probe(struct platform_device *pdev) { + struct da9034_backlight_pdata *pdata = pdev->dev.platform_data; struct da903x_backlight_data *data; struct backlight_device *bl; int max_brightness; @@ -127,6 +129,11 @@ static int da903x_backlight_probe(struct platform_device *pdev) data->da903x_dev = pdev->dev.parent; data->current_brightness = 0; + /* adjust the WLED output current */ + if (pdata) + da903x_write(data->da903x_dev, DA9034_WLED_CONTROL2, + DA9034_WLED_ISET(pdata->output_current)); + bl = backlight_device_register(pdev->name, data->da903x_dev, data, &da903x_backlight_ops); if (IS_ERR(bl)) { diff --git a/include/linux/mfd/da903x.h b/include/linux/mfd/da903x.h index c63b65c94429..0aa3a1a49ee3 100644 --- a/include/linux/mfd/da903x.h +++ b/include/linux/mfd/da903x.h @@ -96,6 +96,10 @@ struct da9034_touch_pdata { int y_inverted; }; +struct da9034_backlight_pdata { + int output_current; /* output current of WLED, from 0-31 (in mA) */ +}; + /* DA9030 battery charger data */ struct power_supply_info; -- cgit v1.3-8-gc7d7 From 9996508b3353063f2d6c48c1a28a84543d72d70b Mon Sep 17 00:00:00 2001 From: Ian Molton Date: Tue, 1 Dec 2009 14:47:32 +0800 Subject: hwrng: core - Replace u32 in driver API with byte array This patch implements a new method by which hw_random hardware drivers can pass data to the core more efficiently, using a shared buffer. The old methods have been retained as a compatability layer until all the drivers have been updated. Signed-off-by: Ian Molton Acked-by: Matt Mackall Acked-by: Rusty Russell Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 107 +++++++++++++++++++++++++----------------- include/linux/hw_random.h | 7 ++- 2 files changed, 69 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index fc93e2fc7c71..82367262f3a8 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -52,7 +52,8 @@ static struct hwrng *current_rng; static LIST_HEAD(rng_list); static DEFINE_MUTEX(rng_mutex); - +static int data_avail; +static u8 rng_buffer[SMP_CACHE_BYTES] __cacheline_aligned; static inline int hwrng_init(struct hwrng *rng) { @@ -67,19 +68,6 @@ static inline void hwrng_cleanup(struct hwrng *rng) rng->cleanup(rng); } -static inline int hwrng_data_present(struct hwrng *rng, int wait) -{ - if (!rng->data_present) - return 1; - return rng->data_present(rng, wait); -} - -static inline int hwrng_data_read(struct hwrng *rng, u32 *data) -{ - return rng->data_read(rng, data); -} - - static int rng_dev_open(struct inode *inode, struct file *filp) { /* enforce read-only access to this chrdev */ @@ -91,54 +79,87 @@ static int rng_dev_open(struct inode *inode, struct file *filp) return 0; } +static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, + int wait) { + int present; + + if (rng->read) + return rng->read(rng, (void *)buffer, size, wait); + + if (rng->data_present) + present = rng->data_present(rng, wait); + else + present = 1; + + if (present) + return rng->data_read(rng, (u32 *)buffer); + + return 0; +} + static ssize_t rng_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { - u32 data; ssize_t ret = 0; int err = 0; - int bytes_read; + int bytes_read, len; while (size) { - err = -ERESTARTSYS; - if (mutex_lock_interruptible(&rng_mutex)) + if (mutex_lock_interruptible(&rng_mutex)) { + err = -ERESTARTSYS; goto out; + } + if (!current_rng) { - mutex_unlock(&rng_mutex); err = -ENODEV; - goto out; + goto out_unlock; } - bytes_read = 0; - if (hwrng_data_present(current_rng, - !(filp->f_flags & O_NONBLOCK))) - bytes_read = hwrng_data_read(current_rng, &data); - mutex_unlock(&rng_mutex); - - err = -EAGAIN; - if (!bytes_read && (filp->f_flags & O_NONBLOCK)) - goto out; - if (bytes_read < 0) { - err = bytes_read; - goto out; + if (!data_avail) { + bytes_read = rng_get_data(current_rng, rng_buffer, + sizeof(rng_buffer), + !(filp->f_flags & O_NONBLOCK)); + if (bytes_read < 0) { + err = bytes_read; + goto out_unlock; + } + data_avail = bytes_read; } - err = -EFAULT; - while (bytes_read && size) { - if (put_user((u8)data, buf++)) - goto out; - size--; - ret++; - bytes_read--; - data >>= 8; + if (!data_avail) { + if (filp->f_flags & O_NONBLOCK) { + err = -EAGAIN; + goto out_unlock; + } + } else { + len = data_avail; + if (len > size) + len = size; + + data_avail -= len; + + if (copy_to_user(buf + ret, rng_buffer + data_avail, + len)) { + err = -EFAULT; + goto out_unlock; + } + + size -= len; + ret += len; } + mutex_unlock(&rng_mutex); + if (need_resched()) schedule_timeout_interruptible(1); - err = -ERESTARTSYS; - if (signal_pending(current)) + + if (signal_pending(current)) { + err = -ERESTARTSYS; goto out; + } } +out_unlock: + mutex_unlock(&rng_mutex); out: return ret ? : err; } @@ -280,7 +301,7 @@ int hwrng_register(struct hwrng *rng) struct hwrng *old_rng, *tmp; if (rng->name == NULL || - rng->data_read == NULL) + (rng->data_read == NULL && rng->read == NULL)) goto out; mutex_lock(&rng_mutex); diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 7244456e7e65..9bede7633f74 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -22,10 +22,12 @@ * @cleanup: Cleanup callback (can be NULL). * @data_present: Callback to determine if data is available * on the RNG. If NULL, it is assumed that - * there is always data available. + * there is always data available. *OBSOLETE* * @data_read: Read data from the RNG device. * Returns the number of lower random bytes in "data". - * Must not be NULL. + * Must not be NULL. *OSOLETE* + * @read: New API. drivers can fill up to max bytes of data + * into the buffer. The buffer is aligned for any type. * @priv: Private data, for use by the RNG driver. */ struct hwrng { @@ -34,6 +36,7 @@ struct hwrng { void (*cleanup)(struct hwrng *rng); int (*data_present)(struct hwrng *rng, int wait); int (*data_read)(struct hwrng *rng, u32 *data); + int (*read)(struct hwrng *rng, void *data, size_t max, bool wait); unsigned long priv; /* internal. */ -- cgit v1.3-8-gc7d7 From 7716977b6ae5a0cdd0afab5c6035c4d0ce53f599 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 30 Nov 2009 13:24:18 +0000 Subject: mfd: Correct WM831X_MAX_ISEL_VALUE There was confusion between the array size and the highest ISEL value possible. Reported-by: Dan Carpenter Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/wm831x-core.c | 2 +- include/linux/mfd/wm831x/regulator.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c index 49b7885c2702..7f27576ca046 100644 --- a/drivers/mfd/wm831x-core.c +++ b/drivers/mfd/wm831x-core.c @@ -29,7 +29,7 @@ /* Current settings - values are 2*2^(reg_val/4) microamps. These are * exported since they are used by multiple drivers. */ -int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL] = { +int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1] = { 2, 2, 3, diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h index f95466343fb2..955d30fc6a27 100644 --- a/include/linux/mfd/wm831x/regulator.h +++ b/include/linux/mfd/wm831x/regulator.h @@ -1212,7 +1212,7 @@ #define WM831X_LDO1_OK_SHIFT 0 /* LDO1_OK */ #define WM831X_LDO1_OK_WIDTH 1 /* LDO1_OK */ -#define WM831X_ISINK_MAX_ISEL 56 -extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL]; +#define WM831X_ISINK_MAX_ISEL 55 +extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1]; #endif -- cgit v1.3-8-gc7d7 From f13a48bd798a159291ca583b95453171b88b7448 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Dec 2009 15:36:11 +0000 Subject: SLOW_WORK: Move slow_work's proc file to debugfs Move slow_work's debugging proc file to debugfs. Signed-off-by: David Howells Requested-and-acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- Documentation/slow-work.txt | 4 +- include/linux/slow-work.h | 8 +- init/Kconfig | 8 +- kernel/Makefile | 2 +- kernel/slow-work-debugfs.c | 227 ++++++++++++++++++++++++++++++++++++++++++++ kernel/slow-work-proc.c | 227 -------------------------------------------- kernel/slow-work.c | 18 ++-- kernel/slow-work.h | 6 +- 8 files changed, 253 insertions(+), 247 deletions(-) create mode 100644 kernel/slow-work-debugfs.c delete mode 100644 kernel/slow-work-proc.c (limited to 'include') diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index 52bc31433723..9dbf4470c7e1 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -279,9 +279,9 @@ The slow-work thread pool has a number of configurables: VIEWING EXECUTING AND QUEUED ITEMS ================================== -If CONFIG_SLOW_WORK_PROC is enabled, a proc file is made available: +If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available: - /proc/slow_work_rq + /sys/kernel/debug/slow_work/runqueue through which the list of work items being executed and the queues of items to be executed may be viewed. The owner of a work item is given the chance to diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index 5035a2691739..13337bf6c3f5 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -20,7 +20,7 @@ #include struct slow_work; -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct seq_file; #endif @@ -42,8 +42,8 @@ struct slow_work_ops { /* execute a work item */ void (*execute)(struct slow_work *work); -#ifdef CONFIG_SLOW_WORK_PROC - /* describe a work item for /proc */ +#ifdef CONFIG_SLOW_WORK_DEBUG + /* describe a work item for debugfs */ void (*desc)(struct slow_work *work, struct seq_file *m); #endif }; @@ -64,7 +64,7 @@ struct slow_work { #define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct timespec mark; /* jiffies at which queued or exec begun */ #endif }; diff --git a/init/Kconfig b/init/Kconfig index ab5c64801fe5..39923ccc287b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1098,12 +1098,12 @@ config SLOW_WORK See Documentation/slow-work.txt. -config SLOW_WORK_PROC - bool "Slow work debugging through /proc" +config SLOW_WORK_DEBUG + bool "Slow work debugging through debugfs" default n - depends on SLOW_WORK && PROC_FS + depends on SLOW_WORK && DEBUG_FS help - Display the contents of the slow work run queue through /proc, + Display the contents of the slow work run queue through debugfs, including items currently executing. See Documentation/slow-work.txt. diff --git a/kernel/Makefile b/kernel/Makefile index 776ffed1556d..d7c13d249b2d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o -obj-$(CONFIG_SLOW_WORK_PROC) += slow-work-proc.o +obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) diff --git a/kernel/slow-work-debugfs.c b/kernel/slow-work-debugfs.c new file mode 100644 index 000000000000..e45c43645298 --- /dev/null +++ b/kernel/slow-work-debugfs.c @@ -0,0 +1,227 @@ +/* Slow work debugging + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "slow-work.h" + +#define ITERATOR_SHIFT (BITS_PER_LONG - 4) +#define ITERATOR_SELECTOR (0xfUL << ITERATOR_SHIFT) +#define ITERATOR_COUNTER (~ITERATOR_SELECTOR) + +void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m) +{ + seq_puts(m, "Slow-work: New thread"); +} + +/* + * Render the time mark field on a work item into a 5-char time with units plus + * a space + */ +static void slow_work_print_mark(struct seq_file *m, struct slow_work *work) +{ + struct timespec now, diff; + + now = CURRENT_TIME; + diff = timespec_sub(now, work->mark); + + if (diff.tv_sec < 0) + seq_puts(m, " -ve "); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000) + seq_printf(m, "%3luns ", diff.tv_nsec); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000) + seq_printf(m, "%3luus ", diff.tv_nsec / 1000); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000) + seq_printf(m, "%3lums ", diff.tv_nsec / 1000000); + else if (diff.tv_sec <= 1) + seq_puts(m, " 1s "); + else if (diff.tv_sec < 60) + seq_printf(m, "%4lus ", diff.tv_sec); + else if (diff.tv_sec < 60 * 60) + seq_printf(m, "%4lum ", diff.tv_sec / 60); + else if (diff.tv_sec < 60 * 60 * 24) + seq_printf(m, "%4luh ", diff.tv_sec / 3600); + else + seq_puts(m, "exces "); +} + +/* + * Describe a slow work item for debugfs + */ +static int slow_work_runqueue_show(struct seq_file *m, void *v) +{ + struct slow_work *work; + struct list_head *p = v; + unsigned long id; + + switch ((unsigned long) v) { + case 1: + seq_puts(m, "THR PID ITEM ADDR FL MARK DESC\n"); + return 0; + case 2: + seq_puts(m, "=== ===== ================ == ===== ==========\n"); + return 0; + + case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1: + id = (unsigned long) v - 3; + + read_lock(&slow_work_execs_lock); + work = slow_work_execs[id]; + if (work) { + smp_read_barrier_depends(); + + seq_printf(m, "%3lu %5d %16p %2lx ", + id, slow_work_pids[id], work, work->flags); + slow_work_print_mark(m, work); + + if (work->ops->desc) + work->ops->desc(work, m); + seq_putc(m, '\n'); + } + read_unlock(&slow_work_execs_lock); + return 0; + + default: + work = list_entry(p, struct slow_work, link); + seq_printf(m, "%3s - %16p %2lx ", + work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq", + work, work->flags); + slow_work_print_mark(m, work); + + if (work->ops->desc) + work->ops->desc(work, m); + seq_putc(m, '\n'); + return 0; + } +} + +/* + * map the iterator to a work item + */ +static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos) +{ + struct list_head *p; + unsigned long count, id; + + switch (*_pos >> ITERATOR_SHIFT) { + case 0x0: + if (*_pos == 0) + *_pos = 1; + if (*_pos < 3) + return (void *)(unsigned long) *_pos; + if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT) + for (id = *_pos - 3; + id < SLOW_WORK_THREAD_LIMIT; + id++, (*_pos)++) + if (slow_work_execs[id]) + return (void *)(unsigned long) *_pos; + *_pos = 0x1UL << ITERATOR_SHIFT; + + case 0x1: + count = *_pos & ITERATOR_COUNTER; + list_for_each(p, &slow_work_queue) { + if (count == 0) + return p; + count--; + } + *_pos = 0x2UL << ITERATOR_SHIFT; + + case 0x2: + count = *_pos & ITERATOR_COUNTER; + list_for_each(p, &vslow_work_queue) { + if (count == 0) + return p; + count--; + } + *_pos = 0x3UL << ITERATOR_SHIFT; + + default: + return NULL; + } +} + +/* + * set up the iterator to start reading from the first line + */ +static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos) +{ + spin_lock_irq(&slow_work_queue_lock); + return slow_work_runqueue_index(m, _pos); +} + +/* + * move to the next line + */ +static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos) +{ + struct list_head *p = v; + unsigned long selector = *_pos >> ITERATOR_SHIFT; + + (*_pos)++; + switch (selector) { + case 0x0: + return slow_work_runqueue_index(m, _pos); + + case 0x1: + if (*_pos >> ITERATOR_SHIFT == 0x1) { + p = p->next; + if (p != &slow_work_queue) + return p; + } + *_pos = 0x2UL << ITERATOR_SHIFT; + p = &vslow_work_queue; + + case 0x2: + if (*_pos >> ITERATOR_SHIFT == 0x2) { + p = p->next; + if (p != &vslow_work_queue) + return p; + } + *_pos = 0x3UL << ITERATOR_SHIFT; + + default: + return NULL; + } +} + +/* + * clean up after reading + */ +static void slow_work_runqueue_stop(struct seq_file *m, void *v) +{ + spin_unlock_irq(&slow_work_queue_lock); +} + +static const struct seq_operations slow_work_runqueue_ops = { + .start = slow_work_runqueue_start, + .stop = slow_work_runqueue_stop, + .next = slow_work_runqueue_next, + .show = slow_work_runqueue_show, +}; + +/* + * open "/sys/kernel/debug/slow_work/runqueue" to list queue contents + */ +static int slow_work_runqueue_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &slow_work_runqueue_ops); +} + +const struct file_operations slow_work_runqueue_fops = { + .owner = THIS_MODULE, + .open = slow_work_runqueue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; diff --git a/kernel/slow-work-proc.c b/kernel/slow-work-proc.c deleted file mode 100644 index 3988032571f5..000000000000 --- a/kernel/slow-work-proc.c +++ /dev/null @@ -1,227 +0,0 @@ -/* Slow work debugging - * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include "slow-work.h" - -#define ITERATOR_SHIFT (BITS_PER_LONG - 4) -#define ITERATOR_SELECTOR (0xfUL << ITERATOR_SHIFT) -#define ITERATOR_COUNTER (~ITERATOR_SELECTOR) - -void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m) -{ - seq_puts(m, "Slow-work: New thread"); -} - -/* - * Render the time mark field on a work item into a 5-char time with units plus - * a space - */ -static void slow_work_print_mark(struct seq_file *m, struct slow_work *work) -{ - struct timespec now, diff; - - now = CURRENT_TIME; - diff = timespec_sub(now, work->mark); - - if (diff.tv_sec < 0) - seq_puts(m, " -ve "); - else if (diff.tv_sec == 0 && diff.tv_nsec < 1000) - seq_printf(m, "%3luns ", diff.tv_nsec); - else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000) - seq_printf(m, "%3luus ", diff.tv_nsec / 1000); - else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000) - seq_printf(m, "%3lums ", diff.tv_nsec / 1000000); - else if (diff.tv_sec <= 1) - seq_puts(m, " 1s "); - else if (diff.tv_sec < 60) - seq_printf(m, "%4lus ", diff.tv_sec); - else if (diff.tv_sec < 60 * 60) - seq_printf(m, "%4lum ", diff.tv_sec / 60); - else if (diff.tv_sec < 60 * 60 * 24) - seq_printf(m, "%4luh ", diff.tv_sec / 3600); - else - seq_puts(m, "exces "); -} - -/* - * Describe a slow work item for /proc - */ -static int slow_work_runqueue_show(struct seq_file *m, void *v) -{ - struct slow_work *work; - struct list_head *p = v; - unsigned long id; - - switch ((unsigned long) v) { - case 1: - seq_puts(m, "THR PID ITEM ADDR FL MARK DESC\n"); - return 0; - case 2: - seq_puts(m, "=== ===== ================ == ===== ==========\n"); - return 0; - - case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1: - id = (unsigned long) v - 3; - - read_lock(&slow_work_execs_lock); - work = slow_work_execs[id]; - if (work) { - smp_read_barrier_depends(); - - seq_printf(m, "%3lu %5d %16p %2lx ", - id, slow_work_pids[id], work, work->flags); - slow_work_print_mark(m, work); - - if (work->ops->desc) - work->ops->desc(work, m); - seq_putc(m, '\n'); - } - read_unlock(&slow_work_execs_lock); - return 0; - - default: - work = list_entry(p, struct slow_work, link); - seq_printf(m, "%3s - %16p %2lx ", - work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq", - work, work->flags); - slow_work_print_mark(m, work); - - if (work->ops->desc) - work->ops->desc(work, m); - seq_putc(m, '\n'); - return 0; - } -} - -/* - * map the iterator to a work item - */ -static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos) -{ - struct list_head *p; - unsigned long count, id; - - switch (*_pos >> ITERATOR_SHIFT) { - case 0x0: - if (*_pos == 0) - *_pos = 1; - if (*_pos < 3) - return (void *)(unsigned long) *_pos; - if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT) - for (id = *_pos - 3; - id < SLOW_WORK_THREAD_LIMIT; - id++, (*_pos)++) - if (slow_work_execs[id]) - return (void *)(unsigned long) *_pos; - *_pos = 0x1UL << ITERATOR_SHIFT; - - case 0x1: - count = *_pos & ITERATOR_COUNTER; - list_for_each(p, &slow_work_queue) { - if (count == 0) - return p; - count--; - } - *_pos = 0x2UL << ITERATOR_SHIFT; - - case 0x2: - count = *_pos & ITERATOR_COUNTER; - list_for_each(p, &vslow_work_queue) { - if (count == 0) - return p; - count--; - } - *_pos = 0x3UL << ITERATOR_SHIFT; - - default: - return NULL; - } -} - -/* - * set up the iterator to start reading from the first line - */ -static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos) -{ - spin_lock_irq(&slow_work_queue_lock); - return slow_work_runqueue_index(m, _pos); -} - -/* - * move to the next line - */ -static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos) -{ - struct list_head *p = v; - unsigned long selector = *_pos >> ITERATOR_SHIFT; - - (*_pos)++; - switch (selector) { - case 0x0: - return slow_work_runqueue_index(m, _pos); - - case 0x1: - if (*_pos >> ITERATOR_SHIFT == 0x1) { - p = p->next; - if (p != &slow_work_queue) - return p; - } - *_pos = 0x2UL << ITERATOR_SHIFT; - p = &vslow_work_queue; - - case 0x2: - if (*_pos >> ITERATOR_SHIFT == 0x2) { - p = p->next; - if (p != &vslow_work_queue) - return p; - } - *_pos = 0x3UL << ITERATOR_SHIFT; - - default: - return NULL; - } -} - -/* - * clean up after reading - */ -static void slow_work_runqueue_stop(struct seq_file *m, void *v) -{ - spin_unlock_irq(&slow_work_queue_lock); -} - -static const struct seq_operations slow_work_runqueue_ops = { - .start = slow_work_runqueue_start, - .stop = slow_work_runqueue_stop, - .next = slow_work_runqueue_next, - .show = slow_work_runqueue_show, -}; - -/* - * open "/proc/slow_work_rq" to list queue contents - */ -static int slow_work_runqueue_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &slow_work_runqueue_ops); -} - -const struct file_operations slow_work_runqueue_fops = { - .owner = THIS_MODULE, - .open = slow_work_runqueue_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; diff --git a/kernel/slow-work.c b/kernel/slow-work.c index b5c17f15f9de..00889bd3c590 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include "slow-work.h" static void slow_work_cull_timeout(unsigned long); @@ -138,7 +138,7 @@ static void slow_work_clear_thread_processing(int id) {} /* * Data for tracking currently executing items for indication through /proc */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT]; pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT]; DEFINE_RWLOCK(slow_work_execs_lock); @@ -823,7 +823,7 @@ static void slow_work_new_thread_execute(struct slow_work *work) static const struct slow_work_ops slow_work_new_thread_ops = { .owner = THIS_MODULE, .execute = slow_work_new_thread_execute, -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG .desc = slow_work_new_thread_desc, #endif }; @@ -1055,9 +1055,15 @@ static int __init init_slow_work(void) if (slow_work_max_max_threads < nr_cpus * 2) slow_work_max_max_threads = nr_cpus * 2; #endif -#ifdef CONFIG_SLOW_WORK_PROC - proc_create("slow_work_rq", S_IFREG | 0400, NULL, - &slow_work_runqueue_fops); +#ifdef CONFIG_SLOW_WORK_DEBUG + { + struct dentry *dbdir; + + dbdir = debugfs_create_dir("slow_work", NULL); + if (dbdir && !IS_ERR(dbdir)) + debugfs_create_file("runqueue", S_IFREG | 0400, dbdir, + NULL, &slow_work_runqueue_fops); + } #endif return 0; } diff --git a/kernel/slow-work.h b/kernel/slow-work.h index 3c2f007f3ad6..321f3c59d732 100644 --- a/kernel/slow-work.h +++ b/kernel/slow-work.h @@ -19,7 +19,7 @@ /* * slow-work.c */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG extern struct slow_work *slow_work_execs[]; extern pid_t slow_work_pids[]; extern rwlock_t slow_work_execs_lock; @@ -30,9 +30,9 @@ extern struct list_head vslow_work_queue; extern spinlock_t slow_work_queue_lock; /* - * slow-work-proc.c + * slow-work-debugfs.c */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG extern const struct file_operations slow_work_runqueue_fops; extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *); -- cgit v1.3-8-gc7d7 From bf56a4ea9f1683c5b223fd3a5dbea23f1fa91c34 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:20 +0800 Subject: trace_syscalls: Remove unused event_syscall_enter and event_syscall_exit fix event_enter_##sname->event fix event_exit_##sname->event remove unused event_syscall_enter and event_syscall_exit Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D278.4090209@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 4 ++-- include/trace/syscall.h | 2 -- kernel/trace/trace_syscalls.c | 8 -------- 3 files changed, 2 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b50974a93af0..2f7c539ab96d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -178,7 +178,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ .system = "syscalls", \ - .event = &event_syscall_enter, \ + .event = &enter_syscall_print_##sname, \ .raw_init = init_enter_##sname, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ @@ -214,7 +214,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ .system = "syscalls", \ - .event = &event_syscall_exit, \ + .event = &exit_syscall_print_##sname, \ .raw_init = init_exit_##sname, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 51ee17d3632a..5f8827c92db7 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -37,8 +37,6 @@ extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); -extern struct trace_event event_syscall_enter; -extern struct trace_event event_syscall_exit; extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 63aa8070365d..00d6e176f5b6 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -444,14 +444,6 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } -struct trace_event event_syscall_enter = { - .trace = print_syscall_enter, -}; - -struct trace_event event_syscall_exit = { - .trace = print_syscall_exit, -}; - int __init init_ftrace_syscalls(void) { struct syscall_metadata *meta; -- cgit v1.3-8-gc7d7 From 31c16b13349970b2684248c7d8608d2a96ae135d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:30 +0800 Subject: trace_syscalls: Set event_enter_##sname->data to its metadata Set event_enter_##sname->data to its metadata, it makes codes simpler. Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D282.7050709@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 6 ++++-- include/trace/syscall.h | 2 +- kernel/trace/trace_syscalls.c | 36 +++++++++++------------------------- 3 files changed, 16 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2f7c539ab96d..d3c9fd01a110 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -153,6 +153,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) #define SYSCALL_TRACE_ENTER_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_enter_##sname; \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ @@ -184,11 +185,12 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_ENTER_PROFILE_INIT(sname) \ } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_exit_##sname; \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ @@ -220,7 +222,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_EXIT_PROFILE_INIT(sname) \ } diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5f8827c92db7..c5265c81c4e7 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -34,7 +34,7 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); -extern int syscall_name_to_nr(char *name); +extern int syscall_name_to_nr(const char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 00d6e176f5b6..39649b1675dd 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } -int syscall_name_to_nr(char *name) +int syscall_name_to_nr(const char *name) { int i; @@ -172,18 +172,11 @@ extern char *__bad_type_size(void); int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) { int i; - int nr; int ret; - struct syscall_metadata *entry; + struct syscall_metadata *entry = call->data; struct syscall_trace_enter trace; int offset = offsetof(struct syscall_trace_enter, args); - nr = syscall_name_to_nr(call->data); - entry = syscall_nr_to_meta(nr); - - if (!entry) - return 0; - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" "\tsigned:%u;\n", SYSCALL_FIELD(int, nr)); @@ -245,18 +238,11 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) int syscall_enter_define_fields(struct ftrace_event_call *call) { struct syscall_trace_enter trace; - struct syscall_metadata *meta; + struct syscall_metadata *meta = call->data; int ret; - int nr; int i; int offset = offsetof(typeof(trace), args); - nr = syscall_name_to_nr(call->data); - meta = syscall_nr_to_meta(nr); - - if (!meta) - return 0; - ret = trace_define_common_fields(call); if (ret) return ret; @@ -366,9 +352,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; - char *name; + const char *name; - name = (char *)call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return -ENOSYS; @@ -389,9 +375,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; - char *name; + const char *name; - name = (char *)call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return; @@ -407,9 +393,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; - char *name; + const char *name; - name = call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return -ENOSYS; @@ -430,9 +416,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; - char *name; + const char *name; - name = call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return; -- cgit v1.3-8-gc7d7 From fcc19438dda38dacc8c144e2db3ebc6b9fd4f8b8 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:36 +0800 Subject: trace_syscalls: Remove enter_id exit_id use ->enter_event->id instead of ->enter_id use ->exit_event->id instead of ->exit_id Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D288.7030001@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 2 -- include/trace/syscall.h | 6 ------ kernel/trace/trace_syscalls.c | 30 ++++++++++-------------------- 3 files changed, 10 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d3c9fd01a110..b9af87560adb 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -168,7 +168,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ if (!id) \ return -ENODEV; \ event_enter_##sname.id = id; \ - set_syscall_enter_id(num, id); \ INIT_LIST_HEAD(&event_enter_##sname.fields); \ return 0; \ } \ @@ -205,7 +204,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ if (!id) \ return -ENODEV; \ event_exit_##sname.id = id; \ - set_syscall_exit_id(num, id); \ INIT_LIST_HEAD(&event_exit_##sname.fields); \ return 0; \ } \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index c5265c81c4e7..ca09561cd578 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -15,8 +15,6 @@ * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) - * @enter_id: associated ftrace enter event id - * @exit_id: associated ftrace exit event id * @enter_event: associated syscall_enter trace event * @exit_event: associated syscall_exit trace event */ @@ -25,8 +23,6 @@ struct syscall_metadata { int nb_args; const char **types; const char **args; - int enter_id; - int exit_id; struct ftrace_event_call *enter_event; struct ftrace_event_call *exit_event; @@ -35,8 +31,6 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(const char *name); -void set_syscall_enter_id(int num, int id); -void set_syscall_exit_id(int num, int id); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 39649b1675dd..27eb18d69222 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -67,16 +67,6 @@ int syscall_name_to_nr(const char *name) return -1; } -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) -{ - syscalls_metadata[num]->exit_id = id; -} - enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) { @@ -93,7 +83,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags) if (!entry) goto end; - if (entry->enter_id != ent->type) { + if (entry->enter_event->id != ent->type) { WARN_ON_ONCE(1); goto end; } @@ -148,7 +138,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags) return TRACE_TYPE_HANDLED; } - if (entry->exit_id != ent->type) { + if (entry->exit_event->id != ent->type) { WARN_ON_ONCE(1); return TRACE_TYPE_UNHANDLED; } @@ -302,8 +292,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, - size, 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, + sys_data->enter_event->id, size, 0, 0); if (!event) return; @@ -334,8 +324,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, - sizeof(*entry), 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, + sys_data->exit_event->id, sizeof(*entry), 0, 0); if (!event) return; @@ -510,11 +500,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) rec = (struct syscall_trace_enter *) raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->enter_id; + rec->ent.type = sys_data->enter_event->id; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_tp_event(sys_data->enter_id, 0, 1, rec, size); + perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); end: perf_swevent_put_recursion_context(rctx); @@ -615,11 +605,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) rec = (struct syscall_trace_exit *)raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->exit_id; + rec->ent.type = sys_data->exit_event->id; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_tp_event(sys_data->exit_id, 0, 1, rec, size); + perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); end: perf_swevent_put_recursion_context(rctx); -- cgit v1.3-8-gc7d7 From c252f65793874b56d50395ab604db465ce688665 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:47 +0800 Subject: trace_syscalls: Add syscall_nr field to struct syscall_metadata Add syscall_nr field to struct syscall_metadata, it helps us to get syscall number easier. Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D293.6090800@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 4 ++-- include/trace/syscall.h | 3 ++- kernel/trace/trace_syscalls.c | 22 +++++++++------------- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b9af87560adb..3c280d7ecb76 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -161,7 +161,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ static int init_enter_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ + num = __syscall_meta_##sname.syscall_nr; \ if (num < 0) \ return -ENOSYS; \ id = register_ftrace_event(&enter_syscall_print_##sname);\ @@ -197,7 +197,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ static int init_exit_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ + num = __syscall_meta_##sname.syscall_nr; \ if (num < 0) \ return -ENOSYS; \ id = register_ftrace_event(&exit_syscall_print_##sname);\ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index ca09561cd578..1531eef3071f 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -12,6 +12,7 @@ * A syscall entry in the ftrace syscalls array. * * @name: name of the syscall + * @syscall_nr: number of the syscall * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) @@ -20,6 +21,7 @@ */ struct syscall_metadata { const char *name; + int syscall_nr; int nb_args; const char **types; const char **args; @@ -30,7 +32,6 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); -extern int syscall_name_to_nr(const char *name); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 27eb18d69222..144cc14d8551 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } -int syscall_name_to_nr(const char *name) +static int syscall_name_to_nr(const char *name) { int i; @@ -342,10 +342,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -365,10 +363,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -383,10 +379,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -406,10 +400,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -436,6 +428,10 @@ int __init init_ftrace_syscalls(void) for (i = 0; i < NR_syscalls; i++) { addr = arch_syscall_addr(i); meta = find_syscall_meta(addr); + if (!meta) + continue; + + meta->syscall_nr = i; syscalls_metadata[i] = meta; } -- cgit v1.3-8-gc7d7 From a1301da0997bf73c44dbe584e9070a13adc89672 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:55 +0800 Subject: trace_syscalls: Remove duplicate init_enter_##sname() use only one init_syscall_trace instead of many init_enter_##sname()/init_exit_##sname() Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D29B.6090708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 30 ++---------------------------- include/trace/syscall.h | 1 + kernel/trace/trace_syscalls.c | 12 ++++++++++++ 3 files changed, 15 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3c280d7ecb76..cf0d923ea40e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -158,19 +158,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - static int init_enter_##sname(struct ftrace_event_call *call) \ - { \ - int num, id; \ - num = __syscall_meta_##sname.syscall_nr; \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&enter_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_enter_##sname.id = id; \ - INIT_LIST_HEAD(&event_enter_##sname.fields); \ - return 0; \ - } \ TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -179,7 +166,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .name = "sys_enter"#sname, \ .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ - .raw_init = init_enter_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ @@ -194,19 +181,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - static int init_exit_##sname(struct ftrace_event_call *call) \ - { \ - int num, id; \ - num = __syscall_meta_##sname.syscall_nr; \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&exit_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_exit_##sname.id = id; \ - INIT_LIST_HEAD(&event_exit_##sname.fields); \ - return 0; \ - } \ TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -215,7 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .name = "sys_exit"#sname, \ .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ - .raw_init = init_exit_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 1531eef3071f..dff9371e5274 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -32,6 +32,7 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); +extern int init_syscall_trace(struct ftrace_event_call *call); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 144cc14d8551..c6514093c95a 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -412,6 +412,18 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } +int init_syscall_trace(struct ftrace_event_call *call) +{ + int id; + + id = register_ftrace_event(call->event); + if (!id) + return -ENODEV; + call->id = id; + INIT_LIST_HEAD(&call->fields); + return 0; +} + int __init init_ftrace_syscalls(void) { struct syscall_metadata *meta; -- cgit v1.3-8-gc7d7 From 3bbe84e9d385205d638035ee9dcc4db1b486ea08 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:24:01 +0800 Subject: trace_syscalls: Simplify syscall profile use only one prof_sysenter_enable() instead of prof_sysenter_enable_##sname() use only one prof_sysenter_disable() instead of prof_sysenter_disable_##sname() use only one prof_sysexit_enable() instead of prof_sysexit_enable_##sname() use only one prof_sysexit_disable() instead of prof_sysexit_disable_##sname() Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D2A1.8060304@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 31 ++++--------------------------- include/trace/syscall.h | 8 ++++---- kernel/trace/trace_syscalls.c | 24 ++++++++---------------- 3 files changed, 16 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cf0d923ea40e..c2df3a593236 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -99,37 +99,16 @@ struct perf_event_attr; #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) #ifdef CONFIG_EVENT_PROFILE -#define TRACE_SYS_ENTER_PROFILE(sname) \ -static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused) \ -{ \ - return reg_prof_syscall_enter("sys"#sname); \ -} \ - \ -static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused) \ -{ \ - unreg_prof_syscall_enter("sys"#sname); \ -} - -#define TRACE_SYS_EXIT_PROFILE(sname) \ -static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused) \ -{ \ - return reg_prof_syscall_exit("sys"#sname); \ -} \ - \ -static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ -{ \ - unreg_prof_syscall_exit("sys"#sname); \ -} #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysenter_enable_##sname, \ - .profile_disable = prof_sysenter_disable_##sname, + .profile_enable = prof_sysenter_enable, \ + .profile_disable = prof_sysenter_disable, #define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysexit_enable_##sname, \ - .profile_disable = prof_sysexit_disable_##sname, + .profile_enable = prof_sysexit_enable, \ + .profile_disable = prof_sysexit_disable, #else #define TRACE_SYS_ENTER_PROFILE(sname) #define TRACE_SYS_ENTER_PROFILE_INIT(sname) @@ -158,7 +137,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -181,7 +159,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index dff9371e5274..961fda3556bb 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -50,10 +50,10 @@ enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif #ifdef CONFIG_EVENT_PROFILE -int reg_prof_syscall_enter(char *name); -void unreg_prof_syscall_enter(char *name); -int reg_prof_syscall_exit(char *name); -void unreg_prof_syscall_exit(char *name); +int prof_sysenter_enable(struct ftrace_event_call *call); +void prof_sysenter_disable(struct ftrace_event_call *call); +int prof_sysexit_enable(struct ftrace_event_call *call); +void prof_sysexit_disable(struct ftrace_event_call *call); #endif diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index c6514093c95a..1e85b6cc26aa 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -520,14 +520,12 @@ end_recursion: local_irq_restore(flags); } -int reg_prof_syscall_enter(char *name) +int prof_sysenter_enable(struct ftrace_event_call *call) { int ret = 0; int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return -ENOSYS; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); if (!sys_prof_refcount_enter) @@ -543,13 +541,11 @@ int reg_prof_syscall_enter(char *name) return ret; } -void unreg_prof_syscall_enter(char *name) +void prof_sysenter_disable(struct ftrace_event_call *call) { int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); sys_prof_refcount_enter--; @@ -625,14 +621,12 @@ end_recursion: local_irq_restore(flags); } -int reg_prof_syscall_exit(char *name) +int prof_sysexit_enable(struct ftrace_event_call *call) { int ret = 0; int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return -ENOSYS; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); if (!sys_prof_refcount_exit) @@ -648,13 +642,11 @@ int reg_prof_syscall_exit(char *name) return ret; } -void unreg_prof_syscall_exit(char *name) +void prof_sysexit_disable(struct ftrace_event_call *call) { int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); sys_prof_refcount_exit--; -- cgit v1.3-8-gc7d7 From a5ee155136b4a8f4ab0e4c9c064b661da475e298 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 29 Nov 2009 15:45:58 +0000 Subject: net: NETDEV_UNREGISTER_PERNET -> NETDEV_UNREGISTER_BATCH The motivation for an additional notifier in batched netdevice notification (rt_do_flush) only needs to be called once per batch not once per namespace. For further batching improvements I need a guarantee that the netdevices are unregistered in order allowing me to unregister an all of the network devices in a network namespace at the same time with the guarantee that the loopback device is really and truly unregistered last. Additionally it appears that we moved the route cache flush after the final synchronize_net, which seems wrong and there was no explanation. So I have restored the original location of the final synchronize_net. Cc: Octavian Purdila Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/notifier.h | 2 +- include/net/route.h | 1 + net/core/dev.c | 36 +++++++++--------------------------- net/ipv4/fib_frontend.c | 4 +++- net/ipv4/route.c | 6 ++++++ 5 files changed, 20 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index b0c3671d463c..fee6c2f68075 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -202,7 +202,7 @@ static inline int notifier_to_errno(int ret) #define NETDEV_BONDING_OLDTYPE 0x000E #define NETDEV_BONDING_NEWTYPE 0x000F #define NETDEV_POST_INIT 0x0010 -#define NETDEV_UNREGISTER_PERNET 0x0011 +#define NETDEV_UNREGISTER_BATCH 0x0011 #define SYS_DOWN 0x0001 /* Notify of system down */ #define SYS_RESTART SYS_DOWN diff --git a/include/net/route.h b/include/net/route.h index cfb4c071a136..bce6dd68d27b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -108,6 +108,7 @@ extern int ip_rt_init(void); extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, __be32 src, struct net_device *dev); extern void rt_cache_flush(struct net *net, int how); +extern void rt_cache_flush_batch(void); extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp); extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); diff --git a/net/core/dev.c b/net/core/dev.c index 5d131c2f84cc..bb37ee1e0901 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1353,7 +1353,7 @@ rollback: nb->notifier_call(nb, NETDEV_DOWN, dev); } nb->notifier_call(nb, NETDEV_UNREGISTER, dev); - nb->notifier_call(nb, NETDEV_UNREGISTER_PERNET, dev); + nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); } } @@ -4771,8 +4771,7 @@ static void net_set_todo(struct net_device *dev) static void rollback_registered_many(struct list_head *head) { - struct net_device *dev, *aux, *fdev; - LIST_HEAD(pernet_list); + struct net_device *dev; BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -4828,26 +4827,14 @@ static void rollback_registered_many(struct list_head *head) netdev_unregister_kobject(dev); } - synchronize_net(); + /* Process any work delayed until the end of the batch */ + dev = list_entry(head->next, struct net_device, unreg_list); + call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); - list_for_each_entry_safe(dev, aux, head, unreg_list) { - int new_net = 1; - list_for_each_entry(fdev, &pernet_list, unreg_list) { - if (net_eq(dev_net(dev), dev_net(fdev))) { - new_net = 0; - dev_put(dev); - break; - } - } - if (new_net) - list_move(&dev->unreg_list, &pernet_list); - } + synchronize_net(); - list_for_each_entry_safe(dev, aux, &pernet_list, unreg_list) { - call_netdevice_notifiers(NETDEV_UNREGISTER_PERNET, dev); - list_move(&dev->unreg_list, head); + list_for_each_entry(dev, head, unreg_list) dev_put(dev); - } } static void rollback_registered(struct net_device *dev) @@ -5129,7 +5116,7 @@ static void netdev_wait_allrefs(struct net_device *dev) /* Rebroadcast unregister notification */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - /* don't resend NETDEV_UNREGISTER_PERNET, _PERNET users + /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users * should have already handle it the first time */ if (test_bit(__LINK_STATE_LINKWATCH_PENDING, @@ -5442,11 +5429,6 @@ EXPORT_SYMBOL(unregister_netdevice_queue); /** * unregister_netdevice_many - unregister many devices * @head: list of devices - * - * WARNING: Calling this modifies the given list - * (in rollback_registered_many). It may change the order of the elements - * in the list. However, you can assume it does not add or delete elements - * to/from the list. */ void unregister_netdevice_many(struct list_head *head) { @@ -5555,7 +5537,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char this device. They should clean all the things. */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - call_netdevice_notifiers(NETDEV_UNREGISTER_PERNET, dev); + call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); /* * Flush the unicast and multicast chains diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 6c1e56aef1f4..3b373a8b0473 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -959,9 +959,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - case NETDEV_UNREGISTER_PERNET: rt_cache_flush(dev_net(dev), 0); break; + case NETDEV_UNREGISTER_BATCH: + rt_cache_flush_batch(); + break; } return NOTIFY_DONE; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9889fbd96487..90cdcfc32937 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -900,6 +900,12 @@ void rt_cache_flush(struct net *net, int delay) rt_do_flush(!in_softirq()); } +/* Flush previous cache invalidated entries from the cache */ +void rt_cache_flush_batch(void) +{ + rt_do_flush(!in_softirq()); +} + /* * We change rt_genid and let gc do the cleanup */ -- cgit v1.3-8-gc7d7 From dcbccbd4f1f6ad0f0e169d4b2e816e42bde06f82 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 29 Nov 2009 22:25:26 +0000 Subject: net: Implement for_each_netdev_reverse. I will need this shortly to implement network namespace shutdown batching. For sanity sake network devices should be removed in the reverse order they were created in. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9428793775a0..daf13d367498 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1112,6 +1112,8 @@ extern rwlock_t dev_base_lock; /* Device list lock */ #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_reverse(net, d) \ + list_for_each_entry_reverse(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_rcu(net, d) \ list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_safe(net, d, n) \ -- cgit v1.3-8-gc7d7 From 2b035b39970740722598f7a9d548835f9bdd730f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 29 Nov 2009 22:25:27 +0000 Subject: net: Batch network namespace destruction. It is fairly common to kill several network namespaces at once. Either because they are nested one inside the other or because they are cooperating in multiple machine networking experiments. As the network stack control logic does not parallelize easily batch up multiple network namespaces existing together. To get the full benefit of batching the virtual network devices to be removed must be all removed in one batch. For that purpose I have added a loop after the last network device operations have run that batches up all remaining network devices and deletes them. An extra benefit is that the reorganization slightly shrinks the size of the per network namespace data structures replaceing a work_struct with a list_head. In a trivial test with 4K namespaces this change reduced the cost of a destroying 4K namespaces from 7+ minutes (at 12% cpu) to 44 seconds (at 60% cpu). The bulk of that 44s was spent in inet_twsk_purge. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 +- net/core/net_namespace.c | 66 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 59 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 0addd45038ac..d69b4796030f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -42,7 +42,7 @@ struct net { */ #endif struct list_head list; /* list of network namespaces */ - struct work_struct work; /* work struct for freeing */ + struct list_head cleanup_list; /* namespaces on death row */ struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 86ed7f44d083..a42caa2b909b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -8,8 +8,10 @@ #include #include #include +#include #include #include +#include /* * Our network namespace constructor/destructor lists @@ -27,6 +29,20 @@ EXPORT_SYMBOL(init_net); #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ +static void unregister_netdevices(struct net *net, struct list_head *list) +{ + struct net_device *dev; + /* At exit all network devices most be removed from a network + * namespace. Do this in the reverse order of registeration. + */ + for_each_netdev_reverse(net, dev) { + if (dev->rtnl_link_ops) + dev->rtnl_link_ops->dellink(dev, list); + else + unregister_netdevice_queue(dev, list); + } +} + /* * setup_net runs the initializers for the network namespace object. */ @@ -59,6 +75,13 @@ out_undo: list_for_each_entry_continue_reverse(ops, &pernet_list, list) { if (ops->exit) ops->exit(net); + if (&ops->list == first_device) { + LIST_HEAD(dev_kill_list); + rtnl_lock(); + unregister_netdevices(net, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + } } rcu_barrier(); @@ -147,18 +170,26 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) return net_create(); } +static DEFINE_SPINLOCK(cleanup_list_lock); +static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ + static void cleanup_net(struct work_struct *work) { struct pernet_operations *ops; - struct net *net; + struct net *net, *tmp; + LIST_HEAD(net_kill_list); - net = container_of(work, struct net, work); + /* Atomically snapshot the list of namespaces to cleanup */ + spin_lock_irq(&cleanup_list_lock); + list_replace_init(&cleanup_list, &net_kill_list); + spin_unlock_irq(&cleanup_list_lock); mutex_lock(&net_mutex); /* Don't let anyone else find us. */ rtnl_lock(); - list_del_rcu(&net->list); + list_for_each_entry(net, &net_kill_list, cleanup_list) + list_del_rcu(&net->list); rtnl_unlock(); /* @@ -170,8 +201,18 @@ static void cleanup_net(struct work_struct *work) /* Run all of the network namespace exit methods */ list_for_each_entry_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); + if (ops->exit) { + list_for_each_entry(net, &net_kill_list, cleanup_list) + ops->exit(net); + } + if (&ops->list == first_device) { + LIST_HEAD(dev_kill_list); + rtnl_lock(); + list_for_each_entry(net, &net_kill_list, cleanup_list) + unregister_netdevices(net, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + } } mutex_unlock(&net_mutex); @@ -182,14 +223,23 @@ static void cleanup_net(struct work_struct *work) rcu_barrier(); /* Finally it is safe to free my network namespace structure */ - net_free(net); + list_for_each_entry_safe(net, tmp, &net_kill_list, cleanup_list) { + list_del_init(&net->cleanup_list); + net_free(net); + } } +static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) { /* Cleanup the network namespace in process context */ - INIT_WORK(&net->work, cleanup_net); - queue_work(netns_wq, &net->work); + unsigned long flags; + + spin_lock_irqsave(&cleanup_list_lock, flags); + list_add(&net->cleanup_list, &cleanup_list); + spin_unlock_irqrestore(&cleanup_list_lock, flags); + + queue_work(netns_wq, &net_cleanup_work); } EXPORT_SYMBOL_GPL(__put_net); -- cgit v1.3-8-gc7d7 From f875bae065334907796da12523f9df85c89f5712 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 29 Nov 2009 22:25:28 +0000 Subject: net: Automatically allocate per namespace data. To get the full benefit of batched network namespace cleanup netowrk device deletion needs to be performed by the generic code. When using register_pernet_gen_device and freeing the data in exit_net it is impossible to delay allocation until after exit_net has called as the device uninit methods are no longer safe. To correct this, and to simplify working with per network namespace data I have moved allocation and deletion of per network namespace data into the network namespace core. The core now frees the data only after all of the network namespace exit routines have run. Now it is only required to set the new fields .id and .size in the pernet_operations structure if you want network namespace data to be managed for you automatically. This makes the current register_pernet_gen_device and register_pernet_gen_subsys routines unnecessary. For the moment I have left them as compatibility wrappers in net_namespace.h They will be removed once all of the users have been updated. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/net/net_namespace.h | 28 ++++++- net/core/net_namespace.c | 188 ++++++++++++++++++++++++-------------------- 2 files changed, 126 insertions(+), 90 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index d69b4796030f..080774b9dbf3 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -236,6 +236,8 @@ struct pernet_operations { struct list_head list; int (*init)(struct net *net); void (*exit)(struct net *net); + int *id; + size_t size; }; /* @@ -259,12 +261,30 @@ struct pernet_operations { */ extern int register_pernet_subsys(struct pernet_operations *); extern void unregister_pernet_subsys(struct pernet_operations *); -extern int register_pernet_gen_subsys(int *id, struct pernet_operations *); -extern void unregister_pernet_gen_subsys(int id, struct pernet_operations *); extern int register_pernet_device(struct pernet_operations *); extern void unregister_pernet_device(struct pernet_operations *); -extern int register_pernet_gen_device(int *id, struct pernet_operations *); -extern void unregister_pernet_gen_device(int id, struct pernet_operations *); + +static inline int register_pernet_gen_subsys(int *id, struct pernet_operations *ops) +{ + ops->id = id; + return register_pernet_subsys(ops); +} + +static inline void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops) +{ + return unregister_pernet_subsys(ops); +} + +static inline int register_pernet_gen_device(int *id, struct pernet_operations *ops) +{ + ops->id = id; + return register_pernet_device(ops); +} + +static inline void unregister_pernet_gen_device(int id, struct pernet_operations *ops) +{ + return unregister_pernet_device(ops); +} struct ctl_path; struct ctl_table; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a42caa2b909b..9679ad292da9 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -43,13 +43,40 @@ static void unregister_netdevices(struct net *net, struct list_head *list) } } +static int ops_init(const struct pernet_operations *ops, struct net *net) +{ + int err; + if (ops->id && ops->size) { + void *data = kzalloc(ops->size, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = net_assign_generic(net, *ops->id, data); + if (err) { + kfree(data); + return err; + } + } + if (ops->init) + return ops->init(net); + return 0; +} + +static void ops_free(const struct pernet_operations *ops, struct net *net) +{ + if (ops->id && ops->size) { + int id = *ops->id; + kfree(net_generic(net, id)); + } +} + /* * setup_net runs the initializers for the network namespace object. */ static __net_init int setup_net(struct net *net) { /* Must be called with net_mutex held */ - struct pernet_operations *ops; + const struct pernet_operations *ops, *saved_ops; int error = 0; atomic_set(&net->count, 1); @@ -59,11 +86,9 @@ static __net_init int setup_net(struct net *net) #endif list_for_each_entry(ops, &pernet_list, list) { - if (ops->init) { - error = ops->init(net); - if (error < 0) - goto out_undo; - } + error = ops_init(ops, net); + if (error < 0) + goto out_undo; } out: return error; @@ -72,6 +97,7 @@ out_undo: /* Walk through the list backwards calling the exit functions * for the pernet modules whose init functions did not fail. */ + saved_ops = ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) { if (ops->exit) ops->exit(net); @@ -83,6 +109,9 @@ out_undo: rtnl_unlock(); } } + ops = saved_ops; + list_for_each_entry_continue_reverse(ops, &pernet_list, list) + ops_free(ops, net); rcu_barrier(); goto out; @@ -175,7 +204,7 @@ static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ static void cleanup_net(struct work_struct *work) { - struct pernet_operations *ops; + const struct pernet_operations *ops; struct net *net, *tmp; LIST_HEAD(net_kill_list); @@ -214,6 +243,13 @@ static void cleanup_net(struct work_struct *work) rtnl_unlock(); } } + /* Free the net generic variables */ + list_for_each_entry_reverse(ops, &pernet_list, list) { + if (ops->size && ops->id) { + list_for_each_entry(net, &net_kill_list, cleanup_list) + ops_free(ops, net); + } + } mutex_unlock(&net_mutex); @@ -309,16 +345,16 @@ static int __init net_ns_init(void) pure_initcall(net_ns_init); #ifdef CONFIG_NET_NS -static int register_pernet_operations(struct list_head *list, - struct pernet_operations *ops) +static int __register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) { struct net *net, *undo_net; int error; list_add_tail(&ops->list, list); - if (ops->init) { + if (ops->init || (ops->id && ops->size)) { for_each_net(net) { - error = ops->init(net); + error = ops_init(ops, net); if (error) goto out_undo; } @@ -336,10 +372,18 @@ out_undo: } } undone: + if (ops->size && ops->id) { + for_each_net(undo_net) { + if (net_eq(undo_net, net)) + goto freed; + ops_free(ops, undo_net); + } + } +freed: return error; } -static void unregister_pernet_operations(struct pernet_operations *ops) +static void __unregister_pernet_operations(struct pernet_operations *ops) { struct net *net; @@ -347,27 +391,66 @@ static void unregister_pernet_operations(struct pernet_operations *ops) if (ops->exit) for_each_net(net) ops->exit(net); + if (ops->id && ops->size) + for_each_net(net) + ops_free(ops, net); } #else -static int register_pernet_operations(struct list_head *list, - struct pernet_operations *ops) +static int __register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) { - if (ops->init == NULL) - return 0; - return ops->init(&init_net); + int err = 0; + err = ops_init(ops, &init_net); + if (err) + ops_free(ops, &init_net); + return err; + } -static void unregister_pernet_operations(struct pernet_operations *ops) +static void __unregister_pernet_operations(struct pernet_operations *ops) { if (ops->exit) ops->exit(&init_net); + ops_free(ops, &init_net); } -#endif + +#endif /* CONFIG_NET_NS */ static DEFINE_IDA(net_generic_ids); +static int register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + int error; + + if (ops->id) { +again: + error = ida_get_new_above(&net_generic_ids, 1, ops->id); + if (error < 0) { + if (error == -EAGAIN) { + ida_pre_get(&net_generic_ids, GFP_KERNEL); + goto again; + } + return error; + } + } + error = __register_pernet_operations(list, ops); + if (error && ops->id) + ida_remove(&net_generic_ids, *ops->id); + + return error; +} + +static void unregister_pernet_operations(struct pernet_operations *ops) +{ + + __unregister_pernet_operations(ops); + if (ops->id) + ida_remove(&net_generic_ids, *ops->id); +} + /** * register_pernet_subsys - register a network namespace subsystem * @ops: pernet operations structure for the subsystem @@ -414,38 +497,6 @@ void unregister_pernet_subsys(struct pernet_operations *module) } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); -int register_pernet_gen_subsys(int *id, struct pernet_operations *ops) -{ - int rv; - - mutex_lock(&net_mutex); -again: - rv = ida_get_new_above(&net_generic_ids, 1, id); - if (rv < 0) { - if (rv == -EAGAIN) { - ida_pre_get(&net_generic_ids, GFP_KERNEL); - goto again; - } - goto out; - } - rv = register_pernet_operations(first_device, ops); - if (rv < 0) - ida_remove(&net_generic_ids, *id); -out: - mutex_unlock(&net_mutex); - return rv; -} -EXPORT_SYMBOL_GPL(register_pernet_gen_subsys); - -void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops) -{ - mutex_lock(&net_mutex); - unregister_pernet_operations(ops); - ida_remove(&net_generic_ids, id); - mutex_unlock(&net_mutex); -} -EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys); - /** * register_pernet_device - register a network namespace device * @ops: pernet operations structure for the subsystem @@ -477,30 +528,6 @@ int register_pernet_device(struct pernet_operations *ops) } EXPORT_SYMBOL_GPL(register_pernet_device); -int register_pernet_gen_device(int *id, struct pernet_operations *ops) -{ - int error; - mutex_lock(&net_mutex); -again: - error = ida_get_new_above(&net_generic_ids, 1, id); - if (error) { - if (error == -EAGAIN) { - ida_pre_get(&net_generic_ids, GFP_KERNEL); - goto again; - } - goto out; - } - error = register_pernet_operations(&pernet_list, ops); - if (error) - ida_remove(&net_generic_ids, *id); - else if (first_device == &pernet_list) - first_device = &ops->list; -out: - mutex_unlock(&net_mutex); - return error; -} -EXPORT_SYMBOL_GPL(register_pernet_gen_device); - /** * unregister_pernet_device - unregister a network namespace netdevice * @ops: pernet operations structure to manipulate @@ -520,17 +547,6 @@ void unregister_pernet_device(struct pernet_operations *ops) } EXPORT_SYMBOL_GPL(unregister_pernet_device); -void unregister_pernet_gen_device(int id, struct pernet_operations *ops) -{ - mutex_lock(&net_mutex); - if (&ops->list == first_device) - first_device = first_device->next; - unregister_pernet_operations(ops); - ida_remove(&net_generic_ids, id); - mutex_unlock(&net_mutex); -} -EXPORT_SYMBOL_GPL(unregister_pernet_gen_device); - static void net_generic_release(struct rcu_head *rcu) { struct net_generic *ng; -- cgit v1.3-8-gc7d7 From 65c0cfafce9575319fb6f70080fbe226e5617e3b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 29 Nov 2009 15:46:17 +0000 Subject: net: remove [un]register_pernet_gen_... and update the docs. No that all of the callers have been updated to set fields in struct pernet_operations, and simplified to let the network namespace core handle the allocation and freeing of the storage for them, remove the surpurpflous methods and update the docs to the new style. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/net/net_namespace.h | 22 ---------------------- include/net/netns/generic.h | 8 +++++--- 2 files changed, 5 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 080774b9dbf3..24a8c5591f63 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -264,28 +264,6 @@ extern void unregister_pernet_subsys(struct pernet_operations *); extern int register_pernet_device(struct pernet_operations *); extern void unregister_pernet_device(struct pernet_operations *); -static inline int register_pernet_gen_subsys(int *id, struct pernet_operations *ops) -{ - ops->id = id; - return register_pernet_subsys(ops); -} - -static inline void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops) -{ - return unregister_pernet_subsys(ops); -} - -static inline int register_pernet_gen_device(int *id, struct pernet_operations *ops) -{ - ops->id = id; - return register_pernet_device(ops); -} - -static inline void unregister_pernet_gen_device(int id, struct pernet_operations *ops) -{ - return unregister_pernet_device(ops); -} - struct ctl_path; struct ctl_table; struct ctl_table_header; diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index 0c04fd2a700b..ff4982ab84b6 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -12,9 +12,11 @@ * stuff on the struct net without explicit struct net modification * * The rules are simple: - * 1. register the ops with register_pernet_gen_device to get the id - * of your private pointer; - * 2. call net_assign_generic() to put the private data on the struct + * 1. set pernet_operations->id. After register_pernet_device you + * will have the id of your private pointer. + * 2. Either set pernet_operations->size (to have the code allocate and + * free a private structure pointed to from struct net ) or + * call net_assign_generic() to put the private data on the struct * net (most preferably this should be done in the ->init callback * of the ops registered); * 3. do not change this pointer while the net is alive; -- cgit v1.3-8-gc7d7 From 66d2a5952eab875f1286e04f738ef029afdaf013 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 1 Dec 2009 21:54:35 -0800 Subject: Input: keyboard - fix lack of locking when traversing handler->h_list Keyboard handler should not attempt to traverse handler->h_list on its own, without any locking, otherwise it races with registering and unregistering of input handles which leads to crashes. Introduce input_handler_for_each_handle() helper that allows safely iterate over all handles attached to a particular handler and switch keyboard handler to use it. Reported-by: Jim Paradis Signed-off-by: Dmitry Torokhov --- drivers/char/keyboard.c | 202 +++++++++++++++++++++++++----------------------- drivers/input/input.c | 37 ++++++++- include/linux/input.h | 10 ++- 3 files changed, 148 insertions(+), 101 deletions(-) (limited to 'include') diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index ca090e57e161..ff8e9345f3c9 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -46,8 +46,6 @@ extern void ctrl_alt_del(void); -#define to_handle_h(n) container_of(n, struct input_handle, h_node) - /* * Exported functions/variables */ @@ -191,78 +189,85 @@ EXPORT_SYMBOL_GPL(unregister_keyboard_notifier); * etc.). So this means that scancodes for the extra function keys won't * be valid for the first event device, but will be for the second. */ + +struct getset_keycode_data { + unsigned int scancode; + unsigned int keycode; + int error; +}; + +static int getkeycode_helper(struct input_handle *handle, void *data) +{ + struct getset_keycode_data *d = data; + + d->error = input_get_keycode(handle->dev, d->scancode, &d->keycode); + + return d->error == 0; /* stop as soon as we successfully get one */ +} + int getkeycode(unsigned int scancode) { - struct input_handle *handle; - int keycode; - int error = -ENODEV; + struct getset_keycode_data d = { scancode, 0, -ENODEV }; - list_for_each_entry(handle, &kbd_handler.h_list, h_node) { - error = input_get_keycode(handle->dev, scancode, &keycode); - if (!error) - return keycode; - } + input_handler_for_each_handle(&kbd_handler, &d, getkeycode_helper); - return error; + return d.error ?: d.keycode; +} + +static int setkeycode_helper(struct input_handle *handle, void *data) +{ + struct getset_keycode_data *d = data; + + d->error = input_set_keycode(handle->dev, d->scancode, d->keycode); + + return d->error == 0; /* stop as soon as we successfully set one */ } int setkeycode(unsigned int scancode, unsigned int keycode) { - struct input_handle *handle; - int error = -ENODEV; + struct getset_keycode_data d = { scancode, keycode, -ENODEV }; - list_for_each_entry(handle, &kbd_handler.h_list, h_node) { - error = input_set_keycode(handle->dev, scancode, keycode); - if (!error) - break; - } + input_handler_for_each_handle(&kbd_handler, &d, setkeycode_helper); - return error; + return d.error; } /* * Making beeps and bells. */ -static void kd_nosound(unsigned long ignored) + +static int kd_sound_helper(struct input_handle *handle, void *data) { - struct input_handle *handle; + unsigned int *hz = data; + struct input_dev *dev = handle->dev; - list_for_each_entry(handle, &kbd_handler.h_list, h_node) { - if (test_bit(EV_SND, handle->dev->evbit)) { - if (test_bit(SND_TONE, handle->dev->sndbit)) - input_inject_event(handle, EV_SND, SND_TONE, 0); - if (test_bit(SND_BELL, handle->dev->sndbit)) - input_inject_event(handle, EV_SND, SND_BELL, 0); - } + if (test_bit(EV_SND, dev->evbit)) { + if (test_bit(SND_TONE, dev->sndbit)) + input_inject_event(handle, EV_SND, SND_TONE, *hz); + if (test_bit(SND_BELL, handle->dev->sndbit)) + input_inject_event(handle, EV_SND, SND_BELL, *hz ? 1 : 0); } + + return 0; +} + +static void kd_nosound(unsigned long ignored) +{ + static unsigned int zero; + + input_handler_for_each_handle(&kbd_handler, &zero, kd_sound_helper); } static DEFINE_TIMER(kd_mksound_timer, kd_nosound, 0, 0); void kd_mksound(unsigned int hz, unsigned int ticks) { - struct list_head *node; + del_timer_sync(&kd_mksound_timer); - del_timer(&kd_mksound_timer); + input_handler_for_each_handle(&kbd_handler, &hz, kd_sound_helper); - if (hz) { - list_for_each_prev(node, &kbd_handler.h_list) { - struct input_handle *handle = to_handle_h(node); - if (test_bit(EV_SND, handle->dev->evbit)) { - if (test_bit(SND_TONE, handle->dev->sndbit)) { - input_inject_event(handle, EV_SND, SND_TONE, hz); - break; - } - if (test_bit(SND_BELL, handle->dev->sndbit)) { - input_inject_event(handle, EV_SND, SND_BELL, 1); - break; - } - } - } - if (ticks) - mod_timer(&kd_mksound_timer, jiffies + ticks); - } else - kd_nosound(0); + if (hz && ticks) + mod_timer(&kd_mksound_timer, jiffies + ticks); } EXPORT_SYMBOL(kd_mksound); @@ -270,27 +275,34 @@ EXPORT_SYMBOL(kd_mksound); * Setting the keyboard rate. */ -int kbd_rate(struct kbd_repeat *rep) +static int kbd_rate_helper(struct input_handle *handle, void *data) { - struct list_head *node; - unsigned int d = 0; - unsigned int p = 0; - - list_for_each(node, &kbd_handler.h_list) { - struct input_handle *handle = to_handle_h(node); - struct input_dev *dev = handle->dev; - - if (test_bit(EV_REP, dev->evbit)) { - if (rep->delay > 0) - input_inject_event(handle, EV_REP, REP_DELAY, rep->delay); - if (rep->period > 0) - input_inject_event(handle, EV_REP, REP_PERIOD, rep->period); - d = dev->rep[REP_DELAY]; - p = dev->rep[REP_PERIOD]; - } + struct input_dev *dev = handle->dev; + struct kbd_repeat *rep = data; + + if (test_bit(EV_REP, dev->evbit)) { + + if (rep[0].delay > 0) + input_inject_event(handle, + EV_REP, REP_DELAY, rep[0].delay); + if (rep[0].period > 0) + input_inject_event(handle, + EV_REP, REP_PERIOD, rep[0].period); + + rep[1].delay = dev->rep[REP_DELAY]; + rep[1].period = dev->rep[REP_PERIOD]; } - rep->delay = d; - rep->period = p; + + return 0; +} + +int kbd_rate(struct kbd_repeat *rep) +{ + struct kbd_repeat data[2] = { *rep }; + + input_handler_for_each_handle(&kbd_handler, data, kbd_rate_helper); + *rep = data[1]; /* Copy currently used settings */ + return 0; } @@ -998,36 +1010,36 @@ static inline unsigned char getleds(void) return leds; } +static int kbd_update_leds_helper(struct input_handle *handle, void *data) +{ + unsigned char leds = *(unsigned char *)data; + + if (test_bit(EV_LED, handle->dev->evbit)) { + input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01)); + input_inject_event(handle, EV_LED, LED_NUML, !!(leds & 0x02)); + input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & 0x04)); + input_inject_event(handle, EV_SYN, SYN_REPORT, 0); + } + + return 0; +} + /* - * This routine is the bottom half of the keyboard interrupt - * routine, and runs with all interrupts enabled. It does - * console changing, led setting and copy_to_cooked, which can - * take a reasonably long time. - * - * Aside from timing (which isn't really that important for - * keyboard interrupts as they happen often), using the software - * interrupt routines for this thing allows us to easily mask - * this when we don't want any of the above to happen. - * This allows for easy and efficient race-condition prevention - * for kbd_start => input_inject_event(dev, EV_LED, ...) => ... + * This is the tasklet that updates LED state on all keyboards + * attached to the box. The reason we use tasklet is that we + * need to handle the scenario when keyboard handler is not + * registered yet but we already getting updates form VT to + * update led state. */ - static void kbd_bh(unsigned long dummy) { - struct list_head *node; unsigned char leds = getleds(); if (leds != ledstate) { - list_for_each(node, &kbd_handler.h_list) { - struct input_handle *handle = to_handle_h(node); - input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01)); - input_inject_event(handle, EV_LED, LED_NUML, !!(leds & 0x02)); - input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & 0x04)); - input_inject_event(handle, EV_SYN, SYN_REPORT, 0); - } + input_handler_for_each_handle(&kbd_handler, &leds, + kbd_update_leds_helper); + ledstate = leds; } - - ledstate = leds; } DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh, 0); @@ -1370,15 +1382,11 @@ static void kbd_disconnect(struct input_handle *handle) */ static void kbd_start(struct input_handle *handle) { - unsigned char leds = ledstate; - tasklet_disable(&keyboard_tasklet); - if (leds != 0xff) { - input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01)); - input_inject_event(handle, EV_LED, LED_NUML, !!(leds & 0x02)); - input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & 0x04)); - input_inject_event(handle, EV_SYN, SYN_REPORT, 0); - } + + if (ledstate != 0xff) + kbd_update_leds_helper(handle, &ledstate); + tasklet_enable(&keyboard_tasklet); } diff --git a/drivers/input/input.c b/drivers/input/input.c index cc763c96fada..5d6421bde4ba 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1650,6 +1650,38 @@ void input_unregister_handler(struct input_handler *handler) } EXPORT_SYMBOL(input_unregister_handler); +/** + * input_handler_for_each_handle - handle iterator + * @handler: input handler to iterate + * @data: data for the callback + * @fn: function to be called for each handle + * + * Iterate over @bus's list of devices, and call @fn for each, passing + * it @data and stop when @fn returns a non-zero value. The function is + * using RCU to traverse the list and therefore may be usind in atonic + * contexts. The @fn callback is invoked from RCU critical section and + * thus must not sleep. + */ +int input_handler_for_each_handle(struct input_handler *handler, void *data, + int (*fn)(struct input_handle *, void *)) +{ + struct input_handle *handle; + int retval = 0; + + rcu_read_lock(); + + list_for_each_entry_rcu(handle, &handler->h_list, h_node) { + retval = fn(handle, data); + if (retval) + break; + } + + rcu_read_unlock(); + + return retval; +} +EXPORT_SYMBOL(input_handler_for_each_handle); + /** * input_register_handle - register a new input handle * @handle: handle to register @@ -1683,7 +1715,7 @@ int input_register_handle(struct input_handle *handle) * we can't be racing with input_unregister_handle() * and so separate lock is not needed here. */ - list_add_tail(&handle->h_node, &handler->h_list); + list_add_tail_rcu(&handle->h_node, &handler->h_list); if (handler->start) handler->start(handle); @@ -1706,7 +1738,7 @@ void input_unregister_handle(struct input_handle *handle) { struct input_dev *dev = handle->dev; - list_del_init(&handle->h_node); + list_del_rcu(&handle->h_node); /* * Take dev->mutex to prevent race with input_release_device(). @@ -1714,6 +1746,7 @@ void input_unregister_handle(struct input_handle *handle) mutex_lock(&dev->mutex); list_del_rcu(&handle->d_node); mutex_unlock(&dev->mutex); + synchronize_rcu(); } EXPORT_SYMBOL(input_unregister_handle); diff --git a/include/linux/input.h b/include/linux/input.h index 56d8e048c646..db563bbac9dd 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1021,9 +1021,12 @@ struct ff_effect { * @keycodesize: size of elements in keycode table * @keycode: map of scancodes to keycodes for this device * @setkeycode: optional method to alter current keymap, used to implement - * sparse keymaps. If not supplied default mechanism will be used + * sparse keymaps. If not supplied default mechanism will be used. + * The method is being called while holding event_lock and thus must + * not sleep * @getkeycode: optional method to retrieve current keymap. If not supplied - * default mechanism will be used + * default mechanism will be used. The method is being called while + * holding event_lock and thus must not sleep * @ff: force feedback structure associated with the device if device * supports force feedback effects * @repeat_key: stores key code of the last key pressed; used to implement @@ -1295,6 +1298,9 @@ void input_unregister_device(struct input_dev *); int __must_check input_register_handler(struct input_handler *); void input_unregister_handler(struct input_handler *); +int input_handler_for_each_handle(struct input_handler *, void *data, + int (*fn)(struct input_handle *, void *)); + int input_register_handle(struct input_handle *); void input_unregister_handle(struct input_handle *); -- cgit v1.3-8-gc7d7 From 8592e6486a177a02f048567cb928bc3a1f9a86c3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 2 Dec 2009 12:56:46 +0900 Subject: sched: Revert 498657a478c60be092208422fefa9c7b248729c2 498657a478c60be092208422fefa9c7b248729c2 incorrectly assumed that preempt wasn't disabled around context_switch() and thus was fixing imaginary problem. It also broke KVM because it depended on ->sched_in() to be called with irq enabled so that it can do smp calls from there. Revert the incorrect commit and add comment describing different contexts under with the two callbacks are invoked. Avi: spotted transposed in/out in the added comment. Signed-off-by: Tejun Heo Acked-by: Avi Kivity Cc: peterz@infradead.org Cc: efault@gmx.de Cc: rusty@rustcorp.com.au LKML-Reference: <1259726212-30259-2-git-send-email-tj@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 5 +++++ kernel/sched.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 72b1a10a59b6..2e681d9555bd 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -105,6 +105,11 @@ struct preempt_notifier; * @sched_out: we've just been preempted * notifier: struct preempt_notifier for the task being preempted * next: the task that's kicking us out + * + * Please note that sched_in and out are called under different + * contexts. sched_out is called with rq lock held and irq disabled + * while sched_in is called without rq lock and irq enabled. This + * difference is intentional and depended upon by its users. */ struct preempt_ops { void (*sched_in)(struct preempt_notifier *notifier, int cpu); diff --git a/kernel/sched.c b/kernel/sched.c index b3d4e2be95aa..1031cae39c4c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2768,9 +2768,9 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) prev_state = prev->state; finish_arch_switch(prev); perf_event_task_sched_in(current, cpu_of(rq)); - fire_sched_in_preempt_notifiers(current); finish_lock_switch(rq, prev); + fire_sched_in_preempt_notifiers(current); if (mm) mmdrop(mm); if (unlikely(prev_state == TASK_DEAD)) { -- cgit v1.3-8-gc7d7 From 6b62fe019e39edfd1dbe3f224ecd0a87d9365223 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2009 07:23:10 +0100 Subject: tracing/syscalls: Make syscall events print callbacks static enter_syscall_print_##sname and exit_syscall_print_##sname don't need to have a global scope. Make them static. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Jason Baron Cc: Lai Jiangshan LKML-Reference: <1259734990-9034-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c2df3a593236..e79e2f3ccc51 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -134,7 +134,7 @@ struct perf_event_attr; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_enter_##sname; \ - struct trace_event enter_syscall_print_##sname = { \ + static struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ static struct ftrace_event_call __used \ @@ -156,7 +156,7 @@ struct perf_event_attr; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_exit_##sname; \ - struct trace_event exit_syscall_print_##sname = { \ + static struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ static struct ftrace_event_call __used \ -- cgit v1.3-8-gc7d7 From fa1452e808732ae10e8b1267fd75fc2d028d634b Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 30 Nov 2009 14:59:44 +0900 Subject: locking, task_struct: Reduce size on TRACE_IRQFLAGS and 64bit Reorder task_struct field for TRACE_IRQFLAGS to remove padding on 64-bit. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4B135F50.8070302@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60bf583..49be8f7c05f6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1421,17 +1421,17 @@ struct task_struct { #endif #ifdef CONFIG_TRACE_IRQFLAGS unsigned int irq_events; - int hardirqs_enabled; unsigned long hardirq_enable_ip; - unsigned int hardirq_enable_event; unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; unsigned int hardirq_disable_event; - int softirqs_enabled; + int hardirqs_enabled; + int hardirq_context; unsigned long softirq_disable_ip; - unsigned int softirq_disable_event; unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; unsigned int softirq_enable_event; - int hardirq_context; + int softirqs_enabled; int softirq_context; #endif #ifdef CONFIG_LOCKDEP -- cgit v1.3-8-gc7d7 From d99ca3b977fc5a93141304f571475c2af9e6c1c5 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 2 Dec 2009 17:26:47 +0900 Subject: sched, cputime: Cleanups related to task_times() - Remove if({u,s}t)s because no one call it with NULL now. - Use cputime_{add,sub}(). - Add ifndef-endif for prev_{u,s}time since they are used only when !VIRT_CPU_ACCOUNTING. Signed-off-by: Hidetoshi Seto Cc: Peter Zijlstra Cc: Spencer Candland Cc: Americo Wang Cc: Oleg Nesterov Cc: Balbir Singh Cc: Stanislaw Gruszka LKML-Reference: <4B1624C7.7040302@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ kernel/fork.c | 2 ++ kernel/sched.c | 16 ++++++---------- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0395b0f4df3a..dff85e58264e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1331,7 +1331,9 @@ struct task_struct { cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING cputime_t prev_utime, prev_stime; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ struct timespec real_start_time; /* boot based time */ diff --git a/kernel/fork.c b/kernel/fork.c index 166b8c49257c..ad7cb6d1193c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1066,8 +1066,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; +#endif p->default_timer_slack_ns = current->timer_slack_ns; diff --git a/kernel/sched.c b/kernel/sched.c index 4883fee99314..17e2c1db2bde 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5184,10 +5184,8 @@ void account_idle_ticks(unsigned long ticks) #ifdef CONFIG_VIRT_CPU_ACCOUNTING void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { - if (ut) - *ut = p->utime; - if (st) - *st = p->stime; + *ut = p->utime; + *st = p->stime; } #else @@ -5197,7 +5195,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { - cputime_t rtime, utime = p->utime, total = utime + p->stime; + cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime); /* * Use CFS's precise accounting: @@ -5217,12 +5215,10 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) * Compare with previous values, to keep monotonicity: */ p->prev_utime = max(p->prev_utime, utime); - p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); + p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime)); - if (ut) - *ut = p->prev_utime; - if (st) - *st = p->prev_stime; + *ut = p->prev_utime; + *st = p->prev_stime; } #endif -- cgit v1.3-8-gc7d7 From 0cf55e1ec08bb5a22e068309e2d8ba1180ab4239 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 2 Dec 2009 17:28:07 +0900 Subject: sched, cputime: Introduce thread_group_times() This is a real fix for problem of utime/stime values decreasing described in the thread: http://lkml.org/lkml/2009/11/3/522 Now cputime is accounted in the following way: - {u,s}time in task_struct are increased every time when the thread is interrupted by a tick (timer interrupt). - When a thread exits, its {u,s}time are added to signal->{u,s}time, after adjusted by task_times(). - When all threads in a thread_group exits, accumulated {u,s}time (and also c{u,s}time) in signal struct are added to c{u,s}time in signal struct of the group's parent. So {u,s}time in task struct are "raw" tick count, while {u,s}time and c{u,s}time in signal struct are "adjusted" values. And accounted values are used by: - task_times(), to get cputime of a thread: This function returns adjusted values that originates from raw {u,s}time and scaled by sum_exec_runtime that accounted by CFS. - thread_group_cputime(), to get cputime of a thread group: This function returns sum of all {u,s}time of living threads in the group, plus {u,s}time in the signal struct that is sum of adjusted cputimes of all exited threads belonged to the group. The problem is the return value of thread_group_cputime(), because it is mixed sum of "raw" value and "adjusted" value: group's {u,s}time = foreach(thread){{u,s}time} + exited({u,s}time) This misbehavior can break {u,s}time monotonicity. Assume that if there is a thread that have raw values greater than adjusted values (e.g. interrupted by 1000Hz ticks 50 times but only runs 45ms) and if it exits, cputime will decrease (e.g. -5ms). To fix this, we could do: group's {u,s}time = foreach(t){task_times(t)} + exited({u,s}time) But task_times() contains hard divisions, so applying it for every thread should be avoided. This patch fixes the above problem in the following way: - Modify thread's exit (= __exit_signal()) not to use task_times(). It means {u,s}time in signal struct accumulates raw values instead of adjusted values. As the result it makes thread_group_cputime() to return pure sum of "raw" values. - Introduce a new function thread_group_times(*task, *utime, *stime) that converts "raw" values of thread_group_cputime() to "adjusted" values, in same calculation procedure as task_times(). - Modify group's exit (= wait_task_zombie()) to use this introduced thread_group_times(). It make c{u,s}time in signal struct to have adjusted values like before this patch. - Replace some thread_group_cputime() by thread_group_times(). This replacements are only applied where conveys the "adjusted" cputime to users, and where already uses task_times() near by it. (i.e. sys_times(), getrusage(), and /proc//stat.) This patch have a positive side effect: - Before this patch, if a group contains many short-life threads (e.g. runs 0.9ms and not interrupted by ticks), the group's cputime could be invisible since thread's cputime was accumulated after adjusted: imagine adjustment function as adj(ticks, runtime), {adj(0, 0.9) + adj(0, 0.9) + ....} = {0 + 0 + ....} = 0. After this patch it will not happen because the adjustment is applied after accumulated. v2: - remove if()s, put new variables into signal_struct. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Spencer Candland Cc: Americo Wang Cc: Oleg Nesterov Cc: Balbir Singh Cc: Stanislaw Gruszka LKML-Reference: <4B162517.8040909@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- fs/proc/array.c | 5 +---- include/linux/sched.h | 4 ++++ kernel/exit.c | 23 ++++++++++++----------- kernel/fork.c | 3 +++ kernel/sched.c | 41 +++++++++++++++++++++++++++++++++++++++++ kernel/sys.c | 18 ++++++++---------- 6 files changed, 69 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/fs/proc/array.c b/fs/proc/array.c index ca61a88aed66..2571da43c736 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -506,7 +506,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, /* add up live thread stats at the group level */ if (whole) { - struct task_cputime cputime; struct task_struct *t = task; do { min_flt += t->min_flt; @@ -517,9 +516,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt += sig->min_flt; maj_flt += sig->maj_flt; - thread_group_cputime(task, &cputime); - utime = cputime.utime; - stime = cputime.stime; + thread_group_times(task, &utime, &stime); gtime = cputime_add(gtime, sig->gtime); } diff --git a/include/linux/sched.h b/include/linux/sched.h index dff85e58264e..34238bd10ebf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -624,6 +624,9 @@ struct signal_struct { cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING + cputime_t prev_utime, prev_stime; +#endif unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; @@ -1723,6 +1726,7 @@ static inline void put_task_struct(struct task_struct *t) } extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); +extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st); /* * Per process flags diff --git a/kernel/exit.c b/kernel/exit.c index 2eaf68b634e3..b221ad65fd20 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -91,8 +91,6 @@ static void __exit_signal(struct task_struct *tsk) if (atomic_dec_and_test(&sig->count)) posix_cpu_timers_exit_group(tsk); else { - cputime_t utime, stime; - /* * If there is any task waiting for the group exit * then notify it: @@ -112,9 +110,8 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - task_times(tsk, &utime, &stime); - sig->utime = cputime_add(sig->utime, utime); - sig->stime = cputime_add(sig->stime, stime); + sig->utime = cputime_add(sig->utime, tsk->utime); + sig->stime = cputime_add(sig->stime, tsk->stime); sig->gtime = cputime_add(sig->gtime, tsk->gtime); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; @@ -1208,6 +1205,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) struct signal_struct *psig; struct signal_struct *sig; unsigned long maxrss; + cputime_t tgutime, tgstime; /* * The resource counters for the group leader are in its @@ -1223,20 +1221,23 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) * need to protect the access to parent->signal fields, * as other threads in the parent group can be right * here reaping other children at the same time. + * + * We use thread_group_times() to get times for the thread + * group, which consolidates times for all threads in the + * group including the group leader. */ + thread_group_times(p, &tgutime, &tgstime); spin_lock_irq(&p->real_parent->sighand->siglock); psig = p->real_parent->signal; sig = p->signal; psig->cutime = cputime_add(psig->cutime, - cputime_add(p->utime, - cputime_add(sig->utime, - sig->cutime))); + cputime_add(tgutime, + sig->cutime)); psig->cstime = cputime_add(psig->cstime, - cputime_add(p->stime, - cputime_add(sig->stime, - sig->cstime))); + cputime_add(tgstime, + sig->cstime)); psig->cgtime = cputime_add(psig->cgtime, cputime_add(p->gtime, diff --git a/kernel/fork.c b/kernel/fork.c index ad7cb6d1193c..3d6f121bbe8a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -884,6 +884,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING + sig->prev_utime = sig->prev_stime = cputime_zero; +#endif sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; diff --git a/kernel/sched.c b/kernel/sched.c index 17e2c1db2bde..e6ba726941ae 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5187,6 +5187,16 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) *ut = p->utime; *st = p->stime; } + +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + struct task_cputime cputime; + + thread_group_cputime(p, &cputime); + + *ut = cputime.utime; + *st = cputime.stime; +} #else #ifndef nsecs_to_cputime @@ -5220,6 +5230,37 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) *ut = p->prev_utime; *st = p->prev_stime; } + +/* + * Must be called with siglock held. + */ +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + struct signal_struct *sig = p->signal; + struct task_cputime cputime; + cputime_t rtime, utime, total; + + thread_group_cputime(p, &cputime); + + total = cputime_add(cputime.utime, cputime.stime); + rtime = nsecs_to_cputime(cputime.sum_exec_runtime); + + if (total) { + u64 temp; + + temp = (u64)(rtime * cputime.utime); + do_div(temp, total); + utime = (cputime_t)temp; + } else + utime = rtime; + + sig->prev_utime = max(sig->prev_utime, utime); + sig->prev_stime = max(sig->prev_stime, + cputime_sub(rtime, sig->prev_utime)); + + *ut = sig->prev_utime; + *st = sig->prev_stime; +} #endif /* diff --git a/kernel/sys.c b/kernel/sys.c index bbdfce0d4347..9968c5fb55b9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -911,16 +911,15 @@ change_okay: void do_sys_times(struct tms *tms) { - struct task_cputime cputime; - cputime_t cutime, cstime; + cputime_t tgutime, tgstime, cutime, cstime; - thread_group_cputime(current, &cputime); spin_lock_irq(¤t->sighand->siglock); + thread_group_times(current, &tgutime, &tgstime); cutime = current->signal->cutime; cstime = current->signal->cstime; spin_unlock_irq(¤t->sighand->siglock); - tms->tms_utime = cputime_to_clock_t(cputime.utime); - tms->tms_stime = cputime_to_clock_t(cputime.stime); + tms->tms_utime = cputime_to_clock_t(tgutime); + tms->tms_stime = cputime_to_clock_t(tgstime); tms->tms_cutime = cputime_to_clock_t(cutime); tms->tms_cstime = cputime_to_clock_t(cstime); } @@ -1338,8 +1337,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) { struct task_struct *t; unsigned long flags; - cputime_t utime, stime; - struct task_cputime cputime; + cputime_t tgutime, tgstime, utime, stime; unsigned long maxrss = 0; memset((char *) r, 0, sizeof *r); @@ -1372,9 +1370,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) break; case RUSAGE_SELF: - thread_group_cputime(p, &cputime); - utime = cputime_add(utime, cputime.utime); - stime = cputime_add(stime, cputime.stime); + thread_group_times(p, &tgutime, &tgstime); + utime = cputime_add(utime, tgutime); + stime = cputime_add(stime, tgstime); r->ru_nvcsw += p->signal->nvcsw; r->ru_nivcsw += p->signal->nivcsw; r->ru_minflt += p->signal->min_flt; -- cgit v1.3-8-gc7d7 From c81c2d95449cd218c2022ce6014c52fef1eb1f66 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 2 Dec 2009 16:49:02 +0000 Subject: skbuff: remove skb_dma_map/unmap The two functions skb_dma_map/unmap are unsafe to use as they cause problems when packets are cloned and sent to multiple devices while a HW IOMMU is enabled. Due to this it is best to remove the code so it is not used by any other network driver maintainters. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ------- net/core/Makefile | 1 - net/core/skb_dma_map.c | 65 -------------------------------------------------- 3 files changed, 74 deletions(-) delete mode 100644 net/core/skb_dma_map.c (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 89eed8cdd318..ae836fded530 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -416,14 +416,6 @@ struct sk_buff { #include -#ifdef CONFIG_HAS_DMA -#include -extern int skb_dma_map(struct device *dev, struct sk_buff *skb, - enum dma_data_direction dir); -extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb, - enum dma_data_direction dir); -#endif - static inline struct dst_entry *skb_dst(const struct sk_buff *skb) { return (struct dst_entry *)skb->_skb_dst; diff --git a/net/core/Makefile b/net/core/Makefile index 796f46eece5f..08791ac3e05a 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -6,7 +6,6 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-$(CONFIG_HAS_DMA) += skb_dma_map.o obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c deleted file mode 100644 index 79687dfd6957..000000000000 --- a/net/core/skb_dma_map.c +++ /dev/null @@ -1,65 +0,0 @@ -/* skb_dma_map.c: DMA mapping helpers for socket buffers. - * - * Copyright (C) David S. Miller - */ - -#include -#include -#include -#include - -int skb_dma_map(struct device *dev, struct sk_buff *skb, - enum dma_data_direction dir) -{ - struct skb_shared_info *sp = skb_shinfo(skb); - dma_addr_t map; - int i; - - map = dma_map_single(dev, skb->data, - skb_headlen(skb), dir); - if (dma_mapping_error(dev, map)) - goto out_err; - - sp->dma_head = map; - for (i = 0; i < sp->nr_frags; i++) { - skb_frag_t *fp = &sp->frags[i]; - - map = dma_map_page(dev, fp->page, fp->page_offset, - fp->size, dir); - if (dma_mapping_error(dev, map)) - goto unwind; - sp->dma_maps[i] = map; - } - - return 0; - -unwind: - while (--i >= 0) { - skb_frag_t *fp = &sp->frags[i]; - - dma_unmap_page(dev, sp->dma_maps[i], - fp->size, dir); - } - dma_unmap_single(dev, sp->dma_head, - skb_headlen(skb), dir); -out_err: - return -ENOMEM; -} -EXPORT_SYMBOL(skb_dma_map); - -void skb_dma_unmap(struct device *dev, struct sk_buff *skb, - enum dma_data_direction dir) -{ - struct skb_shared_info *sp = skb_shinfo(skb); - int i; - - dma_unmap_single(dev, sp->dma_head, - skb_headlen(skb), dir); - for (i = 0; i < sp->nr_frags; i++) { - skb_frag_t *fp = &sp->frags[i]; - - dma_unmap_page(dev, sp->dma_maps[i], - fp->size, dir); - } -} -EXPORT_SYMBOL(skb_dma_unmap); -- cgit v1.3-8-gc7d7 From 602da297e293eb2cbd28dcdbbe247593a46a853a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Dec 2009 21:58:33 -0800 Subject: ide: Increase WAIT_DRQ to accomodate some CF cards and SSD drives. Based upon a patch by Philippe De Muyter, and feedback from Mark Lord and Robert Hancock. As noted by Mark Lord, the outdated ATA1 spec specifies a 20msec timeout for setting DRQ but lots of common devices overshoot this. Signed-off-by: David S. Miller --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index e4135d6e0556..0ec612959042 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -125,8 +125,8 @@ struct ide_io_ports { * Timeouts for various operations: */ enum { - /* spec allows up to 20ms */ - WAIT_DRQ = HZ / 10, /* 100ms */ + /* spec allows up to 20ms, but CF cards and SSD drives need more */ + WAIT_DRQ = 1 * HZ, /* 1s */ /* some laptops are very slow */ WAIT_READY = 5 * HZ, /* 5s */ /* should be less than 3ms (?), if all ATAPI CD is closed at boot */ -- cgit v1.3-8-gc7d7 From e6b4d11367519bc71729c09d05a126b133c755be Mon Sep 17 00:00:00 2001 From: William Allen Simpson Date: Wed, 2 Dec 2009 18:07:39 +0000 Subject: TCPCT part 1a: add request_values parameter for sending SYNACK Add optional function parameters associated with sending SYNACK. These parameters are not needed after sending SYNACK, and are not used for retransmission. Avoids extending struct tcp_request_sock, and avoids allocating kernel memory. Also affects DCCP as it uses common struct request_sock_ops, but this parameter is currently reserved for future use. Signed-off-by: William.Allen.Simpson@gmail.com Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/request_sock.h | 8 +++++++- include/net/tcp.h | 3 ++- net/dccp/ipv4.c | 5 +++-- net/dccp/ipv6.c | 5 +++-- net/dccp/minisocks.c | 2 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/tcp_ipv4.c | 18 ++++++++++-------- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 3 ++- net/ipv6/tcp_ipv6.c | 27 ++++++++++++--------------- 10 files changed, 42 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index c7190846e128..c9b50ebd9ce9 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -27,13 +27,19 @@ struct sk_buff; struct dst_entry; struct proto; +/* empty to "strongly type" an otherwise void parameter. + */ +struct request_values { +}; + struct request_sock_ops { int family; int obj_size; struct kmem_cache *slab; char *slab_name; int (*rtx_syn_ack)(struct sock *sk, - struct request_sock *req); + struct request_sock *req, + struct request_values *rvp); void (*send_ack)(struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(struct sock *sk, diff --git a/include/net/tcp.h b/include/net/tcp.h index 325bfcf5c934..ec183fda05d0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -440,7 +440,8 @@ extern int tcp_connect(struct sock *sk); extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct request_sock *req); + struct request_sock *req, + struct request_values *rvp); extern int tcp_disconnect(struct sock *sk, int flags); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2423a0866733..efbcfdc12796 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -477,7 +477,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return &rt->u.dst; } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, + struct request_values *rv_unused) { int err = -1; struct sk_buff *skb; @@ -626,7 +627,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_iss = dccp_v4_init_sequence(skb); dreq->dreq_service = service; - if (dccp_v4_send_response(sk, req)) + if (dccp_v4_send_response(sk, req, NULL)) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 50ea91a77705..6574215a1f51 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -241,7 +241,8 @@ out: } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) +static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, + struct request_values *rv_unused) { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -468,7 +469,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_iss = dccp_v6_init_sequence(skb); dreq->dreq_service = service; - if (dccp_v6_send_response(sk, req)) + if (dccp_v6_send_response(sk, req, NULL)) goto drop_and_free; inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 5ca49cec95f5..af226a063141 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -184,7 +184,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, * counter (backoff, monitored by dccp_response_timer). */ req->retrans++; - req->rsk_ops->rtx_syn_ack(sk, req); + req->rsk_ops->rtx_syn_ack(sk, req, NULL); } /* Network Duplicate, discard packet */ return NULL; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 9b35c56d1023..ee16475f8fc3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -531,7 +531,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, &expire, &resend); if (!expire && (!resend || - !req->rsk_ops->rtx_syn_ack(parent, req) || + !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || inet_rsk(req)->acked)) { unsigned long timeo; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index df18ce04f41e..649a36d99c73 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -742,8 +742,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, * This still operates on a request_sock only, not on a big * socket. */ -static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) +static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, + struct request_sock *req, + struct request_values *rvp) { const struct inet_request_sock *ireq = inet_rsk(req); int err = -1; @@ -753,7 +754,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req); + skb = tcp_make_synack(sk, dst, req, rvp); if (skb) { struct tcphdr *th = tcp_hdr(skb); @@ -774,9 +775,10 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, return err; } -static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req) +static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, + struct request_values *rvp) { - return __tcp_v4_send_synack(sk, req, NULL); + return __tcp_v4_send_synack(sk, NULL, req, rvp); } /* @@ -1211,13 +1213,13 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct inet_request_sock *ireq; struct tcp_options_received tmp_opt; struct request_sock *req; + struct inet_request_sock *ireq; + struct dst_entry *dst = NULL; __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; - struct dst_entry *dst = NULL; #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1337,7 +1339,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } tcp_rsk(req)->snt_isn = isn; - if (__tcp_v4_send_synack(sk, req, dst) || want_cookie) + if (__tcp_v4_send_synack(sk, dst, req, NULL) || want_cookie) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ab32c181f749..d3f6bbfc76f0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -531,7 +531,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * Enforce "SYN-ACK" according to figure 8, figure 6 * of RFC793, fixed by RFC1122. */ - req->rsk_ops->rtx_syn_ack(sk, req); + req->rsk_ops->rtx_syn_ack(sk, req, NULL); return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 875bc6dcd920..b8b25049f257 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2224,7 +2224,8 @@ int tcp_send_synack(struct sock *sk) /* Prepare a SYN-ACK. */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct request_sock *req) + struct request_sock *req, + struct request_values *rvp) { struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index de709091b26d..da6e24416d75 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -461,7 +461,8 @@ out: } -static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) +static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, + struct request_values *rvp) { struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -499,7 +500,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto done; - skb = tcp_make_synack(sk, dst, req); + skb = tcp_make_synack(sk, dst, req, rvp); if (skb) { struct tcphdr *th = tcp_hdr(skb); @@ -1161,13 +1162,13 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { + struct tcp_options_received tmp_opt; + struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_options_received tmp_opt; struct tcp_sock *tp = tcp_sk(sk); - struct request_sock *req = NULL; - __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = __sk_dst_get(sk); + __u32 isn = TCP_SKB_CB(skb)->when; #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1239,23 +1240,19 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v6_init_sequence(skb); } - tcp_rsk(req)->snt_isn = isn; security_inet_conn_request(sk, skb, req); - if (tcp_v6_send_synack(sk, req)) - goto drop; + if (tcp_v6_send_synack(sk, req, NULL) || want_cookie) + goto drop_and_free; - if (!want_cookie) { - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - return 0; - } + inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + return 0; +drop_and_free: + reqsk_free(req); drop: - if (req) - reqsk_free(req); - return 0; /* don't send reset */ } -- cgit v1.3-8-gc7d7 From da5c78c82629a167794436e4306b4cf1faddea90 Mon Sep 17 00:00:00 2001 From: William Allen Simpson Date: Wed, 2 Dec 2009 18:12:09 +0000 Subject: TCPCT part 1b: generate Responder Cookie secret Define (missing) hash message size for SHA1. Define hashing size constants specific to TCP cookies. Add new function: tcp_cookie_generator(). Maintain global secret values for tcp_cookie_generator(). This is a significantly revised implementation of earlier (15-year-old) Photuris [RFC-2522] code for the KA9Q cooperative multitasking platform. Linux RCU technique appears to be well-suited to this application, though neither of the circular queue items are freed. These functions will also be used in subsequent patches that implement additional features. Signed-off-by: William.Allen.Simpson@gmail.com Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/cryptohash.h | 1 + include/net/tcp.h | 8 +++ net/ipv4/tcp.c | 140 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+) (limited to 'include') diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h index c118b2ad9807..ec78a4bbe1d5 100644 --- a/include/linux/cryptohash.h +++ b/include/linux/cryptohash.h @@ -2,6 +2,7 @@ #define __CRYPTOHASH_H #define SHA_DIGEST_WORDS 5 +#define SHA_MESSAGE_BYTES (512 /*bits*/ / 8) #define SHA_WORKSPACE_WORDS 80 void sha_init(__u32 *buf); diff --git a/include/net/tcp.h b/include/net/tcp.h index ec183fda05d0..4a99a8e39121 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1478,6 +1478,14 @@ struct tcp_request_sock_ops { #endif }; +/* Using SHA1 for now, define some constants. + */ +#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS) +#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4) +#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS) + +extern int tcp_cookie_generator(u32 *bakery); + extern void tcp_v4_init(void); extern void tcp_init(void); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7d4648f8b3d3..ba03ac80435a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -264,6 +264,7 @@ #include #include #include +#include #include #include @@ -2848,6 +2849,135 @@ EXPORT_SYMBOL(tcp_md5_hash_key); #endif +/** + * Each Responder maintains up to two secret values concurrently for + * efficient secret rollover. Each secret value has 4 states: + * + * Generating. (tcp_secret_generating != tcp_secret_primary) + * Generates new Responder-Cookies, but not yet used for primary + * verification. This is a short-term state, typically lasting only + * one round trip time (RTT). + * + * Primary. (tcp_secret_generating == tcp_secret_primary) + * Used both for generation and primary verification. + * + * Retiring. (tcp_secret_retiring != tcp_secret_secondary) + * Used for verification, until the first failure that can be + * verified by the newer Generating secret. At that time, this + * cookie's state is changed to Secondary, and the Generating + * cookie's state is changed to Primary. This is a short-term state, + * typically lasting only one round trip time (RTT). + * + * Secondary. (tcp_secret_retiring == tcp_secret_secondary) + * Used for secondary verification, after primary verification + * failures. This state lasts no more than twice the Maximum Segment + * Lifetime (2MSL). Then, the secret is discarded. + */ +struct tcp_cookie_secret { + /* The secret is divided into two parts. The digest part is the + * equivalent of previously hashing a secret and saving the state, + * and serves as an initialization vector (IV). The message part + * serves as the trailing secret. + */ + u32 secrets[COOKIE_WORKSPACE_WORDS]; + unsigned long expires; +}; + +#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) +#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) +#define TCP_SECRET_LIFE (HZ * 600) + +static struct tcp_cookie_secret tcp_secret_one; +static struct tcp_cookie_secret tcp_secret_two; + +/* Essentially a circular list, without dynamic allocation. */ +static struct tcp_cookie_secret *tcp_secret_generating; +static struct tcp_cookie_secret *tcp_secret_primary; +static struct tcp_cookie_secret *tcp_secret_retiring; +static struct tcp_cookie_secret *tcp_secret_secondary; + +static DEFINE_SPINLOCK(tcp_secret_locker); + +/* Select a pseudo-random word in the cookie workspace. + */ +static inline u32 tcp_cookie_work(const u32 *ws, const int n) +{ + return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; +} + +/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. + * Called in softirq context. + * Returns: 0 for success. + */ +int tcp_cookie_generator(u32 *bakery) +{ + unsigned long jiffy = jiffies; + + if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { + spin_lock_bh(&tcp_secret_locker); + if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { + /* refreshed by another */ + memcpy(bakery, + &tcp_secret_generating->secrets[0], + COOKIE_WORKSPACE_WORDS); + } else { + /* still needs refreshing */ + get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); + + /* The first time, paranoia assumes that the + * randomization function isn't as strong. But, + * this secret initialization is delayed until + * the last possible moment (packet arrival). + * Although that time is observable, it is + * unpredictably variable. Mash in the most + * volatile clock bits available, and expire the + * secret extra quickly. + */ + if (unlikely(tcp_secret_primary->expires == + tcp_secret_secondary->expires)) { + struct timespec tv; + + getnstimeofday(&tv); + bakery[COOKIE_DIGEST_WORDS+0] ^= + (u32)tv.tv_nsec; + + tcp_secret_secondary->expires = jiffy + + TCP_SECRET_1MSL + + (0x0f & tcp_cookie_work(bakery, 0)); + } else { + tcp_secret_secondary->expires = jiffy + + TCP_SECRET_LIFE + + (0xff & tcp_cookie_work(bakery, 1)); + tcp_secret_primary->expires = jiffy + + TCP_SECRET_2MSL + + (0x1f & tcp_cookie_work(bakery, 2)); + } + memcpy(&tcp_secret_secondary->secrets[0], + bakery, COOKIE_WORKSPACE_WORDS); + + rcu_assign_pointer(tcp_secret_generating, + tcp_secret_secondary); + rcu_assign_pointer(tcp_secret_retiring, + tcp_secret_primary); + /* + * Neither call_rcu() nor synchronize_rcu() needed. + * Retiring data is not freed. It is replaced after + * further (locked) pointer updates, and a quiet time + * (minimum 1MSL, maximum LIFE - 2MSL). + */ + } + spin_unlock_bh(&tcp_secret_locker); + } else { + rcu_read_lock_bh(); + memcpy(bakery, + &rcu_dereference(tcp_secret_generating)->secrets[0], + COOKIE_WORKSPACE_WORDS); + rcu_read_unlock_bh(); + } + return 0; +} +EXPORT_SYMBOL(tcp_cookie_generator); + void tcp_done(struct sock *sk) { if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) @@ -2882,6 +3012,7 @@ void __init tcp_init(void) struct sk_buff *skb = NULL; unsigned long nr_pages, limit; int order, i, max_share; + unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -2975,6 +3106,15 @@ void __init tcp_init(void) tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); tcp_register_congestion_control(&tcp_reno); + + memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); + memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); + tcp_secret_one.expires = jiffy; /* past due */ + tcp_secret_two.expires = jiffy; /* past due */ + tcp_secret_generating = &tcp_secret_one; + tcp_secret_primary = &tcp_secret_one; + tcp_secret_retiring = &tcp_secret_two; + tcp_secret_secondary = &tcp_secret_two; } EXPORT_SYMBOL(tcp_close); -- cgit v1.3-8-gc7d7 From 519855c508b9a17878c0977a3cdefc09b59b30df Mon Sep 17 00:00:00 2001 From: William Allen Simpson Date: Wed, 2 Dec 2009 18:14:19 +0000 Subject: TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS Define sysctl (tcp_cookie_size) to turn on and off the cookie option default globally, instead of a compiled configuration option. Define per socket option (TCP_COOKIE_TRANSACTIONS) for setting constant data values, retrieving variable cookie values, and other facilities. Move inline tcp_clear_options() unchanged from net/tcp.h to linux/tcp.h, near its corresponding struct tcp_options_received (prior to changes). This is a straightforward re-implementation of an earlier (year-old) patch that no longer applies cleanly, with permission of the original author (Adam Langley): http://thread.gmane.org/gmane.linux.network/102586 These functions will also be used in subsequent patches that implement additional features. Requires: net: TCP_MSS_DEFAULT, TCP_MSS_DESIRED Signed-off-by: William.Allen.Simpson@gmail.com Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++++++++ include/linux/tcp.h | 33 ++++++++++++++++++++++++++++++++- include/net/tcp.h | 6 +----- net/ipv4/sysctl_net_ipv4.c | 8 ++++++++ net/ipv4/tcp_output.c | 3 +++ 5 files changed, 52 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 554440af675c..989f5538b8dd 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -164,6 +164,14 @@ tcp_congestion_control - STRING additional choices may be available based on kernel configuration. Default is set as part of kernel configuration. +tcp_cookie_size - INTEGER + Default size of TCP Cookie Transactions (TCPCT) option, that may be + overridden on a per socket basis by the TCPCT socket option. + Values greater than the maximum (16) are interpreted as the maximum. + Values greater than zero and less than the minimum (8) are interpreted + as the minimum. Odd values are interpreted as the next even value. + Default: 0 (off). + tcp_dsack - BOOLEAN Allows TCP to send "duplicate" SACKs. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 32d7d77b4a01..eaa3113b3786 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -102,7 +102,9 @@ enum { #define TCP_QUICKACK 12 /* Block/reenable quick acks */ #define TCP_CONGESTION 13 /* Congestion control algorithm */ #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ +#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ +/* for TCP_INFO socket option */ #define TCPI_OPT_TIMESTAMPS 1 #define TCPI_OPT_SACK 2 #define TCPI_OPT_WSCALE 4 @@ -174,6 +176,30 @@ struct tcp_md5sig { __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ }; +/* for TCP_COOKIE_TRANSACTIONS (TCPCT) socket option */ +#define TCP_COOKIE_MIN 8 /* 64-bits */ +#define TCP_COOKIE_MAX 16 /* 128-bits */ +#define TCP_COOKIE_PAIR_SIZE (2*TCP_COOKIE_MAX) + +/* Flags for both getsockopt and setsockopt */ +#define TCP_COOKIE_IN_ALWAYS (1 << 0) /* Discard SYN without cookie */ +#define TCP_COOKIE_OUT_NEVER (1 << 1) /* Prohibit outgoing cookies, + * supercedes everything. */ + +/* Flags for getsockopt */ +#define TCP_S_DATA_IN (1 << 2) /* Was data received? */ +#define TCP_S_DATA_OUT (1 << 3) /* Was data sent? */ + +/* TCP_COOKIE_TRANSACTIONS data */ +struct tcp_cookie_transactions { + __u16 tcpct_flags; /* see above */ + __u8 __tcpct_pad1; /* zero */ + __u8 tcpct_cookie_desired; /* bytes */ + __u16 tcpct_s_data_desired; /* bytes of variable data */ + __u16 tcpct_used; /* bytes in value */ + __u8 tcpct_value[TCP_MSS_DEFAULT]; +}; + #ifdef __KERNEL__ #include @@ -227,6 +253,11 @@ struct tcp_options_received { u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ }; +static inline void tcp_clear_options(struct tcp_options_received *rx_opt) +{ + rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; +} + /* This is the max number of SACKS that we'll generate and process. It's safe * to increse this, although since: * size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8) @@ -435,6 +466,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) return (struct tcp_timewait_sock *)sk; } -#endif +#endif /* __KERNEL__ */ #endif /* _LINUX_TCP_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 4a99a8e39121..738b65f01e26 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -234,6 +234,7 @@ extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; +extern int sysctl_tcp_cookie_size; extern atomic_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; @@ -340,11 +341,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, extern void tcp_enter_quickack_mode(struct sock *sk); -static inline void tcp_clear_options(struct tcp_options_received *rx_opt) -{ - rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; -} - #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c00323bae044..13f7ab6ad6a0 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -712,6 +712,14 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "tcp_cookie_size", + .data = &sysctl_tcp_cookie_size, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .ctl_name = CTL_UNNUMBERED, .procname = "udp_mem", diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b8b25049f257..307f318fe931 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -59,6 +59,9 @@ int sysctl_tcp_base_mss __read_mostly = 512; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ + + /* Account for new data that has been sent to the network. */ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) { -- cgit v1.3-8-gc7d7 From 435cf559f02ea3a3159eb316f97dc88bdebe9432 Mon Sep 17 00:00:00 2001 From: William Allen Simpson Date: Wed, 2 Dec 2009 18:17:05 +0000 Subject: TCPCT part 1d: define TCP cookie option, extend existing struct's Data structures are carefully composed to require minimal additions. For example, the struct tcp_options_received cookie_plus variable fits between existing 16-bit and 8-bit variables, requiring no additional space (taking alignment into consideration). There are no additions to tcp_request_sock, and only 1 pointer in tcp_sock. This is a significantly revised implementation of an earlier (year-old) patch that no longer applies cleanly, with permission of the original author (Adam Langley): http://thread.gmane.org/gmane.linux.network/102586 The principle difference is using a TCP option to carry the cookie nonce, instead of a user configured offset in the data. This is more flexible and less subject to user configuration error. Such a cookie option has been suggested for many years, and is also useful without SYN data, allowing several related concepts to use the same extension option. "Re: SYN floods (was: does history repeat itself?)", September 9, 1996. http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html "Re: what a new TCP header might look like", May 12, 1998. ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail These functions will also be used in subsequent patches that implement additional features. Requires: TCPCT part 1a: add request_values parameter for sending SYNACK TCPCT part 1b: generate Responder Cookie secret TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS Signed-off-by: William.Allen.Simpson@gmail.com Signed-off-by: David S. Miller --- include/linux/tcp.h | 29 +++++++++++++---- include/net/tcp.h | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 20 ++++++++++++ net/ipv4/tcp_minisocks.c | 46 ++++++++++++++++++++++----- net/ipv6/tcp_ipv6.c | 13 ++++++++ 5 files changed, 177 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index eaa3113b3786..7fee8a4df931 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -247,31 +247,38 @@ struct tcp_options_received { sack_ok : 4, /* SACK seen on SYN packet */ snd_wscale : 4, /* Window scaling received from sender */ rcv_wscale : 4; /* Window scaling to send to receiver */ -/* SACKs data */ + u8 cookie_plus:6, /* bytes in authenticator/cookie option */ + cookie_out_never:1, + cookie_in_always:1; u8 num_sacks; /* Number of SACK blocks */ - u16 user_mss; /* mss requested by user in ioctl */ + u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ }; static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { - rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; + rx_opt->tstamp_ok = rx_opt->sack_ok = 0; + rx_opt->wscale_ok = rx_opt->snd_wscale = 0; + rx_opt->cookie_plus = 0; } /* This is the max number of SACKS that we'll generate and process. It's safe - * to increse this, although since: + * to increase this, although since: * size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8) * only four options will fit in a standard TCP header */ #define TCP_NUM_SACKS 4 +struct tcp_cookie_values; +struct tcp_request_sock_ops; + struct tcp_request_sock { struct inet_request_sock req; #ifdef CONFIG_TCP_MD5SIG /* Only used by TCP MD5 Signature so far. */ const struct tcp_request_sock_ops *af_specific; #endif - u32 rcv_isn; - u32 snt_isn; + u32 rcv_isn; + u32 snt_isn; }; static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) @@ -441,6 +448,12 @@ struct tcp_sock { /* TCP MD5 Signature Option information */ struct tcp_md5sig_info *md5sig_info; #endif + + /* When the cookie options are generated and exchanged, then this + * object holds a reference to them (cookie_values->kref). Also + * contains related tcp_cookie_transactions fields. + */ + struct tcp_cookie_values *cookie_values; }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) @@ -459,6 +472,10 @@ struct tcp_timewait_sock { u16 tw_md5_keylen; u8 tw_md5_key[TCP_MD5SIG_MAXKEYLEN]; #endif + /* Few sockets in timewait have cookies; in that case, then this + * object holds a reference to them (tw_cookie_values->kref). + */ + struct tcp_cookie_values *tw_cookie_values; }; static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) diff --git a/include/net/tcp.h b/include/net/tcp.h index 738b65f01e26..f9abd9becabd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -164,6 +165,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ /* * TCP option lengths @@ -174,6 +176,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_MD5SIG 18 +#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ +#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */ +#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) +#define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX) /* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 @@ -1482,6 +1488,83 @@ struct tcp_request_sock_ops { extern int tcp_cookie_generator(u32 *bakery); +/** + * struct tcp_cookie_values - each socket needs extra space for the + * cookies, together with (optional) space for any SYN data. + * + * A tcp_sock contains a pointer to the current value, and this is + * cloned to the tcp_timewait_sock. + * + * @cookie_pair: variable data from the option exchange. + * + * @cookie_desired: user specified tcpct_cookie_desired. Zero + * indicates default (sysctl_tcp_cookie_size). + * After cookie sent, remembers size of cookie. + * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX. + * + * @s_data_desired: user specified tcpct_s_data_desired. When the + * constant payload is specified (@s_data_constant), + * holds its length instead. + * Range 0 to TCP_MSS_DESIRED. + * + * @s_data_payload: constant data that is to be included in the + * payload of SYN or SYNACK segments when the + * cookie option is present. + */ +struct tcp_cookie_values { + struct kref kref; + u8 cookie_pair[TCP_COOKIE_PAIR_SIZE]; + u8 cookie_pair_size; + u8 cookie_desired; + u16 s_data_desired:11, + s_data_constant:1, + s_data_in:1, + s_data_out:1, + s_data_unused:2; + u8 s_data_payload[0]; +}; + +static inline void tcp_cookie_values_release(struct kref *kref) +{ + kfree(container_of(kref, struct tcp_cookie_values, kref)); +} + +/* The length of constant payload data. Note that s_data_desired is + * overloaded, depending on s_data_constant: either the length of constant + * data (returned here) or the limit on variable data. + */ +static inline int tcp_s_data_size(const struct tcp_sock *tp) +{ + return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant) + ? tp->cookie_values->s_data_desired + : 0; +} + +/** + * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace. + * + * As tcp_request_sock has already been extended in other places, the + * only remaining method is to pass stack values along as function + * parameters. These parameters are not needed after sending SYNACK. + * + * @cookie_bakery: cryptographic secret and message workspace. + * + * @cookie_plus: bytes in authenticator/cookie option, copied from + * struct tcp_options_received (above). + */ +struct tcp_extend_values { + struct request_values rv; + u32 cookie_bakery[COOKIE_WORKSPACE_WORDS]; + u8 cookie_plus:6, + cookie_out_never:1, + cookie_in_always:1; +}; + +static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp) +{ + return (struct tcp_extend_values *)rvp; +} + extern void tcp_v4_init(void); extern void tcp_init(void); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 649a36d99c73..a2bcac9b388e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1833,6 +1833,19 @@ static int tcp_v4_init_sock(struct sock *sk) tp->af_specific = &tcp_sock_ipv4_specific; #endif + /* TCP Cookie Transactions */ + if (sysctl_tcp_cookie_size > 0) { + /* Default, cookies without s_data_payload. */ + tp->cookie_values = + kzalloc(sizeof(*tp->cookie_values), + sk->sk_allocation); + if (tp->cookie_values != NULL) + kref_init(&tp->cookie_values->kref); + } + /* Presumed zeroed, in order of appearance: + * cookie_in_always, cookie_out_never, + * s_data_constant, s_data_in, s_data_out + */ sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; @@ -1886,6 +1899,13 @@ void tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } + /* TCP Cookie Transactions */ + if (tp->cookie_values != NULL) { + kref_put(&tp->cookie_values->kref, + tcp_cookie_values_release); + tp->cookie_values = NULL; + } + percpu_counter_dec(&tcp_sockets_allocated); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d3f6bbfc76f0..96852af43ca7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -383,14 +383,43 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); - struct tcp_sock *newtp; + struct tcp_sock *newtp = tcp_sk(newsk); + struct tcp_sock *oldtp = tcp_sk(sk); + struct tcp_cookie_values *oldcvp = oldtp->cookie_values; + + /* TCP Cookie Transactions require space for the cookie pair, + * as it differs for each connection. There is no need to + * copy any s_data_payload stored at the original socket. + * Failure will prevent resuming the connection. + * + * Presumed copied, in order of appearance: + * cookie_in_always, cookie_out_never + */ + if (oldcvp != NULL) { + struct tcp_cookie_values *newcvp = + kzalloc(sizeof(*newtp->cookie_values), + GFP_ATOMIC); + + if (newcvp != NULL) { + kref_init(&newcvp->kref); + newcvp->cookie_desired = + oldcvp->cookie_desired; + newtp->cookie_values = newcvp; + } else { + /* Not Yet Implemented */ + newtp->cookie_values = NULL; + } + } /* Now setup tcp_sock */ - newtp = tcp_sk(newsk); newtp->pred_flags = 0; - newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; - newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1; - newtp->snd_up = treq->snt_isn + 1; + + newtp->rcv_wup = newtp->copied_seq = + newtp->rcv_nxt = treq->rcv_isn + 1; + + newtp->snd_sml = newtp->snd_una = + newtp->snd_nxt = newtp->snd_up = + treq->snt_isn + 1 + tcp_s_data_size(oldtp); tcp_prequeue_init(newtp); @@ -423,8 +452,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); - newtp->write_seq = treq->snt_isn + 1; - newtp->pushed_seq = newtp->write_seq; + newtp->write_seq = newtp->pushed_seq = + treq->snt_isn + 1 + tcp_s_data_size(oldtp); newtp->rx_opt.saw_tstamp = 0; @@ -590,7 +619,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * Invalid ACK: reset will be sent by listening socket */ if ((flg & TCP_FLAG_ACK) && - (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1)) + (TCP_SKB_CB(skb)->ack_seq != + tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) return sk; /* Also, it would be not so bad idea to check rcv_tsecr, which diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index da6e24416d75..f2ec38289a4a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1864,6 +1864,19 @@ static int tcp_v6_init_sock(struct sock *sk) tp->af_specific = &tcp_sock_ipv6_specific; #endif + /* TCP Cookie Transactions */ + if (sysctl_tcp_cookie_size > 0) { + /* Default, cookies without s_data_payload. */ + tp->cookie_values = + kzalloc(sizeof(*tp->cookie_values), + sk->sk_allocation); + if (tp->cookie_values != NULL) + kref_init(&tp->cookie_values->kref); + } + /* Presumed zeroed, in order of appearance: + * cookie_in_always, cookie_out_never, + * s_data_constant, s_data_in, s_data_out + */ sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; -- cgit v1.3-8-gc7d7 From 4957faade11b3a278c3b3cade3411ddc20afa791 Mon Sep 17 00:00:00 2001 From: William Allen Simpson Date: Wed, 2 Dec 2009 18:25:27 +0000 Subject: TCPCT part 1g: Responder Cookie => Initiator Parse incoming TCP_COOKIE option(s). Calculate TCP_COOKIE option. Send optional data. This is a significantly revised implementation of an earlier (year-old) patch that no longer applies cleanly, with permission of the original author (Adam Langley): http://thread.gmane.org/gmane.linux.network/102586 Requires: TCPCT part 1a: add request_values parameter for sending SYNACK TCPCT part 1b: generate Responder Cookie secret TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS TCPCT part 1d: define TCP cookie option, extend existing struct's TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS TCPCT part 1f: Initiator Cookie => Responder Signed-off-by: William.Allen.Simpson@gmail.com Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/syncookies.c | 5 ++- net/ipv4/tcp_input.c | 75 +++++++++++++++++++++++++++++----- net/ipv4/tcp_ipv4.c | 47 +++++++++++++++++++-- net/ipv4/tcp_minisocks.c | 14 ++++--- net/ipv4/tcp_output.c | 103 +++++++++++++++++++++++++++++++++++++++-------- net/ipv6/syncookies.c | 5 ++- net/ipv6/tcp_ipv6.c | 52 +++++++++++++++++++++++- 8 files changed, 259 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index f9abd9becabd..28b04ff8c967 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -407,6 +407,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, + u8 **hvpp, int estab, struct dst_entry *dst); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3146cc401748..26399ad2a289 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -253,6 +253,8 @@ EXPORT_SYMBOL(cookie_check_timestamp); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { + struct tcp_options_received tcp_opt; + u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); @@ -263,7 +265,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, int mss; struct rtable *rt; __u8 rcv_wscale; - struct tcp_options_received tcp_opt; if (!sysctl_tcp_syncookies || !th->ack) goto out; @@ -341,7 +342,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0, &rt->u.dst); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst); if (tcp_opt.saw_tstamp) cookie_check_timestamp(&tcp_opt); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index be166e0e11c5..57ae96a04220 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3698,7 +3698,7 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab, struct dst_entry *dst) + u8 **hvpp, int estab, struct dst_entry *dst) { unsigned char *ptr; struct tcphdr *th = tcp_hdr(skb); @@ -3785,7 +3785,30 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, */ break; #endif - } + case TCPOPT_COOKIE: + /* This option is variable length. + */ + switch (opsize) { + case TCPOLEN_COOKIE_BASE: + /* not yet implemented */ + break; + case TCPOLEN_COOKIE_PAIR: + /* not yet implemented */ + break; + case TCPOLEN_COOKIE_MIN+0: + case TCPOLEN_COOKIE_MIN+2: + case TCPOLEN_COOKIE_MIN+4: + case TCPOLEN_COOKIE_MIN+6: + case TCPOLEN_COOKIE_MAX: + /* 16-bit multiple */ + opt_rx->cookie_plus = opsize; + *hvpp = ptr; + default: + /* ignore option */ + break; + }; + break; + }; ptr += opsize-2; length -= opsize; @@ -3813,17 +3836,20 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) * If it is wrong it falls back on tcp_parse_options(). */ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, - struct tcp_sock *tp) + struct tcp_sock *tp, u8 **hvpp) { - if (th->doff == sizeof(struct tcphdr) >> 2) { + /* In the spirit of fast parsing, compare doff directly to constant + * values. Because equality is used, short doff can be ignored here. + */ + if (th->doff == (sizeof(*th) / 4)) { tp->rx_opt.saw_tstamp = 0; return 0; } else if (tp->rx_opt.tstamp_ok && - th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { + th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) { if (tcp_parse_aligned_timestamp(tp, th)) return 1; } - tcp_parse_options(skb, &tp->rx_opt, 1, NULL); + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); return 1; } @@ -5077,10 +5103,12 @@ out: static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, int syn_inerr) { + u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(skb, th, tp, &hash_location) && + tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5368,12 +5396,14 @@ discard: static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { - struct tcp_sock *tp = tcp_sk(sk); + u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); - int saved_clamp = tp->rx_opt.mss_clamp; + struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); + struct tcp_cookie_values *cvp = tp->cookie_values; + int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, 0, dst); + tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst); if (th->ack) { /* rfc793: @@ -5470,6 +5500,31 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * Change state from SYN-SENT only after copied_seq * is initialized. */ tp->copied_seq = tp->rcv_nxt; + + if (cvp != NULL && + cvp->cookie_pair_size > 0 && + tp->rx_opt.cookie_plus > 0) { + int cookie_size = tp->rx_opt.cookie_plus + - TCPOLEN_COOKIE_BASE; + int cookie_pair_size = cookie_size + + cvp->cookie_desired; + + /* A cookie extension option was sent and returned. + * Note that each incoming SYNACK replaces the + * Responder cookie. The initial exchange is most + * fragile, as protection against spoofing relies + * entirely upon the sequence and timestamp (above). + * This replacement strategy allows the correct pair to + * pass through, while any others will be filtered via + * Responder verification later. + */ + if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { + memcpy(&cvp->cookie_pair[cvp->cookie_desired], + hash_location, cookie_size); + cvp->cookie_pair_size = cookie_pair_size; + } + } + smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a2bcac9b388e..59c911f3889d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1213,9 +1213,12 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { + struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; + u8 *hash_location; struct request_sock *req; struct inet_request_sock *ireq; + struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = NULL; __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; @@ -1271,15 +1274,49 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; - tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; + tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + + if (tmp_opt.cookie_plus > 0 && + tmp_opt.saw_tstamp && + !tp->rx_opt.cookie_out_never && + (sysctl_tcp_cookie_size > 0 || + (tp->cookie_values != NULL && + tp->cookie_values->cookie_desired > 0))) { + u8 *c; + u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; + int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; + + if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) + goto drop_and_release; + + /* Secret recipe starts with IP addresses */ + *mess++ ^= daddr; + *mess++ ^= saddr; - tcp_parse_options(skb, &tmp_opt, 0, dst); + /* plus variable length Initiator Cookie */ + c = (u8 *)mess; + while (l-- > 0) + *c++ ^= *hash_location++; + +#ifdef CONFIG_SYN_COOKIES + want_cookie = 0; /* not our kind of cookie */ +#endif + tmp_ext.cookie_out_never = 0; /* false */ + tmp_ext.cookie_plus = tmp_opt.cookie_plus; + } else if (!tp->rx_opt.cookie_in_always) { + /* redundant indications, but ensure initialization. */ + tmp_ext.cookie_out_never = 1; /* true */ + tmp_ext.cookie_plus = 0; + } else { + goto drop_and_release; + } + tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); if (security_inet_conn_request(sk, skb, req)) @@ -1339,7 +1376,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } tcp_rsk(req)->snt_isn = isn; - if (__tcp_v4_send_synack(sk, dst, req, NULL) || want_cookie) + if (__tcp_v4_send_synack(sk, dst, req, + (struct request_values *)&tmp_ext) || + want_cookie) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 96852af43ca7..87accec8d097 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -90,13 +90,14 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { - struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); struct tcp_options_received tmp_opt; + u8 *hash_location; + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); int paws_reject = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, 1, NULL); + tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; @@ -518,15 +519,16 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct request_sock **prev) { + struct tcp_options_received tmp_opt; + u8 *hash_location; + struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); int paws_reject = 0; - struct tcp_options_received tmp_opt; - struct sock *child; - if ((th->doff > (sizeof(struct tcphdr)>>2)) && (req->ts_recent)) { + if ((th->doff > (sizeof(*th) >> 2)) && (req->ts_recent)) { tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, 1, NULL); + tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 35dd983a8a99..2ac8beff4d77 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -655,48 +655,77 @@ static unsigned tcp_synack_options(struct sock *sk, struct request_sock *req, unsigned mss, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) { - unsigned size = 0; + struct tcp_md5sig_key **md5, + struct tcp_extend_values *xvp) +{ struct inet_request_sock *ireq = inet_rsk(req); - char doing_ts; + unsigned remaining = MAX_TCP_OPTION_SPACE; + u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? + xvp->cookie_plus : + 0; + bool doing_ts = ireq->tstamp_ok; #ifdef CONFIG_TCP_MD5SIG *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); if (*md5) { opts->options |= OPTION_MD5; - size += TCPOLEN_MD5SIG_ALIGNED; + remaining -= TCPOLEN_MD5SIG_ALIGNED; + + /* We can't fit any SACK blocks in a packet with MD5 + TS + * options. There was discussion about disabling SACK + * rather than TS in order to fit in better with old, + * buggy kernels, but that was deemed to be unnecessary. + */ + doing_ts &= !ireq->sack_ok; } #else *md5 = NULL; #endif - /* we can't fit any SACK blocks in a packet with MD5 + TS - options. There was discussion about disabling SACK rather than TS in - order to fit in better with old, buggy kernels, but that was deemed - to be unnecessary. */ - doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok); - + /* We always send an MSS option. */ opts->mss = mss; - size += TCPOLEN_MSS_ALIGNED; + remaining -= TCPOLEN_MSS_ALIGNED; if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; opts->options |= OPTION_WSCALE; - size += TCPOLEN_WSCALE_ALIGNED; + remaining -= TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { opts->options |= OPTION_TS; opts->tsval = TCP_SKB_CB(skb)->when; opts->tsecr = req->ts_recent; - size += TCPOLEN_TSTAMP_ALIGNED; + remaining -= TCPOLEN_TSTAMP_ALIGNED; } if (likely(ireq->sack_ok)) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!doing_ts)) - size += TCPOLEN_SACKPERM_ALIGNED; + remaining -= TCPOLEN_SACKPERM_ALIGNED; } - return size; + /* Similar rationale to tcp_syn_options() applies here, too. + * If the options fit, the same options should fit now! + */ + if (*md5 == NULL && + doing_ts && + cookie_plus > TCPOLEN_COOKIE_BASE) { + int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ + + if (0x2 & need) { + /* 32-bit multiple */ + need += 2; /* NOPs */ + } + if (need <= remaining) { + opts->options |= OPTION_COOKIE_EXTENSION; + opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; + remaining -= need; + } else { + /* There's no error return, so flag it. */ + xvp->cookie_out_never = 1; /* true */ + opts->hash_size = 0; + } + } + return MAX_TCP_OPTION_SPACE - remaining; } /* Compute TCP options for ESTABLISHED sockets. This is not the @@ -2365,6 +2394,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_values *rvp) { struct tcp_out_options opts; + struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th; @@ -2408,8 +2438,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5) + - sizeof(struct tcphdr); + skb, &opts, &md5, xvp) + + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2426,6 +2456,45 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, */ tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); + + if (OPTION_COOKIE_EXTENSION & opts.options) { + const struct tcp_cookie_values *cvp = tp->cookie_values; + + if (cvp != NULL && + cvp->s_data_constant && + cvp->s_data_desired > 0) { + u8 *buf = skb_put(skb, cvp->s_data_desired); + + /* copy data directly from the listening socket. */ + memcpy(buf, cvp->s_data_payload, cvp->s_data_desired); + TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired; + } + + if (opts.hash_size > 0) { + __u32 workspace[SHA_WORKSPACE_WORDS]; + u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; + u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; + + /* Secret recipe depends on the Timestamp, (future) + * Sequence and Acknowledgment Numbers, Initiator + * Cookie, and others handled by IP variant caller. + */ + *tail-- ^= opts.tsval; + *tail-- ^= tcp_rsk(req)->rcv_isn + 1; + *tail-- ^= TCP_SKB_CB(skb)->seq + 1; + + /* recommended */ + *tail-- ^= ((th->dest << 16) | th->source); + *tail-- ^= (u32)cvp; /* per sockopt */ + + sha_transform((__u32 *)&xvp->cookie_bakery[0], + (char *)mess, + &workspace[0]); + opts.hash_location = + (__u8 *)&xvp->cookie_bakery[0]; + } + } + th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 612fc53e0bb9..5b9af508b8f2 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -159,6 +159,8 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { + struct tcp_options_received tcp_opt; + u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; @@ -171,7 +173,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; - struct tcp_options_received tcp_opt; if (!sysctl_tcp_syncookies || !th->ack) goto out; @@ -254,7 +255,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0, dst); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst); if (tcp_opt.saw_tstamp) cookie_check_timestamp(&tcp_opt); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f2ec38289a4a..fc0a4e5895ee 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1162,7 +1162,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { + struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; + u8 *hash_location; struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); @@ -1206,8 +1208,52 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + + if (tmp_opt.cookie_plus > 0 && + tmp_opt.saw_tstamp && + !tp->rx_opt.cookie_out_never && + (sysctl_tcp_cookie_size > 0 || + (tp->cookie_values != NULL && + tp->cookie_values->cookie_desired > 0))) { + u8 *c; + u32 *d; + u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; + int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; + + if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) + goto drop_and_free; + + /* Secret recipe starts with IP addresses */ + d = &ipv6_hdr(skb)->daddr.s6_addr32[0]; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + d = &ipv6_hdr(skb)->saddr.s6_addr32[0]; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + + /* plus variable length Initiator Cookie */ + c = (u8 *)mess; + while (l-- > 0) + *c++ ^= *hash_location++; - tcp_parse_options(skb, &tmp_opt, 0, dst); +#ifdef CONFIG_SYN_COOKIES + want_cookie = 0; /* not our kind of cookie */ +#endif + tmp_ext.cookie_out_never = 0; /* false */ + tmp_ext.cookie_plus = tmp_opt.cookie_plus; + } else if (!tp->rx_opt.cookie_in_always) { + /* redundant indications, but ensure initialization. */ + tmp_ext.cookie_out_never = 1; /* true */ + tmp_ext.cookie_plus = 0; + } else { + goto drop_and_free; + } + tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1244,7 +1290,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) security_inet_conn_request(sk, skb, req); - if (tcp_v6_send_synack(sk, req, NULL) || want_cookie) + if (tcp_v6_send_synack(sk, req, + (struct request_values *)&tmp_ext) || + want_cookie) goto drop_and_free; inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); -- cgit v1.3-8-gc7d7 From 8818a9d884e3a589899be3303958fff182e98e55 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Wed, 2 Dec 2009 22:24:02 -0800 Subject: tcp: clear hints to avoid a stale one (nfs only affected?) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eric Dumazet mentioned in a context of another problem: "Well, it seems NFS reuses its socket, so maybe we miss some cleaning as spotted in this old patch" I've not check under which conditions that actually happens but if true, we need to make sure we don't accidently leave stale hints behind when the write queue had to be purged (whether reusing with NFS can actually happen if purging took place is something I'm not sure of). ...At least it compiles. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 28b04ff8c967..e2d2ca2509be 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1229,6 +1229,7 @@ static inline void tcp_write_queue_purge(struct sock *sk) while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) sk_wmem_free_skb(sk, skb); sk_mem_reclaim(sk); + tcp_clear_all_retrans_hints(tcp_sk(sk)); } static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) -- cgit v1.3-8-gc7d7 From 7cff7ce94a7df2ccf5ac76b48ee0995fee2060df Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 9 Oct 2009 00:01:39 -0700 Subject: include/linux/compiler-gcc4.h: Fix build bug - gcc-4.0.2 doesn't understand __builtin_object_size Maybe 4.1.0 doesn't too, but this fixed it for me. Caused by: 4a31276: x86: Turn the copy_from_user check into an (optional) compile time warning 9f0cf4a: x86: Use __builtin_object_size() to validate the buffer size for copy_from_user() Signed-off-by: Andrew Morton Cc: Arjan van de Ven LKML-Reference: <200910090724.n997OQl6013538@imap1.linux-foundation.org> Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 77542c57e20a..e6ef279ca20c 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -38,7 +38,9 @@ #endif +#if __GNUC_MINOR__ > 0 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#endif #if __GNUC_MINOR__ >= 4 #define __compiletime_warning(message) __attribute__((warning(message))) #define __compiletime_error(message) __attribute__((error(message))) -- cgit v1.3-8-gc7d7 From 1a6e4a8c276e122dbeb6f9c610f29735e4236bfd Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:19 +0300 Subject: KVM: Move irq sharing information to irqchip level This removes assumptions that max GSIs is smaller than number of pins. Sharing is tracked on pin level not GSI level. [avi: no PIC on ia64] Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/irq.h | 1 + include/linux/kvm_host.h | 2 +- virt/kvm/ioapic.h | 1 + virt/kvm/irq_comm.c | 59 +++++++++++++++++++++++++---------------- 5 files changed, 39 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0b113f2b58cf..35d3236c9de4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -410,7 +410,6 @@ struct kvm_arch{ gpa_t ept_identity_map_addr; unsigned long irq_sources_bitmap; - unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; u64 vm_init_tsc; }; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7d6058a2fd38..c025a2362aae 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -71,6 +71,7 @@ struct kvm_pic { int output; /* intr from master PIC */ struct kvm_io_device dev; void (*ack_notifier)(void *opaque, int irq); + unsigned long irq_states[16]; }; struct kvm_pic *kvm_create_pic(struct kvm *kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b7bbb5ddd7ae..1c7f8c49e4e8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -120,7 +120,7 @@ struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; int (*set)(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level); + struct kvm *kvm, int irq_source_id, int level); union { struct { unsigned irqchip; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 7080b713c160..6e461ade6365 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -41,6 +41,7 @@ struct kvm_ioapic { u32 irr; u32 pad; union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; + unsigned long irq_states[IOAPIC_NUM_PINS]; struct kvm_io_device dev; struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 001663ff401a..9783f5c43dae 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -31,20 +31,39 @@ #include "ioapic.h" +static inline int kvm_irq_line_state(unsigned long *irq_state, + int irq_source_id, int level) +{ + /* Logical OR for level trig interrupt */ + if (level) + set_bit(irq_source_id, irq_state); + else + clear_bit(irq_source_id, irq_state); + + return !!(*irq_state); +} + static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { #ifdef CONFIG_X86 - return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); + struct kvm_pic *pic = pic_irqchip(kvm); + level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], + irq_source_id, level); + return kvm_pic_set_irq(pic, e->irqchip.pin, level); #else return -1; #endif } static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { - return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], + irq_source_id, level); + + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level); } inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) @@ -96,10 +115,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, } static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { struct kvm_lapic_irq irq; + if (!level) + return -1; + trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); irq.dest_id = (e->msi.address_lo & @@ -125,34 +147,19 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { struct kvm_kernel_irq_routing_entry *e; - unsigned long *irq_state, sig_level; int ret = -1; trace_kvm_set_irq(irq, level, irq_source_id); WARN_ON(!mutex_is_locked(&kvm->irq_lock)); - if (irq < KVM_IOAPIC_NUM_PINS) { - irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; - - /* Logical OR for level trig interrupt */ - if (level) - set_bit(irq_source_id, irq_state); - else - clear_bit(irq_source_id, irq_state); - sig_level = !!(*irq_state); - } else if (!level) - return ret; - else /* Deal with MSI/MSI-X */ - sig_level = 1; - /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ list_for_each_entry(e, &kvm->irq_routing, link) if (e->gsi == irq) { - int r = e->set(e, kvm, sig_level); + int r = e->set(e, kvm, irq_source_id, level); if (r < 0) continue; @@ -232,8 +239,14 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); return; } - for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) - clear_bit(irq_source_id, &kvm->arch.irq_states[i]); + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) { + clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]); + if (i >= 16) + continue; +#ifdef CONFIG_X86 + clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]); +#endif + } clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); mutex_unlock(&kvm->irq_lock); } -- cgit v1.3-8-gc7d7 From 46e624b95c36d729bdf24010fff11d16f6fe94fa Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:20 +0300 Subject: KVM: Change irq routing table to use gsi indexed array Use gsi indexed array instead of scanning all entries on each interrupt injection. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 21 ++++++++++-- virt/kvm/irq_comm.c | 88 +++++++++++++++++++++++++++++------------------- virt/kvm/kvm_main.c | 1 - 3 files changed, 71 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1c7f8c49e4e8..f403e66557fb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -128,7 +128,17 @@ struct kvm_kernel_irq_routing_entry { } irqchip; struct msi_msg msi; }; - struct list_head link; + struct hlist_node link; +}; + +struct kvm_irq_routing_table { + struct kvm_kernel_irq_routing_entry *rt_entries; + u32 nr_rt_entries; + /* + * Array indexed by gsi. Each entry contains list of irq chips + * the gsi is connected to. + */ + struct hlist_head map[0]; }; struct kvm { @@ -166,7 +176,7 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */ + struct kvm_irq_routing_table *irq_routing; struct hlist_head mask_notifier_list; #endif @@ -390,7 +400,12 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); -int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); +#ifdef __KVM_HAVE_IOAPIC +void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, + union kvm_ioapic_redirect_entry *entry, + unsigned long *deliver_bitmask); +#endif +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 9783f5c43dae..81950f6f6fd9 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -144,10 +144,12 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, * = 0 Interrupt was coalesced (previous irq is still pending) * > 0 Number of CPUs interrupt was delivered to */ -int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) { struct kvm_kernel_irq_routing_entry *e; int ret = -1; + struct kvm_irq_routing_table *irq_rt; + struct hlist_node *n; trace_kvm_set_irq(irq, level, irq_source_id); @@ -157,8 +159,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - list_for_each_entry(e, &kvm->irq_routing, link) - if (e->gsi == irq) { + irq_rt = kvm->irq_routing; + if (irq < irq_rt->nr_rt_entries) + hlist_for_each_entry(e, n, &irq_rt->map[irq], link) { int r = e->set(e, kvm, irq_source_id, level); if (r < 0) continue; @@ -170,20 +173,23 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_ack_notifier *kian; struct hlist_node *n; unsigned gsi = pin; + int i; trace_kvm_ack_irq(irqchip, pin); - list_for_each_entry(e, &kvm->irq_routing, link) + for (i = 0; i < kvm->irq_routing->nr_rt_entries; i++) { + struct kvm_kernel_irq_routing_entry *e; + e = &kvm->irq_routing->rt_entries[i]; if (e->type == KVM_IRQ_ROUTING_IRQCHIP && e->irqchip.irqchip == irqchip && e->irqchip.pin == pin) { gsi = e->gsi; break; } + } hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) if (kian->gsi == gsi) @@ -280,26 +286,30 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) kimn->func(kimn, mask); } -static void __kvm_free_irq_routing(struct list_head *irq_routing) -{ - struct kvm_kernel_irq_routing_entry *e, *n; - - list_for_each_entry_safe(e, n, irq_routing, link) - kfree(e); -} - void kvm_free_irq_routing(struct kvm *kvm) { mutex_lock(&kvm->irq_lock); - __kvm_free_irq_routing(&kvm->irq_routing); + kfree(kvm->irq_routing); mutex_unlock(&kvm->irq_lock); } -static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, +static int setup_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; int delta; + struct kvm_kernel_irq_routing_entry *ei; + struct hlist_node *n; + + /* + * Do not allow GSI to be mapped to the same irqchip more than once. + * Allow only one to one mapping between GSI and MSI. + */ + hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) + if (ei->type == KVM_IRQ_ROUTING_MSI || + ue->u.irqchip.irqchip == ei->irqchip.irqchip) + return r; e->gsi = ue->gsi; e->type = ue->type; @@ -332,6 +342,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, default: goto out; } + + hlist_add_head(&e->link, &rt->map[e->gsi]); r = 0; out: return r; @@ -343,43 +355,49 @@ int kvm_set_irq_routing(struct kvm *kvm, unsigned nr, unsigned flags) { - struct list_head irq_list = LIST_HEAD_INIT(irq_list); - struct list_head tmp = LIST_HEAD_INIT(tmp); - struct kvm_kernel_irq_routing_entry *e = NULL; - unsigned i; + struct kvm_irq_routing_table *new, *old; + u32 i, nr_rt_entries = 0; int r; + for (i = 0; i < nr; ++i) { + if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) + return -EINVAL; + nr_rt_entries = max(nr_rt_entries, ue[i].gsi); + } + + nr_rt_entries += 1; + + new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) + + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), + GFP_KERNEL); + + if (!new) + return -ENOMEM; + + new->rt_entries = (void *)&new->map[nr_rt_entries]; + + new->nr_rt_entries = nr_rt_entries; + for (i = 0; i < nr; ++i) { r = -EINVAL; - if (ue->gsi >= KVM_MAX_IRQ_ROUTES) - goto out; if (ue->flags) goto out; - r = -ENOMEM; - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) - goto out; - r = setup_routing_entry(e, ue); + r = setup_routing_entry(new, &new->rt_entries[i], ue); if (r) goto out; ++ue; - list_add(&e->link, &irq_list); - e = NULL; } mutex_lock(&kvm->irq_lock); - list_splice(&kvm->irq_routing, &tmp); - INIT_LIST_HEAD(&kvm->irq_routing); - list_splice(&irq_list, &kvm->irq_routing); - INIT_LIST_HEAD(&irq_list); - list_splice(&tmp, &irq_list); + old = kvm->irq_routing; + kvm->irq_routing = new; mutex_unlock(&kvm->irq_lock); + new = old; r = 0; out: - kfree(e); - __kvm_free_irq_routing(&irq_list); + kfree(new); return r; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 22b520b54411..3bee94892774 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -957,7 +957,6 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; #ifdef CONFIG_HAVE_KVM_IRQCHIP - INIT_LIST_HEAD(&kvm->irq_routing); INIT_HLIST_HEAD(&kvm->mask_notifier_list); #endif -- cgit v1.3-8-gc7d7 From 3e71f88bc90792a187703860cf22fbed7c12cbd9 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:21 +0300 Subject: KVM: Maintain back mapping from irqchip/pin to gsi Maintain back mapping from irqchip/pin to gsi to speedup interrupt acknowledgment notifications. [avi: build fix on non-x86/ia64] Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm.h | 1 + arch/x86/include/asm/kvm.h | 1 + include/linux/kvm_host.h | 9 +++++++++ virt/kvm/irq_comm.c | 31 ++++++++++++++----------------- 4 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index 18a7e49abbc5..bc90c75adf67 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -60,6 +60,7 @@ struct kvm_ioapic_state { #define KVM_IRQCHIP_PIC_MASTER 0 #define KVM_IRQCHIP_PIC_SLAVE 1 #define KVM_IRQCHIP_IOAPIC 2 +#define KVM_NR_IRQCHIPS 3 #define KVM_CONTEXT_SIZE 8*1024 diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4a5fe914dc59..f02e87a5206f 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -79,6 +79,7 @@ struct kvm_ioapic_state { #define KVM_IRQCHIP_PIC_MASTER 0 #define KVM_IRQCHIP_PIC_SLAVE 1 #define KVM_IRQCHIP_IOAPIC 2 +#define KVM_NR_IRQCHIPS 3 /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f403e66557fb..cc2d7493598b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -131,7 +131,10 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; +#ifdef __KVM_HAVE_IOAPIC + struct kvm_irq_routing_table { + int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; struct kvm_kernel_irq_routing_entry *rt_entries; u32 nr_rt_entries; /* @@ -141,6 +144,12 @@ struct kvm_irq_routing_table { struct hlist_head map[0]; }; +#else + +struct kvm_irq_routing_table {}; + +#endif + struct kvm { spinlock_t mmu_lock; spinlock_t requests_lock; diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 81950f6f6fd9..59cf8dae0062 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -175,25 +175,16 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; struct hlist_node *n; - unsigned gsi = pin; - int i; + int gsi; trace_kvm_ack_irq(irqchip, pin); - for (i = 0; i < kvm->irq_routing->nr_rt_entries; i++) { - struct kvm_kernel_irq_routing_entry *e; - e = &kvm->irq_routing->rt_entries[i]; - if (e->type == KVM_IRQ_ROUTING_IRQCHIP && - e->irqchip.irqchip == irqchip && - e->irqchip.pin == pin) { - gsi = e->gsi; - break; - } - } - - hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) - if (kian->gsi == gsi) - kian->irq_acked(kian); + gsi = kvm->irq_routing->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); } void kvm_register_irq_ack_notifier(struct kvm *kvm, @@ -332,6 +323,9 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, } e->irqchip.irqchip = ue->u.irqchip.irqchip; e->irqchip.pin = ue->u.irqchip.pin + delta; + if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS) + goto out; + rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; @@ -356,7 +350,7 @@ int kvm_set_irq_routing(struct kvm *kvm, unsigned flags) { struct kvm_irq_routing_table *new, *old; - u32 i, nr_rt_entries = 0; + u32 i, j, nr_rt_entries = 0; int r; for (i = 0; i < nr; ++i) { @@ -377,6 +371,9 @@ int kvm_set_irq_routing(struct kvm *kvm, new->rt_entries = (void *)&new->map[nr_rt_entries]; new->nr_rt_entries = nr_rt_entries; + for (i = 0; i < 3; i++) + for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) + new->chip[i][j] = -1; for (i = 0; i < nr; ++i) { r = -EINVAL; -- cgit v1.3-8-gc7d7 From 136bdfeee7b5bc986fc94af3a40d7d13ea37bb95 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:23 +0300 Subject: KVM: Move irq ack notifier list to arch independent code Mask irq notifier list is already there. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_host.h | 1 - arch/x86/include/asm/kvm_host.h | 1 - include/linux/kvm_host.h | 1 + virt/kvm/irq_comm.c | 5 ++--- virt/kvm/kvm_main.c | 1 + 5 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index d9b6325a9328..a362e67e0ca6 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -475,7 +475,6 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; int iommu_flags; - struct hlist_head irq_ack_notifier_list; unsigned long irq_sources_bitmap; unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 35d3236c9de4..a46e2dd9aca8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -397,7 +397,6 @@ struct kvm_arch{ struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; - struct hlist_head irq_ack_notifier_list; int vapics_in_nmi_mode; unsigned int tss_addr; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cc2d7493598b..4aa5e1d9a797 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -187,6 +187,7 @@ struct kvm { #ifdef CONFIG_HAVE_KVM_IRQCHIP struct kvm_irq_routing_table *irq_routing; struct hlist_head mask_notifier_list; + struct hlist_head irq_ack_notifier_list; #endif #ifdef KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index fb861dd956fc..f01972595938 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -186,8 +186,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) rcu_read_unlock(); if (gsi != -1) - hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, - link) + hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) kian->irq_acked(kian); } @@ -196,7 +195,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian) { mutex_lock(&kvm->irq_lock); - hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); + hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list); mutex_unlock(&kvm->irq_lock); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3bee94892774..6eca153e1a02 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -958,6 +958,7 @@ static struct kvm *kvm_create_vm(void) goto out; #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); + INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET -- cgit v1.3-8-gc7d7 From bfd99ff5d483b11c32bca49fbff7a5ac59038b0a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 26 Aug 2009 14:57:50 +0300 Subject: KVM: Move assigned device code to own file Signed-off-by: Avi Kivity --- arch/ia64/kvm/Makefile | 2 +- arch/x86/kvm/Makefile | 3 +- include/linux/kvm_host.h | 17 + virt/kvm/assigned-dev.c | 818 +++++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 798 +-------------------------------------------- 5 files changed, 840 insertions(+), 798 deletions(-) create mode 100644 virt/kvm/assigned-dev.c (limited to 'include') diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 0bb99b732908..1089b3e918ac 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -49,7 +49,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o) + coalesced_mmio.o irq_comm.o assigned-dev.o) ifeq ($(CONFIG_IOMMU_API),y) common-objs += $(addprefix ../../../virt/kvm/, iommu.o) diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 0e7fe78d0f74..31a7035c4bd9 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -6,7 +6,8 @@ CFLAGS_svm.o := -I. CFLAGS_vmx.o := -I. kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o eventfd.o) + coalesced_mmio.o irq_comm.o eventfd.o \ + assigned-dev.o) kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4aa5e1d9a797..c0a1cc35f080 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -577,4 +577,21 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; } #endif + +#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT + +long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg); + +#else + +static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg) +{ + return -ENOTTY; +} + #endif + +#endif + diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c new file mode 100644 index 000000000000..fd9c097b760a --- /dev/null +++ b/virt/kvm/assigned-dev.c @@ -0,0 +1,818 @@ +/* + * Kernel-based Virtual Machine - device assignment support + * + * Copyright (C) 2006-9 Red Hat, Inc + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "irq.h" + +static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, + int assigned_dev_id) +{ + struct list_head *ptr; + struct kvm_assigned_dev_kernel *match; + + list_for_each(ptr, head) { + match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); + if (match->assigned_dev_id == assigned_dev_id) + return match; + } + return NULL; +} + +static int find_index_from_host_irq(struct kvm_assigned_dev_kernel + *assigned_dev, int irq) +{ + int i, index; + struct msix_entry *host_msix_entries; + + host_msix_entries = assigned_dev->host_msix_entries; + + index = -1; + for (i = 0; i < assigned_dev->entries_nr; i++) + if (irq == host_msix_entries[i].vector) { + index = i; + break; + } + if (index < 0) { + printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); + return 0; + } + + return index; +} + +static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) +{ + struct kvm_assigned_dev_kernel *assigned_dev; + struct kvm *kvm; + int i; + + assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, + interrupt_work); + kvm = assigned_dev->kvm; + + spin_lock_irq(&assigned_dev->assigned_dev_lock); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + struct kvm_guest_msix_entry *guest_entries = + assigned_dev->guest_msix_entries; + for (i = 0; i < assigned_dev->entries_nr; i++) { + if (!(guest_entries[i].flags & + KVM_ASSIGNED_MSIX_PENDING)) + continue; + guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; + kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, + guest_entries[i].vector, 1); + } + } else + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + + spin_unlock_irq(&assigned_dev->assigned_dev_lock); +} + +static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) +{ + unsigned long flags; + struct kvm_assigned_dev_kernel *assigned_dev = + (struct kvm_assigned_dev_kernel *) dev_id; + + spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + int index = find_index_from_host_irq(assigned_dev, irq); + if (index < 0) + goto out; + assigned_dev->guest_msix_entries[index].flags |= + KVM_ASSIGNED_MSIX_PENDING; + } + + schedule_work(&assigned_dev->interrupt_work); + + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { + disable_irq_nosync(irq); + assigned_dev->host_irq_disabled = true; + } + +out: + spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); + return IRQ_HANDLED; +} + +/* Ack the irq line for an assigned device */ +static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) +{ + struct kvm_assigned_dev_kernel *dev; + unsigned long flags; + + if (kian->gsi == -1) + return; + + dev = container_of(kian, struct kvm_assigned_dev_kernel, + ack_notifier); + + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); + + /* The guest irq may be shared so this ack may be + * from another device. + */ + spin_lock_irqsave(&dev->assigned_dev_lock, flags); + if (dev->host_irq_disabled) { + enable_irq(dev->host_irq); + dev->host_irq_disabled = false; + } + spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); +} + +static void deassign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); + assigned_dev->ack_notifier.gsi = -1; + + if (assigned_dev->irq_source_id != -1) + kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); + assigned_dev->irq_source_id = -1; + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); +} + +/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ +static void deassign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + /* + * In kvm_free_device_irq, cancel_work_sync return true if: + * 1. work is scheduled, and then cancelled. + * 2. work callback is executed. + * + * The first one ensured that the irq is disabled and no more events + * would happen. But for the second one, the irq may be enabled (e.g. + * for MSI). So we disable irq here to prevent further events. + * + * Notice this maybe result in nested disable if the interrupt type is + * INTx, but it's OK for we are going to free it. + * + * If this function is a part of VM destroy, please ensure that till + * now, the kvm state is still legal for probably we also have to wait + * interrupt_work done. + */ + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + int i; + for (i = 0; i < assigned_dev->entries_nr; i++) + disable_irq_nosync(assigned_dev-> + host_msix_entries[i].vector); + + cancel_work_sync(&assigned_dev->interrupt_work); + + for (i = 0; i < assigned_dev->entries_nr; i++) + free_irq(assigned_dev->host_msix_entries[i].vector, + (void *)assigned_dev); + + assigned_dev->entries_nr = 0; + kfree(assigned_dev->host_msix_entries); + kfree(assigned_dev->guest_msix_entries); + pci_disable_msix(assigned_dev->dev); + } else { + /* Deal with MSI and INTx */ + disable_irq_nosync(assigned_dev->host_irq); + cancel_work_sync(&assigned_dev->interrupt_work); + + free_irq(assigned_dev->host_irq, (void *)assigned_dev); + + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) + pci_disable_msi(assigned_dev->dev); + } + + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); +} + +static int kvm_deassign_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev, + unsigned long irq_requested_type) +{ + unsigned long guest_irq_type, host_irq_type; + + if (!irqchip_in_kernel(kvm)) + return -EINVAL; + /* no irq assignment to deassign */ + if (!assigned_dev->irq_requested_type) + return -ENXIO; + + host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; + guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; + + if (host_irq_type) + deassign_host_irq(kvm, assigned_dev); + if (guest_irq_type) + deassign_guest_irq(kvm, assigned_dev); + + return 0; +} + +static void kvm_free_assigned_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); +} + +static void kvm_free_assigned_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + kvm_free_assigned_irq(kvm, assigned_dev); + + pci_reset_function(assigned_dev->dev); + + pci_release_regions(assigned_dev->dev); + pci_disable_device(assigned_dev->dev); + pci_dev_put(assigned_dev->dev); + + list_del(&assigned_dev->list); + kfree(assigned_dev); +} + +void kvm_free_all_assigned_devices(struct kvm *kvm) +{ + struct list_head *ptr, *ptr2; + struct kvm_assigned_dev_kernel *assigned_dev; + + list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { + assigned_dev = list_entry(ptr, + struct kvm_assigned_dev_kernel, + list); + + kvm_free_assigned_device(kvm, assigned_dev); + } +} + +static int assigned_device_enable_host_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + dev->host_irq = dev->dev->irq; + /* Even though this is PCI, we don't want to use shared + * interrupts. Sharing host devices with guest-assigned devices + * on the same interrupt line is not a happy situation: there + * are going to be long delays in accepting, acking, etc. + */ + if (request_irq(dev->host_irq, kvm_assigned_dev_intr, + 0, "kvm_assigned_intx_device", (void *)dev)) + return -EIO; + return 0; +} + +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_host_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + int r; + + if (!dev->dev->msi_enabled) { + r = pci_enable_msi(dev->dev); + if (r) + return r; + } + + dev->host_irq = dev->dev->irq; + if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, + "kvm_assigned_msi_device", (void *)dev)) { + pci_disable_msi(dev->dev); + return -EIO; + } + + return 0; +} +#endif + +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_host_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + int i, r = -EINVAL; + + /* host_msix_entries and guest_msix_entries should have been + * initialized */ + if (dev->entries_nr == 0) + return r; + + r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); + if (r) + return r; + + for (i = 0; i < dev->entries_nr; i++) { + r = request_irq(dev->host_msix_entries[i].vector, + kvm_assigned_dev_intr, 0, + "kvm_assigned_msix_device", + (void *)dev); + /* FIXME: free requested_irq's on failure */ + if (r) + return r; + } + + return 0; +} + +#endif + +static int assigned_device_enable_guest_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = irq->guest_irq; + return 0; +} + +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_guest_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; + dev->host_irq_disabled = false; + return 0; +} +#endif + +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_guest_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; + dev->host_irq_disabled = false; + return 0; +} +#endif + +static int assign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + __u32 host_irq_type) +{ + int r = -EEXIST; + + if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) + return r; + + switch (host_irq_type) { + case KVM_DEV_IRQ_HOST_INTX: + r = assigned_device_enable_host_intx(kvm, dev); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_HOST_MSI: + r = assigned_device_enable_host_msi(kvm, dev); + break; +#endif +#ifdef __KVM_HAVE_MSIX + case KVM_DEV_IRQ_HOST_MSIX: + r = assigned_device_enable_host_msix(kvm, dev); + break; +#endif + default: + r = -EINVAL; + } + + if (!r) + dev->irq_requested_type |= host_irq_type; + + return r; +} + +static int assign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq, + unsigned long guest_irq_type) +{ + int id; + int r = -EEXIST; + + if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) + return r; + + id = kvm_request_irq_source_id(kvm); + if (id < 0) + return id; + + dev->irq_source_id = id; + + switch (guest_irq_type) { + case KVM_DEV_IRQ_GUEST_INTX: + r = assigned_device_enable_guest_intx(kvm, dev, irq); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_GUEST_MSI: + r = assigned_device_enable_guest_msi(kvm, dev, irq); + break; +#endif +#ifdef __KVM_HAVE_MSIX + case KVM_DEV_IRQ_GUEST_MSIX: + r = assigned_device_enable_guest_msix(kvm, dev, irq); + break; +#endif + default: + r = -EINVAL; + } + + if (!r) { + dev->irq_requested_type |= guest_irq_type; + kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); + } else + kvm_free_irq_source_id(kvm, dev->irq_source_id); + + return r; +} + +/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ +static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, + struct kvm_assigned_irq *assigned_irq) +{ + int r = -EINVAL; + struct kvm_assigned_dev_kernel *match; + unsigned long host_irq_type, guest_irq_type; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + if (!irqchip_in_kernel(kvm)) + return r; + + mutex_lock(&kvm->lock); + r = -ENODEV; + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) + goto out; + + host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); + guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); + + r = -EINVAL; + /* can only assign one type at a time */ + if (hweight_long(host_irq_type) > 1) + goto out; + if (hweight_long(guest_irq_type) > 1) + goto out; + if (host_irq_type == 0 && guest_irq_type == 0) + goto out; + + r = 0; + if (host_irq_type) + r = assign_host_irq(kvm, match, host_irq_type); + if (r) + goto out; + + if (guest_irq_type) + r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); +out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, + struct kvm_assigned_irq + *assigned_irq) +{ + int r = -ENODEV; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) + goto out; + + r = kvm_deassign_irq(kvm, match, assigned_irq->flags); +out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_assign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + struct pci_dev *dev; + + down_read(&kvm->slots_lock); + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (match) { + /* device already assigned */ + r = -EEXIST; + goto out; + } + + match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); + if (match == NULL) { + printk(KERN_INFO "%s: Couldn't allocate memory\n", + __func__); + r = -ENOMEM; + goto out; + } + dev = pci_get_bus_and_slot(assigned_dev->busnr, + assigned_dev->devfn); + if (!dev) { + printk(KERN_INFO "%s: host device not found\n", __func__); + r = -EINVAL; + goto out_free; + } + if (pci_enable_device(dev)) { + printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); + r = -EBUSY; + goto out_put; + } + r = pci_request_regions(dev, "kvm_assigned_device"); + if (r) { + printk(KERN_INFO "%s: Could not get access to device regions\n", + __func__); + goto out_disable; + } + + pci_reset_function(dev); + + match->assigned_dev_id = assigned_dev->assigned_dev_id; + match->host_busnr = assigned_dev->busnr; + match->host_devfn = assigned_dev->devfn; + match->flags = assigned_dev->flags; + match->dev = dev; + spin_lock_init(&match->assigned_dev_lock); + match->irq_source_id = -1; + match->kvm = kvm; + match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; + INIT_WORK(&match->interrupt_work, + kvm_assigned_dev_interrupt_work_handler); + + list_add(&match->list, &kvm->arch.assigned_dev_head); + + if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { + if (!kvm->arch.iommu_domain) { + r = kvm_iommu_map_guest(kvm); + if (r) + goto out_list_del; + } + r = kvm_assign_device(kvm, match); + if (r) + goto out_list_del; + } + +out: + mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); + return r; +out_list_del: + list_del(&match->list); + pci_release_regions(dev); +out_disable: + pci_disable_device(dev); +out_put: + pci_dev_put(dev); +out_free: + kfree(match); + mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); + return r; +} + +static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (!match) { + printk(KERN_INFO "%s: device hasn't been assigned before, " + "so cannot be deassigned\n", __func__); + r = -EINVAL; + goto out; + } + + if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) + kvm_deassign_device(kvm, match); + + kvm_free_assigned_device(kvm, match); + +out: + mutex_unlock(&kvm->lock); + return r; +} + + +#ifdef __KVM_HAVE_MSIX +static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, + struct kvm_assigned_msix_nr *entry_nr) +{ + int r = 0; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry_nr->assigned_dev_id); + if (!adev) { + r = -EINVAL; + goto msix_nr_out; + } + + if (adev->entries_nr == 0) { + adev->entries_nr = entry_nr->entry_nr; + if (adev->entries_nr == 0 || + adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { + r = -EINVAL; + goto msix_nr_out; + } + + adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * + entry_nr->entry_nr, + GFP_KERNEL); + if (!adev->host_msix_entries) { + r = -ENOMEM; + goto msix_nr_out; + } + adev->guest_msix_entries = kzalloc( + sizeof(struct kvm_guest_msix_entry) * + entry_nr->entry_nr, GFP_KERNEL); + if (!adev->guest_msix_entries) { + kfree(adev->host_msix_entries); + r = -ENOMEM; + goto msix_nr_out; + } + } else /* Not allowed set MSI-X number twice */ + r = -EINVAL; +msix_nr_out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, + struct kvm_assigned_msix_entry *entry) +{ + int r = 0, i; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry->assigned_dev_id); + + if (!adev) { + r = -EINVAL; + goto msix_entry_out; + } + + for (i = 0; i < adev->entries_nr; i++) + if (adev->guest_msix_entries[i].vector == 0 || + adev->guest_msix_entries[i].entry == entry->entry) { + adev->guest_msix_entries[i].entry = entry->entry; + adev->guest_msix_entries[i].vector = entry->gsi; + adev->host_msix_entries[i].entry = entry->entry; + break; + } + if (i == adev->entries_nr) { + r = -ENOSPC; + goto msix_entry_out; + } + +msix_entry_out: + mutex_unlock(&kvm->lock); + + return r; +} +#endif + +long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + int r = -ENOTTY; + + switch (ioctl) { + case KVM_ASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } + case KVM_ASSIGN_IRQ: { + r = -EOPNOTSUPP; + break; + } +#ifdef KVM_CAP_ASSIGN_DEV_IRQ + case KVM_ASSIGN_DEV_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } + case KVM_DEASSIGN_DEV_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } +#endif +#ifdef KVM_CAP_DEVICE_DEASSIGNMENT + case KVM_DEASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } +#endif +#ifdef KVM_CAP_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(&routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr >= KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } +#endif /* KVM_CAP_IRQ_ROUTING */ +#ifdef __KVM_HAVE_MSIX + case KVM_ASSIGN_SET_MSIX_NR: { + struct kvm_assigned_msix_nr entry_nr; + r = -EFAULT; + if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) + goto out; + r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); + if (r) + goto out; + break; + } + case KVM_ASSIGN_SET_MSIX_ENTRY: { + struct kvm_assigned_msix_entry entry; + r = -EFAULT; + if (copy_from_user(&entry, argp, sizeof entry)) + goto out; + r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); + if (r) + goto out; + break; + } +#endif + } +out: + return r; +} + diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c12c95b1b641..38e4d2c34ac1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -53,12 +53,6 @@ #include "coalesced_mmio.h" #endif -#ifdef KVM_CAP_DEVICE_ASSIGNMENT -#include -#include -#include "irq.h" -#endif - #define CREATE_TRACE_POINTS #include @@ -90,608 +84,6 @@ static bool kvm_rebooting; static bool largepages_enabled = true; -#ifdef KVM_CAP_DEVICE_ASSIGNMENT -static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, - int assigned_dev_id) -{ - struct list_head *ptr; - struct kvm_assigned_dev_kernel *match; - - list_for_each(ptr, head) { - match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); - if (match->assigned_dev_id == assigned_dev_id) - return match; - } - return NULL; -} - -static int find_index_from_host_irq(struct kvm_assigned_dev_kernel - *assigned_dev, int irq) -{ - int i, index; - struct msix_entry *host_msix_entries; - - host_msix_entries = assigned_dev->host_msix_entries; - - index = -1; - for (i = 0; i < assigned_dev->entries_nr; i++) - if (irq == host_msix_entries[i].vector) { - index = i; - break; - } - if (index < 0) { - printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); - return 0; - } - - return index; -} - -static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) -{ - struct kvm_assigned_dev_kernel *assigned_dev; - struct kvm *kvm; - int i; - - assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, - interrupt_work); - kvm = assigned_dev->kvm; - - spin_lock_irq(&assigned_dev->assigned_dev_lock); - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - struct kvm_guest_msix_entry *guest_entries = - assigned_dev->guest_msix_entries; - for (i = 0; i < assigned_dev->entries_nr; i++) { - if (!(guest_entries[i].flags & - KVM_ASSIGNED_MSIX_PENDING)) - continue; - guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, - guest_entries[i].vector, 1); - } - } else - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); - - spin_unlock_irq(&assigned_dev->assigned_dev_lock); -} - -static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) -{ - unsigned long flags; - struct kvm_assigned_dev_kernel *assigned_dev = - (struct kvm_assigned_dev_kernel *) dev_id; - - spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - int index = find_index_from_host_irq(assigned_dev, irq); - if (index < 0) - goto out; - assigned_dev->guest_msix_entries[index].flags |= - KVM_ASSIGNED_MSIX_PENDING; - } - - schedule_work(&assigned_dev->interrupt_work); - - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { - disable_irq_nosync(irq); - assigned_dev->host_irq_disabled = true; - } - -out: - spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); - return IRQ_HANDLED; -} - -/* Ack the irq line for an assigned device */ -static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_assigned_dev_kernel *dev; - unsigned long flags; - - if (kian->gsi == -1) - return; - - dev = container_of(kian, struct kvm_assigned_dev_kernel, - ack_notifier); - - kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); - - /* The guest irq may be shared so this ack may be - * from another device. - */ - spin_lock_irqsave(&dev->assigned_dev_lock, flags); - if (dev->host_irq_disabled) { - enable_irq(dev->host_irq); - dev->host_irq_disabled = false; - } - spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); -} - -static void deassign_guest_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); - assigned_dev->ack_notifier.gsi = -1; - - if (assigned_dev->irq_source_id != -1) - kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); - assigned_dev->irq_source_id = -1; - assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); -} - -/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ -static void deassign_host_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - /* - * In kvm_free_device_irq, cancel_work_sync return true if: - * 1. work is scheduled, and then cancelled. - * 2. work callback is executed. - * - * The first one ensured that the irq is disabled and no more events - * would happen. But for the second one, the irq may be enabled (e.g. - * for MSI). So we disable irq here to prevent further events. - * - * Notice this maybe result in nested disable if the interrupt type is - * INTx, but it's OK for we are going to free it. - * - * If this function is a part of VM destroy, please ensure that till - * now, the kvm state is still legal for probably we also have to wait - * interrupt_work done. - */ - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - int i; - for (i = 0; i < assigned_dev->entries_nr; i++) - disable_irq_nosync(assigned_dev-> - host_msix_entries[i].vector); - - cancel_work_sync(&assigned_dev->interrupt_work); - - for (i = 0; i < assigned_dev->entries_nr; i++) - free_irq(assigned_dev->host_msix_entries[i].vector, - (void *)assigned_dev); - - assigned_dev->entries_nr = 0; - kfree(assigned_dev->host_msix_entries); - kfree(assigned_dev->guest_msix_entries); - pci_disable_msix(assigned_dev->dev); - } else { - /* Deal with MSI and INTx */ - disable_irq_nosync(assigned_dev->host_irq); - cancel_work_sync(&assigned_dev->interrupt_work); - - free_irq(assigned_dev->host_irq, (void *)assigned_dev); - - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) - pci_disable_msi(assigned_dev->dev); - } - - assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); -} - -static int kvm_deassign_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev, - unsigned long irq_requested_type) -{ - unsigned long guest_irq_type, host_irq_type; - - if (!irqchip_in_kernel(kvm)) - return -EINVAL; - /* no irq assignment to deassign */ - if (!assigned_dev->irq_requested_type) - return -ENXIO; - - host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; - guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; - - if (host_irq_type) - deassign_host_irq(kvm, assigned_dev); - if (guest_irq_type) - deassign_guest_irq(kvm, assigned_dev); - - return 0; -} - -static void kvm_free_assigned_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); -} - -static void kvm_free_assigned_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) -{ - kvm_free_assigned_irq(kvm, assigned_dev); - - pci_reset_function(assigned_dev->dev); - - pci_release_regions(assigned_dev->dev); - pci_disable_device(assigned_dev->dev); - pci_dev_put(assigned_dev->dev); - - list_del(&assigned_dev->list); - kfree(assigned_dev); -} - -void kvm_free_all_assigned_devices(struct kvm *kvm) -{ - struct list_head *ptr, *ptr2; - struct kvm_assigned_dev_kernel *assigned_dev; - - list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { - assigned_dev = list_entry(ptr, - struct kvm_assigned_dev_kernel, - list); - - kvm_free_assigned_device(kvm, assigned_dev); - } -} - -static int assigned_device_enable_host_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - dev->host_irq = dev->dev->irq; - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. - */ - if (request_irq(dev->host_irq, kvm_assigned_dev_intr, - 0, "kvm_assigned_intx_device", (void *)dev)) - return -EIO; - return 0; -} - -#ifdef __KVM_HAVE_MSI -static int assigned_device_enable_host_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - int r; - - if (!dev->dev->msi_enabled) { - r = pci_enable_msi(dev->dev); - if (r) - return r; - } - - dev->host_irq = dev->dev->irq; - if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, - "kvm_assigned_msi_device", (void *)dev)) { - pci_disable_msi(dev->dev); - return -EIO; - } - - return 0; -} -#endif - -#ifdef __KVM_HAVE_MSIX -static int assigned_device_enable_host_msix(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - int i, r = -EINVAL; - - /* host_msix_entries and guest_msix_entries should have been - * initialized */ - if (dev->entries_nr == 0) - return r; - - r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); - if (r) - return r; - - for (i = 0; i < dev->entries_nr; i++) { - r = request_irq(dev->host_msix_entries[i].vector, - kvm_assigned_dev_intr, 0, - "kvm_assigned_msix_device", - (void *)dev); - /* FIXME: free requested_irq's on failure */ - if (r) - return r; - } - - return 0; -} - -#endif - -static int assigned_device_enable_guest_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = irq->guest_irq; - return 0; -} - -#ifdef __KVM_HAVE_MSI -static int assigned_device_enable_guest_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; - return 0; -} -#endif -#ifdef __KVM_HAVE_MSIX -static int assigned_device_enable_guest_msix(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; - return 0; -} -#endif - -static int assign_host_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - __u32 host_irq_type) -{ - int r = -EEXIST; - - if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) - return r; - - switch (host_irq_type) { - case KVM_DEV_IRQ_HOST_INTX: - r = assigned_device_enable_host_intx(kvm, dev); - break; -#ifdef __KVM_HAVE_MSI - case KVM_DEV_IRQ_HOST_MSI: - r = assigned_device_enable_host_msi(kvm, dev); - break; -#endif -#ifdef __KVM_HAVE_MSIX - case KVM_DEV_IRQ_HOST_MSIX: - r = assigned_device_enable_host_msix(kvm, dev); - break; -#endif - default: - r = -EINVAL; - } - - if (!r) - dev->irq_requested_type |= host_irq_type; - - return r; -} - -static int assign_guest_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq, - unsigned long guest_irq_type) -{ - int id; - int r = -EEXIST; - - if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) - return r; - - id = kvm_request_irq_source_id(kvm); - if (id < 0) - return id; - - dev->irq_source_id = id; - - switch (guest_irq_type) { - case KVM_DEV_IRQ_GUEST_INTX: - r = assigned_device_enable_guest_intx(kvm, dev, irq); - break; -#ifdef __KVM_HAVE_MSI - case KVM_DEV_IRQ_GUEST_MSI: - r = assigned_device_enable_guest_msi(kvm, dev, irq); - break; -#endif -#ifdef __KVM_HAVE_MSIX - case KVM_DEV_IRQ_GUEST_MSIX: - r = assigned_device_enable_guest_msix(kvm, dev, irq); - break; -#endif - default: - r = -EINVAL; - } - - if (!r) { - dev->irq_requested_type |= guest_irq_type; - kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); - } else - kvm_free_irq_source_id(kvm, dev->irq_source_id); - - return r; -} - -/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ -static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, - struct kvm_assigned_irq *assigned_irq) -{ - int r = -EINVAL; - struct kvm_assigned_dev_kernel *match; - unsigned long host_irq_type, guest_irq_type; - - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - - if (!irqchip_in_kernel(kvm)) - return r; - - mutex_lock(&kvm->lock); - r = -ENODEV; - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_irq->assigned_dev_id); - if (!match) - goto out; - - host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); - guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); - - r = -EINVAL; - /* can only assign one type at a time */ - if (hweight_long(host_irq_type) > 1) - goto out; - if (hweight_long(guest_irq_type) > 1) - goto out; - if (host_irq_type == 0 && guest_irq_type == 0) - goto out; - - r = 0; - if (host_irq_type) - r = assign_host_irq(kvm, match, host_irq_type); - if (r) - goto out; - - if (guest_irq_type) - r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); -out: - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, - struct kvm_assigned_irq - *assigned_irq) -{ - int r = -ENODEV; - struct kvm_assigned_dev_kernel *match; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_irq->assigned_dev_id); - if (!match) - goto out; - - r = kvm_deassign_irq(kvm, match, assigned_irq->flags); -out: - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_assign_device(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - struct pci_dev *dev; - - down_read(&kvm->slots_lock); - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (match) { - /* device already assigned */ - r = -EEXIST; - goto out; - } - - match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); - if (match == NULL) { - printk(KERN_INFO "%s: Couldn't allocate memory\n", - __func__); - r = -ENOMEM; - goto out; - } - dev = pci_get_bus_and_slot(assigned_dev->busnr, - assigned_dev->devfn); - if (!dev) { - printk(KERN_INFO "%s: host device not found\n", __func__); - r = -EINVAL; - goto out_free; - } - if (pci_enable_device(dev)) { - printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); - r = -EBUSY; - goto out_put; - } - r = pci_request_regions(dev, "kvm_assigned_device"); - if (r) { - printk(KERN_INFO "%s: Could not get access to device regions\n", - __func__); - goto out_disable; - } - - pci_reset_function(dev); - - match->assigned_dev_id = assigned_dev->assigned_dev_id; - match->host_busnr = assigned_dev->busnr; - match->host_devfn = assigned_dev->devfn; - match->flags = assigned_dev->flags; - match->dev = dev; - spin_lock_init(&match->assigned_dev_lock); - match->irq_source_id = -1; - match->kvm = kvm; - match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; - INIT_WORK(&match->interrupt_work, - kvm_assigned_dev_interrupt_work_handler); - - list_add(&match->list, &kvm->arch.assigned_dev_head); - - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { - if (!kvm->arch.iommu_domain) { - r = kvm_iommu_map_guest(kvm); - if (r) - goto out_list_del; - } - r = kvm_assign_device(kvm, match); - if (r) - goto out_list_del; - } - -out: - mutex_unlock(&kvm->lock); - up_read(&kvm->slots_lock); - return r; -out_list_del: - list_del(&match->list); - pci_release_regions(dev); -out_disable: - pci_disable_device(dev); -out_put: - pci_dev_put(dev); -out_free: - kfree(match); - mutex_unlock(&kvm->lock); - up_read(&kvm->slots_lock); - return r; -} -#endif - -#ifdef KVM_CAP_DEVICE_DEASSIGNMENT -static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (!match) { - printk(KERN_INFO "%s: device hasn't been assigned before, " - "so cannot be deassigned\n", __func__); - r = -EINVAL; - goto out; - } - - if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) - kvm_deassign_device(kvm, match); - - kvm_free_assigned_device(kvm, match); - -out: - mutex_unlock(&kvm->lock); - return r; -} -#endif - inline int kvm_is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) { @@ -1824,88 +1216,6 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) return 0; } -#ifdef __KVM_HAVE_MSIX -static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, - struct kvm_assigned_msix_nr *entry_nr) -{ - int r = 0; - struct kvm_assigned_dev_kernel *adev; - - mutex_lock(&kvm->lock); - - adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - entry_nr->assigned_dev_id); - if (!adev) { - r = -EINVAL; - goto msix_nr_out; - } - - if (adev->entries_nr == 0) { - adev->entries_nr = entry_nr->entry_nr; - if (adev->entries_nr == 0 || - adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { - r = -EINVAL; - goto msix_nr_out; - } - - adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * - entry_nr->entry_nr, - GFP_KERNEL); - if (!adev->host_msix_entries) { - r = -ENOMEM; - goto msix_nr_out; - } - adev->guest_msix_entries = kzalloc( - sizeof(struct kvm_guest_msix_entry) * - entry_nr->entry_nr, GFP_KERNEL); - if (!adev->guest_msix_entries) { - kfree(adev->host_msix_entries); - r = -ENOMEM; - goto msix_nr_out; - } - } else /* Not allowed set MSI-X number twice */ - r = -EINVAL; -msix_nr_out: - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, - struct kvm_assigned_msix_entry *entry) -{ - int r = 0, i; - struct kvm_assigned_dev_kernel *adev; - - mutex_lock(&kvm->lock); - - adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - entry->assigned_dev_id); - - if (!adev) { - r = -EINVAL; - goto msix_entry_out; - } - - for (i = 0; i < adev->entries_nr; i++) - if (adev->guest_msix_entries[i].vector == 0 || - adev->guest_msix_entries[i].entry == entry->entry) { - adev->guest_msix_entries[i].entry = entry->entry; - adev->guest_msix_entries[i].vector = entry->gsi; - adev->host_msix_entries[i].entry = entry->entry; - break; - } - if (i == adev->entries_nr) { - r = -ENOSPC; - goto msix_entry_out; - } - -msix_entry_out: - mutex_unlock(&kvm->lock); - - return r; -} -#endif - static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2163,112 +1473,6 @@ static long kvm_vm_ioctl(struct file *filp, r = 0; break; } -#endif -#ifdef KVM_CAP_DEVICE_ASSIGNMENT - case KVM_ASSIGN_PCI_DEVICE: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); - if (r) - goto out; - break; - } - case KVM_ASSIGN_IRQ: { - r = -EOPNOTSUPP; - break; - } -#ifdef KVM_CAP_ASSIGN_DEV_IRQ - case KVM_ASSIGN_DEV_IRQ: { - struct kvm_assigned_irq assigned_irq; - - r = -EFAULT; - if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) - goto out; - r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); - if (r) - goto out; - break; - } - case KVM_DEASSIGN_DEV_IRQ: { - struct kvm_assigned_irq assigned_irq; - - r = -EFAULT; - if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) - goto out; - r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); - if (r) - goto out; - break; - } -#endif -#endif -#ifdef KVM_CAP_DEVICE_DEASSIGNMENT - case KVM_DEASSIGN_PCI_DEVICE: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); - if (r) - goto out; - break; - } -#endif -#ifdef KVM_CAP_IRQ_ROUTING - case KVM_SET_GSI_ROUTING: { - struct kvm_irq_routing routing; - struct kvm_irq_routing __user *urouting; - struct kvm_irq_routing_entry *entries; - - r = -EFAULT; - if (copy_from_user(&routing, argp, sizeof(routing))) - goto out; - r = -EINVAL; - if (routing.nr >= KVM_MAX_IRQ_ROUTES) - goto out; - if (routing.flags) - goto out; - r = -ENOMEM; - entries = vmalloc(routing.nr * sizeof(*entries)); - if (!entries) - goto out; - r = -EFAULT; - urouting = argp; - if (copy_from_user(entries, urouting->entries, - routing.nr * sizeof(*entries))) - goto out_free_irq_routing; - r = kvm_set_irq_routing(kvm, entries, routing.nr, - routing.flags); - out_free_irq_routing: - vfree(entries); - break; - } -#endif /* KVM_CAP_IRQ_ROUTING */ -#ifdef __KVM_HAVE_MSIX - case KVM_ASSIGN_SET_MSIX_NR: { - struct kvm_assigned_msix_nr entry_nr; - r = -EFAULT; - if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) - goto out; - r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); - if (r) - goto out; - break; - } - case KVM_ASSIGN_SET_MSIX_ENTRY: { - struct kvm_assigned_msix_entry entry; - r = -EFAULT; - if (copy_from_user(&entry, argp, sizeof entry)) - goto out; - r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); - if (r) - goto out; - break; - } #endif case KVM_IRQFD: { struct kvm_irqfd data; @@ -2301,6 +1505,8 @@ static long kvm_vm_ioctl(struct file *filp, #endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); + if (r == -ENOTTY) + r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); } out: return r; -- cgit v1.3-8-gc7d7 From 10474ae8945ce08622fd1f3464e55bd817bf2376 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 15 Sep 2009 11:37:46 +0200 Subject: KVM: Activate Virtualization On Demand X86 CPUs need to have some magic happening to enable the virtualization extensions on them. This magic can result in unpleasant results for users, like blocking other VMMs from working (vmx) or using invalid TLB entries (svm). Currently KVM activates virtualization when the respective kernel module is loaded. This blocks us from autoloading KVM modules without breaking other VMMs. To circumvent this problem at least a bit, this patch introduces on demand activation of virtualization. This means, that instead virtualization is enabled on creation of the first virtual machine and disabled on destruction of the last one. So using this, KVM can be easily autoloaded, while keeping other hypervisors usable. Signed-off-by: Alexander Graf Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 8 ++-- arch/powerpc/kvm/powerpc.c | 3 +- arch/s390/kvm/kvm-s390.c | 3 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 13 ++++-- arch/x86/kvm/vmx.c | 11 +++-- arch/x86/kvm/x86.c | 4 +- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 90 +++++++++++++++++++++++++++++++++++------ 9 files changed, 108 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index f6471c882667..5fdeec5fddcf 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { long status; long tmp_base; @@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage) slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); local_irq_restore(saved_psr); if (slot < 0) - return; + return -EINVAL; spin_lock(&vp_lock); status = ia64_pal_vp_init_env(kvm_vsa_base ? @@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage) __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); if (status != 0) { printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); - return ; + return -EINVAL; } if (!kvm_vsa_base) { @@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage) } spin_unlock(&vp_lock); ia64_ptr_entry(0x3, slot); + + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 95af62217b6b..5902bbc2411e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 00e2ce8e91f5..544505893c9f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { static unsigned long long *facilities; /* Section: not file related */ -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { /* every s390 is virtualization enabled ;-) */ + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a46e2dd9aca8..295c7c4d9c90 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -459,7 +459,7 @@ struct descriptor_table { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - void (*hardware_enable)(void *dummy); /* __init */ + int (*hardware_enable)(void *dummy); void (*hardware_disable)(void *dummy); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f54c4f9d2865..59fe4d54da11 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage) cpu_svm_disable(); } -static void svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void *garbage) { struct svm_cpu_data *svm_data; @@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage) struct desc_struct *gdt; int me = raw_smp_processor_id(); + rdmsrl(MSR_EFER, efer); + if (efer & EFER_SVME) + return -EBUSY; + if (!has_svm()) { printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); - return; + return -EINVAL; } svm_data = per_cpu(svm_data, me); if (!svm_data) { printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", me); - return; + return -EINVAL; } svm_data->asid_generation = 1; @@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage) gdt = (struct desc_struct *)gdt_descr.base; svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); - rdmsrl(MSR_EFER, efer); wrmsrl(MSR_EFER, efer | EFER_SVME); wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(svm_data->save_area) << PAGE_SHIFT); + + return 0; } static void svm_cpu_uninit(int cpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 73cb5dd960cf..a187570e4837 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1138,12 +1138,15 @@ static __init int vmx_disabled_by_bios(void) /* locked but not enabled */ } -static void hardware_enable(void *garbage) +static int hardware_enable(void *garbage) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); u64 old; + if (read_cr4() & X86_CR4_VMXE) + return -EBUSY; + INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); rdmsrl(MSR_IA32_FEATURE_CONTROL, old); if ((old & (FEATURE_CONTROL_LOCKED | @@ -1158,6 +1161,10 @@ static void hardware_enable(void *garbage) asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) : "memory", "cc"); + + ept_sync_global(); + + return 0; } static void vmclear_local_vcpus(void) @@ -4040,8 +4047,6 @@ static int __init vmx_init(void) if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); - ept_sync_global(); - return 0; out3: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 829e3063e2ab..3d83de8bcbf4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4691,9 +4691,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) return kvm_x86_ops->vcpu_reset(vcpu); } -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { - kvm_x86_ops->hardware_enable(garbage); + return kvm_x86_ops->hardware_enable(garbage); } void kvm_arch_hardware_disable(void *garbage) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c0a1cc35f080..b985a29d8175 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -345,7 +345,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); -void kvm_arch_hardware_enable(void *garbage); +int kvm_arch_hardware_enable(void *garbage); void kvm_arch_hardware_disable(void *garbage); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 38e4d2c34ac1..70c8cbea0a99 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; +static int kvm_usage_count = 0; +static atomic_t hardware_enable_failed; struct kmem_cache *kvm_vcpu_cache; EXPORT_SYMBOL_GPL(kvm_vcpu_cache); @@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +static int hardware_enable_all(void); +static void hardware_disable_all(void); static bool kvm_rebooting; @@ -339,6 +343,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { static struct kvm *kvm_create_vm(void) { + int r = 0; struct kvm *kvm = kvm_arch_create_vm(); #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET struct page *page; @@ -346,6 +351,11 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; + + r = hardware_enable_all(); + if (r) + goto out_err_nodisable; + #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); @@ -354,8 +364,8 @@ static struct kvm *kvm_create_vm(void) #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) { - kfree(kvm); - return ERR_PTR(-ENOMEM); + r = -ENOMEM; + goto out_err; } kvm->coalesced_mmio_ring = (struct kvm_coalesced_mmio_ring *)page_address(page); @@ -363,15 +373,13 @@ static struct kvm *kvm_create_vm(void) #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) { - int err; kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; - err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); - if (err) { + r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); + if (r) { #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET put_page(page); #endif - kfree(kvm); - return ERR_PTR(err); + goto out_err; } } #endif @@ -395,6 +403,12 @@ static struct kvm *kvm_create_vm(void) #endif out: return kvm; + +out_err: + hardware_disable_all(); +out_err_nodisable: + kfree(kvm); + return ERR_PTR(r); } /* @@ -453,6 +467,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_flush_shadow(kvm); #endif kvm_arch_destroy_vm(kvm); + hardware_disable_all(); mmdrop(mm); } @@ -1644,11 +1659,21 @@ static struct miscdevice kvm_dev = { static void hardware_enable(void *junk) { int cpu = raw_smp_processor_id(); + int r; if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; + cpumask_set_cpu(cpu, cpus_hardware_enabled); - kvm_arch_hardware_enable(NULL); + + r = kvm_arch_hardware_enable(NULL); + + if (r) { + cpumask_clear_cpu(cpu, cpus_hardware_enabled); + atomic_inc(&hardware_enable_failed); + printk(KERN_INFO "kvm: enabling virtualization on " + "CPU%d failed\n", cpu); + } } static void hardware_disable(void *junk) @@ -1661,11 +1686,52 @@ static void hardware_disable(void *junk) kvm_arch_hardware_disable(NULL); } +static void hardware_disable_all_nolock(void) +{ + BUG_ON(!kvm_usage_count); + + kvm_usage_count--; + if (!kvm_usage_count) + on_each_cpu(hardware_disable, NULL, 1); +} + +static void hardware_disable_all(void) +{ + spin_lock(&kvm_lock); + hardware_disable_all_nolock(); + spin_unlock(&kvm_lock); +} + +static int hardware_enable_all(void) +{ + int r = 0; + + spin_lock(&kvm_lock); + + kvm_usage_count++; + if (kvm_usage_count == 1) { + atomic_set(&hardware_enable_failed, 0); + on_each_cpu(hardware_enable, NULL, 1); + + if (atomic_read(&hardware_enable_failed)) { + hardware_disable_all_nolock(); + r = -EBUSY; + } + } + + spin_unlock(&kvm_lock); + + return r; +} + static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, void *v) { int cpu = (long)v; + if (!kvm_usage_count) + return NOTIFY_OK; + val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: @@ -1868,13 +1934,15 @@ static void kvm_exit_debug(void) static int kvm_suspend(struct sys_device *dev, pm_message_t state) { - hardware_disable(NULL); + if (kvm_usage_count) + hardware_disable(NULL); return 0; } static int kvm_resume(struct sys_device *dev) { - hardware_enable(NULL); + if (kvm_usage_count) + hardware_enable(NULL); return 0; } @@ -1949,7 +2017,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, goto out_free_1; } - on_each_cpu(hardware_enable, NULL, 1); r = register_cpu_notifier(&kvm_cpu_notifier); if (r) goto out_free_2; @@ -1999,7 +2066,6 @@ out_free_3: unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); out_free_2: - on_each_cpu(hardware_disable, NULL, 1); out_free_1: kvm_arch_hardware_unsetup(); out_free_0a: -- cgit v1.3-8-gc7d7 From d255f4f2bac81eb798fcf76938147f1f6c756ae2 Mon Sep 17 00:00:00 2001 From: "Zhai, Edwin" Date: Fri, 9 Oct 2009 18:03:20 +0800 Subject: KVM: introduce kvm_vcpu_on_spin Introduce kvm_vcpu_on_spin, to be used by VMX/SVM to yield processing once the cpu detects pause-based looping. Signed-off-by: "Zhai, Edwin" Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b985a29d8175..bd5a616d9373 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -286,6 +286,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); +void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); void kvm_resched(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 70c8cbea0a99..cac69c4415df 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1108,6 +1108,21 @@ void kvm_resched(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_resched); +void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) +{ + ktime_t expires; + DEFINE_WAIT(wait); + + prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); + + /* Sleep for 100 us, and hope lock-holder got scheduled */ + expires = ktime_add_ns(ktime_get(), 100000UL); + schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); + + finish_wait(&vcpu->wq, &wait); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); + static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct kvm_vcpu *vcpu = vma->vm_file->private_data; -- cgit v1.3-8-gc7d7 From ffde22ac53b6d6b1d7206f1172176a667eead778 Mon Sep 17 00:00:00 2001 From: Ed Swierk Date: Thu, 15 Oct 2009 15:21:43 -0700 Subject: KVM: Xen PV-on-HVM guest support Support for Xen PV-on-HVM guests can be implemented almost entirely in userspace, except for handling one annoying MSR that maps a Xen hypercall blob into guest address space. A generic mechanism to delegate MSR writes to userspace seems overkill and risks encouraging similar MSR abuse in the future. Thus this patch adds special support for the Xen HVM MSR. I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell KVM which MSR the guest will write to, as well as the starting address and size of the hypercall blobs (one each for 32-bit and 64-bit) that userspace has loaded from files. When the guest writes to the MSR, KVM copies one page of the blob from userspace to the guest. I've tested this patch with a hacked-up version of Gerd's userspace code, booting a number of guests (CentOS 5.3 i386 and x86_64, and FreeBSD 8.0-RC1 amd64) and exercising PV network and block devices. [jan: fix i386 build warning] [avi: future proof abi with a flags field] Signed-off-by: Ed Swierk Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 24 +++++++++++++++++++++ arch/x86/include/asm/kvm.h | 1 + arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/x86.c | 46 +++++++++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 16 ++++++++++++++ 5 files changed, 89 insertions(+) (limited to 'include') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 5a4bc8cf6d04..3e8684e48506 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -593,6 +593,30 @@ struct kvm_irqchip { } chip; }; +4.27 KVM_XEN_HVM_CONFIG + +Capability: KVM_CAP_XEN_HVM +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_xen_hvm_config (in) +Returns: 0 on success, -1 on error + +Sets the MSR that the Xen HVM guest uses to initialize its hypercall +page, and provides the starting address and size of the hypercall +blobs in userspace. When the guest writes the MSR, kvm copies one +page of a blob (32- or 64-bit, depending on the vcpu mode) to guest +memory. + +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index f02e87a5206f..ef9b4b73cce4 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -19,6 +19,7 @@ #define __KVM_HAVE_MSIX #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 +#define __KVM_HAVE_XEN_HVM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 179a919f53a4..36f3b53f5c27 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -410,6 +410,8 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; u64 vm_init_tsc; + + struct kvm_xen_hvm_config xen_hvm_config; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5d450cc6f841..bb842db3ee7c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -857,6 +857,38 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) return 0; } +static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) +{ + struct kvm *kvm = vcpu->kvm; + int lm = is_long_mode(vcpu); + u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64 + : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32; + u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 + : kvm->arch.xen_hvm_config.blob_size_32; + u32 page_num = data & ~PAGE_MASK; + u64 page_addr = data & PAGE_MASK; + u8 *page; + int r; + + r = -E2BIG; + if (page_num >= blob_size) + goto out; + r = -ENOMEM; + page = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!page) + goto out; + r = -EFAULT; + if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE)) + goto out_free; + if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) + goto out_free; + r = 0; +out_free: + kfree(page); +out: + return r; +} + int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) { switch (msr) { @@ -972,6 +1004,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) "0x%x data 0x%llx\n", msr, data); break; default: + if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) + return xen_hvm_config(vcpu, data); if (!ignore_msrs) { pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); @@ -1246,6 +1280,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PIT2: case KVM_CAP_PIT_STATE2: case KVM_CAP_SET_IDENTITY_MAP_ADDR: + case KVM_CAP_XEN_HVM: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2441,6 +2476,17 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_XEN_HVM_CONFIG: { + r = -EFAULT; + if (copy_from_user(&kvm->arch.xen_hvm_config, argp, + sizeof(struct kvm_xen_hvm_config))) + goto out; + r = -EINVAL; + if (kvm->arch.xen_hvm_config.flags) + goto out; + r = 0; + break; + } default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index f8f8900fc5ec..b694c1d2f918 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -436,6 +436,9 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_IOEVENTFD 36 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 +#ifdef __KVM_HAVE_XEN_HVM +#define KVM_CAP_XEN_HVM 38 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -488,6 +491,18 @@ struct kvm_x86_mce { }; #endif +#ifdef KVM_CAP_XEN_HVM +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; +#endif + #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) struct kvm_irqfd { @@ -546,6 +561,7 @@ struct kvm_irqfd { #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) /* * ioctls for vcpu fds -- cgit v1.3-8-gc7d7 From afbcf7ab8d1bc8c2d04792f6d9e786e0adeb328d Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 16 Oct 2009 15:28:36 -0400 Subject: KVM: allow userspace to adjust kvmclock offset When we migrate a kvm guest that uses pvclock between two hosts, we may suffer a large skew. This is because there can be significant differences between the monotonic clock of the hosts involved. When a new host with a much larger monotonic time starts running the guest, the view of time will be significantly impacted. Situation is much worse when we do the opposite, and migrate to a host with a smaller monotonic clock. This proposed ioctl will allow userspace to inform us what is the monotonic clock value in the source host, so we can keep the time skew short, and more importantly, never goes backwards. Userspace may also need to trigger the current data, since from the first migration onwards, it won't be reflected by a simple call to clock_gettime() anymore. [marcelo: future-proof abi with a flags field] [jan: fix KVM_GET_CLOCK by clearing flags field instead of checking it] Signed-off-by: Glauber Costa Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 36 +++++++++++++++++++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 42 ++++++++++++++++++++++++++++++++++++++++- include/linux/kvm.h | 10 ++++++++++ 4 files changed, 88 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 3e8684e48506..36594ba57723 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -617,6 +617,42 @@ struct kvm_xen_hvm_config { __u8 pad2[30]; }; +4.27 KVM_GET_CLOCK + +Capability: KVM_CAP_ADJUST_CLOCK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_clock_data (out) +Returns: 0 on success, -1 on error + +Gets the current timestamp of kvmclock as seen by the current guest. In +conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios +such as migration. + +struct kvm_clock_data { + __u64 clock; /* kvmclock current value */ + __u32 flags; + __u32 pad[9]; +}; + +4.28 KVM_SET_CLOCK + +Capability: KVM_CAP_ADJUST_CLOCK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_clock_data (in) +Returns: 0 on success, -1 on error + +Sets the current timestamp of kvmclock to the valued specific in its parameter. +In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios +such as migration. + +struct kvm_clock_data { + __u64 clock; /* kvmclock current value */ + __u32 flags; + __u32 pad[9]; +}; + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4d994ad5051a..0558ff8c32ae 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -413,6 +413,7 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; u64 vm_init_tsc; + s64 kvmclock_offset; struct kvm_xen_hvm_config xen_hvm_config; }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 13f30aac460b..e16cdc9ec0c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -680,7 +680,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ vcpu->hv_clock.system_time = ts.tv_nsec + - (NSEC_PER_SEC * (u64)ts.tv_sec); + (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; + /* * The interface expects us to write an even number signaling that the * update is finished. Since the guest won't see the intermediate @@ -1262,6 +1263,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PIT_STATE2: case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_XEN_HVM: + case KVM_CAP_ADJUST_CLOCK: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2468,6 +2470,44 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_SET_CLOCK: { + struct timespec now; + struct kvm_clock_data user_ns; + u64 now_ns; + s64 delta; + + r = -EFAULT; + if (copy_from_user(&user_ns, argp, sizeof(user_ns))) + goto out; + + r = -EINVAL; + if (user_ns.flags) + goto out; + + r = 0; + ktime_get_ts(&now); + now_ns = timespec_to_ns(&now); + delta = user_ns.clock - now_ns; + kvm->arch.kvmclock_offset = delta; + break; + } + case KVM_GET_CLOCK: { + struct timespec now; + struct kvm_clock_data user_ns; + u64 now_ns; + + ktime_get_ts(&now); + now_ns = timespec_to_ns(&now); + user_ns.clock = kvm->arch.kvmclock_offset + now_ns; + user_ns.flags = 0; + + r = -EFAULT; + if (copy_to_user(argp, &user_ns, sizeof(user_ns))) + goto out; + r = 0; + break; + } + default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index b694c1d2f918..6ed1a12ed526 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -439,6 +439,7 @@ struct kvm_ioeventfd { #ifdef __KVM_HAVE_XEN_HVM #define KVM_CAP_XEN_HVM 38 #endif +#define KVM_CAP_ADJUST_CLOCK 39 #ifdef KVM_CAP_IRQ_ROUTING @@ -512,6 +513,12 @@ struct kvm_irqfd { __u8 pad[20]; }; +struct kvm_clock_data { + __u64 clock; + __u32 flags; + __u32 pad[9]; +}; + /* * ioctls for VM fds */ @@ -562,6 +569,9 @@ struct kvm_irqfd { #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) +#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) +#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) + /* * ioctls for vcpu fds -- cgit v1.3-8-gc7d7 From c54d2aba27f0c505d61700d656c5943e96982e60 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 2 Nov 2009 17:20:28 +0100 Subject: KVM: Reorder IOCTLs in main kvm.h Obviously, people tend to extend this header at the bottom - more or less blindly. Ensure that deprecated stuff gets its own corner again by moving things to the top. Also add some comments and reindent IOCTLs to make them more readable and reduce the risk of number collisions. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- include/linux/kvm.h | 235 ++++++++++++++++++++++++++-------------------------- 1 file changed, 117 insertions(+), 118 deletions(-) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 6ed1a12ed526..ca62b8e056f9 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -14,12 +14,76 @@ #define KVM_API_VERSION 12 -/* for KVM_TRACE_ENABLE, deprecated */ +/* *** Deprecated interfaces *** */ + +#define KVM_TRC_SHIFT 16 + +#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) +#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) + +#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) +#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) +#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) + +#define KVM_TRC_HEAD_SIZE 12 +#define KVM_TRC_CYCLE_SIZE 8 +#define KVM_TRC_EXTRA_MAX 7 + +#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) +#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) +#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) +#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) +#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) +#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) +#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) +#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) +#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) +#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) +#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) +#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) +#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) +#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) +#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) +#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) +#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) +#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) +#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) +#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) +#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) +#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) +#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) +#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) + struct kvm_user_trace_setup { - __u32 buf_size; /* sub_buffer size of each per-cpu */ - __u32 buf_nr; /* the number of sub_buffers of each per-cpu */ + __u32 buf_size; + __u32 buf_nr; +}; + +#define __KVM_DEPRECATED_MAIN_W_0x06 \ + _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) +#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) +#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) + +#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) + +struct kvm_breakpoint { + __u32 enabled; + __u32 padding; + __u64 address; +}; + +struct kvm_debug_guest { + __u32 enabled; + __u32 pad; + struct kvm_breakpoint breakpoints[4]; + __u32 singlestep; }; +#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) + +/* *** End of deprecated interfaces *** */ + + /* for KVM_CREATE_MEMORY_REGION */ struct kvm_memory_region { __u32 slot; @@ -329,24 +393,6 @@ struct kvm_ioeventfd { __u8 pad[36]; }; -#define KVM_TRC_SHIFT 16 -/* - * kvm trace categories - */ -#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) -#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) /* only 12 bits */ - -/* - * kvm trace action - */ -#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) -#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) -#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) - -#define KVM_TRC_HEAD_SIZE 12 -#define KVM_TRC_CYCLE_SIZE 8 -#define KVM_TRC_EXTRA_MAX 7 - #define KVMIO 0xAE /* @@ -367,12 +413,10 @@ struct kvm_ioeventfd { */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) -/* - * ioctls for kvm trace - */ -#define KVM_TRACE_ENABLE _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) -#define KVM_TRACE_PAUSE _IO(KVMIO, 0x07) -#define KVM_TRACE_DISABLE _IO(KVMIO, 0x08) +#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 +#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 +#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 + /* * Extension capability list. */ @@ -522,56 +566,57 @@ struct kvm_clock_data { /* * ioctls for VM fds */ -#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) +#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. */ -#define KVM_CREATE_VCPU _IO(KVMIO, 0x41) -#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) -#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) -#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) -#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) -#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\ +#define KVM_CREATE_VCPU _IO(KVMIO, 0x41) +#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) +#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) +#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) +#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) +#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ struct kvm_userspace_memory_region) -#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) -#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) +#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) +#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) /* Device model IOC */ -#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) -#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) -#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) -#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) -#define KVM_CREATE_PIT _IO(KVMIO, 0x64) -#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) -#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) -#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level) +#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) +#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) +#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) +#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) +#define KVM_CREATE_PIT _IO(KVMIO, 0x64) +#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) +#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) +#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level) #define KVM_REGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) -#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ - struct kvm_assigned_pci_dev) -#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) +#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ + struct kvm_assigned_pci_dev) +#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) /* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ -#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ - struct kvm_assigned_irq) -#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) -#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) -#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ - struct kvm_assigned_pci_dev) -#define KVM_ASSIGN_SET_MSIX_NR \ - _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) -#define KVM_ASSIGN_SET_MSIX_ENTRY \ - _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) -#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) -#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) -#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) -#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) -#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) -#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) -#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) -#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) - +#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 +#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) +#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) +#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ + struct kvm_assigned_pci_dev) +#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ + struct kvm_assigned_msix_nr) +#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ + struct kvm_assigned_msix_entry) +#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) +#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) +#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) +#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) +#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) +#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) +#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) +/* Available with KVM_CAP_PIT_STATE2 */ +#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) +#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) /* * ioctls for vcpu fds @@ -584,7 +629,7 @@ struct kvm_clock_data { #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) /* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ -#define KVM_DEBUG_GUEST __KVM_DEPRECATED_DEBUG_GUEST +#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) @@ -596,7 +641,7 @@ struct kvm_clock_data { #define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2) #define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2) /* Available with KVM_CAP_VAPIC */ -#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) +#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) /* Available with KVM_CAP_VAPIC */ #define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr) /* valid for virtual machine (for floating interrupt)_and_ vcpu */ @@ -608,67 +653,21 @@ struct kvm_clock_data { /* initial ipl psw for s390 */ #define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw) /* initial reset for s390 */ -#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) +#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) /* Available with KVM_CAP_NMI */ -#define KVM_NMI _IO(KVMIO, 0x9a) +#define KVM_NMI _IO(KVMIO, 0x9a) /* Available with KVM_CAP_SET_GUEST_DEBUG */ #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) /* MCE for x86 */ #define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64) #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64) #define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce) - -/* - * Deprecated interfaces - */ -struct kvm_breakpoint { - __u32 enabled; - __u32 padding; - __u64 address; -}; - -struct kvm_debug_guest { - __u32 enabled; - __u32 pad; - struct kvm_breakpoint breakpoints[4]; - __u32 singlestep; -}; - -#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) - +/* IA64 stack access */ #define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) #define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) -#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) -#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) - -#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) -#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) -#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) -#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) -#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) -#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) -#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) -#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) -#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) -#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) -#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) -#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) -#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) -#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) -#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) -#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) -#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) -#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) -#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) -#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) -#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) -#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) -#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) -#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) - #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) struct kvm_assigned_pci_dev { @@ -722,4 +721,4 @@ struct kvm_assigned_msix_entry { __u16 padding[3]; }; -#endif +#endif /* __LINUX_KVM_H */ -- cgit v1.3-8-gc7d7 From a9c7399d6cda0a092b347f8ee49bbe44f6e1fe66 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 4 Nov 2009 11:54:59 +0200 Subject: KVM: Allow internal errors reported to userspace to carry extra data Usually userspace will freeze the guest so we can inspect it, but some internal state is not available. Add extra data to internal error reporting so we can expose it to the debugger. Extra data is specific to the suberror. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 1 + arch/x86/kvm/vmx.c | 1 + include/linux/kvm.h | 4 ++++ virt/kvm/kvm_main.c | 1 + 4 files changed, 7 insertions(+) (limited to 'include') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a9024797b21f..4c3e5b2314cb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) case EMULATE_FAIL: vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; return 0; default: BUG(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c9cc9596e1a6..c0e66dd58a47 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3352,6 +3352,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) kvm_report_emulation_failure(vcpu, "emulation failure"); vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; ret = 0; goto out; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ca62b8e056f9..172639e94392 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -251,6 +251,9 @@ struct kvm_run { } dcr; struct { __u32 suberror; + /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */ + __u32 ndata; + __u64 data[16]; } internal; /* Fix the size of the union. */ char padding[256]; @@ -484,6 +487,7 @@ struct kvm_ioeventfd { #define KVM_CAP_XEN_HVM 38 #endif #define KVM_CAP_ADJUST_CLOCK 39 +#define KVM_CAP_INTERNAL_ERROR_DATA 40 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bd44fb48ac43..f92ba138007a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1653,6 +1653,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) #ifdef CONFIG_KVM_APIC_ARCHITECTURE case KVM_CAP_SET_BOOT_CPU_ID: #endif + case KVM_CAP_INTERNAL_ERROR_DATA: return 1; #ifdef CONFIG_HAVE_KVM_IRQCHIP case KVM_CAP_IRQ_ROUTING: -- cgit v1.3-8-gc7d7 From 65ac7264043740572ba804edca03c374d70427c9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 4 Nov 2009 11:59:01 +0200 Subject: KVM: VMX: Report unexpected simultaneous exceptions as internal errors These happen when we trap an exception when another exception is being delivered; we only expect these with MCEs and page faults. If something unexpected happens, things probably went south and we're better off reporting an internal error and freezing. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 11 ++++++++--- include/linux/kvm.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c0e66dd58a47..22fcd27a0b58 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2744,9 +2744,14 @@ static int handle_exception(struct kvm_vcpu *vcpu) return handle_machine_check(vcpu); if ((vect_info & VECTORING_INFO_VALID_MASK) && - !is_page_fault(intr_info)) - printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " - "intr info 0x%x\n", __func__, vect_info, intr_info); + !is_page_fault(intr_info)) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = vect_info; + vcpu->run->internal.data[1] = intr_info; + return 0; + } if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) return 1; /* already handled by vmx_vcpu_run() */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 172639e94392..976f4d181858 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -163,6 +163,7 @@ struct kvm_pit_config { /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 +#define KVM_INTERNAL_ERROR_SIMUL_EX 2 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { -- cgit v1.3-8-gc7d7 From 3cfc3092f40bc37c57ba556cfd8de4218f2135ab Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 12 Nov 2009 01:04:25 +0100 Subject: KVM: x86: Add KVM_GET/SET_VCPU_EVENTS This new IOCTL exports all yet user-invisible states related to exceptions, interrupts, and NMIs. Together with appropriate user space changes, this fixes sporadic problems of vmsave/restore, live migration and system reset. [avi: future-proof abi by adding a flags field] Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 49 ++++++++++++++++++++++++++ arch/x86/include/asm/kvm.h | 28 +++++++++++++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 22 ++++++++++++ arch/x86/kvm/vmx.c | 30 ++++++++++++++++ arch/x86/kvm/x86.c | 77 +++++++++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 6 ++++ 7 files changed, 214 insertions(+) (limited to 'include') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 36594ba57723..e1a114161027 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -653,6 +653,55 @@ struct kvm_clock_data { __u32 pad[9]; }; +4.29 KVM_GET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (out) +Returns: 0 on success, -1 on error + +Gets currently pending exceptions, interrupts, and NMIs as well as related +states of the vcpu. + +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; /* must be zero */ +}; + +4.30 KVM_SET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (in) +Returns: 0 on success, -1 on error + +Set pending exceptions, interrupts, and NMIs as well as related states of the +vcpu. + +See KVM_GET_VCPU_EVENTS for the data structure. + + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ef9b4b73cce4..950df434763f 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -20,6 +20,7 @@ #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 #define __KVM_HAVE_XEN_HVM +#define __KVM_HAVE_VCPU_EVENTS /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -252,4 +253,31 @@ struct kvm_reinject_control { __u8 pit_reinject; __u8 reserved[31]; }; + +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; + __u32 reserved[10]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 26a74b7bb6bc..06e085614dad 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -523,6 +523,8 @@ struct kvm_x86_ops { bool has_error_code, u32 error_code); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu); + bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); + void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 34b700f9e498..3de0b37ec038 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2499,6 +2499,26 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) !(svm->vcpu.arch.hflags & HF_NMI_MASK); } +static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); +} + +static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (masked) { + svm->vcpu.arch.hflags |= HF_NMI_MASK; + svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); + } else { + svm->vcpu.arch.hflags &= ~HF_NMI_MASK; + svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); + } +} + static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -2946,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = { .queue_exception = svm_queue_exception, .interrupt_allowed = svm_interrupt_allowed, .nmi_allowed = svm_nmi_allowed, + .get_nmi_mask = svm_get_nmi_mask, + .set_nmi_mask = svm_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 22fcd27a0b58..778f059ae423 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2639,6 +2639,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) GUEST_INTR_STATE_NMI)); } +static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + if (!cpu_has_virtual_nmis()) + return to_vmx(vcpu)->soft_vnmi_blocked; + else + return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + GUEST_INTR_STATE_NMI); +} + +static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!cpu_has_virtual_nmis()) { + if (vmx->soft_vnmi_blocked != masked) { + vmx->soft_vnmi_blocked = masked; + vmx->vnmi_blocked_time = 0; + } + } else { + if (masked) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } +} + static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && @@ -3985,6 +4013,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .queue_exception = vmx_queue_exception, .interrupt_allowed = vmx_interrupt_allowed, .nmi_allowed = vmx_nmi_allowed, + .get_nmi_mask = vmx_get_nmi_mask, + .set_nmi_mask = vmx_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ba8958dca3c4..35eea30821d6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1342,6 +1342,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_XEN_HVM: case KVM_CAP_ADJUST_CLOCK: + case KVM_CAP_VCPU_EVENTS: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1883,6 +1884,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, return 0; } +static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_load(vcpu); + + events->exception.injected = vcpu->arch.exception.pending; + events->exception.nr = vcpu->arch.exception.nr; + events->exception.has_error_code = vcpu->arch.exception.has_error_code; + events->exception.error_code = vcpu->arch.exception.error_code; + + events->interrupt.injected = vcpu->arch.interrupt.pending; + events->interrupt.nr = vcpu->arch.interrupt.nr; + events->interrupt.soft = vcpu->arch.interrupt.soft; + + events->nmi.injected = vcpu->arch.nmi_injected; + events->nmi.pending = vcpu->arch.nmi_pending; + events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); + + events->sipi_vector = vcpu->arch.sipi_vector; + + events->flags = 0; + + vcpu_put(vcpu); +} + +static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + if (events->flags) + return -EINVAL; + + vcpu_load(vcpu); + + vcpu->arch.exception.pending = events->exception.injected; + vcpu->arch.exception.nr = events->exception.nr; + vcpu->arch.exception.has_error_code = events->exception.has_error_code; + vcpu->arch.exception.error_code = events->exception.error_code; + + vcpu->arch.interrupt.pending = events->interrupt.injected; + vcpu->arch.interrupt.nr = events->interrupt.nr; + vcpu->arch.interrupt.soft = events->interrupt.soft; + if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) + kvm_pic_clear_isr_ack(vcpu->kvm); + + vcpu->arch.nmi_injected = events->nmi.injected; + vcpu->arch.nmi_pending = events->nmi.pending; + kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); + + vcpu->arch.sipi_vector = events->sipi_vector; + + vcpu_put(vcpu); + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2040,6 +2096,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); break; } + case KVM_GET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); + + r = -EFAULT; + if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) + break; + r = 0; + break; + } + case KVM_SET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + r = -EFAULT; + if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) + break; + + r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); + break; + } default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 976f4d181858..92045a92d714 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -489,6 +489,9 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_ADJUST_CLOCK 39 #define KVM_CAP_INTERNAL_ERROR_DATA 40 +#ifdef __KVM_HAVE_VCPU_EVENTS +#define KVM_CAP_VCPU_EVENTS 41 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -672,6 +675,9 @@ struct kvm_clock_data { /* IA64 stack access */ #define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) #define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) +/* Available with KVM_CAP_VCPU_EVENTS */ +#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) +#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.3-8-gc7d7 From d7b0b5eb3000c6fb902f08c619fcd673a23d8fab Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Thu, 19 Nov 2009 14:21:16 +0100 Subject: KVM: s390: Make psw available on all exits, not just a subset This patch moves s390 processor status word into the base kvm_run struct and keeps it up-to date on all userspace exits. The userspace ABI is broken by this, however there are no applications in the wild using this. A capability check is provided so users can verify the updated API exists. Cc: stable@kernel.org Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/include/asm/kvm.h | 3 ++- arch/s390/kvm/kvm-s390.c | 25 +++++++++++++++++-------- include/linux/kvm.h | 8 ++++++-- 3 files changed, 25 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index 3dfcaeb5d7f4..82b32a100c7d 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h @@ -1,6 +1,5 @@ #ifndef __LINUX_KVM_S390_H #define __LINUX_KVM_S390_H - /* * asm-s390/kvm.h - KVM s390 specific structures and definitions * @@ -15,6 +14,8 @@ */ #include +#define __KVM_S390 + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* general purpose regs for s390 */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 544505893c9f..f8bcaefd7d34 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -117,10 +117,16 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_dev_ioctl_check_extension(long ext) { + int r; + switch (ext) { + case KVM_CAP_S390_PSW: + r = 1; + break; default: - return 0; + r = 0; } + return r; } /* Section: vm related */ @@ -420,8 +426,10 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) vcpu_load(vcpu); if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) rc = -EBUSY; - else - vcpu->arch.sie_block->gpsw = psw; + else { + vcpu->run->psw_mask = psw.mask; + vcpu->run->psw_addr = psw.addr; + } vcpu_put(vcpu); return rc; } @@ -509,9 +517,6 @@ rerun_vcpu: switch (kvm_run->exit_reason) { case KVM_EXIT_S390_SIEIC: - vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask; - vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr; - break; case KVM_EXIT_UNKNOWN: case KVM_EXIT_INTR: case KVM_EXIT_S390_RESET: @@ -520,6 +525,9 @@ rerun_vcpu: BUG(); } + vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; + vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; + might_fault(); do { @@ -539,8 +547,6 @@ rerun_vcpu: /* intercept cannot be handled in-kernel, prepare kvm-run */ kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; - kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask; - kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr; kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; rc = 0; @@ -552,6 +558,9 @@ rerun_vcpu: rc = 0; } + kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; + kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 92045a92d714..2d241da07236 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -181,6 +181,11 @@ struct kvm_run { __u64 cr8; __u64 apic_base; +#ifdef __KVM_S390 + /* the processor status word for s390 */ + __u64 psw_mask; /* psw upper half */ + __u64 psw_addr; /* psw lower half */ +#endif union { /* KVM_EXIT_UNKNOWN */ struct { @@ -232,8 +237,6 @@ struct kvm_run { /* KVM_EXIT_S390_SIEIC */ struct { __u8 icptcode; - __u64 mask; /* psw upper half */ - __u64 addr; /* psw lower half */ __u16 ipa; __u32 ipb; } s390_sieic; @@ -492,6 +495,7 @@ struct kvm_ioeventfd { #ifdef __KVM_HAVE_VCPU_EVENTS #define KVM_CAP_VCPU_EVENTS 41 #endif +#define KVM_CAP_S390_PSW 42 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.3-8-gc7d7 From 6013efd8860bf15c1f86f365332642cfe557152f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 19 Nov 2009 15:36:45 +0900 Subject: libata: retry failed FLUSH if device didn't fail it If ATA device failed FLUSH, it means that the device failed to write out some amount of data and the error needs to be reported to upper layers. As retries can't recover the lost data, FLUSH failures need to be reported immediately in general. However, if FLUSH fails due to transmission errors, the FLUSH needs to be retried; otherwise, filesystems may switch to RO mode and/or raid array may drop a drive for a random transmission glitch. This condition can be rather easily reproduced on certain ahci controllers which go through a PHY event after powersave mode switch + ext4 combination. Powersave mode switch is often closely followed by flush from the filesystem failing the FLUSH with ATA bus error which makes the filesystem code believe that data is lost and drop to RO mode. This was reported in the following bugzilla bug. http://bugzilla.kernel.org/show_bug.cgi?id=14543 This patch makes libata EH retry FLUSH if it wasn't failed by the device. Signed-off-by: Tejun Heo Reported-by: Andrey Vihrov Signed-off-by: Jeff Garzik --- drivers/ata/libata-eh.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/libata.h | 2 +- 2 files changed, 95 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index bba2ae5df1c2..0ea97c942ced 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -110,6 +110,13 @@ static const unsigned long ata_eh_identify_timeouts[] = { ULONG_MAX, }; +static const unsigned long ata_eh_flush_timeouts[] = { + 15000, /* be generous with flush */ + 15000, /* ditto */ + 30000, /* and even more generous */ + ULONG_MAX, +}; + static const unsigned long ata_eh_other_timeouts[] = { 5000, /* same rationale as identify timeout */ 10000, /* ditto */ @@ -147,6 +154,8 @@ ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { .timeouts = ata_eh_other_timeouts, }, { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), .timeouts = ata_eh_other_timeouts, }, + { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), + .timeouts = ata_eh_flush_timeouts }, }; #undef CMDS @@ -3112,6 +3121,82 @@ static int atapi_eh_clear_ua(struct ata_device *dev) return 0; } +/** + * ata_eh_maybe_retry_flush - Retry FLUSH if necessary + * @dev: ATA device which may need FLUSH retry + * + * If @dev failed FLUSH, it needs to be reported upper layer + * immediately as it means that @dev failed to remap and already + * lost at least a sector and further FLUSH retrials won't make + * any difference to the lost sector. However, if FLUSH failed + * for other reasons, for example transmission error, FLUSH needs + * to be retried. + * + * This function determines whether FLUSH failure retry is + * necessary and performs it if so. + * + * RETURNS: + * 0 if EH can continue, -errno if EH needs to be repeated. + */ +static int ata_eh_maybe_retry_flush(struct ata_device *dev) +{ + struct ata_link *link = dev->link; + struct ata_port *ap = link->ap; + struct ata_queued_cmd *qc; + struct ata_taskfile tf; + unsigned int err_mask; + int rc = 0; + + /* did flush fail for this device? */ + if (!ata_tag_valid(link->active_tag)) + return 0; + + qc = __ata_qc_from_tag(ap, link->active_tag); + if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && + qc->tf.command != ATA_CMD_FLUSH)) + return 0; + + /* if the device failed it, it should be reported to upper layers */ + if (qc->err_mask & AC_ERR_DEV) + return 0; + + /* flush failed for some other reason, give it another shot */ + ata_tf_init(dev, &tf); + + tf.command = qc->tf.command; + tf.flags |= ATA_TFLAG_DEVICE; + tf.protocol = ATA_PROT_NODATA; + + ata_dev_printk(dev, KERN_WARNING, "retrying FLUSH 0x%x Emask 0x%x\n", + tf.command, qc->err_mask); + + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + if (!err_mask) { + /* + * FLUSH is complete but there's no way to + * successfully complete a failed command from EH. + * Making sure retry is allowed at least once and + * retrying it should do the trick - whatever was in + * the cache is already on the platter and this won't + * cause infinite loop. + */ + qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); + } else { + ata_dev_printk(dev, KERN_WARNING, "FLUSH failed Emask 0x%x\n", + err_mask); + rc = -EIO; + + /* if device failed it, report it to upper layers */ + if (err_mask & AC_ERR_DEV) { + qc->err_mask |= AC_ERR_DEV; + qc->result_tf = tf; + if (!(ap->pflags & ATA_PFLAG_FROZEN)) + rc = 0; + } + } + return rc; +} + static int ata_link_nr_enabled(struct ata_link *link) { struct ata_device *dev; @@ -3455,6 +3540,15 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, } } + /* retry flush if necessary */ + ata_for_each_dev(dev, link, ALL) { + if (dev->class != ATA_DEV_ATA) + continue; + rc = ata_eh_maybe_retry_flush(dev); + if (rc) + goto dev_fail; + } + /* configure link power saving */ if (ehc->i.action & ATA_EH_LPM) ata_for_each_dev(dev, link, ALL) diff --git a/include/linux/libata.h b/include/linux/libata.h index 87698640c091..ba07e84c9840 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -365,7 +365,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 5, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependant */ -- cgit v1.3-8-gc7d7 From 18f0f97850059303ed73b1f02084f55ca330a80c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 17 Nov 2009 10:00:47 -0500 Subject: libata: add translation for SCSI WRITE SAME (aka TRIM support) Add support for the ATA TRIM command in libata. We translate a WRITE SAME 16 command with the unmap bit set into an ATA TRIM command and export enough information in READ CAPACITY 16 and the block limits EVPD page so that the new SCSI layer discard support will driver this for us. Note that I hardcode the WRITE_SAME_16 opcode for now as the patch to introduce the symbolic is not in 2.6.32 yet but only in the SCSI tree - as soon as it is merged we can fix it up to properly use the symbolic name. Signed-off-by: Christoph Hellwig Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/ata.h | 13 +++++++ 2 files changed, 111 insertions(+) (limited to 'include') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 512a3ee8caf6..340a616d226b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "libata.h" @@ -1963,6 +1964,7 @@ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf) 0x80, /* page 0x80, unit serial no page */ 0x83, /* page 0x83, device ident page */ 0x89, /* page 0x89, ata info page */ + 0xb0, /* page 0xb0, block limits page */ 0xb1, /* page 0xb1, block device characteristics page */ }; @@ -2084,6 +2086,41 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf) return 0; } +static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf) +{ + u32 min_io_sectors; + + rbuf[1] = 0xb0; + rbuf[3] = 0x3c; /* required VPD size with unmap support */ + + /* + * Optimal transfer length granularity. + * + * This is always one physical block, but for disks with a smaller + * logical than physical sector size we need to figure out what the + * latter is. + */ + if (ata_id_has_large_logical_sectors(args->id)) + min_io_sectors = ata_id_logical_per_physical_sectors(args->id); + else + min_io_sectors = 1; + put_unaligned_be16(min_io_sectors, &rbuf[6]); + + /* + * Optimal unmap granularity. + * + * The ATA spec doesn't even know about a granularity or alignment + * for the TRIM command. We can leave away most of the unmap related + * VPD page entries, but we have specifify a granularity to signal + * that we support some form of unmap - in thise case via WRITE SAME + * with the unmap bit set. + */ + if (ata_id_has_trim(args->id)) + put_unaligned_be32(1, &rbuf[28]); + + return 0; +} + static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf) { int form_factor = ata_id_form_factor(args->id); @@ -2373,6 +2410,9 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) rbuf[13] = log_per_phys; rbuf[14] = (lowest_aligned >> 8) & 0x3f; rbuf[15] = lowest_aligned; + + if (ata_id_has_trim(args->id)) + rbuf[14] |= 0x80; } return 0; @@ -2895,6 +2935,58 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) return 1; } +static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc) +{ + struct ata_taskfile *tf = &qc->tf; + struct scsi_cmnd *scmd = qc->scsicmd; + struct ata_device *dev = qc->dev; + const u8 *cdb = scmd->cmnd; + u64 block; + u32 n_block; + u32 size; + void *buf; + + /* we may not issue DMA commands if no DMA mode is set */ + if (unlikely(!dev->dma_mode)) + goto invalid_fld; + + if (unlikely(scmd->cmd_len < 16)) + goto invalid_fld; + scsi_16_lba_len(cdb, &block, &n_block); + + /* for now we only support WRITE SAME with the unmap bit set */ + if (unlikely(!(cdb[1] & 0x8))) + goto invalid_fld; + + /* + * WRITE SAME always has a sector sized buffer as payload, this + * should never be a multiple entry S/G list. + */ + if (!scsi_sg_count(scmd)) + goto invalid_fld; + + buf = page_address(sg_page(scsi_sglist(scmd))); + size = ata_set_lba_range_entries(buf, 512 / 8, block, n_block); + + tf->protocol = ATA_PROT_DMA; + tf->hob_feature = 0; + tf->feature = ATA_DSM_TRIM; + tf->hob_nsect = (size / 512) >> 8; + tf->nsect = size / 512; + tf->command = ATA_CMD_DSM; + tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48 | + ATA_TFLAG_WRITE; + + ata_qc_set_pc_nbytes(qc); + + return 0; + + invalid_fld: + ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x24, 0x00); + /* "Invalid field in cdb" */ + return 1; +} + /** * ata_get_xlat_func - check if SCSI to ATA translation is possible * @dev: ATA device @@ -2919,6 +3011,9 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd) case WRITE_16: return ata_scsi_rw_xlat; + case 0x93 /*WRITE_SAME_16*/: + return ata_scsi_write_same_xlat; + case SYNCHRONIZE_CACHE: if (ata_try_flush_cache(dev)) return ata_scsi_flush_xlat; @@ -3108,6 +3203,9 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd, case 0x89: ata_scsi_rbuf_fill(&args, ata_scsiop_inq_89); break; + case 0xb0: + ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b0); + break; case 0xb1: ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b1); break; diff --git a/include/linux/ata.h b/include/linux/ata.h index 4fb357312b3b..e2595e877e44 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -87,6 +87,7 @@ enum { ATA_ID_HW_CONFIG = 93, ATA_ID_SPG = 98, ATA_ID_LBA_CAPACITY_2 = 100, + ATA_ID_SECTOR_SIZE = 106, ATA_ID_LAST_LUN = 126, ATA_ID_DLF = 128, ATA_ID_CSFO = 129, @@ -638,6 +639,18 @@ static inline int ata_id_flush_ext_enabled(const u16 *id) return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400; } +static inline int ata_id_has_large_logical_sectors(const u16 *id) +{ + if ((id[ATA_ID_SECTOR_SIZE] & 0xc000) != 0x4000) + return 0; + return id[ATA_ID_SECTOR_SIZE] & (1 << 13); +} + +static inline u8 ata_id_logical_per_physical_sectors(const u16 *id) +{ + return id[ATA_ID_SECTOR_SIZE] & 0xf; +} + static inline int ata_id_has_lba48(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) -- cgit v1.3-8-gc7d7 From 98262f2762f0067375f83824d81ea929e37e6bfe Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 3 Dec 2009 09:24:48 +0100 Subject: block: Allow devices to indicate whether discarded blocks are zeroed The discard ioctl is used by mkfs utilities to clear a block device prior to putting metadata down. However, not all devices return zeroed blocks after a discard. Some drives return stale data, potentially containing old superblocks. It is therefore important to know whether discarded blocks are properly zeroed. Both ATA and SCSI drives have configuration bits that indicate whether zeroes are returned after a discard operation. Implement a block level interface that allows this information to be bubbled up the stack and queried via a new block device ioctl. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 ++ block/blk-sysfs.c | 11 +++++++++++ block/compat_ioctl.c | 2 ++ block/ioctl.c | 2 ++ include/linux/blkdev.h | 14 ++++++++++++++ include/linux/fs.h | 1 + 6 files changed, 32 insertions(+) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index 1ebc1fdb9144..dd1f1e0e196f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -101,6 +101,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->discard_granularity = 0; lim->discard_alignment = 0; lim->discard_misaligned = 0; + lim->discard_zeroes_data = -1; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->alignment_offset = 0; @@ -544,6 +545,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->io_min = max(t->io_min, b->io_min); t->no_cluster |= b->no_cluster; + t->discard_zeroes_data &= b->discard_zeroes_data; /* Bottom device offset aligned? */ if (offset && diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 3147145edc15..8606c9543fdd 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -136,6 +136,11 @@ static ssize_t queue_discard_max_show(struct request_queue *q, char *page) return queue_var_show(q->limits.max_discard_sectors << 9, page); } +static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page) +{ + return queue_var_show(queue_discard_zeroes_data(q), page); +} + static ssize_t queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) { @@ -313,6 +318,11 @@ static struct queue_sysfs_entry queue_discard_max_entry = { .show = queue_discard_max_show, }; +static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { + .attr = {.name = "discard_zeroes_data", .mode = S_IRUGO }, + .show = queue_discard_zeroes_data_show, +}; + static struct queue_sysfs_entry queue_nonrot_entry = { .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, .show = queue_nonrot_show, @@ -350,6 +360,7 @@ static struct attribute *default_attrs[] = { &queue_io_opt_entry.attr, &queue_discard_granularity_entry.attr, &queue_discard_max_entry.attr, + &queue_discard_zeroes_data_entry.attr, &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 9bd086c1a4d5..4eb8e9ea4af5 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -747,6 +747,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return compat_put_uint(arg, bdev_io_opt(bdev)); case BLKALIGNOFF: return compat_put_int(arg, bdev_alignment_offset(bdev)); + case BLKDISCARDZEROES: + return compat_put_uint(arg, bdev_discard_zeroes_data(bdev)); case BLKFLSBUF: case BLKROSET: case BLKDISCARD: diff --git a/block/ioctl.c b/block/ioctl.c index 1f4d1de12b09..be48ea51faee 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -280,6 +280,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return put_uint(arg, bdev_io_opt(bdev)); case BLKALIGNOFF: return put_int(arg, bdev_alignment_offset(bdev)); + case BLKDISCARDZEROES: + return put_uint(arg, bdev_discard_zeroes_data(bdev)); case BLKSECTGET: return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); case BLKRASET: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e727f6c44c44..784a919aa0d0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -322,6 +322,7 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char no_cluster; + signed char discard_zeroes_data; }; struct request_queue @@ -1150,6 +1151,19 @@ static inline int queue_sector_discard_alignment(struct request_queue *q, & (q->limits.discard_granularity - 1); } +static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) +{ + if (q->limits.discard_zeroes_data == 1) + return 1; + + return 0; +} + +static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) +{ + return queue_discard_zeroes_data(bdev_get_queue(bdev)); +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; diff --git a/include/linux/fs.h b/include/linux/fs.h index 79cea8051736..891f7d642e5c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -304,6 +304,7 @@ struct inodes_stat_t { #define BLKIOOPT _IO(0x12,121) #define BLKALIGNOFF _IO(0x12,122) #define BLKPBSZGET _IO(0x12,123) +#define BLKDISCARDZEROES _IO(0x12,124) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.3-8-gc7d7 From 796bd9524731850967d437b7f47a86acc776ea89 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 Sep 2009 12:27:23 +0100 Subject: VFS: Add forget_all_cached_acls() This is required for cluster filesystems which want to use cached ACLs so that they can invalidate the cache when required. Signed-off-by: Steven Whitehouse Cc: Alexander Viro Cc: Christoph Hellwig --- include/linux/posix_acl.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 065a3652a3ea..67608161df6b 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -147,6 +147,20 @@ static inline void forget_cached_acl(struct inode *inode, int type) if (old != ACL_NOT_CACHED) posix_acl_release(old); } + +static inline void forget_all_cached_acls(struct inode *inode) +{ + struct posix_acl *old_access, *old_default; + spin_lock(&inode->i_lock); + old_access = inode->i_acl; + old_default = inode->i_default_acl; + inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; + spin_unlock(&inode->i_lock); + if (old_access != ACL_NOT_CACHED) + posix_acl_release(old_access); + if (old_default != ACL_NOT_CACHED) + posix_acl_release(old_default); +} #endif static inline void cache_no_acl(struct inode *inode) -- cgit v1.3-8-gc7d7 From 86e931a35e93d94e6e91b57cc76456e16d188ea9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 Sep 2009 12:35:17 +0100 Subject: VFS: Export dquot_send_warning Sending a message to userspace in a generic format to warn of events (e.g. quota exceeded) in the quota subsystem is a generically useful feature. This patch makes some minor changes to the send_message function from dquot.c renaming it quota_send_message, moving it to quota.c and exporting it for use by filesystems which do not use the dquot code. Signed-off-by: Steven Whitehouse --- fs/quota/Kconfig | 2 +- fs/quota/dquot.c | 93 +++++---------------------------------------------- fs/quota/quota.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/quota.h | 11 ++++++ 4 files changed, 114 insertions(+), 85 deletions(-) (limited to 'include') diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index 8047e01ef46b..353e78a9ebee 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig @@ -17,7 +17,7 @@ config QUOTA config QUOTA_NETLINK_INTERFACE bool "Report quota messages through netlink interface" - depends on QUOTA && NET + depends on QUOTACTL && NET help If you say Y here, quota warnings (about exceeding softlimit, reaching hardlimit, etc.) will be reported through netlink interface. If unsure, diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 39b49c42a7ed..9b6ad908dcb2 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -77,10 +77,6 @@ #include #include #include /* for inode_lock, oddly enough.. */ -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE -#include -#include -#endif #include @@ -1071,73 +1067,6 @@ static void print_warning(struct dquot *dquot, const int warntype) } #endif -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - -/* Netlink family structure for quota */ -static struct genl_family quota_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = "VFS_DQUOT", - .version = 1, - .maxattr = QUOTA_NL_A_MAX, -}; - -/* Send warning to userspace about user which exceeded quota */ -static void send_warning(const struct dquot *dquot, const char warntype) -{ - static atomic_t seq; - struct sk_buff *skb; - void *msg_head; - int ret; - int msg_size = 4 * nla_total_size(sizeof(u32)) + - 2 * nla_total_size(sizeof(u64)); - - /* We have to allocate using GFP_NOFS as we are called from a - * filesystem performing write and thus further recursion into - * the fs to free some data could cause deadlocks. */ - skb = genlmsg_new(msg_size, GFP_NOFS); - if (!skb) { - printk(KERN_ERR - "VFS: Not enough memory to send quota warning.\n"); - return; - } - msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq), - "a_genl_family, 0, QUOTA_NL_C_WARNING); - if (!msg_head) { - printk(KERN_ERR - "VFS: Cannot store netlink header in quota warning.\n"); - goto err_out; - } - ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type); - if (ret) - goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, - MAJOR(dquot->dq_sb->s_dev)); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, - MINOR(dquot->dq_sb->s_dev)); - if (ret) - goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); - if (ret) - goto attr_err_out; - genlmsg_end(skb, msg_head); - - genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); - return; -attr_err_out: - printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); -err_out: - kfree_skb(skb); -} -#endif /* * Write warnings to the console and send warning messages over netlink. * @@ -1145,18 +1074,20 @@ err_out: */ static void flush_warnings(struct dquot *const *dquots, char *warntype) { + struct dquot *dq; int i; - for (i = 0; i < MAXQUOTAS; i++) - if (dquots[i] && warntype[i] != QUOTA_NL_NOWARN && - !warning_issued(dquots[i], warntype[i])) { + for (i = 0; i < MAXQUOTAS; i++) { + dq = dquots[i]; + if (dq && warntype[i] != QUOTA_NL_NOWARN && + !warning_issued(dq, warntype[i])) { #ifdef CONFIG_PRINT_QUOTA_WARNING - print_warning(dquots[i], warntype[i]); -#endif -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - send_warning(dquots[i], warntype[i]); + print_warning(dq, warntype[i]); #endif + quota_send_warning(dq->dq_type, dq->dq_id, + dq->dq_sb->s_dev, warntype[i]); } + } } static int ignore_hardlimit(struct dquot *dquot) @@ -2607,12 +2538,6 @@ static int __init dquot_init(void) register_shrinker(&dqcache_shrinker); -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - if (genl_register_family("a_genl_family) != 0) - printk(KERN_ERR - "VFS: Failed to create quota netlink interface.\n"); -#endif - return 0; } module_init(dquot_init); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 95c5b42384b2..ee91e2756950 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include /* Check validity of generic quotactl commands */ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, @@ -525,3 +527,94 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, return ret; } #endif + + +#ifdef CONFIG_QUOTA_NETLINK_INTERFACE + +/* Netlink family structure for quota */ +static struct genl_family quota_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "VFS_DQUOT", + .version = 1, + .maxattr = QUOTA_NL_A_MAX, +}; + +/** + * quota_send_warning - Send warning to userspace about exceeded quota + * @type: The quota type: USRQQUOTA, GRPQUOTA,... + * @id: The user or group id of the quota that was exceeded + * @dev: The device on which the fs is mounted (sb->s_dev) + * @warntype: The type of the warning: QUOTA_NL_... + * + * This can be used by filesystems (including those which don't use + * dquot) to send a message to userspace relating to quota limits. + * + */ + +void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype) +{ + static atomic_t seq; + struct sk_buff *skb; + void *msg_head; + int ret; + int msg_size = 4 * nla_total_size(sizeof(u32)) + + 2 * nla_total_size(sizeof(u64)); + + /* We have to allocate using GFP_NOFS as we are called from a + * filesystem performing write and thus further recursion into + * the fs to free some data could cause deadlocks. */ + skb = genlmsg_new(msg_size, GFP_NOFS); + if (!skb) { + printk(KERN_ERR + "VFS: Not enough memory to send quota warning.\n"); + return; + } + msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq), + "a_genl_family, 0, QUOTA_NL_C_WARNING); + if (!msg_head) { + printk(KERN_ERR + "VFS: Cannot store netlink header in quota warning.\n"); + goto err_out; + } + ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type); + if (ret) + goto attr_err_out; + ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev)); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); + if (ret) + goto attr_err_out; + ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); + if (ret) + goto attr_err_out; + genlmsg_end(skb, msg_head); + + genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); + return; +attr_err_out: + printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); +err_out: + kfree_skb(skb); +} +EXPORT_SYMBOL(quota_send_warning); + +static int __init quota_init(void) +{ + if (genl_register_family("a_genl_family) != 0) + printk(KERN_ERR + "VFS: Failed to create quota netlink interface.\n"); + return 0; +}; + +module_init(quota_init); +#endif + diff --git a/include/linux/quota.h b/include/linux/quota.h index 78c48895b12a..ce9a9b2e5cd4 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -376,6 +376,17 @@ static inline unsigned int dquot_generic_flag(unsigned int flags, int type) return flags >> _DQUOT_STATE_FLAGS; } +#ifdef CONFIG_QUOTA_NETLINK_INTERFACE +extern void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype); +#else +static inline void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype) +{ + return; +} +#endif /* CONFIG_QUOTA_NETLINK_INTERFACE */ + struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ struct mutex dqio_mutex; /* lock device while I/O in progress */ -- cgit v1.3-8-gc7d7 From 0ab7d13fcbd7ce1658c563e345990ba453719deb Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 6 Nov 2009 16:20:51 +0000 Subject: GFS2: Tag all metadata with jid There are two spare field in the header common to all GFS2 metadata. One is just the right size to fit a journal id in it, and this patch updates the journal code so that each time a metadata block is modified, we tag it with the journal id of the node which is performing the modification. The reason for this is that it should make it much easier to debug issues which arise if we can tell which node was the last to modify a particular metadata block. Since the field is updated before the block is written into the journal, each journal should only contain metadata which is tagged with its own journal id. The one exception to this is the journal header block, which might have a different node's id in it, if that journal was recovered by another node in the cluster. Thus each journal will contain a record of which nodes recovered it, via the journal header. The other field in the metadata header could potentially be used to hold information about what kind of operation was performed, but for the time being we just zero it on each transaction so that if we use it for that in future, we'll know that the information (where it exists) is reliable. I did consider using the other field to hold the journal sequence number, however since in GFS2's journaling we write the modified data into the journal and not the original data, this gives no information as to what action caused the modification, so I think we can probably come up with a better use for those 64 bits in the future. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 2 -- fs/gfs2/log.c | 2 ++ fs/gfs2/lops.c | 4 ++++ fs/gfs2/recovery.c | 2 ++ include/linux/gfs2_ondisk.h | 6 +++++- 5 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6380cd9314b0..26ba2a4c4a2d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -947,9 +947,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); - str->di_header.__pad0 = 0; str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); - str->di_header.__pad1 = 0; str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); str->di_mode = cpu_to_be32(ip->i_inode.i_mode); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 13c6237c5f67..4511b08fc451 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -596,7 +596,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) memset(lh, 0, sizeof(struct gfs2_log_header)); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); + lh->lh_header.__pad0 = cpu_to_be64(0); lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); + lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++); lh->lh_flags = cpu_to_be32(flags); lh->lh_tail = cpu_to_be32(tail); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 9969ff062c5b..de97632ba32f 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -132,6 +132,7 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); + struct gfs2_meta_header *mh; struct gfs2_trans *tr; lock_buffer(bd->bd_bh); @@ -148,6 +149,9 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); gfs2_meta_check(sdp, bd->bd_bh); gfs2_pin(sdp, bd->bd_bh); + mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; + mh->__pad0 = cpu_to_be64(0); + mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); sdp->sd_log_num_buf++; list_add(&le->le_list, &sdp->sd_log_le_buf); tr->tr_num_buf_new++; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 09fa31965576..4b9bece3d437 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -410,7 +410,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea memset(lh, 0, sizeof(struct gfs2_log_header)); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); + lh->lh_header.__pad0 = cpu_to_be64(0); lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); + lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1); lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT); lh->lh_blkno = cpu_to_be32(lblock); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index b80c88dedbbb..81f90a59cda6 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -81,7 +81,11 @@ struct gfs2_meta_header { __be32 mh_type; __be64 __pad0; /* Was generation number in gfs1 */ __be32 mh_format; - __be32 __pad1; /* Was incarnation number in gfs1 */ + /* This union is to keep userspace happy */ + union { + __be32 mh_jid; /* Was incarnation number in gfs1 */ + __be32 __pad1; + }; }; /* -- cgit v1.3-8-gc7d7 From b17621fed6aa039387e35f9b4d34d98f213e5673 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 3 Dec 2009 13:54:25 +0100 Subject: writeback: introduce wbc.for_background It will lower the flush priority for NFS, and maybe more in future. Signed-off-by: Wu Fengguang Cc: Trond Myklebust Cc: Jens Axboe Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 1 + fs/nfs/write.c | 2 +- include/linux/writeback.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 0306c8e7d6b5..0793961f7699 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -738,6 +738,7 @@ static long wb_writeback(struct bdi_writeback *wb, .sync_mode = args->sync_mode, .older_than_this = NULL, .for_kupdate = args->for_kupdate, + .for_background = args->for_background, .range_cyclic = args->range_cyclic, }; unsigned long oldest_jif; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 53eb26c16b50..c84b5cc1a943 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -178,7 +178,7 @@ static int wb_priority(struct writeback_control *wbc) { if (wbc->for_reclaim) return FLUSH_HIGHPRI | FLUSH_STABLE; - if (wbc->for_kupdate) + if (wbc->for_kupdate || wbc->for_background) return FLUSH_LOWPRI; return 0; } diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 66ebddcff664..705f01fe413a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -49,6 +49,7 @@ struct writeback_control { unsigned nonblocking:1; /* Don't get stuck on request queues */ unsigned encountered_congestion:1; /* An output: a queue is full */ unsigned for_kupdate:1; /* A kupdate writeback */ + unsigned for_background:1; /* A background writeback */ unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned more_io:1; /* more io to be dispatched */ -- cgit v1.3-8-gc7d7 From 31e4c28d95e64f2d5d3c497a3ecf37c62de635b4 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 3 Dec 2009 12:59:42 -0500 Subject: blkio: Introduce blkio controller cgroup interface o This is basic implementation of blkio controller cgroup interface. This is the common interface visible to user space and should be used by different IO control policies as we implement those. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/Kconfig | 13 ++++ block/Kconfig.iosched | 1 + block/Makefile | 1 + block/blk-cgroup.c | 177 ++++++++++++++++++++++++++++++++++++++++++ block/blk-cgroup.h | 58 ++++++++++++++ include/linux/cgroup_subsys.h | 6 ++ include/linux/iocontext.h | 4 + 7 files changed, 260 insertions(+) create mode 100644 block/blk-cgroup.c create mode 100644 block/blk-cgroup.h (limited to 'include') diff --git a/block/Kconfig b/block/Kconfig index 9be0b56eaee1..6ba1a8e3388b 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -77,6 +77,19 @@ config BLK_DEV_INTEGRITY T10/SCSI Data Integrity Field or the T13/ATA External Path Protection. If in doubt, say N. +config BLK_CGROUP + bool + depends on CGROUPS + default n + ---help--- + Generic block IO controller cgroup interface. This is the common + cgroup interface which should be used by various IO controlling + policies. + + Currently, CFQ IO scheduler uses it to recognize task groups and + control disk bandwidth allocation (proportional time slice allocation) + to such task groups. + endif # BLOCK config BLOCK_COMPAT diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 8bd105115a69..be0280deec29 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -23,6 +23,7 @@ config IOSCHED_DEADLINE config IOSCHED_CFQ tristate "CFQ I/O scheduler" + select BLK_CGROUP default y ---help--- The CFQ I/O scheduler tries to distribute bandwidth equally diff --git a/block/Makefile b/block/Makefile index 7914108952f2..cb2d515ebd6e 100644 --- a/block/Makefile +++ b/block/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o +obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c new file mode 100644 index 000000000000..4f6afd76ec59 --- /dev/null +++ b/block/blk-cgroup.c @@ -0,0 +1,177 @@ +/* + * Common Block IO controller cgroup interface + * + * Based on ideas and code from CFQ, CFS and BFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2009 Vivek Goyal + * Nauman Rafique + */ +#include +#include "blk-cgroup.h" + +struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; + +struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) +{ + return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), + struct blkio_cgroup, css); +} + +void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, + struct blkio_group *blkg, void *key) +{ + unsigned long flags; + + spin_lock_irqsave(&blkcg->lock, flags); + rcu_assign_pointer(blkg->key, key); + hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); + spin_unlock_irqrestore(&blkcg->lock, flags); +} + +int blkiocg_del_blkio_group(struct blkio_group *blkg) +{ + /* Implemented later */ + return 0; +} + +/* called under rcu_read_lock(). */ +struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) +{ + struct blkio_group *blkg; + struct hlist_node *n; + void *__key; + + hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { + __key = blkg->key; + if (__key == key) + return blkg; + } + + return NULL; +} + +#define SHOW_FUNCTION(__VAR) \ +static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup, \ + struct cftype *cftype) \ +{ \ + struct blkio_cgroup *blkcg; \ + \ + blkcg = cgroup_to_blkio_cgroup(cgroup); \ + return (u64)blkcg->__VAR; \ +} + +SHOW_FUNCTION(weight); +#undef SHOW_FUNCTION + +static int +blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) +{ + struct blkio_cgroup *blkcg; + + if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) + return -EINVAL; + + blkcg = cgroup_to_blkio_cgroup(cgroup); + blkcg->weight = (unsigned int)val; + return 0; +} + +struct cftype blkio_files[] = { + { + .name = "weight", + .read_u64 = blkiocg_weight_read, + .write_u64 = blkiocg_weight_write, + }, +}; + +static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + return cgroup_add_files(cgroup, subsys, blkio_files, + ARRAY_SIZE(blkio_files)); +} + +static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); + + free_css_id(&blkio_subsys, &blkcg->css); + kfree(blkcg); +} + +static struct cgroup_subsys_state * +blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + struct blkio_cgroup *blkcg, *parent_blkcg; + + if (!cgroup->parent) { + blkcg = &blkio_root_cgroup; + goto done; + } + + /* Currently we do not support hierarchy deeper than two level (0,1) */ + parent_blkcg = cgroup_to_blkio_cgroup(cgroup->parent); + if (css_depth(&parent_blkcg->css) > 0) + return ERR_PTR(-EINVAL); + + blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); + if (!blkcg) + return ERR_PTR(-ENOMEM); + + blkcg->weight = BLKIO_WEIGHT_DEFAULT; +done: + spin_lock_init(&blkcg->lock); + INIT_HLIST_HEAD(&blkcg->blkg_list); + + return &blkcg->css; +} + +/* + * We cannot support shared io contexts, as we have no mean to support + * two tasks with the same ioc in two different groups without major rework + * of the main cic data structures. For now we allow a task to change + * its cgroup only if it's the only owner of its ioc. + */ +static int blkiocg_can_attach(struct cgroup_subsys *subsys, + struct cgroup *cgroup, struct task_struct *tsk, + bool threadgroup) +{ + struct io_context *ioc; + int ret = 0; + + /* task_lock() is needed to avoid races with exit_io_context() */ + task_lock(tsk); + ioc = tsk->io_context; + if (ioc && atomic_read(&ioc->nr_tasks) > 1) + ret = -EINVAL; + task_unlock(tsk); + + return ret; +} + +static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, + struct cgroup *prev, struct task_struct *tsk, + bool threadgroup) +{ + struct io_context *ioc; + + task_lock(tsk); + ioc = tsk->io_context; + if (ioc) + ioc->cgroup_changed = 1; + task_unlock(tsk); +} + +struct cgroup_subsys blkio_subsys = { + .name = "blkio", + .create = blkiocg_create, + .can_attach = blkiocg_can_attach, + .attach = blkiocg_attach, + .destroy = blkiocg_destroy, + .populate = blkiocg_populate, + .subsys_id = blkio_subsys_id, + .use_id = 1, +}; diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h new file mode 100644 index 000000000000..ba5703f69b42 --- /dev/null +++ b/block/blk-cgroup.h @@ -0,0 +1,58 @@ +#ifndef _BLK_CGROUP_H +#define _BLK_CGROUP_H +/* + * Common Block IO controller cgroup interface + * + * Based on ideas and code from CFQ, CFS and BFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2009 Vivek Goyal + * Nauman Rafique + */ + +#include + +struct blkio_cgroup { + struct cgroup_subsys_state css; + unsigned int weight; + spinlock_t lock; + struct hlist_head blkg_list; +}; + +struct blkio_group { + /* An rcu protected unique identifier for the group */ + void *key; + struct hlist_node blkcg_node; +}; + +#define BLKIO_WEIGHT_MIN 100 +#define BLKIO_WEIGHT_MAX 1000 +#define BLKIO_WEIGHT_DEFAULT 500 + +#ifdef CONFIG_BLK_CGROUP +extern struct blkio_cgroup blkio_root_cgroup; +extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); +extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, + struct blkio_group *blkg, void *key); +extern int blkiocg_del_blkio_group(struct blkio_group *blkg); +extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, + void *key); +#else +static inline struct blkio_cgroup * +cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } + +static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, + struct blkio_group *blkg, void *key) +{ +} + +static inline int +blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } + +static inline struct blkio_group * +blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } +#endif +#endif /* _BLK_CGROUP_H */ diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 9c8d31bacf46..ccefff02b6cb 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -60,3 +60,9 @@ SUBSYS(net_cls) #endif /* */ + +#ifdef CONFIG_BLK_CGROUP +SUBSYS(blkio) +#endif + +/* */ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index eb73632440f1..d61b0b8b5cd1 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -68,6 +68,10 @@ struct io_context { unsigned short ioprio; unsigned short ioprio_changed; +#ifdef CONFIG_BLK_CGROUP + unsigned short cgroup_changed; +#endif + /* * For request batching */ -- cgit v1.3-8-gc7d7 From 76bca88012e1d27de794f32cc551d6314d38b6d9 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 18 Nov 2009 00:40:39 +0100 Subject: Bluetooth: Turn hci_recv_frame into an exported function For future simplification it is important that the hci_recv_frame function is no longer an inline function. So move it into the module itself and export it. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 23 +---------------------- net/bluetooth/hci_core.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7b640aeddb64..8460f2283089 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -437,28 +437,7 @@ int hci_inquiry(void __user *arg); void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); -/* Receive frame from HCI drivers */ -static inline int hci_recv_frame(struct sk_buff *skb) -{ - struct hci_dev *hdev = (struct hci_dev *) skb->dev; - if (!hdev || (!test_bit(HCI_UP, &hdev->flags) - && !test_bit(HCI_INIT, &hdev->flags))) { - kfree_skb(skb); - return -ENXIO; - } - - /* Incomming skb */ - bt_cb(skb)->incoming = 1; - - /* Time stamp */ - __net_timestamp(skb); - - /* Queue frame for rx task */ - skb_queue_tail(&hdev->rx_q, skb); - hci_sched_rx(hdev); - return 0; -} - +int hci_recv_frame(struct sk_buff *skb); int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count); int hci_register_sysfs(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e1da8f68759c..3fc90e543ec1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -987,6 +987,29 @@ int hci_resume_dev(struct hci_dev *hdev) } EXPORT_SYMBOL(hci_resume_dev); +/* Receive frame from HCI drivers */ +int hci_recv_frame(struct sk_buff *skb) +{ + struct hci_dev *hdev = (struct hci_dev *) skb->dev; + if (!hdev || (!test_bit(HCI_UP, &hdev->flags) + && !test_bit(HCI_INIT, &hdev->flags))) { + kfree_skb(skb); + return -ENXIO; + } + + /* Incomming skb */ + bt_cb(skb)->incoming = 1; + + /* Time stamp */ + __net_timestamp(skb); + + /* Queue frame for rx task */ + skb_queue_tail(&hdev->rx_q, skb); + hci_sched_rx(hdev); + return 0; +} +EXPORT_SYMBOL(hci_recv_frame); + /* Receive packet type fragment */ #define __reassembly(hdev, type) ((hdev)->reassembly[(type) - 2]) -- cgit v1.3-8-gc7d7 From c78ae283145d3a8799b2fb01650166a66af3bff8 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 18 Nov 2009 01:02:54 +0100 Subject: Bluetooth: Unobfuscate tasklet_schedule usage The tasklet schedule function helpers are just an obfuscation. So remove them and call the schedule functions directly. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 16 ---------------- net/bluetooth/hci_core.c | 18 ++++++++++++------ net/bluetooth/hci_event.c | 6 +++--- net/bluetooth/hci_sock.c | 6 +++--- 4 files changed, 18 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 8460f2283089..7b86094a894b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -367,22 +367,6 @@ static inline void hci_conn_put(struct hci_conn *conn) } } -/* ----- HCI tasks ----- */ -static inline void hci_sched_cmd(struct hci_dev *hdev) -{ - tasklet_schedule(&hdev->cmd_task); -} - -static inline void hci_sched_rx(struct hci_dev *hdev) -{ - tasklet_schedule(&hdev->rx_task); -} - -static inline void hci_sched_tx(struct hci_dev *hdev) -{ - tasklet_schedule(&hdev->tx_task); -} - /* ----- HCI Devices ----- */ static inline void __hci_dev_put(struct hci_dev *d) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3fc90e543ec1..94ba34982021 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -193,8 +193,9 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) while ((skb = skb_dequeue(&hdev->driver_init))) { bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; + skb_queue_tail(&hdev->cmd_q, skb); - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); } skb_queue_purge(&hdev->driver_init); @@ -1005,7 +1006,8 @@ int hci_recv_frame(struct sk_buff *skb) /* Queue frame for rx task */ skb_queue_tail(&hdev->rx_q, skb); - hci_sched_rx(hdev); + tasklet_schedule(&hdev->rx_task); + return 0; } EXPORT_SYMBOL(hci_recv_frame); @@ -1216,8 +1218,9 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; + skb_queue_tail(&hdev->cmd_q, skb); - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); return 0; } @@ -1294,7 +1297,8 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) spin_unlock_bh(&conn->data_q.lock); } - hci_sched_tx(hdev); + tasklet_schedule(&hdev->tx_task); + return 0; } EXPORT_SYMBOL(hci_send_acl); @@ -1321,8 +1325,10 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb) skb->dev = (void *) hdev; bt_cb(skb)->pkt_type = HCI_SCODATA_PKT; + skb_queue_tail(&conn->data_q, skb); - hci_sched_tx(hdev); + tasklet_schedule(&hdev->tx_task); + return 0; } EXPORT_SYMBOL(hci_send_sco); @@ -1635,7 +1641,7 @@ static void hci_cmd_task(unsigned long arg) hdev->cmd_last_tx = jiffies; } else { skb_queue_head(&hdev->cmd_q, skb); - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); } } } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e99fe385fba2..28517bad796c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1320,7 +1320,7 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk if (ev->ncmd) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); } } @@ -1386,7 +1386,7 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->ncmd) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); } } @@ -1454,7 +1454,7 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s } } - hci_sched_tx(hdev); + tasklet_schedule(&hdev->tx_task); tasklet_enable(&hdev->tx_task); } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index c3701f37aeb0..688cfebfbee0 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -445,10 +445,10 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, if (test_bit(HCI_RAW, &hdev->flags) || (ogf == 0x3f)) { skb_queue_tail(&hdev->raw_q, skb); - hci_sched_tx(hdev); + tasklet_schedule(&hdev->tx_task); } else { skb_queue_tail(&hdev->cmd_q, skb); - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); } } else { if (!capable(CAP_NET_RAW)) { @@ -457,7 +457,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, } skb_queue_tail(&hdev->raw_q, skb); - hci_sched_tx(hdev); + tasklet_schedule(&hdev->tx_task); } err = len; -- cgit v1.3-8-gc7d7 From 9f121a5a80b4417c6db5a35e26d2e79c29c3fc0d Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sat, 3 Oct 2009 02:34:38 -0300 Subject: Bluetooth: Fix sending ReqSeq on I-frames As specified by ERTM spec an ERTM channel can acknowledge received I-frames(the data frames) by sending an I-frame with the proper ReqSeq value (i.e. ReqSeq is set to BufferSeq). Until now we aren't setting the ReqSeq value on I-frame control bits. That way we can save sending S-frames(Supervise frames) only to acknowledge receipt of I-frames. It is very helpful to the full-duplex channel. ReqSeq is the packet sequence number sent in an acknowledgement frame to acknowledge receipt of frames up to (ReqSeq - 1). BufferSeq controls the receiver buffer, it is used to delay acknowledgement of new frames to not cause buffer overflow. BufferSeq value is not increased until frames are pulled by reassembly function. Signed-off-by: Gustavo F. Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 - net/bluetooth/l2cap.c | 8 ++++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 9516f4b4a3c2..327eb57dab51 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -324,7 +324,6 @@ struct l2cap_pinfo { __u8 next_tx_seq; __u8 expected_ack_seq; - __u8 req_seq; __u8 expected_tx_seq; __u8 buffer_seq; __u8 buffer_seq_srej; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 2d5d37545b10..78ab8811e5ef 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1333,7 +1333,7 @@ static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq) tx_skb = skb_clone(skb, GFP_ATOMIC); bt_cb(skb)->retries++; control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); - control |= (pi->req_seq << L2CAP_CTRL_REQSEQ_SHIFT) + control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); @@ -1375,7 +1375,7 @@ static int l2cap_ertm_send(struct sock *sk) bt_cb(skb)->retries++; control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); - control |= (pi->req_seq << L2CAP_CTRL_REQSEQ_SHIFT) + control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) | (pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); @@ -3298,12 +3298,16 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str { struct l2cap_pinfo *pi = l2cap_pi(sk); u8 tx_seq = __get_txseq(rx_control); + u8 req_seq = __get_reqseq(rx_control); u16 tx_control = 0; u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT; int err = 0; BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len); + pi->expected_ack_seq = req_seq; + l2cap_drop_acked_frames(sk); + if (tx_seq == pi->expected_tx_seq) goto expected; -- cgit v1.3-8-gc7d7 From 4ec10d9720ef78cd81d8bcc30a3238665744569f Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sat, 3 Oct 2009 02:34:39 -0300 Subject: Bluetooth: Implement RejActioned flag RejActioned is used to prevent retransmission when a entity is on the WAIT_F state, i.e., waiting for a frame with F-bit set due local busy condition or a expired retransmission timer. (When these two events raise they send a frame with the Poll bit set and enters in the WAIT_F state to wait for a frame with the Final bit set.) The local entity doesn't send I-frames(the data frames) until the receipt of a frame with F-bit set. When that happens it also set RejActioned to false. RejActioned is a mandatory feature of ERTM spec. Signed-off-by: Gustavo F. Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap.c | 38 +++++++++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 327eb57dab51..17a689f27a6a 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -374,6 +374,7 @@ struct l2cap_pinfo { #define L2CAP_CONN_SEND_PBIT 0x10 #define L2CAP_CONN_REMOTE_BUSY 0x20 #define L2CAP_CONN_LOCAL_BUSY 0x40 +#define L2CAP_CONN_REJ_ACT 0x80 #define __mod_retrans_timer() mod_timer(&l2cap_pi(sk)->retrans_timer, \ jiffies + msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO)); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 78ab8811e5ef..73bda0ae41d6 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -3362,6 +3362,16 @@ expected: return 0; } + if (rx_control & L2CAP_CTRL_FINAL) { + if (pi->conn_state & L2CAP_CONN_REJ_ACT) + pi->conn_state &= ~L2CAP_CONN_REJ_ACT; + else { + sk->sk_send_head = TX_QUEUE(sk)->next; + pi->next_tx_seq = pi->expected_ack_seq; + l2cap_ertm_send(sk); + } + } + pi->buffer_seq = (pi->buffer_seq + 1) % 64; err = l2cap_sar_reassembly_sdu(sk, skb, rx_control); @@ -3398,6 +3408,14 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str pi->expected_ack_seq = tx_seq; l2cap_drop_acked_frames(sk); + if (pi->conn_state & L2CAP_CONN_REJ_ACT) + pi->conn_state &= ~L2CAP_CONN_REJ_ACT; + else { + sk->sk_send_head = TX_QUEUE(sk)->next; + pi->next_tx_seq = pi->expected_ack_seq; + l2cap_ertm_send(sk); + } + if (!(pi->conn_state & L2CAP_CONN_WAIT_F)) break; @@ -3425,10 +3443,24 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str pi->expected_ack_seq = __get_reqseq(rx_control); l2cap_drop_acked_frames(sk); - sk->sk_send_head = TX_QUEUE(sk)->next; - pi->next_tx_seq = pi->expected_ack_seq; + if (rx_control & L2CAP_CTRL_FINAL) { + if (pi->conn_state & L2CAP_CONN_REJ_ACT) + pi->conn_state &= ~L2CAP_CONN_REJ_ACT; + else { + sk->sk_send_head = TX_QUEUE(sk)->next; + pi->next_tx_seq = pi->expected_ack_seq; + l2cap_ertm_send(sk); + } + } else { + sk->sk_send_head = TX_QUEUE(sk)->next; + pi->next_tx_seq = pi->expected_ack_seq; + l2cap_ertm_send(sk); - l2cap_ertm_send(sk); + if (pi->conn_state & L2CAP_CONN_WAIT_F) { + pi->srej_save_reqseq = tx_seq; + pi->conn_state |= L2CAP_CONN_REJ_ACT; + } + } break; -- cgit v1.3-8-gc7d7 From 8e182a90f91456335756d2ce304ad470795d98e1 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 30 Nov 2009 13:23:11 +0000 Subject: pata_piccolo: Driver for old Toshiba chipsets We were never able to get docs for this out of Toshiba for years. Dave Barnes produced a NetBSD driver however and from that we can fill in the needed tables. As we correct the PCI identifiers a bit also update the old ide generic driver at the same time so it stays compiling. Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- drivers/ata/Kconfig | 9 +++ drivers/ata/Makefile | 1 + drivers/ata/ata_generic.c | 5 +- drivers/ata/pata_piccolo.c | 140 ++++++++++++++++++++++++++++++++++++++++++ drivers/ide/ide-pci-generic.c | 3 +- include/linux/pci_ids.h | 7 ++- 6 files changed, 160 insertions(+), 5 deletions(-) create mode 100644 drivers/ata/pata_piccolo.c (limited to 'include') diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index a47ca75aa078..676f08b004b3 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -667,6 +667,15 @@ config PATA_SIS If unsure, say N. +config PATA_TOSHIBA + tristate "Toshiba Piccolo support (Experimental)" + depends on PCI && EXPERIMENTAL + help + Support for the Toshiba Piccolo controllers. Currently only the + primary channel is supported by this driver. + + If unsure, say N. + config PATA_VIA tristate "VIA PATA support" depends on PCI diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 01e126f343b3..d909435e9d81 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -63,6 +63,7 @@ obj-$(CONFIG_PATA_RZ1000) += pata_rz1000.o obj-$(CONFIG_PATA_SC1200) += pata_sc1200.o obj-$(CONFIG_PATA_SERVERWORKS) += pata_serverworks.o obj-$(CONFIG_PATA_SIL680) += pata_sil680.o +obj-$(CONFIG_PATA_TOSHIBA) += pata_piccolo.o obj-$(CONFIG_PATA_VIA) += pata_via.o obj-$(CONFIG_PATA_WINBOND) += pata_sl82c105.o obj-$(CONFIG_PATA_WINBOND_VLB) += pata_winbond.o diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c index ecfd22b4f1ce..12e26c3c68e3 100644 --- a/drivers/ata/ata_generic.c +++ b/drivers/ata/ata_generic.c @@ -168,9 +168,12 @@ static struct pci_device_id ata_generic[] = { { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C561), }, { PCI_DEVICE(PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C558), }, { PCI_DEVICE(PCI_VENDOR_ID_CENATEK,PCI_DEVICE_ID_CENATEK_IDE), }, - { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO), }, +#if !defined(CONFIG_PATA_TOSHIBA) && !defined(CONFIG_PATA_TOSHIBA_MODULE) { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), }, { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2), }, + { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_3), }, + { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_5), }, +#endif /* Must come last. If you add entries adjust this table appropriately */ { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL, 1}, { 0, }, diff --git a/drivers/ata/pata_piccolo.c b/drivers/ata/pata_piccolo.c new file mode 100644 index 000000000000..bfe0180f3efa --- /dev/null +++ b/drivers/ata/pata_piccolo.c @@ -0,0 +1,140 @@ +/* + * pata_piccolo.c - Toshiba Piccolo PATA/SATA controller driver. + * + * This is basically an update to ata_generic.c to add Toshiba Piccolo support + * then split out to keep ata_generic "clean". + * + * Copyright 2005 Red Hat Inc, all rights reserved. + * + * Elements from ide/pci/generic.c + * Copyright (C) 2001-2002 Andre Hedrick + * Portions (C) Copyright 2002 Red Hat Inc + * + * May be copied or modified under the terms of the GNU General Public License + * + * The timing data tables/programming info are courtesy of the NetBSD driver + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_piccolo" +#define DRV_VERSION "0.0.1" + + + +static void tosh_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + static const u16 pio[6] = { /* For reg 0x50 low word & E088 */ + 0x0566, 0x0433, 0x0311, 0x0201, 0x0200, 0x0100 + }; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u16 conf; + pci_read_config_word(pdev, 0x50, &conf); + conf &= 0xE088; + conf |= pio[adev->pio_mode - XFER_PIO_0]; + pci_write_config_word(pdev, 0x50, conf); +} + +static void tosh_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 conf; + pci_read_config_dword(pdev, 0x5C, &conf); + conf &= 0x78FFE088; /* Keep the other bits */ + if (adev->dma_mode >= XFER_UDMA_0) { + int udma = adev->dma_mode - XFER_UDMA_0; + conf |= 0x80000000; + conf |= (udma + 2) << 28; + conf |= (2 - udma) * 0x111; /* spread into three nibbles */ + } else { + static const u32 mwdma[4] = { + 0x0655, 0x0200, 0x0200, 0x0100 + }; + conf |= mwdma[adev->dma_mode - XFER_MW_DMA_0]; + } + pci_write_config_dword(pdev, 0x5C, conf); +} + + +static struct scsi_host_template tosh_sht = { + ATA_BMDMA_SHT(DRV_NAME), +}; + +static struct ata_port_operations tosh_port_ops = { + .inherits = &ata_bmdma_port_ops, + .cable_detect = ata_cable_unknown, + .set_piomode = tosh_set_piomode, + .set_dmamode = tosh_set_dmamode +}; + +/** + * ata_tosh_init - attach generic IDE + * @dev: PCI device found + * @id: match entry + * + * Called each time a matching IDE interface is found. We check if the + * interface is one we wish to claim and if so we perform any chip + * specific hacks then let the ATA layer do the heavy lifting. + */ + +static int ata_tosh_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static const struct ata_port_info info = { + .flags = ATA_FLAG_SLAVE_POSS, + .pio_mask = ATA_PIO5, + .mwdma_mask = ATA_MWDMA2, + .udma_mask = ATA_UDMA2, + .port_ops = &tosh_port_ops + }; + const struct ata_port_info *ppi[] = { &info, &ata_dummy_port_info }; + /* Just one port for the moment */ + return ata_pci_sff_init_one(dev, ppi, &tosh_sht, NULL); +} + +static struct pci_device_id ata_tosh[] = { + { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), }, + { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2), }, + { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_3), }, + { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_5), }, + { 0, }, +}; + +static struct pci_driver ata_tosh_pci_driver = { + .name = DRV_NAME, + .id_table = ata_tosh, + .probe = ata_tosh_init_one, + .remove = ata_pci_remove_one, +#ifdef CONFIG_PM + .suspend = ata_pci_device_suspend, + .resume = ata_pci_device_resume, +#endif +}; + +static int __init ata_tosh_init(void) +{ + return pci_register_driver(&ata_tosh_pci_driver); +} + + +static void __exit ata_tosh_exit(void) +{ + pci_unregister_driver(&ata_tosh_pci_driver); +} + + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("Low level driver for Toshiba Piccolo ATA"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, ata_tosh); +MODULE_VERSION(DRV_VERSION); + +module_init(ata_tosh_init); +module_exit(ata_tosh_exit); + diff --git a/drivers/ide/ide-pci-generic.c b/drivers/ide/ide-pci-generic.c index 39d4e01f5c9c..a743e68a8903 100644 --- a/drivers/ide/ide-pci-generic.c +++ b/drivers/ide/ide-pci-generic.c @@ -162,9 +162,10 @@ static const struct pci_device_id generic_pci_tbl[] = { #ifdef CONFIG_BLK_DEV_IDE_SATA { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_8237_SATA), 5 }, #endif - { PCI_VDEVICE(TOSHIBA, PCI_DEVICE_ID_TOSHIBA_PICCOLO), 4 }, { PCI_VDEVICE(TOSHIBA, PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), 4 }, { PCI_VDEVICE(TOSHIBA, PCI_DEVICE_ID_TOSHIBA_PICCOLO_2), 4 }, + { PCI_VDEVICE(TOSHIBA, PCI_DEVICE_ID_TOSHIBA_PICCOLO_3), 4 }, + { PCI_VDEVICE(TOSHIBA, PCI_DEVICE_ID_TOSHIBA_PICCOLO_5), 4 }, { PCI_VDEVICE(NETCELL, PCI_DEVICE_ID_REVOLUTION), 6 }, /* * Must come last. If you add entries adjust diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 84cf1f3b7838..9ca483be68d5 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1496,9 +1496,10 @@ #define PCI_DEVICE_ID_SBE_WANXL400 0x0104 #define PCI_VENDOR_ID_TOSHIBA 0x1179 -#define PCI_DEVICE_ID_TOSHIBA_PICCOLO 0x0102 -#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_1 0x0103 -#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_2 0x0105 +#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_1 0x0101 +#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_2 0x0102 +#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_3 0x0103 +#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_5 0x0105 #define PCI_DEVICE_ID_TOSHIBA_TOPIC95 0x060a #define PCI_DEVICE_ID_TOSHIBA_TOPIC97 0x060f #define PCI_DEVICE_ID_TOSHIBA_TOPIC100 0x0617 -- cgit v1.3-8-gc7d7 From 95514fd8ff0f30de7815950edfd84ef1e19fb1c8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 25 Nov 2009 18:12:48 +0100 Subject: libata: add private driver field to struct ata_device This brings struct ata_device in-line with struct ata_{port,host}. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index ba07e84c9840..a5b3dc71e819 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -595,6 +595,7 @@ struct ata_device { unsigned int horkage; /* List of broken features */ unsigned long flags; /* ATA_DFLAG_xxx */ struct scsi_device *sdev; /* attached SCSI device */ + void *private_data; #ifdef CONFIG_ATA_ACPI acpi_handle acpi_handle; union acpi_object *gtf_cache; -- cgit v1.3-8-gc7d7 From d285834001df372f81045bb41092e54943e93c84 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Dec 2009 01:25:53 +0000 Subject: net 01/05: fib_rules: rearrange struct fib_rule commit b8952893d5d86f69c4e499d191b98c6658f64b0f Author: Patrick McHardy Date: Thu Dec 3 12:05:22 2009 +0100 net: fib_rules: rearrange struct fib_rule The ifname member is only used to resolve interface names and is not needed during rule lookups. The target and ctarget members however are used during rule lookups and are currently located in a second cacheline. Move ifname further to the end to make sure both target and ctarget are located in the same cacheline as other members used during rule lookups. The layout on 64 bit changes from: struct fib_rule { ... u32 table; /* 56 4 */ u8 action; /* 60 1 */ /* XXX 3 bytes hole, try to pack */ /* --- cacheline 1 boundary (64 bytes) --- */ u32 target; /* 64 4 */ /* XXX 4 bytes hole, try to pack */ struct fib_rule * ctarget; /* 72 8 */ struct rcu_head rcu; /* 80 16 */ struct net * fr_net; /* 96 8 */ }; to: struct fib_rule { ... u32 table; /* 40 4 */ u8 action; /* 44 1 */ /* XXX 3 bytes hole, try to pack */ u32 target; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ struct fib_rule * ctarget; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ char ifname[16]; /* 64 16 */ struct rcu_head rcu; /* 80 16 */ struct net * fr_net; /* 96 8 */ }; Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/fib_rules.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 2cd707b15d59..22fb323cd85e 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -11,7 +11,6 @@ struct fib_rule { struct list_head list; atomic_t refcnt; int ifindex; - char ifname[IFNAMSIZ]; u32 mark; u32 mark_mask; u32 pref; @@ -20,6 +19,7 @@ struct fib_rule { u8 action; u32 target; struct fib_rule * ctarget; + char ifname[IFNAMSIZ]; struct rcu_head rcu; struct net * fr_net; }; -- cgit v1.3-8-gc7d7 From 491deb24bf5bf7124141287aaf02c3219783ceab Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Dec 2009 01:25:54 +0000 Subject: net 02/05: fib_rules: rename ifindex/ifname/FRA_IFNAME to iifindex/iifname/FRA_IIFNAME commit 229e77eec406ad68662f18e49fda8b5d366768c5 Author: Patrick McHardy Date: Thu Dec 3 12:05:23 2009 +0100 net: fib_rules: rename ifindex/ifname/FRA_IFNAME to iifindex/iifname/FRA_IIFNAME The next patch will add oif classification, rename interface related members and attributes to reflect that they're used for iif classification. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 6 ++++-- include/net/fib_rules.h | 6 +++--- net/core/fib_rules.c | 36 ++++++++++++++++++------------------ 3 files changed, 25 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index c7e5b700bb91..7e11bb2fa655 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -8,7 +8,8 @@ #define FIB_RULE_PERMANENT 0x00000001 #define FIB_RULE_INVERT 0x00000002 #define FIB_RULE_UNRESOLVED 0x00000004 -#define FIB_RULE_DEV_DETACHED 0x00000008 +#define FIB_RULE_IIF_DETACHED 0x00000008 +#define FIB_RULE_DEV_DETACHED FIB_RULE_IIF_DETACHED /* try to find source address in routing lookups */ #define FIB_RULE_FIND_SADDR 0x00010000 @@ -31,7 +32,8 @@ enum { FRA_UNSPEC, FRA_DST, /* destination address */ FRA_SRC, /* source address */ - FRA_IFNAME, /* interface name */ + FRA_IIFNAME, /* interface name */ +#define FRA_IFNAME FRA_IIFNAME FRA_GOTO, /* target to jump to (FR_ACT_GOTO) */ FRA_UNUSED2, FRA_PRIORITY, /* priority/preference */ diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 22fb323cd85e..62bebcb2a51c 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -10,7 +10,7 @@ struct fib_rule { struct list_head list; atomic_t refcnt; - int ifindex; + int iifindex; u32 mark; u32 mark_mask; u32 pref; @@ -19,7 +19,7 @@ struct fib_rule { u8 action; u32 target; struct fib_rule * ctarget; - char ifname[IFNAMSIZ]; + char iifname[IFNAMSIZ]; struct rcu_head rcu; struct net * fr_net; }; @@ -67,7 +67,7 @@ struct fib_rules_ops { }; #define FRA_GENERIC_POLICY \ - [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ + [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index bd309384f8b8..8e8028cdc87f 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -135,7 +135,7 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->ifindex && (rule->ifindex != fl->iif)) + if (rule->iifindex && (rule->iifindex != fl->iif)) goto out; if ((rule->mark ^ fl->mark) & rule->mark_mask) @@ -248,14 +248,14 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (tb[FRA_PRIORITY]) rule->pref = nla_get_u32(tb[FRA_PRIORITY]); - if (tb[FRA_IFNAME]) { + if (tb[FRA_IIFNAME]) { struct net_device *dev; - rule->ifindex = -1; - nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, rule->ifname); + rule->iifindex = -1; + nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); + dev = __dev_get_by_name(net, rule->iifname); if (dev) - rule->ifindex = dev->ifindex; + rule->iifindex = dev->ifindex; } if (tb[FRA_FWMARK]) { @@ -388,8 +388,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) continue; - if (tb[FRA_IFNAME] && - nla_strcmp(tb[FRA_IFNAME], rule->ifname)) + if (tb[FRA_IIFNAME] && + nla_strcmp(tb[FRA_IIFNAME], rule->iifname)) continue; if (tb[FRA_FWMARK] && @@ -447,7 +447,7 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, struct fib_rule *rule) { size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) - + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */ + + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */ + nla_total_size(4) /* FRA_PRIORITY */ + nla_total_size(4) /* FRA_TABLE */ + nla_total_size(4) /* FRA_FWMARK */ @@ -481,11 +481,11 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) frh->flags |= FIB_RULE_UNRESOLVED; - if (rule->ifname[0]) { - NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); + if (rule->iifname[0]) { + NLA_PUT_STRING(skb, FRA_IIFNAME, rule->iifname); - if (rule->ifindex == -1) - frh->flags |= FIB_RULE_DEV_DETACHED; + if (rule->iifindex == -1) + frh->flags |= FIB_RULE_IIF_DETACHED; } if (rule->pref) @@ -600,9 +600,9 @@ static void attach_rules(struct list_head *rules, struct net_device *dev) struct fib_rule *rule; list_for_each_entry(rule, rules, list) { - if (rule->ifindex == -1 && - strcmp(dev->name, rule->ifname) == 0) - rule->ifindex = dev->ifindex; + if (rule->iifindex == -1 && + strcmp(dev->name, rule->iifname) == 0) + rule->iifindex = dev->ifindex; } } @@ -611,8 +611,8 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) struct fib_rule *rule; list_for_each_entry(rule, rules, list) - if (rule->ifindex == dev->ifindex) - rule->ifindex = -1; + if (rule->iifindex == dev->ifindex) + rule->iifindex = -1; } -- cgit v1.3-8-gc7d7 From 1b038a5e60c7812f19818e8a5df96d029e49c38f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Dec 2009 01:25:56 +0000 Subject: net 03/05: fib_rules: add oif classification commit 68144d350f4f6c348659c825cde6a82b34c27a91 Author: Patrick McHardy Date: Thu Dec 3 12:05:25 2009 +0100 net: fib_rules: add oif classification Support routing table lookup based on the flow's oif. This is useful to classify packets originating from sockets bound to interfaces differently. The route cache already includes the oif and needs no changes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 2 ++ include/net/fib_rules.h | 3 +++ net/core/fib_rules.c | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 7e11bb2fa655..51da65b68b85 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -10,6 +10,7 @@ #define FIB_RULE_UNRESOLVED 0x00000004 #define FIB_RULE_IIF_DETACHED 0x00000008 #define FIB_RULE_DEV_DETACHED FIB_RULE_IIF_DETACHED +#define FIB_RULE_OIF_DETACHED 0x00000010 /* try to find source address in routing lookups */ #define FIB_RULE_FIND_SADDR 0x00010000 @@ -47,6 +48,7 @@ enum { FRA_UNUSED8, FRA_TABLE, /* Extended table id */ FRA_FWMASK, /* mask for netfilter mark */ + FRA_OIFNAME, __FRA_MAX }; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 62bebcb2a51c..d4e875a58f8b 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -11,6 +11,7 @@ struct fib_rule { struct list_head list; atomic_t refcnt; int iifindex; + int oifindex; u32 mark; u32 mark_mask; u32 pref; @@ -20,6 +21,7 @@ struct fib_rule { u32 target; struct fib_rule * ctarget; char iifname[IFNAMSIZ]; + char oifname[IFNAMSIZ]; struct rcu_head rcu; struct net * fr_net; }; @@ -68,6 +70,7 @@ struct fib_rules_ops { #define FRA_GENERIC_POLICY \ [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ + [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 8e8028cdc87f..d1a70ad4b544 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -138,6 +138,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if (rule->iifindex && (rule->iifindex != fl->iif)) goto out; + if (rule->oifindex && (rule->oifindex != fl->oif)) + goto out; + if ((rule->mark ^ fl->mark) & rule->mark_mask) goto out; @@ -258,6 +261,16 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->iifindex = dev->ifindex; } + if (tb[FRA_OIFNAME]) { + struct net_device *dev; + + rule->oifindex = -1; + nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); + dev = __dev_get_by_name(net, rule->oifname); + if (dev) + rule->oifindex = dev->ifindex; + } + if (tb[FRA_FWMARK]) { rule->mark = nla_get_u32(tb[FRA_FWMARK]); if (rule->mark) @@ -392,6 +405,10 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) nla_strcmp(tb[FRA_IIFNAME], rule->iifname)) continue; + if (tb[FRA_OIFNAME] && + nla_strcmp(tb[FRA_OIFNAME], rule->oifname)) + continue; + if (tb[FRA_FWMARK] && (rule->mark != nla_get_u32(tb[FRA_FWMARK]))) continue; @@ -448,6 +465,7 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, { size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */ + + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */ + nla_total_size(4) /* FRA_PRIORITY */ + nla_total_size(4) /* FRA_TABLE */ + nla_total_size(4) /* FRA_FWMARK */ @@ -488,6 +506,13 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh->flags |= FIB_RULE_IIF_DETACHED; } + if (rule->oifname[0]) { + NLA_PUT_STRING(skb, FRA_OIFNAME, rule->oifname); + + if (rule->oifindex == -1) + frh->flags |= FIB_RULE_OIF_DETACHED; + } + if (rule->pref) NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); @@ -603,6 +628,9 @@ static void attach_rules(struct list_head *rules, struct net_device *dev) if (rule->iifindex == -1 && strcmp(dev->name, rule->iifname) == 0) rule->iifindex = dev->ifindex; + if (rule->oifindex == -1 && + strcmp(dev->name, rule->oifname) == 0) + rule->oifindex = dev->ifindex; } } @@ -610,9 +638,12 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) { struct fib_rule *rule; - list_for_each_entry(rule, rules, list) + list_for_each_entry(rule, rules, list) { if (rule->iifindex == dev->ifindex) rule->iifindex = -1; + if (rule->oifindex == dev->ifindex) + rule->oifindex = -1; + } } -- cgit v1.3-8-gc7d7 From 8153a10c08f1312af563bb92532002e46d3f504a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Dec 2009 01:25:58 +0000 Subject: ipv4 05/05: add sysctl to accept packets with local source addresses commit 8ec1e0ebe26087bfc5c0394ada5feb5758014fc8 Author: Patrick McHardy Date: Thu Dec 3 12:16:35 2009 +0100 ipv4: add sysctl to accept packets with local source addresses Change fib_validate_source() to accept packets with a local source address when the "accept_local" sysctl is set for the incoming inet device. Combined with the previous patches, this allows to communicate between multiple local interfaces over the wire. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 6 ++++++ include/linux/inetdevice.h | 1 + include/linux/sysctl.h | 1 + kernel/sysctl_check.c | 1 + net/ipv4/devinet.c | 1 + net/ipv4/fib_frontend.c | 11 +++++++---- 6 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 989f5538b8dd..006b39dec87d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -731,6 +731,12 @@ accept_source_route - BOOLEAN default TRUE (router) FALSE (host) +accept_local - BOOLEAN + Accept packets with local source addresses. In combination with + suitable routing, this can be used to direct packets between two + local interfaces over the wire and have them accepted properly. + default FALSE + rp_filter - INTEGER 0 - No source validation. 1 - Strict mode as defined in RFC3704 Strict Reverse Path diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index eecfa559bfb4..699e85c01a4d 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -83,6 +83,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_RPFILTER(in_dev) IN_DEV_MAXCONF((in_dev), RP_FILTER) #define IN_DEV_SOURCE_ROUTE(in_dev) IN_DEV_ANDCONF((in_dev), \ ACCEPT_SOURCE_ROUTE) +#define IN_DEV_ACCEPT_LOCAL(in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL) #define IN_DEV_BOOTP_RELAY(in_dev) IN_DEV_ANDCONF((in_dev), BOOTP_RELAY) #define IN_DEV_LOG_MARTIANS(in_dev) IN_DEV_ORCONF((in_dev), LOG_MARTIANS) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 1e4743ee6831..9f047d73a216 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -490,6 +490,7 @@ enum NET_IPV4_CONF_PROMOTE_SECONDARIES=20, NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_NOTIFY=22, + NET_IPV4_CONF_ACCEPT_LOCAL=23, __NET_IPV4_CONF_MAX }; diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index b6e7aaea4604..f1d676e4b368 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -220,6 +220,7 @@ static const struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = { { NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" }, { NET_IPV4_CONF_ARP_ACCEPT, "arp_accept" }, { NET_IPV4_CONF_ARP_NOTIFY, "arp_notify" }, + { NET_IPV4_CONF_ACCEPT_LOCAL, "accept_local" }, {} }; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c100709d6ddf..e3126612fcbb 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1468,6 +1468,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, "accept_source_route"), + DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3b373a8b0473..3323168ee52d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -241,16 +241,17 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, .iif = oif }; struct fib_result res; - int no_addr, rpf; + int no_addr, rpf, accept_local; int ret; struct net *net; - no_addr = rpf = 0; + no_addr = rpf = accept_local = 0; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); + accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); } rcu_read_unlock(); @@ -260,8 +261,10 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, net = dev_net(dev); if (fib_lookup(net, &fl, &res)) goto last_resort; - if (res.type != RTN_UNICAST) - goto e_inval_res; + if (res.type != RTN_UNICAST) { + if (res.type != RTN_LOCAL || !accept_local) + goto e_inval_res; + } *spec_dst = FIB_RES_PREFSRC(res); fib_combine_itag(itag, &res); #ifdef CONFIG_IP_ROUTE_MULTIPATH -- cgit v1.3-8-gc7d7 From 72ad937abd0a43b7cf2c557ba1f2ec75e608c516 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Dec 2009 02:29:03 +0000 Subject: net: Add support for batching network namespace cleanups - Add exit_list to struct net to support building lists of network namespaces to cleanup. - Add exit_batch to pernet_operations to allow running operations only once during a network namespace exit. Instead of once per network namespace. - Factor opt ops_exit_list and ops_exit_free so the logic with cleanup up a network namespace does not need to be duplicated. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 + net/core/net_namespace.c | 122 ++++++++++++++++++++++---------------------- 2 files changed, 63 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 24a8c5591f63..f307e133d14c 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -43,6 +43,7 @@ struct net { #endif struct list_head list; /* list of network namespaces */ struct list_head cleanup_list; /* namespaces on death row */ + struct list_head exit_list; /* Use only net_mutex */ struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; @@ -236,6 +237,7 @@ struct pernet_operations { struct list_head list; int (*init)(struct net *net); void (*exit)(struct net *net); + void (*exit_batch)(struct list_head *net_exit_list); int *id; size_t size; }; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 9679ad292da9..6c7f6e04dbbf 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -70,6 +70,36 @@ static void ops_free(const struct pernet_operations *ops, struct net *net) } } +static void ops_exit_list(const struct pernet_operations *ops, + struct list_head *net_exit_list) +{ + struct net *net; + if (ops->exit) { + list_for_each_entry(net, net_exit_list, exit_list) + ops->exit(net); + } + if (&ops->list == first_device) { + LIST_HEAD(dev_kill_list); + rtnl_lock(); + list_for_each_entry(net, net_exit_list, exit_list) + unregister_netdevices(net, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + } + if (ops->exit_batch) + ops->exit_batch(net_exit_list); +} + +static void ops_free_list(const struct pernet_operations *ops, + struct list_head *net_exit_list) +{ + struct net *net; + if (ops->size && ops->id) { + list_for_each_entry(net, net_exit_list, exit_list) + ops_free(ops, net); + } +} + /* * setup_net runs the initializers for the network namespace object. */ @@ -78,6 +108,7 @@ static __net_init int setup_net(struct net *net) /* Must be called with net_mutex held */ const struct pernet_operations *ops, *saved_ops; int error = 0; + LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); @@ -97,21 +128,14 @@ out_undo: /* Walk through the list backwards calling the exit functions * for the pernet modules whose init functions did not fail. */ + list_add(&net->exit_list, &net_exit_list); saved_ops = ops; - list_for_each_entry_continue_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - if (&ops->list == first_device) { - LIST_HEAD(dev_kill_list); - rtnl_lock(); - unregister_netdevices(net, &dev_kill_list); - unregister_netdevice_many(&dev_kill_list); - rtnl_unlock(); - } - } + list_for_each_entry_continue_reverse(ops, &pernet_list, list) + ops_exit_list(ops, &net_exit_list); + ops = saved_ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) - ops_free(ops, net); + ops_free_list(ops, &net_exit_list); rcu_barrier(); goto out; @@ -207,6 +231,7 @@ static void cleanup_net(struct work_struct *work) const struct pernet_operations *ops; struct net *net, *tmp; LIST_HEAD(net_kill_list); + LIST_HEAD(net_exit_list); /* Atomically snapshot the list of namespaces to cleanup */ spin_lock_irq(&cleanup_list_lock); @@ -217,8 +242,10 @@ static void cleanup_net(struct work_struct *work) /* Don't let anyone else find us. */ rtnl_lock(); - list_for_each_entry(net, &net_kill_list, cleanup_list) + list_for_each_entry(net, &net_kill_list, cleanup_list) { list_del_rcu(&net->list); + list_add_tail(&net->exit_list, &net_exit_list); + } rtnl_unlock(); /* @@ -229,27 +256,12 @@ static void cleanup_net(struct work_struct *work) synchronize_rcu(); /* Run all of the network namespace exit methods */ - list_for_each_entry_reverse(ops, &pernet_list, list) { - if (ops->exit) { - list_for_each_entry(net, &net_kill_list, cleanup_list) - ops->exit(net); - } - if (&ops->list == first_device) { - LIST_HEAD(dev_kill_list); - rtnl_lock(); - list_for_each_entry(net, &net_kill_list, cleanup_list) - unregister_netdevices(net, &dev_kill_list); - unregister_netdevice_many(&dev_kill_list); - rtnl_unlock(); - } - } + list_for_each_entry_reverse(ops, &pernet_list, list) + ops_exit_list(ops, &net_exit_list); + /* Free the net generic variables */ - list_for_each_entry_reverse(ops, &pernet_list, list) { - if (ops->size && ops->id) { - list_for_each_entry(net, &net_kill_list, cleanup_list) - ops_free(ops, net); - } - } + list_for_each_entry_reverse(ops, &pernet_list, list) + ops_free_list(ops, &net_exit_list); mutex_unlock(&net_mutex); @@ -259,8 +271,8 @@ static void cleanup_net(struct work_struct *work) rcu_barrier(); /* Finally it is safe to free my network namespace structure */ - list_for_each_entry_safe(net, tmp, &net_kill_list, cleanup_list) { - list_del_init(&net->cleanup_list); + list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { + list_del_init(&net->exit_list); net_free(net); } } @@ -348,8 +360,9 @@ pure_initcall(net_ns_init); static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { - struct net *net, *undo_net; + struct net *net; int error; + LIST_HEAD(net_exit_list); list_add_tail(&ops->list, list); if (ops->init || (ops->id && ops->size)) { @@ -357,6 +370,7 @@ static int __register_pernet_operations(struct list_head *list, error = ops_init(ops, net); if (error) goto out_undo; + list_add_tail(&net->exit_list, &net_exit_list); } } return 0; @@ -364,36 +378,21 @@ static int __register_pernet_operations(struct list_head *list, out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); - if (ops->exit) { - for_each_net(undo_net) { - if (net_eq(undo_net, net)) - goto undone; - ops->exit(undo_net); - } - } -undone: - if (ops->size && ops->id) { - for_each_net(undo_net) { - if (net_eq(undo_net, net)) - goto freed; - ops_free(ops, undo_net); - } - } -freed: + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); return error; } static void __unregister_pernet_operations(struct pernet_operations *ops) { struct net *net; + LIST_HEAD(net_exit_list); list_del(&ops->list); - if (ops->exit) - for_each_net(net) - ops->exit(net); - if (ops->id && ops->size) - for_each_net(net) - ops_free(ops, net); + for_each_net(net) + list_add_tail(&net->exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); } #else @@ -411,9 +410,10 @@ static int __register_pernet_operations(struct list_head *list, static void __unregister_pernet_operations(struct pernet_operations *ops) { - if (ops->exit) - ops->exit(&init_net); - ops_free(ops, &init_net); + LIST_HEAD(net_exit_list); + list_add(&init_net.exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); } #endif /* CONFIG_NET_NS */ -- cgit v1.3-8-gc7d7 From d79d792ef9f99cca463b6619a93e860d1c833a6e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Dec 2009 02:29:05 +0000 Subject: net: Allow xfrm_user_net_exit to batch efficiently. xfrm.nlsk is provided by the xfrm_user module and is access via rcu from other parts of the xfrm code. Add xfrm.nlsk_stash a copy of xfrm.nlsk that will never be set to NULL. This allows the synchronize_net and netlink_kernel_release to be deferred until a whole batch of xfrm.nlsk sockets have been set to NULL. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_user.c | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 1ba912749caa..56f8e5585df7 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -43,6 +43,7 @@ struct netns_xfrm { struct work_struct policy_hash_work; struct sock *nlsk; + struct sock *nlsk_stash; u32 sysctl_aevent_etime; u32 sysctl_aevent_rseqth; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index fb42d778d278..1ada6186933c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2721,22 +2721,24 @@ static int __net_init xfrm_user_net_init(struct net *net) xfrm_netlink_rcv, NULL, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; + net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ rcu_assign_pointer(net->xfrm.nlsk, nlsk); return 0; } -static void __net_exit xfrm_user_net_exit(struct net *net) +static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list) { - struct sock *nlsk = net->xfrm.nlsk; - - rcu_assign_pointer(net->xfrm.nlsk, NULL); - synchronize_rcu(); - netlink_kernel_release(nlsk); + struct net *net; + list_for_each_entry(net, net_exit_list, exit_list) + rcu_assign_pointer(net->xfrm.nlsk, NULL); + synchronize_net(); + list_for_each_entry(net, net_exit_list, exit_list) + netlink_kernel_release(net->xfrm.nlsk_stash); } static struct pernet_operations xfrm_user_net_ops = { - .init = xfrm_user_net_init, - .exit = xfrm_user_net_exit, + .init = xfrm_user_net_init, + .exit_batch = xfrm_user_net_exit, }; static int __init xfrm_user_init(void) -- cgit v1.3-8-gc7d7 From e9c5158ac26affd5d8ce006521bdfb7148090e18 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Dec 2009 12:22:55 -0800 Subject: net: Allow fib_rule_unregister to batch Refactor the code so fib_rules_register always takes a template instead of the actual fib_rules_ops structure that will be used. This is required for network namespace support so 2 out of the 3 callers already do this, it allows the error handling to be made common, and it allows fib_rules_unregister to free the template for hte caller. Modify fib_rules_unregister to use call_rcu instead of syncrhonize_rcu to allw multiple namespaces to be cleaned up in the same rcu grace period. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/net/fib_rules.h | 3 ++- net/core/fib_rules.c | 36 +++++++++++++++++++++++++++++++++--- net/decnet/dn_rules.c | 22 ++++++++++++---------- net/ipv4/fib_rules.c | 12 +++--------- net/ipv6/fib6_rules.c | 22 +++++++--------------- 5 files changed, 57 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index d4e875a58f8b..c07ac9650ebc 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -66,6 +66,7 @@ struct fib_rules_ops { struct list_head rules_list; struct module *owner; struct net *fro_net; + struct rcu_head rcu; }; #define FRA_GENERIC_POLICY \ @@ -102,7 +103,7 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) return frh->table; } -extern int fib_rules_register(struct fib_rules_ops *); +extern struct fib_rules_ops *fib_rules_register(struct fib_rules_ops *, struct net *); extern void fib_rules_unregister(struct fib_rules_ops *); extern void fib_rules_cleanup_ops(struct fib_rules_ops *); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ef0e7d9e664b..02a3b2c69c1e 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -72,7 +72,7 @@ static void flush_route_cache(struct fib_rules_ops *ops) ops->flush_cache(ops); } -int fib_rules_register(struct fib_rules_ops *ops) +static int __fib_rules_register(struct fib_rules_ops *ops) { int err = -EEXIST; struct fib_rules_ops *o; @@ -102,6 +102,28 @@ errout: return err; } +struct fib_rules_ops * +fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) +{ + struct fib_rules_ops *ops; + int err; + + ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL); + if (ops == NULL) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&ops->rules_list); + ops->fro_net = net; + + err = __fib_rules_register(ops); + if (err) { + kfree(ops); + ops = ERR_PTR(err); + } + + return ops; +} + EXPORT_SYMBOL_GPL(fib_rules_register); void fib_rules_cleanup_ops(struct fib_rules_ops *ops) @@ -115,6 +137,15 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops); +static void fib_rules_put_rcu(struct rcu_head *head) +{ + struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu); + struct net *net = ops->fro_net; + + release_net(net); + kfree(ops); +} + void fib_rules_unregister(struct fib_rules_ops *ops) { struct net *net = ops->fro_net; @@ -124,8 +155,7 @@ void fib_rules_unregister(struct fib_rules_ops *ops) fib_rules_cleanup_ops(ops); spin_unlock(&net->rules_mod_lock); - synchronize_rcu(); - release_net(net); + call_rcu(&ops->rcu, fib_rules_put_rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 72495f25269f..7466c546f286 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -33,7 +33,7 @@ #include #include -static struct fib_rules_ops dn_fib_rules_ops; +static struct fib_rules_ops *dn_fib_rules_ops; struct dn_fib_rule { @@ -56,7 +56,7 @@ int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) }; int err; - err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(dn_fib_rules_ops, flp, 0, &arg); res->r = arg.rule; return err; @@ -217,9 +217,9 @@ static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops) struct list_head *pos; struct fib_rule *rule; - if (!list_empty(&dn_fib_rules_ops.rules_list)) { - pos = dn_fib_rules_ops.rules_list.next; - if (pos->next != &dn_fib_rules_ops.rules_list) { + if (!list_empty(&dn_fib_rules_ops->rules_list)) { + pos = dn_fib_rules_ops->rules_list.next; + if (pos->next != &dn_fib_rules_ops->rules_list) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; @@ -234,7 +234,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) dn_rt_cache_flush(-1); } -static struct fib_rules_ops dn_fib_rules_ops = { +static struct fib_rules_ops dn_fib_rules_ops_template = { .family = AF_DECnet, .rule_size = sizeof(struct dn_fib_rule), .addr_size = sizeof(u16), @@ -247,21 +247,23 @@ static struct fib_rules_ops dn_fib_rules_ops = { .flush_cache = dn_fib_rule_flush_cache, .nlgroup = RTNLGRP_DECnet_RULE, .policy = dn_fib_rule_policy, - .rules_list = LIST_HEAD_INIT(dn_fib_rules_ops.rules_list), .owner = THIS_MODULE, .fro_net = &init_net, }; void __init dn_fib_rules_init(void) { - BUG_ON(fib_default_rule_add(&dn_fib_rules_ops, 0x7fff, + dn_fib_rules_ops = + fib_rules_register(&dn_fib_rules_ops_template, &init_net); + BUG_ON(IS_ERR(dn_fib_rules_ops)); + BUG_ON(fib_default_rule_add(dn_fib_rules_ops, 0x7fff, RT_TABLE_MAIN, 0)); - fib_rules_register(&dn_fib_rules_ops); } void __exit dn_fib_rules_cleanup(void) { - fib_rules_unregister(&dn_fib_rules_ops); + fib_rules_unregister(dn_fib_rules_ops); + rcu_barrier(); } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 1239ed23cab6..ca2d07b1c706 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -301,13 +301,9 @@ int __net_init fib4_rules_init(struct net *net) int err; struct fib_rules_ops *ops; - ops = kmemdup(&fib4_rules_ops_template, sizeof(*ops), GFP_KERNEL); - if (ops == NULL) - return -ENOMEM; - INIT_LIST_HEAD(&ops->rules_list); - ops->fro_net = net; - - fib_rules_register(ops); + ops = fib_rules_register(&fib4_rules_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); err = fib_default_rules_init(ops); if (err < 0) @@ -318,12 +314,10 @@ int __net_init fib4_rules_init(struct net *net) fail: /* also cleans all rules already added */ fib_rules_unregister(ops); - kfree(ops); return err; } void __net_exit fib4_rules_exit(struct net *net) { fib_rules_unregister(net->ipv4.rules_ops); - kfree(net->ipv4.rules_ops); } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 3b38f49f2c28..b7aa7c64cc4a 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -264,16 +264,14 @@ static struct fib_rules_ops fib6_rules_ops_template = { static int fib6_rules_net_init(struct net *net) { + struct fib_rules_ops *ops; int err = -ENOMEM; - net->ipv6.fib6_rules_ops = kmemdup(&fib6_rules_ops_template, - sizeof(*net->ipv6.fib6_rules_ops), - GFP_KERNEL); - if (!net->ipv6.fib6_rules_ops) - goto out; + ops = fib_rules_register(&fib6_rules_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv6.fib6_rules_ops = ops; - net->ipv6.fib6_rules_ops->fro_net = net; - INIT_LIST_HEAD(&net->ipv6.fib6_rules_ops->rules_list); err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0, RT6_TABLE_LOCAL, 0); @@ -283,25 +281,19 @@ static int fib6_rules_net_init(struct net *net) err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0x7FFE, RT6_TABLE_MAIN, 0); if (err) - goto out_fib6_default_rule_add; + goto out_fib6_rules_ops; - err = fib_rules_register(net->ipv6.fib6_rules_ops); - if (err) - goto out_fib6_default_rule_add; out: return err; -out_fib6_default_rule_add: - fib_rules_cleanup_ops(net->ipv6.fib6_rules_ops); out_fib6_rules_ops: - kfree(net->ipv6.fib6_rules_ops); + fib_rules_unregister(ops); goto out; } static void fib6_rules_net_exit(struct net *net) { fib_rules_unregister(net->ipv6.fib6_rules_ops); - kfree(net->ipv6.fib6_rules_ops); } static struct pernet_operations fib6_rules_net_ops = { -- cgit v1.3-8-gc7d7 From b099ce2602d806deb41caaa578731848995cdb2a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Dec 2009 02:29:09 +0000 Subject: net: Batch inet_twsk_purge This function walks the whole hashtable so there is no point in passing it a network namespace. Instead I purge all timewait sockets from dead network namespaces that I find. If the namespace is one of the once I am trying to purge I am guaranteed no new timewait sockets can be formed so this will get them all. If the namespace is one I am not acting for it might form a few more but I will call inet_twsk_purge again and shortly to get rid of them. In any even if the network namespace is dead timewait sockets are useless. Move the calls of inet_twsk_purge into batch_exit routines so that if I am killing a bunch of namespaces at once I will just call inet_twsk_purge once and save a lot of redundant unnecessary work. My simple 4k network namespace exit test the cleanup time dropped from roughly 8.2s to 1.6s. While the time spent running inet_twsk_purge fell to about 2ms. 1ms for ipv4 and 1ms for ipv6. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 6 +++--- net/ipv4/inet_timewait_sock.c | 10 +++++----- net/ipv4/tcp_ipv4.c | 11 ++++++++--- net/ipv6/tcp_ipv6.c | 11 ++++++++--- 4 files changed, 24 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 773b10fa38e4..4fd007f34dd5 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -212,14 +212,14 @@ extern void inet_twsk_schedule(struct inet_timewait_sock *tw, extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, struct inet_timewait_death_row *twdr); -extern void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, +extern void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family); static inline struct net *twsk_net(const struct inet_timewait_sock *twsk) { #ifdef CONFIG_NET_NS - return twsk->tw_net; + return rcu_dereference(twsk->tw_net); #else return &init_net; #endif @@ -229,7 +229,7 @@ static inline void twsk_net_set(struct inet_timewait_sock *twsk, struct net *net) { #ifdef CONFIG_NET_NS - twsk->tw_net = net; + rcu_assign_pointer(twsk->tw_net, net); #endif } #endif /* _INET_TIMEWAIT_SOCK_ */ diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index d38ca7c77b93..31f931ef3daf 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -421,7 +421,7 @@ out: EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); -void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, +void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) { struct inet_timewait_sock *tw; @@ -436,15 +436,15 @@ restart_rcu: restart: sk_nulls_for_each_rcu(sk, node, &head->twchain) { tw = inet_twsk(sk); - if (!net_eq(twsk_net(tw), net) || - tw->tw_family != family) + if ((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count)) continue; if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt))) continue; - if (unlikely(!net_eq(twsk_net(tw), net) || - tw->tw_family != family)) { + if (unlikely((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count))) { inet_twsk_put(tw); goto restart; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59c911f3889d..fee9aabd5aa1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2529,12 +2529,17 @@ static int __net_init tcp_sk_init(struct net *net) static void __net_exit tcp_sk_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv4.tcp_sock); - inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); +} + +static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); } static struct pernet_operations __net_initdata tcp_sk_ops = { - .init = tcp_sk_init, - .exit = tcp_sk_exit, + .init = tcp_sk_init, + .exit = tcp_sk_exit, + .exit_batch = tcp_sk_exit_batch, }; void __init tcp_v4_init(void) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fc0a4e5895ee..aadd7cef73b3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2184,12 +2184,17 @@ static int tcpv6_net_init(struct net *net) static void tcpv6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.tcp_sk); - inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6); +} + +static void tcpv6_net_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6); } static struct pernet_operations tcpv6_net_ops = { - .init = tcpv6_net_init, - .exit = tcpv6_net_exit, + .init = tcpv6_net_init, + .exit = tcpv6_net_exit, + .exit_batch = tcpv6_net_exit_batch, }; int __init tcpv6_init(void) -- cgit v1.3-8-gc7d7 From 141518c95870228da4e050fbe31a8f0c9df82c72 Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Thu, 3 Dec 2009 08:36:22 +0000 Subject: tg3: Add some VPD preprocessor constants This patch cleans up the VPD code by creating preprocessor definitions and using them in the place of hardcoded constants. Signed-off-by: Matt Carlson Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 15 ++++++++------- drivers/net/tg3.h | 10 ++++++++-- include/linux/ethtool.h | 3 ++- 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 36f2e1b8cbb1..2576effca845 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -12433,7 +12433,7 @@ skip_phy_reset: static void __devinit tg3_read_partno(struct tg3 *tp) { - unsigned char vpd_data[256]; /* in little-endian format */ + unsigned char vpd_data[TG3_NVM_VPD_LEN]; /* in little-endian format */ unsigned int i; u32 magic; @@ -12442,14 +12442,14 @@ static void __devinit tg3_read_partno(struct tg3 *tp) goto out_not_found; if (magic == TG3_EEPROM_MAGIC) { - for (i = 0; i < 256; i += 4) { + for (i = 0; i < TG3_NVM_VPD_LEN; i += 4) { u32 tmp; /* The data is in little-endian format in NVRAM. * Use the big-endian read routines to preserve * the byte order as it exists in NVRAM. */ - if (tg3_nvram_read_be32(tp, 0x100 + i, &tmp)) + if (tg3_nvram_read_be32(tp, TG3_NVM_VPD_OFF + i, &tmp)) goto out_not_found; memcpy(&vpd_data[i], &tmp, sizeof(tmp)); @@ -12458,7 +12458,7 @@ static void __devinit tg3_read_partno(struct tg3 *tp) int vpd_cap; vpd_cap = pci_find_capability(tp->pdev, PCI_CAP_ID_VPD); - for (i = 0; i < 256; i += 4) { + for (i = 0; i < TG3_NVM_VPD_LEN; i += 4) { u32 tmp, j = 0; __le32 v; u16 tmp16; @@ -12483,7 +12483,7 @@ static void __devinit tg3_read_partno(struct tg3 *tp) } /* Now parse and find the part number. */ - for (i = 0; i < 254; ) { + for (i = 0; i < TG3_NVM_VPD_LEN - 2; ) { unsigned char val = vpd_data[i]; unsigned int block_end; @@ -12502,7 +12502,7 @@ static void __devinit tg3_read_partno(struct tg3 *tp) (vpd_data[i + 2] << 8))); i += 3; - if (block_end > 256) + if (block_end > TG3_NVM_VPD_LEN) goto out_not_found; while (i < (block_end - 2)) { @@ -12511,7 +12511,8 @@ static void __devinit tg3_read_partno(struct tg3 *tp) int partno_len = vpd_data[i + 2]; i += 3; - if (partno_len > 24 || (partno_len + i) > 256) + if (partno_len > TG3_BPN_SIZE || + (partno_len + i) > TG3_NVM_VPD_LEN) goto out_not_found; memcpy(tp->board_part_number, diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 6a2c31071caf..cd30889650f8 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -1821,6 +1821,11 @@ #define TG3_OTP_DEFAULT 0x286c1640 + +/* Hardware Legacy NVRAM layout */ +#define TG3_NVM_VPD_OFF 0x100 +#define TG3_NVM_VPD_LEN 256 + /* Hardware Selfboot NVRAM layout */ #define TG3_NVM_HWSB_CFG1 0x00000004 #define TG3_NVM_HWSB_CFG1_MAJMSK 0xf8000000 @@ -2893,8 +2898,9 @@ struct tg3 { u32 led_ctrl; u32 phy_otp; - char board_part_number[24]; -#define TG3_VER_SIZE 32 +#define TG3_BPN_SIZE 24 + char board_part_number[TG3_BPN_SIZE]; +#define TG3_VER_SIZE ETHTOOL_FWVERS_LEN char fw_ver[TG3_VER_SIZE]; u32 nic_sram_data_cfg; u32 pci_clock_ctrl; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index bcaa0e0a54d3..ef4a2d84d922 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -49,13 +49,14 @@ static inline __u32 ethtool_cmd_speed(struct ethtool_cmd *ep) return (ep->speed_hi << 16) | ep->speed; } +#define ETHTOOL_FWVERS_LEN 32 #define ETHTOOL_BUSINFO_LEN 32 /* these strings are set to whatever the driver author decides... */ struct ethtool_drvinfo { __u32 cmd; char driver[32]; /* driver short name, "tulip", "eepro100" */ char version[32]; /* driver version string */ - char fw_version[32]; /* firmware version string, if applicable */ + char fw_version[ETHTOOL_FWVERS_LEN]; /* firmware version string */ char bus_info[ETHTOOL_BUSINFO_LEN]; /* Bus info for this IF. */ /* For PCI devices, use pci_name(pci_dev). */ char reserved1[32]; -- cgit v1.3-8-gc7d7 From e78db4dfb1355a895f7ea50133b702b55b8ed184 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 26 Nov 2009 22:46:03 -0500 Subject: libata: Report zeroed read after TRIM and max discard size Our current TRIM payload is a single sector that can accommodate 64 * 65535 blocks being unmapped. Report this value in the Block Limits Maximum Unmap LBA count field. If a storage device supports TRIM and the DRAT and RZAT bits are set, report TPRZ=1 in Read Capacity(16). Signed-off-by: Martin K. Petersen Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 12 +++++++++--- include/linux/ata.h | 11 +++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 340a616d226b..e1e186b9dfcc 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2115,8 +2115,10 @@ static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf) * that we support some form of unmap - in thise case via WRITE SAME * with the unmap bit set. */ - if (ata_id_has_trim(args->id)) + if (ata_id_has_trim(args->id)) { + put_unaligned_be32(65535 * 512 / 8, &rbuf[20]); put_unaligned_be32(1, &rbuf[28]); + } return 0; } @@ -2411,8 +2413,12 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) rbuf[14] = (lowest_aligned >> 8) & 0x3f; rbuf[15] = lowest_aligned; - if (ata_id_has_trim(args->id)) - rbuf[14] |= 0x80; + if (ata_id_has_trim(args->id)) { + rbuf[14] |= 0x80; /* TPE */ + + if (ata_id_has_zero_after_trim(args->id)) + rbuf[14] |= 0x40; /* TPRZ */ + } } return 0; diff --git a/include/linux/ata.h b/include/linux/ata.h index e2595e877e44..dfa2298bf488 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -75,6 +75,7 @@ enum { ATA_ID_EIDE_DMA_TIME = 66, ATA_ID_EIDE_PIO = 67, ATA_ID_EIDE_PIO_IORDY = 68, + ATA_ID_ADDITIONAL_SUPP = 69, ATA_ID_QUEUE_DEPTH = 75, ATA_ID_MAJOR_VER = 80, ATA_ID_COMMAND_SET_1 = 82, @@ -816,6 +817,16 @@ static inline int ata_id_has_trim(const u16 *id) return 0; } +static inline int ata_id_has_zero_after_trim(const u16 *id) +{ + /* DSM supported, deterministic read, and read zero after trim set */ + if (ata_id_has_trim(id) && + (id[ATA_ID_ADDITIONAL_SUPP] & 0x4020) == 0x4020) + return 1; + + return 0; +} + static inline int ata_id_current_chs_valid(const u16 *id) { /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command -- cgit v1.3-8-gc7d7 From d0634c4aea0b80447cbdc4c0db285004b860c455 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 26 Nov 2009 12:00:43 -0500 Subject: libata: Clarify ata_set_lba_range_entries function ata_set_lba_range_entries used the variable max for two different things which was confusing. Make the function take a buffer size in bytes as argument and return the used buffer size upon completion. Signed-off-by: Martin K. Petersen Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 2 +- include/linux/ata.h | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index e1e186b9dfcc..62e6b9ea96af 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2972,7 +2972,7 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc) goto invalid_fld; buf = page_address(sg_page(scsi_sglist(scmd))); - size = ata_set_lba_range_entries(buf, 512 / 8, block, n_block); + size = ata_set_lba_range_entries(buf, 512, block, n_block); tf->protocol = ATA_PROT_DMA; tf->hob_feature = 0; diff --git a/include/linux/ata.h b/include/linux/ata.h index dfa2298bf488..38a6948ce0c2 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -982,17 +982,17 @@ static inline void ata_id_to_hd_driveid(u16 *id) } /* - * Write up to 'max' LBA Range Entries to the buffer that will cover the - * extent from sector to sector + count. This is used for TRIM and for - * ADD LBA(S) TO NV CACHE PINNED SET. + * Write LBA Range Entries to the buffer that will cover the extent from + * sector to sector + count. This is used for TRIM and for ADD LBA(S) + * TO NV CACHE PINNED SET. */ -static inline unsigned ata_set_lba_range_entries(void *_buffer, unsigned max, - u64 sector, unsigned long count) +static inline unsigned ata_set_lba_range_entries(void *_buffer, + unsigned buf_size, u64 sector, unsigned long count) { __le64 *buffer = _buffer; - unsigned i = 0; + unsigned i = 0, used_bytes; - while (i < max) { + while (i < buf_size / 8 ) { /* 6-byte LBA + 2-byte range per entry */ u64 entry = sector | ((u64)(count > 0xffff ? 0xffff : count) << 48); buffer[i++] = __cpu_to_le64(entry); @@ -1002,9 +1002,9 @@ static inline unsigned ata_set_lba_range_entries(void *_buffer, unsigned max, sector += 0xffff; } - max = ALIGN(i * 8, 512); - memset(buffer + i, 0, max - i * 8); - return max; + used_bytes = ALIGN(i * 8, 512); + memset(buffer + i, 0, used_bytes - i * 8); + return used_bytes; } static inline int is_multi_taskfile(struct ata_taskfile *tf) -- cgit v1.3-8-gc7d7 From 69ee472f2706371ca639de49b06df91615c07d8d Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 3 Dec 2009 15:31:18 -0800 Subject: usbnet & cdc-ether: Autosuspend for online devices Using remote wakeup and delayed transmission to allow online device to go into usb autosuspend. Minimal alternate support for devices that don't support remote wakeup. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 8 +++ drivers/net/usb/usbnet.c | 157 ++++++++++++++++++++++++++++++++++++-------- include/linux/usb/usbnet.h | 6 ++ 3 files changed, 142 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 197912d9c04a..21e183a83b99 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -411,6 +411,12 @@ static int cdc_bind(struct usbnet *dev, struct usb_interface *intf) return 0; } +static int cdc_manage_power(struct usbnet *dev, int on) +{ + dev->intf->needs_remote_wakeup = on; + return 0; +} + static const struct driver_info cdc_info = { .description = "CDC Ethernet Device", .flags = FLAG_ETHER | FLAG_LINK_INTR, @@ -418,6 +424,7 @@ static const struct driver_info cdc_info = { .bind = cdc_bind, .unbind = usbnet_cdc_unbind, .status = cdc_status, + .manage_power = cdc_manage_power, }; static const struct driver_info mbm_info = { @@ -619,6 +626,7 @@ static struct usb_driver cdc_driver = { .suspend = usbnet_suspend, .resume = usbnet_resume, .reset_resume = usbnet_resume, + .supports_autosuspend = 1, }; diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 511e7bdc4573..035fab04c0a0 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -353,7 +353,8 @@ static void rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) if (netif_running (dev->net) && netif_device_present (dev->net) && - !test_bit (EVENT_RX_HALT, &dev->flags)) { + !test_bit (EVENT_RX_HALT, &dev->flags) && + !test_bit (EVENT_DEV_ASLEEP, &dev->flags)) { switch (retval = usb_submit_urb (urb, GFP_ATOMIC)) { case -EPIPE: usbnet_defer_kevent (dev, EVENT_RX_HALT); @@ -611,15 +612,39 @@ EXPORT_SYMBOL_GPL(usbnet_unlink_rx_urbs); /*-------------------------------------------------------------------------*/ // precondition: never called in_interrupt +static void usbnet_terminate_urbs(struct usbnet *dev) +{ + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(unlink_wakeup); + DECLARE_WAITQUEUE(wait, current); + int temp; + + /* ensure there are no more active urbs */ + add_wait_queue(&unlink_wakeup, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + dev->wait = &unlink_wakeup; + temp = unlink_urbs(dev, &dev->txq) + + unlink_urbs(dev, &dev->rxq); + + /* maybe wait for deletions to finish. */ + while (!skb_queue_empty(&dev->rxq) + && !skb_queue_empty(&dev->txq) + && !skb_queue_empty(&dev->done)) { + schedule_timeout(UNLINK_TIMEOUT_MS); + set_current_state(TASK_UNINTERRUPTIBLE); + if (netif_msg_ifdown(dev)) + devdbg(dev, "waited for %d urb completions", + temp); + } + set_current_state(TASK_RUNNING); + dev->wait = NULL; + remove_wait_queue(&unlink_wakeup, &wait); +} int usbnet_stop (struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct driver_info *info = dev->driver_info; - int temp; int retval; - DECLARE_WAIT_QUEUE_HEAD_ONSTACK (unlink_wakeup); - DECLARE_WAITQUEUE (wait, current); netif_stop_queue (net); @@ -641,25 +666,8 @@ int usbnet_stop (struct net_device *net) info->description); } - if (!(info->flags & FLAG_AVOID_UNLINK_URBS)) { - /* ensure there are no more active urbs */ - add_wait_queue(&unlink_wakeup, &wait); - dev->wait = &unlink_wakeup; - temp = unlink_urbs(dev, &dev->txq) + - unlink_urbs(dev, &dev->rxq); - - /* maybe wait for deletions to finish. */ - while (!skb_queue_empty(&dev->rxq) && - !skb_queue_empty(&dev->txq) && - !skb_queue_empty(&dev->done)) { - msleep(UNLINK_TIMEOUT_MS); - if (netif_msg_ifdown(dev)) - devdbg(dev, "waited for %d urb completions", - temp); - } - dev->wait = NULL; - remove_wait_queue(&unlink_wakeup, &wait); - } + if (!(info->flags & FLAG_AVOID_UNLINK_URBS)) + usbnet_terminate_urbs(dev); usb_kill_urb(dev->interrupt); @@ -672,7 +680,10 @@ int usbnet_stop (struct net_device *net) dev->flags = 0; del_timer_sync (&dev->delay); tasklet_kill (&dev->bh); - usb_autopm_put_interface(dev->intf); + if (info->manage_power) + info->manage_power(dev, 0); + else + usb_autopm_put_interface(dev->intf); return 0; } @@ -753,6 +764,12 @@ int usbnet_open (struct net_device *net) // delay posting reads until we're fully open tasklet_schedule (&dev->bh); + if (info->manage_power) { + retval = info->manage_power(dev, 1); + if (retval < 0) + goto done; + usb_autopm_put_interface(dev->intf); + } return retval; done: usb_autopm_put_interface(dev->intf); @@ -881,11 +898,16 @@ kevent (struct work_struct *work) /* usb_clear_halt() needs a thread context */ if (test_bit (EVENT_TX_HALT, &dev->flags)) { unlink_urbs (dev, &dev->txq); + status = usb_autopm_get_interface(dev->intf); + if (status < 0) + goto fail_pipe; status = usb_clear_halt (dev->udev, dev->out); + usb_autopm_put_interface(dev->intf); if (status < 0 && status != -EPIPE && status != -ESHUTDOWN) { if (netif_msg_tx_err (dev)) +fail_pipe: deverr (dev, "can't clear tx halt, status %d", status); } else { @@ -896,11 +918,16 @@ kevent (struct work_struct *work) } if (test_bit (EVENT_RX_HALT, &dev->flags)) { unlink_urbs (dev, &dev->rxq); + status = usb_autopm_get_interface(dev->intf); + if (status < 0) + goto fail_halt; status = usb_clear_halt (dev->udev, dev->in); + usb_autopm_put_interface(dev->intf); if (status < 0 && status != -EPIPE && status != -ESHUTDOWN) { if (netif_msg_rx_err (dev)) +fail_halt: deverr (dev, "can't clear rx halt, status %d", status); } else { @@ -919,7 +946,12 @@ kevent (struct work_struct *work) clear_bit (EVENT_RX_MEMORY, &dev->flags); if (urb != NULL) { clear_bit (EVENT_RX_MEMORY, &dev->flags); + status = usb_autopm_get_interface(dev->intf); + if (status < 0) + goto fail_lowmem; rx_submit (dev, urb, GFP_KERNEL); + usb_autopm_put_interface(dev->intf); +fail_lowmem: tasklet_schedule (&dev->bh); } } @@ -929,11 +961,18 @@ kevent (struct work_struct *work) int retval = 0; clear_bit (EVENT_LINK_RESET, &dev->flags); + status = usb_autopm_get_interface(dev->intf); + if (status < 0) + goto skip_reset; if(info->link_reset && (retval = info->link_reset(dev)) < 0) { + usb_autopm_put_interface(dev->intf); +skip_reset: devinfo(dev, "link reset failed (%d) usbnet usb-%s-%s, %s", retval, dev->udev->bus->bus_name, dev->udev->devpath, info->description); + } else { + usb_autopm_put_interface(dev->intf); } } @@ -971,6 +1010,7 @@ static void tx_complete (struct urb *urb) case -EPROTO: case -ETIME: case -EILSEQ: + usb_mark_last_busy(dev->udev); if (!timer_pending (&dev->delay)) { mod_timer (&dev->delay, jiffies + THROTTLE_JIFFIES); @@ -987,6 +1027,7 @@ static void tx_complete (struct urb *urb) } } + usb_autopm_put_interface_async(dev->intf); urb->dev = NULL; entry->state = tx_done; defer_bh(dev, skb, &dev->txq); @@ -1057,14 +1098,34 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, } } - spin_lock_irqsave (&dev->txq.lock, flags); + spin_lock_irqsave(&dev->txq.lock, flags); + retval = usb_autopm_get_interface_async(dev->intf); + if (retval < 0) { + spin_unlock_irqrestore(&dev->txq.lock, flags); + goto drop; + } + +#ifdef CONFIG_PM + /* if this triggers the device is still a sleep */ + if (test_bit(EVENT_DEV_ASLEEP, &dev->flags)) { + /* transmission will be done in resume */ + usb_anchor_urb(urb, &dev->deferred); + /* no use to process more packets */ + netif_stop_queue(net); + spin_unlock_irqrestore(&dev->txq.lock, flags); + devdbg(dev, "Delaying transmission for resumption"); + goto deferred; + } +#endif switch ((retval = usb_submit_urb (urb, GFP_ATOMIC))) { case -EPIPE: netif_stop_queue (net); usbnet_defer_kevent (dev, EVENT_TX_HALT); + usb_autopm_put_interface_async(dev->intf); break; default: + usb_autopm_put_interface_async(dev->intf); if (netif_msg_tx_err (dev)) devdbg (dev, "tx: submit urb err %d", retval); break; @@ -1088,6 +1149,9 @@ drop: devdbg (dev, "> tx, len %d, type 0x%x", length, skb->protocol); } +#ifdef CONFIG_PM +deferred: +#endif return NETDEV_TX_OK; } EXPORT_SYMBOL_GPL(usbnet_start_xmit); @@ -1263,6 +1327,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->bh.func = usbnet_bh; dev->bh.data = (unsigned long) dev; INIT_WORK (&dev->kevent, kevent); + init_usb_anchor(&dev->deferred); dev->delay.function = usbnet_bh; dev->delay.data = (unsigned long) dev; init_timer (&dev->delay); @@ -1382,13 +1447,23 @@ int usbnet_suspend (struct usb_interface *intf, pm_message_t message) struct usbnet *dev = usb_get_intfdata(intf); if (!dev->suspend_count++) { + spin_lock_irq(&dev->txq.lock); + /* don't autosuspend while transmitting */ + if (dev->txq.qlen && (message.event & PM_EVENT_AUTO)) { + spin_unlock_irq(&dev->txq.lock); + return -EBUSY; + } else { + set_bit(EVENT_DEV_ASLEEP, &dev->flags); + spin_unlock_irq(&dev->txq.lock); + } /* * accelerate emptying of the rx and queues, to avoid * having everything error out. */ netif_device_detach (dev->net); - (void) unlink_urbs (dev, &dev->rxq); - (void) unlink_urbs (dev, &dev->txq); + usbnet_terminate_urbs(dev); + usb_kill_urb(dev->interrupt); + /* * reattach so runtime management can use and * wake the device @@ -1402,10 +1477,34 @@ EXPORT_SYMBOL_GPL(usbnet_suspend); int usbnet_resume (struct usb_interface *intf) { struct usbnet *dev = usb_get_intfdata(intf); + struct sk_buff *skb; + struct urb *res; + int retval; + + if (!--dev->suspend_count) { + spin_lock_irq(&dev->txq.lock); + while ((res = usb_get_from_anchor(&dev->deferred))) { + + printk(KERN_INFO"%s has delayed data\n", __func__); + skb = (struct sk_buff *)res->context; + retval = usb_submit_urb(res, GFP_ATOMIC); + if (retval < 0) { + dev_kfree_skb_any(skb); + usb_free_urb(res); + usb_autopm_put_interface_async(dev->intf); + } else { + dev->net->trans_start = jiffies; + __skb_queue_tail(&dev->txq, skb); + } + } - if (!--dev->suspend_count) + smp_mb(); + clear_bit(EVENT_DEV_ASLEEP, &dev->flags); + spin_unlock_irq(&dev->txq.lock); + if (!(dev->txq.qlen >= TX_QLEN(dev))) + netif_start_queue(dev->net); tasklet_schedule (&dev->bh); - + } return 0; } EXPORT_SYMBOL_GPL(usbnet_resume); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 8c84881f8478..8ce61359bf73 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -55,6 +55,7 @@ struct usbnet { struct sk_buff_head done; struct sk_buff_head rxq_pause; struct urb *interrupt; + struct usb_anchor deferred; struct tasklet_struct bh; struct work_struct kevent; @@ -65,6 +66,8 @@ struct usbnet { # define EVENT_STS_SPLIT 3 # define EVENT_LINK_RESET 4 # define EVENT_RX_PAUSED 5 +# define EVENT_DEV_WAKING 6 +# define EVENT_DEV_ASLEEP 7 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) @@ -109,6 +112,9 @@ struct driver_info { /* see if peer is connected ... can sleep */ int (*check_connect)(struct usbnet *); + /* (dis)activate runtime power management */ + int (*manage_power)(struct usbnet *, int); + /* for status polling */ void (*status)(struct usbnet *, struct urb *); -- cgit v1.3-8-gc7d7 From fc4a7489663250360cd40d5adf06a08d1c5d54df Mon Sep 17 00:00:00 2001 From: Patrick Mullaney Date: Thu, 3 Dec 2009 15:59:22 -0800 Subject: netdevice: provide common routine for macvlan and vlan operstate management Provide common routine for the transition of operational state for a leaf device during a root device transition. Signed-off-by: Patrick Mullaney Acked-by: Arnd Bergmann Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 24 +++--------------------- include/linux/netdevice.h | 3 +++ net/8021q/vlan.c | 29 ++++------------------------- net/core/dev.c | 27 +++++++++++++++++++++++++++ 4 files changed, 37 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 93c3e6edf702..21a9c9ab4b34 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -582,25 +582,6 @@ static void macvlan_port_destroy(struct net_device *dev) kfree(port); } -static void macvlan_transfer_operstate(struct net_device *dev) -{ - struct macvlan_dev *vlan = netdev_priv(dev); - const struct net_device *lowerdev = vlan->lowerdev; - - if (lowerdev->operstate == IF_OPER_DORMANT) - netif_dormant_on(dev); - else - netif_dormant_off(dev); - - if (netif_carrier_ok(lowerdev)) { - if (!netif_carrier_ok(dev)) - netif_carrier_on(dev); - } else { - if (netif_carrier_ok(dev)) - netif_carrier_off(dev); - } -} - static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) { if (tb[IFLA_ADDRESS]) { @@ -693,7 +674,7 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev, return err; list_add_tail(&vlan->list, &port->vlans); - macvlan_transfer_operstate(dev); + netif_stacked_transfer_operstate(lowerdev, dev); return 0; } @@ -768,7 +749,8 @@ static int macvlan_device_event(struct notifier_block *unused, switch (event) { case NETDEV_CHANGE: list_for_each_entry(vlan, &port->vlans, list) - macvlan_transfer_operstate(vlan->dev); + netif_stacked_transfer_operstate(vlan->lowerdev, + vlan->dev); break; case NETDEV_FEAT_CHANGE: list_for_each_entry(vlan, &port->vlans, list) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index daf13d367498..a3fccc85b1a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1981,6 +1981,9 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one, unsigned long mask); unsigned long netdev_fix_features(unsigned long features, const char *name); +void netif_stacked_transfer_operstate(const struct net_device *rootdev, + struct net_device *dev); + static inline int net_gso_ok(int features, int gso_type) { int feature = gso_type << NETIF_F_GSO_SHIFT; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index ec3769295dac..33f90e7362cc 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -184,27 +184,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) dev_put(real_dev); } -static void vlan_transfer_operstate(const struct net_device *dev, - struct net_device *vlandev) -{ - /* Have to respect userspace enforced dormant state - * of real device, also must allow supplicant running - * on VLAN device - */ - if (dev->operstate == IF_OPER_DORMANT) - netif_dormant_on(vlandev); - else - netif_dormant_off(vlandev); - - if (netif_carrier_ok(dev)) { - if (!netif_carrier_ok(vlandev)) - netif_carrier_on(vlandev); - } else { - if (netif_carrier_ok(vlandev)) - netif_carrier_off(vlandev); - } -} - int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) { const char *name = real_dev->name; @@ -262,7 +241,7 @@ int register_vlan_dev(struct net_device *dev) /* Account for reference in struct vlan_dev_info */ dev_hold(real_dev); - vlan_transfer_operstate(real_dev, dev); + netif_stacked_transfer_operstate(real_dev, dev); linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ /* So, got the sucker initialized, now lets place @@ -453,7 +432,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (!vlandev) continue; - vlan_transfer_operstate(dev, vlandev); + netif_stacked_transfer_operstate(dev, vlandev); } break; @@ -511,7 +490,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan = vlan_dev_info(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) dev_change_flags(vlandev, flgs & ~IFF_UP); - vlan_transfer_operstate(dev, vlandev); + netif_stacked_transfer_operstate(dev, vlandev); } break; @@ -529,7 +508,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan = vlan_dev_info(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) dev_change_flags(vlandev, flgs | IFF_UP); - vlan_transfer_operstate(dev, vlandev); + netif_stacked_transfer_operstate(dev, vlandev); } break; diff --git a/net/core/dev.c b/net/core/dev.c index 0913a08a87d6..c36a17aafcf3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4900,6 +4900,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } EXPORT_SYMBOL(netdev_fix_features); +/** + * netif_stacked_transfer_operstate - transfer operstate + * @rootdev: the root or lower level device to transfer state from + * @dev: the device to transfer operstate to + * + * Transfer operational state from root to device. This is normally + * called when a stacking relationship exists between the root + * device and the device(a leaf device). + */ +void netif_stacked_transfer_operstate(const struct net_device *rootdev, + struct net_device *dev) +{ + if (rootdev->operstate == IF_OPER_DORMANT) + netif_dormant_on(dev); + else + netif_dormant_off(dev); + + if (netif_carrier_ok(rootdev)) { + if (!netif_carrier_ok(dev)) + netif_carrier_on(dev); + } else { + if (netif_carrier_ok(dev)) + netif_carrier_off(dev); + } +} +EXPORT_SYMBOL(netif_stacked_transfer_operstate); + /** * register_netdevice - register a network device * @dev: device to register -- cgit v1.3-8-gc7d7 From 13475a30b66cdb9250a34052c19ac98847373030 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Dec 2009 22:31:19 +0000 Subject: tcp: connect() race with timewait reuse Its currently possible that several threads issuing a connect() find the same timewait socket and try to reuse it, leading to list corruptions. Condition for bug is that these threads bound their socket on same address/port of to-be-find timewait socket, and connected to same target. (SO_REUSEADDR needed) To fix this problem, we could unhash timewait socket while holding ehash lock, to make sure lookups/changes will be serialized. Only first thread finds the timewait socket, other ones find the established socket and return an EADDRNOTAVAIL error. This second version takes into account Evgeniy's review and makes sure inet_twsk_put() is called outside of locked sections. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 2 ++ net/ipv4/inet_hashtables.c | 10 +++++++--- net/ipv4/inet_timewait_sock.c | 38 ++++++++++++++++++++++++++++---------- net/ipv6/inet6_hashtables.c | 15 ++++++++++----- 4 files changed, 47 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 4fd007f34dd5..b801ade2295e 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -199,6 +199,8 @@ static inline __be32 inet_rcv_saddr(const struct sock *sk) extern void inet_twsk_put(struct inet_timewait_sock *tw); +extern int inet_twsk_unhash(struct inet_timewait_sock *tw); + extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 94ef51aa5bc9..21e5e32d8c60 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -286,6 +286,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; + int twrefcnt = 0; spin_lock(lock); @@ -318,20 +319,23 @@ unique: sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); + if (tw) { + twrefcnt = inet_twsk_unhash(tw); + NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); + } spin_unlock(lock); + if (twrefcnt) + inet_twsk_put(tw); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule(tw, death_row); - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); } - return 0; not_unique: diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 31f931ef3daf..11a107a5af4f 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -14,22 +14,33 @@ #include #include + +/* + * unhash a timewait socket from established hash + * lock must be hold by caller + */ +int inet_twsk_unhash(struct inet_timewait_sock *tw) +{ + if (hlist_nulls_unhashed(&tw->tw_node)) + return 0; + + hlist_nulls_del_rcu(&tw->tw_node); + sk_nulls_node_init(&tw->tw_node); + return 1; +} + /* Must be called with locally disabled BHs. */ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; + int refcnt; /* Unlink from established hashes. */ spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); spin_lock(lock); - if (hlist_nulls_unhashed(&tw->tw_node)) { - spin_unlock(lock); - return; - } - hlist_nulls_del_rcu(&tw->tw_node); - sk_nulls_node_init(&tw->tw_node); + refcnt = inet_twsk_unhash(tw); spin_unlock(lock); /* Disassociate with bind bucket. */ @@ -37,9 +48,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; - __hlist_del(&tw->tw_bind_node); - tw->tw_tb = NULL; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + if (tb) { + __hlist_del(&tw->tw_bind_node); + tw->tw_tb = NULL; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + refcnt++; + } spin_unlock(&bhead->lock); #ifdef SOCK_REFCNT_DEBUG if (atomic_read(&tw->tw_refcnt) != 1) { @@ -47,7 +61,10 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); } #endif - inet_twsk_put(tw); + while (refcnt) { + inet_twsk_put(tw); + refcnt--; + } } static noinline void inet_twsk_free(struct inet_timewait_sock *tw) @@ -92,6 +109,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); + atomic_inc(&tw->tw_refcnt); spin_unlock(&bhead->lock); spin_lock(lock); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 00c6a3e6cddf..c813e294ec0c 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -223,6 +223,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; + int twrefcnt = 0; spin_lock(lock); @@ -250,19 +251,23 @@ unique: * in hash table socket with a funny identity. */ inet->inet_num = lport; inet->inet_sport = htons(lport); + sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); - sk->sk_hash = hash; + if (tw) { + twrefcnt = inet_twsk_unhash(tw); + NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); + } spin_unlock(lock); + if (twrefcnt) + inet_twsk_put(tw); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - if (twp != NULL) { + if (twp) { *twp = tw; - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); - } else if (tw != NULL) { + } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule(tw, death_row); - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); } -- cgit v1.3-8-gc7d7 From 83cf0a9b86ba12664ab0648f5afb118c981371e9 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 20 Oct 2009 11:11:52 +0200 Subject: comment typo fix: sybsystem -> subsystem Signed-off-by: Jean Delvare Signed-off-by: Jiri Kosina --- drivers/net/irda/smsc-ircc2.c | 2 +- include/sound/ac97_codec.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c index 1e8dd8c74a64..8f7d0d146f24 100644 --- a/drivers/net/irda/smsc-ircc2.c +++ b/drivers/net/irda/smsc-ircc2.c @@ -115,7 +115,7 @@ struct smsc_ircc_subsystem_configuration { unsigned short vendor; /* PCI vendor ID */ unsigned short device; /* PCI vendor ID */ unsigned short subvendor; /* PCI subsystem vendor ID */ - unsigned short subdevice; /* PCI sybsystem device ID */ + unsigned short subdevice; /* PCI subsystem device ID */ unsigned short sir_io; /* I/O port for SIR */ unsigned short fir_io; /* I/O port for FIR */ unsigned char fir_irq; /* FIR IRQ */ diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h index 3dae3f799b9b..49400459b477 100644 --- a/include/sound/ac97_codec.h +++ b/include/sound/ac97_codec.h @@ -593,7 +593,7 @@ enum { struct ac97_quirk { unsigned short subvendor; /* PCI subsystem vendor id */ - unsigned short subdevice; /* PCI sybsystem device id */ + unsigned short subdevice; /* PCI subsystem device id */ unsigned short mask; /* device id bit mask, 0 = accept all */ unsigned int codec_id; /* codec id (if any), 0 = accept all */ const char *name; /* name shown as info */ -- cgit v1.3-8-gc7d7 From 96c085db0a0f22895917f09cf942853186b892fd Mon Sep 17 00:00:00 2001 From: Thiago Farina Date: Sun, 1 Nov 2009 16:47:35 -0500 Subject: sgivwfb: Make use of ARRAY_SIZE. Cleanup the usage of DBE_VT_SIZE since the kernel already defines the same macro for the same propose. Also clean up a surrounding whitespaces. Signed-off-by: Thiago Farina Signed-off-by: Jiri Kosina --- drivers/video/sgivwfb.c | 4 ++-- include/video/sgivw.h | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/video/sgivwfb.c b/drivers/video/sgivwfb.c index bba53714a7b1..f86012239bff 100644 --- a/drivers/video/sgivwfb.c +++ b/drivers/video/sgivwfb.c @@ -260,13 +260,13 @@ static int sgivwfb_check_var(struct fb_var_screeninfo *var, var->grayscale = 0; /* No grayscale for now */ /* determine valid resolution and timing */ - for (min_mode = 0; min_mode < DBE_VT_SIZE; min_mode++) { + for (min_mode = 0; min_mode < ARRAY_SIZE(dbeVTimings); min_mode++) { if (dbeVTimings[min_mode].width >= var->xres && dbeVTimings[min_mode].height >= var->yres) break; } - if (min_mode == DBE_VT_SIZE) + if (min_mode == ARRAY_SIZE(dbeVTimings)) return -EINVAL; /* Resolution to high */ /* XXX FIXME - should try to pick best refresh rate */ diff --git a/include/video/sgivw.h b/include/video/sgivw.h index 55f2a7c024af..f6aa5692e74b 100644 --- a/include/video/sgivw.h +++ b/include/video/sgivw.h @@ -351,7 +351,7 @@ typedef enum { struct dbe_timing_info { dbe_timing_t type; - int flags; + int flags; short width; /* Monitor resolution */ short height; int fields_sec; /* fields/sec (Hz -3 dec. places */ @@ -389,11 +389,11 @@ struct dbe_timing_info dbeVTimings[] = { { DBE_VT_640_480_60, /* flags, width, height, fields_sec, cfreq */ - 0, 640, 480, 59940, 25175, + 0, 640, 480, 59940, 25175, /* htotal, hblank_start, hblank_end, hsync_start, hsync_end */ - 800, 640, 800, 656, 752, + 800, 640, 800, 656, 752, /* vtotal, vblank_start, vblank_end, vsync_start, vsync_end */ - 525, 480, 525, 490, 492, + 525, 480, 525, 490, 492, /* pll_m, pll_n, pll_p */ 15, 2, 3 }, @@ -650,7 +650,7 @@ struct dbe_timing_info dbeVTimings[] = { /* pll_m, pll_n, pll_p */ 6, 1, 0 }, - + { DBE_VT_1920_1200_60, /* flags, width, height, fields_sec, cfreq */ @@ -676,7 +676,6 @@ struct dbe_timing_info dbeVTimings[] = { } }; -#define DBE_VT_SIZE (sizeof(dbeVTimings)/sizeof(dbeVTimings[0])) #endif // INCLUDE_TIMING_TABLE_DATA #endif // ! __SGIVWFB_H__ -- cgit v1.3-8-gc7d7 From 8a74770353d540d9b7641d22ea0401966f14cf2a Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 30 Oct 2009 18:03:39 -0200 Subject: sysctl: add missing comments Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Jiri Kosina --- include/linux/sysctl.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 1e4743ee6831..9a3c8f777caf 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -98,8 +98,8 @@ enum KERN_VERSION=4, /* string: compile time info */ KERN_SECUREMASK=5, /* struct: maximum rights mask */ KERN_PROF=6, /* table: profiling information */ - KERN_NODENAME=7, - KERN_DOMAINNAME=8, + KERN_NODENAME=7, /* string: hostname */ + KERN_DOMAINNAME=8, /* string: domainname */ KERN_PANIC=15, /* int: panic timeout */ KERN_REALROOTDEV=16, /* real root device to mount after initrd */ @@ -111,8 +111,8 @@ enum KERN_PPC_HTABRECLAIM=25, /* turn htab reclaimation on/off on PPC */ KERN_PPC_ZEROPAGED=26, /* turn idle page zeroing on/off on PPC */ KERN_PPC_POWERSAVE_NAP=27, /* use nap mode for power saving */ - KERN_MODPROBE=28, - KERN_SG_BIG_BUFF=29, + KERN_MODPROBE=28, /* string: modprobe path */ + KERN_SG_BIG_BUFF=29, /* int: sg driver reserved buffer size */ KERN_ACCT=30, /* BSD process accounting parameters */ KERN_PPC_L2CR=31, /* l2cr register on PPC */ @@ -159,7 +159,7 @@ enum KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ KERN_COMPAT_LOG=73, /* int: print compat layer messages */ - KERN_MAX_LOCK_DEPTH=74, + KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */ KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ }; -- cgit v1.3-8-gc7d7 From af901ca181d92aac3a7dc265144a9081a86d8f39 Mon Sep 17 00:00:00 2001 From: André Goddard Rosa Date: Sat, 14 Nov 2009 13:09:05 -0200 Subject: tree-wide: fix assorted typos all over the place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That is "success", "unknown", "through", "performance", "[re|un]mapping" , "access", "default", "reasonable", "[con]currently", "temperature" , "channel", "[un]used", "application", "example","hierarchy", "therefore" , "[over|under]flow", "contiguous", "threshold", "enough" and others. Signed-off-by: André Goddard Rosa Signed-off-by: Jiri Kosina --- Documentation/ABI/testing/procfs-diskstats | 2 +- Documentation/ABI/testing/sysfs-block | 2 +- Documentation/DocBook/mtdnand.tmpl | 2 +- Documentation/DocBook/v4l/videodev2.h.xml | 2 +- Documentation/DocBook/writing-an-alsa-driver.tmpl | 2 +- Documentation/dvb/README.dvb-usb | 2 +- Documentation/lguest/lguest.c | 2 +- Documentation/scsi/ChangeLog.megaraid_sas | 2 +- Documentation/spi/spi-summary | 2 +- Documentation/sysctl/vm.txt | 2 +- Documentation/video4linux/gspca.txt | 2 +- Documentation/vm/page-types.c | 2 +- arch/alpha/mm/numa.c | 2 +- arch/arm/common/scoop.c | 2 +- .../mach-bcmring/include/mach/csp/dmacHw_priv.h | 2 +- arch/arm/mach-bcmring/include/mach/dma.h | 2 +- arch/arm/mach-lh7a40x/include/mach/hardware.h | 2 +- arch/arm/mach-orion5x/pci.c | 2 +- arch/arm/mach-pxa/include/mach/palmld.h | 2 +- arch/arm/mach-pxa/include/mach/palmt5.h | 2 +- arch/arm/mach-pxa/include/mach/palmtc.h | 2 +- arch/arm/mach-pxa/include/mach/palmte2.h | 2 +- arch/arm/mach-pxa/include/mach/palmtx.h | 2 +- arch/arm/mach-pxa/include/mach/palmz72.h | 2 +- arch/arm/mach-s3c6400/setup-sdhci.c | 2 +- arch/arm/mach-s3c6410/setup-sdhci.c | 2 +- arch/arm/mach-sa1100/dma.c | 2 +- arch/arm/plat-mxc/include/mach/iomux-mx3.h | 2 +- arch/arm/plat-mxc/include/mach/iomux-mxc91231.h | 2 +- arch/arm/plat-mxc/pwm.c | 2 +- arch/arm/plat-omap/dma.c | 2 +- arch/arm/plat-omap/include/mach/omap16xx.h | 2 +- arch/arm/plat-s3c24xx/include/plat/map.h | 2 +- arch/avr32/boards/hammerhead/Kconfig | 2 +- arch/blackfin/kernel/traps.c | 2 +- .../mach-bf518/include/mach/defBF51x_base.h | 4 +- .../mach-bf527/include/mach/defBF52x_base.h | 4 +- arch/blackfin/mach-bf537/include/mach/defBF534.h | 4 +- arch/blackfin/mach-bf548/include/mach/defBF544.h | 4 +- arch/blackfin/mach-bf548/include/mach/defBF547.h | 4 +- arch/blackfin/mach-bf548/include/mach/defBF548.h | 4 +- arch/blackfin/mach-bf548/include/mach/defBF549.h | 4 +- arch/cris/mm/fault.c | 2 +- arch/ia64/hp/common/sba_iommu.c | 2 +- arch/ia64/ia32/ia32_entry.S | 2 +- arch/ia64/include/asm/perfmon_default_smpl.h | 2 +- arch/ia64/include/asm/sn/shubio.h | 2 +- arch/ia64/kernel/esi.c | 2 +- arch/ia64/kernel/perfmon.c | 2 +- arch/m68k/ifpsp060/src/fpsp.S | 28 +++++----- arch/m68k/ifpsp060/src/pfpsp.S | 26 ++++----- arch/m68k/include/asm/bootinfo.h | 2 +- arch/microblaze/lib/memcpy.c | 2 +- arch/microblaze/lib/memmove.c | 2 +- arch/microblaze/lib/memset.c | 2 +- arch/mips/include/asm/mach-pnx833x/gpio.h | 2 +- arch/mips/include/asm/sgi/ioc.h | 2 +- arch/mips/include/asm/sibyte/sb1250_mac.h | 2 +- arch/mips/include/asm/sn/sn0/hubio.h | 2 +- arch/mips/kernel/smtc.c | 2 +- arch/mips/math-emu/dp_sub.c | 2 +- arch/mips/txx9/generic/smsc_fdc37m81x.c | 2 +- arch/powerpc/include/asm/reg_fsl_emb.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/tau_6xx.c | 2 +- arch/powerpc/oprofile/op_model_cell.c | 4 +- arch/powerpc/platforms/52xx/mpc52xx_pci.c | 2 +- arch/powerpc/platforms/powermac/pci.c | 2 +- arch/powerpc/sysdev/dart_iommu.c | 2 +- arch/s390/math-emu/math.c | 4 +- arch/x86/include/asm/desc_defs.h | 4 +- arch/x86/include/asm/mmzone_32.h | 2 +- arch/x86/include/asm/uv/uv_bau.h | 2 +- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/amd_iommu.c | 4 +- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/kprobes.c | 4 +- arch/x86/mm/kmmio.c | 4 +- block/blk-iopoll.c | 2 +- drivers/ata/ata_piix.c | 2 +- drivers/ata/sata_fsl.c | 6 +-- drivers/atm/iphase.c | 2 +- drivers/base/dd.c | 2 +- drivers/bluetooth/btmrvl_sdio.c | 2 +- drivers/bluetooth/hci_ldisc.c | 2 +- drivers/char/mem.c | 2 +- drivers/char/mspec.c | 2 +- drivers/char/n_r3964.c | 2 +- drivers/char/rio/route.h | 2 +- drivers/crypto/hifn_795x.c | 2 +- drivers/dma/at_hdmac.c | 2 +- drivers/firewire/core-topology.c | 2 +- drivers/gpu/drm/drm_crtc.c | 4 +- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/intel_fb.c | 2 +- drivers/gpu/drm/i915/intel_sdvo.c | 2 +- drivers/gpu/drm/radeon/r600.c | 4 +- drivers/gpu/drm/radeon/radeon_fb.c | 2 +- drivers/gpu/drm/radeon/radeon_state.c | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- drivers/gpu/drm/radeon/rv770.c | 4 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +- drivers/hwmon/adm1029.c | 2 +- drivers/hwmon/lm93.c | 2 +- drivers/ieee1394/dv1394.c | 2 +- drivers/infiniband/hw/ipath/ipath_iba6110.c | 2 +- drivers/infiniband/hw/ipath/ipath_sd7220.c | 4 +- drivers/infiniband/hw/mlx4/qp.c | 2 +- drivers/input/serio/hp_sdc.c | 2 +- drivers/input/serio/hp_sdc_mlc.c | 2 +- drivers/input/touchscreen/atmel-wm97xx.c | 2 +- drivers/input/touchscreen/mainstone-wm97xx.c | 4 +- drivers/input/touchscreen/zylonite-wm97xx.c | 2 +- drivers/isdn/capi/capidrv.c | 2 +- drivers/isdn/hardware/eicon/di.c | 2 +- drivers/isdn/hardware/eicon/maintidi.c | 4 +- drivers/isdn/hardware/mISDN/hfcsusb.c | 2 +- drivers/isdn/hardware/mISDN/hfcsusb.h | 2 +- drivers/isdn/hardware/mISDN/mISDNisar.c | 2 +- drivers/isdn/hisax/hfc_usb.c | 2 +- drivers/isdn/i4l/isdn_ppp.c | 6 +-- drivers/isdn/i4l/isdn_ttyfax.c | 2 +- drivers/isdn/mISDN/dsp_core.c | 2 +- drivers/isdn/mISDN/tei.c | 2 +- drivers/macintosh/therm_windtunnel.c | 2 +- drivers/media/common/saa7146_i2c.c | 2 +- drivers/media/dvb/dvb-core/dvb_frontend.h | 2 +- drivers/media/dvb/dvb-usb/anysee.c | 2 +- drivers/media/dvb/dvb-usb/dibusb-mb.c | 2 +- drivers/media/dvb/dvb-usb/dvb-usb-remote.c | 2 +- drivers/media/dvb/dvb-usb/usb-urb.c | 4 +- drivers/media/dvb/frontends/au8522_decoder.c | 2 +- drivers/media/dvb/frontends/cx24110.c | 4 +- drivers/media/dvb/frontends/cx24113.c | 2 +- drivers/media/dvb/frontends/dib3000mb.c | 2 +- drivers/media/dvb/frontends/lgdt330x.c | 4 +- drivers/media/dvb/frontends/stb0899_drv.c | 2 +- drivers/media/dvb/ttpci/av7110.c | 4 +- drivers/media/dvb/ttpci/budget-patch.c | 2 +- drivers/media/radio/radio-mr800.c | 2 +- drivers/media/video/cx231xx/cx231xx-avcore.c | 8 +-- drivers/media/video/cx23885/cx23885-dvb.c | 2 +- drivers/media/video/cx88/cx88-core.c | 2 +- drivers/media/video/davinci/dm355_ccdc.c | 2 +- drivers/media/video/davinci/vpss.c | 2 +- drivers/media/video/gspca/sonixb.c | 2 +- drivers/media/video/gspca/spca500.c | 2 +- drivers/media/video/gspca/spca501.c | 6 +-- drivers/media/video/gspca/sunplus.c | 2 +- drivers/media/video/gspca/zc3xx.c | 2 +- drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h | 2 +- drivers/media/video/s2255drv.c | 2 +- drivers/media/video/zoran/zoran.h | 2 +- drivers/message/i2o/i2o_block.c | 2 +- drivers/message/i2o/iop.c | 4 +- drivers/misc/sgi-gru/grufile.c | 2 +- drivers/mmc/host/s3cmci.c | 2 +- drivers/mtd/devices/slram.c | 2 +- drivers/mtd/nand/diskonchip.c | 2 +- drivers/mtd/nand/nand_ecc.c | 2 +- drivers/mtd/nand/s3c2410.c | 2 +- drivers/net/82596.c | 2 +- drivers/net/amd8111e.c | 7 ++- drivers/net/appletalk/cops.c | 2 +- drivers/net/ariadne.h | 2 +- drivers/net/atl1c/atl1c_main.c | 2 +- drivers/net/benet/be_cmds.h | 2 +- drivers/net/benet/be_main.c | 2 +- drivers/net/bnx2x_reg.h | 2 +- drivers/net/cxgb3/sge.c | 2 +- drivers/net/ehea/ehea_ethtool.c | 4 +- drivers/net/hamradio/baycom_ser_fdx.c | 2 +- drivers/net/iseries_veth.c | 2 +- drivers/net/lasi_82596.c | 2 +- drivers/net/lib82596.c | 2 +- drivers/net/mlx4/en_rx.c | 2 +- drivers/net/mlx4/en_tx.c | 2 +- drivers/net/mlx4/mlx4_en.h | 2 +- drivers/net/ps3_gelic_net.c | 2 +- drivers/net/sis900.c | 4 +- drivers/net/skfp/h/smc.h | 8 +-- drivers/net/skfp/skfddi.c | 2 +- drivers/net/smsc911x.c | 2 +- drivers/net/smsc911x.h | 2 +- drivers/net/spider_net.c | 2 +- drivers/net/stmmac/gmac.c | 2 +- drivers/net/stmmac/gmac.h | 4 +- drivers/net/tokenring/smctr.c | 2 +- drivers/net/ucc_geth.c | 2 +- drivers/net/ucc_geth.h | 20 +++---- drivers/net/usb/smsc95xx.c | 2 +- drivers/net/wan/lmc/lmc_main.c | 2 +- drivers/net/wimax/i2400m/rx.c | 2 +- drivers/net/wireless/ath/ath5k/phy.c | 6 +-- drivers/net/wireless/ath/ath9k/rc.c | 2 +- drivers/net/wireless/ipw2x00/ipw2100.c | 6 +-- drivers/net/wireless/ipw2x00/ipw2200.c | 8 +-- drivers/net/wireless/ipw2x00/libipw_module.c | 2 +- drivers/net/wireless/iwmc3200wifi/hal.c | 2 +- drivers/net/wireless/iwmc3200wifi/rx.c | 2 +- drivers/net/wireless/libertas/if_sdio.c | 2 +- drivers/net/wireless/prism54/isl_ioctl.c | 4 +- drivers/net/wireless/rt2x00/rt2400pci.h | 2 +- drivers/net/wireless/rt2x00/rt2500pci.h | 2 +- drivers/net/wireless/rt2x00/rt2500usb.h | 2 +- drivers/net/wireless/rt2x00/rt61pci.h | 2 +- drivers/net/wireless/rt2x00/rt73usb.h | 2 +- drivers/net/wireless/wavelan_cs.c | 2 +- drivers/net/wireless/zd1211rw/zd_mac.c | 2 +- drivers/parisc/ccio-dma.c | 2 +- drivers/platform/x86/thinkpad_acpi.c | 2 +- drivers/pnp/pnpbios/rsparser.c | 8 +-- drivers/ps3/ps3-sys-manager.c | 2 +- drivers/rtc/rtc-v3020.c | 2 +- drivers/s390/char/fs3270.c | 2 +- drivers/s390/cio/chp.c | 2 +- drivers/s390/cio/cmf.c | 2 +- drivers/sbus/char/envctrl.c | 4 +- drivers/scsi/53c700.c | 2 +- drivers/scsi/aacraid/aacraid.h | 6 +-- drivers/scsi/aacraid/comminit.c | 2 +- drivers/scsi/aic7xxx/aic79xx.seq | 2 +- drivers/scsi/aic7xxx/aic79xx_core.c | 2 +- drivers/scsi/aic7xxx/aic7xxx_core.c | 2 +- drivers/scsi/bfa/include/defs/bfa_defs_pport.h | 2 +- drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h | 2 +- drivers/scsi/hptiop.c | 2 +- drivers/scsi/libfc/fc_lport.c | 2 +- drivers/scsi/libiscsi_tcp.c | 2 +- drivers/scsi/lpfc/lpfc_attr.c | 4 +- drivers/scsi/lpfc/lpfc_els.c | 4 +- drivers/scsi/lpfc/lpfc_init.c | 62 +++++++++++----------- drivers/scsi/lpfc/lpfc_sli.c | 10 ++-- drivers/scsi/megaraid.h | 2 +- drivers/scsi/megaraid/mbox_defs.h | 2 +- drivers/scsi/megaraid/megaraid_mbox.c | 2 +- drivers/scsi/mpt2sas/mpt2sas_scsih.c | 2 +- drivers/scsi/ncr53c8xx.c | 2 +- drivers/scsi/pmcraid.c | 6 +-- drivers/scsi/pmcraid.h | 6 +-- drivers/scsi/scsi_netlink.c | 2 +- drivers/scsi/scsi_transport_sas.c | 6 +-- drivers/scsi/sym53c8xx_2/sym_glue.c | 2 +- drivers/scsi/sym53c8xx_2/sym_hipd.c | 2 +- drivers/scsi/sym53c8xx_2/sym_hipd.h | 2 +- drivers/serial/8250_pnp.c | 4 +- drivers/serial/pmac_zilog.h | 2 +- drivers/serial/ucc_uart.c | 2 +- drivers/telephony/ixj.c | 4 +- drivers/usb/atm/ueagle-atm.c | 2 +- drivers/usb/class/usbtmc.c | 2 +- drivers/usb/core/message.c | 2 +- drivers/usb/gadget/f_acm.c | 2 +- drivers/usb/gadget/pxa27x_udc.c | 2 +- drivers/usb/host/fhci-sched.c | 2 +- drivers/usb/wusbcore/crypto.c | 2 +- drivers/usb/wusbcore/wa-xfer.c | 4 +- drivers/uwb/i1480/dfu/usb.c | 2 +- drivers/uwb/wlp/txrx.c | 2 +- drivers/video/aty/atyfb_base.c | 4 +- drivers/video/backlight/atmel-pwm-bl.c | 2 +- drivers/video/backlight/tosa_lcd.c | 2 +- drivers/video/console/sticore.c | 2 +- drivers/video/gbefb.c | 2 +- drivers/video/stifb.c | 4 +- drivers/video/tdfxfb.c | 2 +- drivers/video/via/dvi.c | 4 +- drivers/video/vt8623fb.c | 2 +- drivers/watchdog/coh901327_wdt.c | 2 +- drivers/watchdog/machzwd.c | 2 +- drivers/watchdog/wdrtas.c | 6 +-- fs/binfmt_elf.c | 2 +- fs/bio.c | 2 +- fs/btrfs/extent_map.c | 2 +- fs/cifs/README | 2 +- fs/cifs/cifsglob.h | 2 +- fs/cifs/inode.c | 4 +- fs/cifs/smbdes.c | 2 +- fs/dlm/plock.c | 2 +- fs/ext4/inode.c | 6 +-- fs/ext4/mballoc.c | 2 +- fs/jffs2/compr.c | 2 +- fs/jffs2/readinode.c | 2 +- fs/jffs2/xattr.c | 2 +- fs/jfs/jfs_dmap.c | 4 +- fs/ncpfs/ioctl.c | 2 +- fs/ntfs/compress.c | 2 +- fs/ntfs/file.c | 4 +- fs/ntfs/logfile.c | 2 +- fs/ocfs2/alloc.c | 2 +- fs/ocfs2/dlm/dlmmaster.c | 2 +- fs/ocfs2/dlmglue.c | 2 +- fs/ocfs2/journal.c | 2 +- fs/ocfs2/refcounttree.c | 2 +- fs/omfs/bitmap.c | 2 +- fs/ubifs/recovery.c | 2 +- fs/xfs/quota/xfs_dquot.h | 2 +- include/asm-generic/memory_model.h | 2 +- include/asm-generic/unistd.h | 2 +- include/linux/chio.h | 2 +- include/linux/mfd/ezx-pcap.h | 4 +- include/linux/pktcdvd.h | 2 +- include/linux/serial_reg.h | 8 +-- include/linux/videodev2.h | 2 +- include/net/sctp/structs.h | 2 +- include/net/tcp.h | 2 +- include/net/wimax.h | 2 +- include/sound/wm8993.h | 2 +- kernel/perf_event.c | 4 +- lib/Kconfig.debug | 2 +- lib/decompress_bunzip2.c | 2 +- lib/dma-debug.c | 2 +- lib/swiotlb.c | 2 +- mm/filemap.c | 2 +- mm/memcontrol.c | 4 +- mm/memory-failure.c | 2 +- net/ipv4/netfilter/ipt_ECN.c | 2 +- net/irda/irlap.c | 14 ++--- net/irda/irlap_event.c | 2 +- net/irda/irlmp.c | 4 +- net/mac80211/mesh_pathtbl.c | 4 +- net/netlabel/netlabel_domainhash.c | 2 +- net/sctp/sm_sideeffect.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 +- net/wimax/op-reset.c | 2 +- scripts/kconfig/mconf.c | 2 +- security/selinux/netlabel.c | 2 +- security/selinux/ss/services.c | 2 +- sound/Kconfig | 2 +- sound/isa/cs423x/cs4236.c | 2 +- sound/isa/opti9xx/miro.c | 2 +- sound/isa/opti9xx/opti92x-ad1848.c | 2 +- sound/oss/dmasound/dmasound_paula.c | 2 +- sound/pci/ca0106/ca0106_proc.c | 2 +- sound/pci/cs46xx/imgs/cwcdma.asp | 9 ++-- sound/pci/emu10k1/emu10k1x.c | 2 +- sound/pci/hda/patch_cmedia.c | 2 +- sound/pci/hda/patch_realtek.c | 2 +- sound/pci/rme9652/hdspm.c | 4 +- sound/soc/codecs/uda134x.c | 4 +- sound/soc/codecs/wm8903.c | 6 +-- sound/soc/codecs/wm8993.c | 4 +- sound/soc/s3c24xx/s3c24xx_simtec.c | 2 +- sound/soc/s6000/s6000-pcm.c | 2 +- sound/sound_core.c | 2 +- 345 files changed, 516 insertions(+), 508 deletions(-) (limited to 'include') diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats index 99233902e09e..f91a973a37fe 100644 --- a/Documentation/ABI/testing/procfs-diskstats +++ b/Documentation/ABI/testing/procfs-diskstats @@ -8,7 +8,7 @@ Description: 1 - major number 2 - minor mumber 3 - device name - 4 - reads completed succesfully + 4 - reads completed successfully 5 - reads merged 6 - sectors read 7 - time spent reading (ms) diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 5f3bedaf8e35..d2f90334bb93 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -4,7 +4,7 @@ Contact: Jerome Marchand Description: The /sys/block//stat files displays the I/O statistics of disk . They contain 11 fields: - 1 - reads completed succesfully + 1 - reads completed successfully 2 - reads merged 3 - sectors read 4 - time spent reading (ms) diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl index df0d089d0fb9..f508a8a27fea 100644 --- a/Documentation/DocBook/mtdnand.tmpl +++ b/Documentation/DocBook/mtdnand.tmpl @@ -362,7 +362,7 @@ module_exit(board_cleanup); Multiple chip control - The nand driver can control chip arrays. Therefor the + The nand driver can control chip arrays. Therefore the board driver must provide an own select_chip function. This function must (de)select the requested chip. The function pointer in the nand_chip structure must diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml index 97002060ac4f..26303e58345f 100644 --- a/Documentation/DocBook/v4l/videodev2.h.xml +++ b/Documentation/DocBook/v4l/videodev2.h.xml @@ -492,7 +492,7 @@ struct v4l2_jpegcompression { * you do, leave them untouched. * Inluding less markers will make the * resulting code smaller, but there will - * be fewer aplications which can read it. + * be fewer applications which can read it. * The presence of the APP and COM marker * is influenced by APP_len and COM_len * ONLY, not by this property! */ diff --git a/Documentation/DocBook/writing-an-alsa-driver.tmpl b/Documentation/DocBook/writing-an-alsa-driver.tmpl index 7a2e0e98986a..0d0f7b4d4b1a 100644 --- a/Documentation/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/DocBook/writing-an-alsa-driver.tmpl @@ -5318,7 +5318,7 @@ struct _snd_pcm_runtime { pages of the given size and map them onto the virtually contiguous memory. The virtual pointer is addressed in runtime->dma_area. The physical address (runtime->dma_addr) is set to zero, - because the buffer is physically non-contigous. + because the buffer is physically non-contiguous. The physical address table is set up in sgbuf->table. You can get the physical address at a certain offset via snd_pcm_sgbuf_get_addr(). diff --git a/Documentation/dvb/README.dvb-usb b/Documentation/dvb/README.dvb-usb index bf2a9cdfe7bb..c8238e44ed6b 100644 --- a/Documentation/dvb/README.dvb-usb +++ b/Documentation/dvb/README.dvb-usb @@ -85,7 +85,7 @@ http://www.linuxtv.org/wiki/index.php/DVB_USB - moved transfer control (pid filter, fifo control) from usb driver to frontend, it seems better settled there (added xfer_ops-struct) - created a common files for frontends (mc/p/mb) - 2004-09-28 - added support for a new device (Unkown, vendor ID is Hyper-Paltek) + 2004-09-28 - added support for a new device (Unknown, vendor ID is Hyper-Paltek) 2004-09-20 - added support for a new device (Compro DVB-U2000), thanks to Amaury Demol for reporting - changed usb TS transfer method (several urbs, stopping transfer diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 098de5bce00a..42208511b5c0 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -304,7 +304,7 @@ static void *map_zeroed_pages(unsigned int num) addr = mmap(NULL, getpagesize() * num, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) - err(1, "Mmaping %u pages of /dev/zero", num); + err(1, "Mmapping %u pages of /dev/zero", num); /* * One neat mmap feature is that you can close the fd, and it diff --git a/Documentation/scsi/ChangeLog.megaraid_sas b/Documentation/scsi/ChangeLog.megaraid_sas index c851ef497795..84524e0cf9c3 100644 --- a/Documentation/scsi/ChangeLog.megaraid_sas +++ b/Documentation/scsi/ChangeLog.megaraid_sas @@ -185,7 +185,7 @@ ii. FW enables WCE bit in Mode Sense cmd for drives that are configured Disks are exposed with WCE=1. User is advised to enable Write Back mode only when the controller has battery backup. At this time Synhronize cache is not supported by the FW. Driver will short-cycle - the cmd and return sucess without sending down to FW. + the cmd and return success without sending down to FW. 1 Release Date : Sun Jan. 14 11:21:32 PDT 2007 - Sumant Patro /Bo Yang diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary index deab51ddc33e..4884cb33845d 100644 --- a/Documentation/spi/spi-summary +++ b/Documentation/spi/spi-summary @@ -538,7 +538,7 @@ SPI MESSAGE QUEUE The bulk of the driver will be managing the I/O queue fed by transfer(). That queue could be purely conceptual. For example, a driver used only -for low-frequency sensor acess might be fine using synchronous PIO. +for low-frequency sensor access might be fine using synchronous PIO. But the queue will probably be very real, using message->queue, PIO, often DMA (especially if the root filesystem is in SPI flash), and diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index a6e360d2055c..fc5790d36cd9 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -370,7 +370,7 @@ The default is 1 percent. mmap_min_addr This file indicates the amount of address space which a user process will -be restricted from mmaping. Since kernel null dereference bugs could +be restricted from mmapping. Since kernel null dereference bugs could accidentally operate based on the information in the first couple of pages of memory userspace processes should not be allowed to write to them. By default this value is set to 0 and no protections will be enforced by the diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 3f61825be499..6b29555b58b7 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -6,7 +6,7 @@ The modules are: xxxx vend:prod ---- -spca501 0000:0000 MystFromOri Unknow Camera +spca501 0000:0000 MystFromOri Unknown Camera m5602 0402:5602 ALi Video Camera Controller spca501 040a:0002 Kodak DVC-325 spca500 040a:0300 Kodak EZ200 diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index 3ec4f2a22585..aa7f4d0639c4 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c @@ -301,7 +301,7 @@ static char *page_flag_name(uint64_t flags) present = (flags >> i) & 1; if (!page_flag_names[i]) { if (present) - fatal("unkown flag bit %d\n", i); + fatal("unknown flag bit %d\n", i); continue; } buf[j++] = present ? page_flag_names[i][0] : '_'; diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 10b403554b65..7b2c56d8f930 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -197,7 +197,7 @@ setup_memory_node(int nid, void *kernel_end) } if (bootmap_start == -1) - panic("couldn't find a contigous place for the bootmap"); + panic("couldn't find a contiguous place for the bootmap"); /* Allocate the bootmap and mark the whole MM as reserved. */ bootmap_size = init_bootmem_node(NODE_DATA(nid), bootmap_start, diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c index 7713a08bb10c..37bda5f3dde3 100644 --- a/arch/arm/common/scoop.c +++ b/arch/arm/common/scoop.c @@ -82,7 +82,7 @@ static int scoop_gpio_get(struct gpio_chip *chip, unsigned offset) { struct scoop_dev *sdev = container_of(chip, struct scoop_dev, gpio); - /* XXX: I'm usure, but it seems so */ + /* XXX: I'm unsure, but it seems so */ return ioread16(sdev->base + SCOOP_GPRR) & (1 << (offset + 1)); } diff --git a/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h b/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h index 375066ad0186..cbf334d1c761 100644 --- a/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h +++ b/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h @@ -83,7 +83,7 @@ typedef struct { * @brief Get next available transaction width * * -* @return On sucess : Next avail able transaction width +* @return On success : Next available transaction width * On failure : dmacHw_TRANSACTION_WIDTH_8 * * @note diff --git a/arch/arm/mach-bcmring/include/mach/dma.h b/arch/arm/mach-bcmring/include/mach/dma.h index 847980c85c88..1f2c5319c056 100644 --- a/arch/arm/mach-bcmring/include/mach/dma.h +++ b/arch/arm/mach-bcmring/include/mach/dma.h @@ -651,7 +651,7 @@ int dma_map_add_region(DMA_MemMap_t *memMap, /* Stores state information about t /** * Creates a descriptor ring from a memory mapping. * -* @return 0 on sucess, error code otherwise. +* @return 0 on success, error code otherwise. */ /****************************************************************************/ diff --git a/arch/arm/mach-lh7a40x/include/mach/hardware.h b/arch/arm/mach-lh7a40x/include/mach/hardware.h index 48e827d2fa56..59d2ace35217 100644 --- a/arch/arm/mach-lh7a40x/include/mach/hardware.h +++ b/arch/arm/mach-lh7a40x/include/mach/hardware.h @@ -31,7 +31,7 @@ /* * This __REG() version gives the same results as the one above, except * that we are fooling gcc somehow so it generates far better and smaller - * assembly code for access to contigous registers. It's a shame that gcc + * assembly code for access to contiguous registers. It's a shame that gcc * doesn't guess this by itself. */ #include diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c index 36dc5413cc97..bdf96eb523bc 100644 --- a/arch/arm/mach-orion5x/pci.c +++ b/arch/arm/mach-orion5x/pci.c @@ -463,7 +463,7 @@ static void __init orion5x_setup_pci_wins(struct mbus_dram_target_info *dram) writel(win_enable, PCI_BAR_ENABLE); /* - * Disable automatic update of address remaping when writing to BARs. + * Disable automatic update of address remapping when writing to BARs. */ orion5x_setbits(PCI_ADDR_DECODE_CTRL, 1); } diff --git a/arch/arm/mach-pxa/include/mach/palmld.h b/arch/arm/mach-pxa/include/mach/palmld.h index 8721b8010221..ae536e86d8e8 100644 --- a/arch/arm/mach-pxa/include/mach/palmld.h +++ b/arch/arm/mach-pxa/include/mach/palmld.h @@ -91,7 +91,7 @@ /* BATTERY */ #define PALMLD_BAT_MAX_VOLTAGE 4000 /* 4.00V maximum voltage */ #define PALMLD_BAT_MIN_VOLTAGE 3550 /* 3.55V critical voltage */ -#define PALMLD_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMLD_BAT_MAX_CURRENT 0 /* unknown */ #define PALMLD_BAT_MIN_CURRENT 0 /* unknown */ #define PALMLD_BAT_MAX_CHARGE 1 /* unknown */ #define PALMLD_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-pxa/include/mach/palmt5.h b/arch/arm/mach-pxa/include/mach/palmt5.h index d15662aba008..6baf7469d4ec 100644 --- a/arch/arm/mach-pxa/include/mach/palmt5.h +++ b/arch/arm/mach-pxa/include/mach/palmt5.h @@ -66,7 +66,7 @@ /* BATTERY */ #define PALMT5_BAT_MAX_VOLTAGE 4000 /* 4.00v current voltage */ #define PALMT5_BAT_MIN_VOLTAGE 3550 /* 3.55v critical voltage */ -#define PALMT5_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMT5_BAT_MAX_CURRENT 0 /* unknown */ #define PALMT5_BAT_MIN_CURRENT 0 /* unknown */ #define PALMT5_BAT_MAX_CHARGE 1 /* unknown */ #define PALMT5_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-pxa/include/mach/palmtc.h b/arch/arm/mach-pxa/include/mach/palmtc.h index 3dc9b074ab46..3f9dd3fd4638 100644 --- a/arch/arm/mach-pxa/include/mach/palmtc.h +++ b/arch/arm/mach-pxa/include/mach/palmtc.h @@ -68,7 +68,7 @@ /* BATTERY */ #define PALMTC_BAT_MAX_VOLTAGE 4000 /* 4.00V maximum voltage */ #define PALMTC_BAT_MIN_VOLTAGE 3550 /* 3.55V critical voltage */ -#define PALMTC_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMTC_BAT_MAX_CURRENT 0 /* unknown */ #define PALMTC_BAT_MIN_CURRENT 0 /* unknown */ #define PALMTC_BAT_MAX_CHARGE 1 /* unknown */ #define PALMTC_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-pxa/include/mach/palmte2.h b/arch/arm/mach-pxa/include/mach/palmte2.h index 12361341f9d8..f89e989a7637 100644 --- a/arch/arm/mach-pxa/include/mach/palmte2.h +++ b/arch/arm/mach-pxa/include/mach/palmte2.h @@ -59,7 +59,7 @@ /* BATTERY */ #define PALMTE2_BAT_MAX_VOLTAGE 4000 /* 4.00v current voltage */ #define PALMTE2_BAT_MIN_VOLTAGE 3550 /* 3.55v critical voltage */ -#define PALMTE2_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMTE2_BAT_MAX_CURRENT 0 /* unknown */ #define PALMTE2_BAT_MIN_CURRENT 0 /* unknown */ #define PALMTE2_BAT_MAX_CHARGE 1 /* unknown */ #define PALMTE2_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-pxa/include/mach/palmtx.h b/arch/arm/mach-pxa/include/mach/palmtx.h index 1be0db6ed55e..10abc4f2e8e4 100644 --- a/arch/arm/mach-pxa/include/mach/palmtx.h +++ b/arch/arm/mach-pxa/include/mach/palmtx.h @@ -94,7 +94,7 @@ /* BATTERY */ #define PALMTX_BAT_MAX_VOLTAGE 4000 /* 4.00v current voltage */ #define PALMTX_BAT_MIN_VOLTAGE 3550 /* 3.55v critical voltage */ -#define PALMTX_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMTX_BAT_MAX_CURRENT 0 /* unknown */ #define PALMTX_BAT_MIN_CURRENT 0 /* unknown */ #define PALMTX_BAT_MAX_CHARGE 1 /* unknown */ #define PALMTX_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-pxa/include/mach/palmz72.h b/arch/arm/mach-pxa/include/mach/palmz72.h index 2806ef69ba5a..2bbcf70dd935 100644 --- a/arch/arm/mach-pxa/include/mach/palmz72.h +++ b/arch/arm/mach-pxa/include/mach/palmz72.h @@ -49,7 +49,7 @@ /* Battery */ #define PALMZ72_BAT_MAX_VOLTAGE 4000 /* 4.00v current voltage */ #define PALMZ72_BAT_MIN_VOLTAGE 3550 /* 3.55v critical voltage */ -#define PALMZ72_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMZ72_BAT_MAX_CURRENT 0 /* unknown */ #define PALMZ72_BAT_MIN_CURRENT 0 /* unknown */ #define PALMZ72_BAT_MAX_CHARGE 1 /* unknown */ #define PALMZ72_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-s3c6400/setup-sdhci.c b/arch/arm/mach-s3c6400/setup-sdhci.c index b93dafbee1f4..1039937403be 100644 --- a/arch/arm/mach-s3c6400/setup-sdhci.c +++ b/arch/arm/mach-s3c6400/setup-sdhci.c @@ -30,7 +30,7 @@ char *s3c6400_hsmmc_clksrcs[4] = { [0] = "hsmmc", [1] = "hsmmc", [2] = "mmc_bus", - /* [3] = "48m", - note not succesfully used yet */ + /* [3] = "48m", - note not successfully used yet */ }; void s3c6400_setup_sdhci_cfg_card(struct platform_device *dev, diff --git a/arch/arm/mach-s3c6410/setup-sdhci.c b/arch/arm/mach-s3c6410/setup-sdhci.c index 20666f3bd478..816d2d9f9ef8 100644 --- a/arch/arm/mach-s3c6410/setup-sdhci.c +++ b/arch/arm/mach-s3c6410/setup-sdhci.c @@ -30,7 +30,7 @@ char *s3c6410_hsmmc_clksrcs[4] = { [0] = "hsmmc", [1] = "hsmmc", [2] = "mmc_bus", - /* [3] = "48m", - note not succesfully used yet */ + /* [3] = "48m", - note not successfully used yet */ }; diff --git a/arch/arm/mach-sa1100/dma.c b/arch/arm/mach-sa1100/dma.c index cb4521a6f42d..ad660350c296 100644 --- a/arch/arm/mach-sa1100/dma.c +++ b/arch/arm/mach-sa1100/dma.c @@ -65,7 +65,7 @@ static irqreturn_t dma_irq_handler(int irq, void *dev_id) /** - * sa1100_request_dma - allocate one of the SA11x0's DMA chanels + * sa1100_request_dma - allocate one of the SA11x0's DMA channels * @device: The SA11x0 peripheral targeted by this request * @device_id: An ascii name for the claiming device * @callback: Function to be called when the DMA completes diff --git a/arch/arm/plat-mxc/include/mach/iomux-mx3.h b/arch/arm/plat-mxc/include/mach/iomux-mx3.h index 446f86763816..0c7802bbeccb 100644 --- a/arch/arm/plat-mxc/include/mach/iomux-mx3.h +++ b/arch/arm/plat-mxc/include/mach/iomux-mx3.h @@ -112,7 +112,7 @@ enum iomux_gp_func { * setups a single pin: * - reserves the pin so that it is not claimed by another driver * - setups the iomux according to the configuration - * - if the pin is configured as a GPIO, we claim it throug kernel gpiolib + * - if the pin is configured as a GPIO, we claim it through kernel gpiolib */ int mxc_iomux_alloc_pin(const unsigned int pin, const char *label); /* diff --git a/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h b/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h index 9f13061192c8..3887f3fe29d4 100644 --- a/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h +++ b/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h @@ -48,7 +48,7 @@ * setups a single pin: * - reserves the pin so that it is not claimed by another driver * - setups the iomux according to the configuration - * - if the pin is configured as a GPIO, we claim it throug kernel gpiolib + * - if the pin is configured as a GPIO, we claim it through kernel gpiolib */ int mxc_iomux_alloc_pin(const unsigned int pin_mode, const char *label); /* diff --git a/arch/arm/plat-mxc/pwm.c b/arch/arm/plat-mxc/pwm.c index 5cdbd605ac05..4ff6dfe04283 100644 --- a/arch/arm/plat-mxc/pwm.c +++ b/arch/arm/plat-mxc/pwm.c @@ -94,7 +94,7 @@ int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns) * register to follow the ratio of duty_ns vs. period_ns * accordingly. * - * This is good enought for programming the brightness of + * This is good enough for programming the brightness of * the LCD backlight. * * The real implementation would divide PERCLK[0] first by diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index b53125f41293..0e308913291b 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -1232,7 +1232,7 @@ static void create_dma_lch_chain(int lch_head, int lch_queue) * OMAP_DMA_DYNAMIC_CHAIN * @params - Channel parameters * - * @return - Succes : 0 + * @return - Success : 0 * Failure: -EINVAL/-ENOMEM */ int omap_request_dma_chain(int dev_id, const char *dev_name, diff --git a/arch/arm/plat-omap/include/mach/omap16xx.h b/arch/arm/plat-omap/include/mach/omap16xx.h index 0e69b504c25f..7560b4d583a3 100644 --- a/arch/arm/plat-omap/include/mach/omap16xx.h +++ b/arch/arm/plat-omap/include/mach/omap16xx.h @@ -124,7 +124,7 @@ #define TIPB_SWITCH_BASE (0xfffbc800) #define OMAP16XX_MMCSD2_SSW_MPU_CONF (TIPB_SWITCH_BASE + 0x160) -/* UART3 Registers Maping through MPU bus */ +/* UART3 Registers Mapping through MPU bus */ #define UART3_RHR (OMAP_UART3_BASE + 0) #define UART3_THR (OMAP_UART3_BASE + 0) #define UART3_DLL (OMAP_UART3_BASE + 0) diff --git a/arch/arm/plat-s3c24xx/include/plat/map.h b/arch/arm/plat-s3c24xx/include/plat/map.h index c4d133436fc7..bd534d32b993 100644 --- a/arch/arm/plat-s3c24xx/include/plat/map.h +++ b/arch/arm/plat-s3c24xx/include/plat/map.h @@ -64,7 +64,7 @@ /* the calculation for the VA of this must ensure that * it is the same distance apart from the UART in the * phsyical address space, as the initial mapping for the IO - * is done as a 1:1 maping. This puts it (currently) at + * is done as a 1:1 mapping. This puts it (currently) at * 0xFA800000, which is not in the way of any current mapping * by the base system. */ diff --git a/arch/avr32/boards/hammerhead/Kconfig b/arch/avr32/boards/hammerhead/Kconfig index fda2331f9789..5c13d785cc70 100644 --- a/arch/avr32/boards/hammerhead/Kconfig +++ b/arch/avr32/boards/hammerhead/Kconfig @@ -24,7 +24,7 @@ config BOARD_HAMMERHEAD_SND bool "Atmel AC97 Sound support" help This enables Sound support for the Hammerhead board. You may - also go trough the ALSA settings to get it working. + also go through the ALSA settings to get it working. Choose 'Y' here if you have ordered a Corona daugther board and want to make your board funky. diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c index 6b7325d634af..78cb3d38f899 100644 --- a/arch/blackfin/kernel/traps.c +++ b/arch/blackfin/kernel/traps.c @@ -619,7 +619,7 @@ asmlinkage notrace void trap_c(struct pt_regs *fp) /* * Similar to get_user, do some address checking, then dereference - * Return true on sucess, false on bad address + * Return true on success, false on bad address */ static bool get_instruction(unsigned short *val, unsigned short *address) { diff --git a/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h b/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h index e06f4112c695..f9fd2b2a2956 100644 --- a/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h +++ b/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h @@ -542,7 +542,7 @@ #define HMDMA0_CONTROL 0xFFC03300 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xFFC03304 /* HMDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xFFC03308 /* HMDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xFFC03310 /* HMDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xFFC03314 /* HMDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xFFC03318 /* HMDMA0 Current Block Count Register */ @@ -550,7 +550,7 @@ #define HMDMA1_CONTROL 0xFFC03340 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xFFC03344 /* HMDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xFFC03348 /* HMDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xFFC03350 /* HMDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xFFC03354 /* HMDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xFFC03358 /* HMDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h b/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h index f821700716ee..b9dbb73d7ef0 100644 --- a/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h +++ b/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h @@ -544,7 +544,7 @@ #define HMDMA0_CONTROL 0xFFC03300 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xFFC03304 /* HMDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xFFC03308 /* HMDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xFFC03310 /* HMDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xFFC03314 /* HMDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xFFC03318 /* HMDMA0 Current Block Count Register */ @@ -552,7 +552,7 @@ #define HMDMA1_CONTROL 0xFFC03340 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xFFC03344 /* HMDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xFFC03348 /* HMDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xFFC03350 /* HMDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xFFC03354 /* HMDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xFFC03358 /* HMDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf537/include/mach/defBF534.h b/arch/blackfin/mach-bf537/include/mach/defBF534.h index cebb14feb1ba..a6d20ca57683 100644 --- a/arch/blackfin/mach-bf537/include/mach/defBF534.h +++ b/arch/blackfin/mach-bf537/include/mach/defBF534.h @@ -934,7 +934,7 @@ #define HMDMA0_CONTROL 0xFFC03300 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xFFC03304 /* HMDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xFFC03308 /* HMDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xFFC03310 /* HMDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xFFC03314 /* HMDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xFFC03318 /* HMDMA0 Current Block Count Register */ @@ -942,7 +942,7 @@ #define HMDMA1_CONTROL 0xFFC03340 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xFFC03344 /* HMDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xFFC03348 /* HMDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xFFC03350 /* HMDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xFFC03354 /* HMDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xFFC03358 /* HMDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf548/include/mach/defBF544.h b/arch/blackfin/mach-bf548/include/mach/defBF544.h index dd414ae4ba4c..39f588dcd382 100644 --- a/arch/blackfin/mach-bf548/include/mach/defBF544.h +++ b/arch/blackfin/mach-bf548/include/mach/defBF544.h @@ -491,7 +491,7 @@ #define HMDMA0_CONTROL 0xffc04500 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xffc04504 /* Handshake MDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xffc04508 /* Handshake MDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xffc04510 /* Handshake MDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xffc04514 /* Handshake MDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xffc04518 /* Handshake MDMA0 Current Block Count Register */ @@ -501,7 +501,7 @@ #define HMDMA1_CONTROL 0xffc04540 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xffc04544 /* Handshake MDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xffc04548 /* Handshake MDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xffc04550 /* Handshake MDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xffc04554 /* Handshake MDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xffc04558 /* Handshake MDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf548/include/mach/defBF547.h b/arch/blackfin/mach-bf548/include/mach/defBF547.h index 5a9dbabe0a68..c4dcf302d9f5 100644 --- a/arch/blackfin/mach-bf548/include/mach/defBF547.h +++ b/arch/blackfin/mach-bf548/include/mach/defBF547.h @@ -470,7 +470,7 @@ #define HMDMA0_CONTROL 0xffc04500 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xffc04504 /* Handshake MDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xffc04508 /* Handshake MDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xffc04510 /* Handshake MDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xffc04514 /* Handshake MDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xffc04518 /* Handshake MDMA0 Current Block Count Register */ @@ -480,7 +480,7 @@ #define HMDMA1_CONTROL 0xffc04540 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xffc04544 /* Handshake MDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xffc04548 /* Handshake MDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xffc04550 /* Handshake MDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xffc04554 /* Handshake MDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xffc04558 /* Handshake MDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf548/include/mach/defBF548.h b/arch/blackfin/mach-bf548/include/mach/defBF548.h index 82cd593f7391..a5079980968c 100644 --- a/arch/blackfin/mach-bf548/include/mach/defBF548.h +++ b/arch/blackfin/mach-bf548/include/mach/defBF548.h @@ -853,7 +853,7 @@ #define HMDMA0_CONTROL 0xffc04500 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xffc04504 /* Handshake MDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xffc04508 /* Handshake MDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xffc04510 /* Handshake MDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xffc04514 /* Handshake MDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xffc04518 /* Handshake MDMA0 Current Block Count Register */ @@ -863,7 +863,7 @@ #define HMDMA1_CONTROL 0xffc04540 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xffc04544 /* Handshake MDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xffc04548 /* Handshake MDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xffc04550 /* Handshake MDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xffc04554 /* Handshake MDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xffc04558 /* Handshake MDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf548/include/mach/defBF549.h b/arch/blackfin/mach-bf548/include/mach/defBF549.h index 6fc6e39ab61b..f7f043560c6f 100644 --- a/arch/blackfin/mach-bf548/include/mach/defBF549.h +++ b/arch/blackfin/mach-bf548/include/mach/defBF549.h @@ -1024,7 +1024,7 @@ #define HMDMA0_CONTROL 0xffc04500 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xffc04504 /* Handshake MDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xffc04508 /* Handshake MDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xffc04510 /* Handshake MDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xffc04514 /* Handshake MDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xffc04518 /* Handshake MDMA0 Current Block Count Register */ @@ -1034,7 +1034,7 @@ #define HMDMA1_CONTROL 0xffc04540 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xffc04544 /* Handshake MDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xffc04548 /* Handshake MDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xffc04550 /* Handshake MDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xffc04554 /* Handshake MDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xffc04558 /* Handshake MDMA1 Current Block Count Register */ diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 4a7cdd9ea1ee..380df1a73a6e 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -209,7 +209,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs, /* Are we prepared to handle this kernel fault? * * (The kernel has valid exception-points in the source - * when it acesses user-memory. When it fails in one + * when it accesses user-memory. When it fails in one * of those points, we find it in a table and do a jump * to some fixup code that loads an appropriate error * code) diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 674a8374c6d9..f332e3fe4237 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1381,7 +1381,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev, #endif /* - ** Not virtually contigous. + ** Not virtually contiguous. ** Terminate prev chunk. ** Start a new chunk. ** diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index af9405cd70e5..02d1fb732951 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -79,7 +79,7 @@ GLOBAL_ENTRY(ia32_ret_from_clone) (p6) br.cond.spnt .ia32_strace_check_retval ;; // prevent RAW on r8 END(ia32_ret_from_clone) - // fall thrugh + // fall through GLOBAL_ENTRY(ia32_ret_from_syscall) PT_REGS_UNWIND_INFO(0) diff --git a/arch/ia64/include/asm/perfmon_default_smpl.h b/arch/ia64/include/asm/perfmon_default_smpl.h index 48822c0811d8..74724b24c2b7 100644 --- a/arch/ia64/include/asm/perfmon_default_smpl.h +++ b/arch/ia64/include/asm/perfmon_default_smpl.h @@ -67,7 +67,7 @@ typedef struct { unsigned long ip; /* where did the overflow interrupt happened */ unsigned long tstamp; /* ar.itc when entering perfmon intr. handler */ - unsigned short cpu; /* cpu on which the overfow occured */ + unsigned short cpu; /* cpu on which the overflow occured */ unsigned short set; /* event set active when overflow ocurred */ int tgid; /* thread group id (for NPTL, this is getpid()) */ } pfm_default_smpl_entry_t; diff --git a/arch/ia64/include/asm/sn/shubio.h b/arch/ia64/include/asm/sn/shubio.h index 22a6f18a5313..6052422a22b3 100644 --- a/arch/ia64/include/asm/sn/shubio.h +++ b/arch/ia64/include/asm/sn/shubio.h @@ -3289,7 +3289,7 @@ typedef ii_icrb0_e_u_t icrbe_t; #define IIO_IIDSR_LVL_SHIFT 0 #define IIO_IIDSR_LVL_MASK 0x000000ff -/* Xtalk timeout threshhold register (IIO_IXTT) */ +/* Xtalk timeout threshold register (IIO_IXTT) */ #define IXTT_RRSP_TO_SHFT 55 /* read response timeout */ #define IXTT_RRSP_TO_MASK (0x1FULL << IXTT_RRSP_TO_SHFT) #define IXTT_RRSP_PS_SHFT 32 /* read responsed TO prescalar */ diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c index d5764a3d74af..b091111270cb 100644 --- a/arch/ia64/kernel/esi.c +++ b/arch/ia64/kernel/esi.c @@ -84,7 +84,7 @@ static int __init esi_init (void) case ESI_DESC_ENTRY_POINT: break; default: - printk(KERN_WARNING "Unkown table type %d found in " + printk(KERN_WARNING "Unknown table type %d found in " "ESI table, ignoring rest of table\n", *p); return -ENODEV; } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index f1782705b1f7..b3a1cb3e6b25 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -3523,7 +3523,7 @@ pfm_use_debug_registers(struct task_struct *task) * IA64_THREAD_DBG_VALID set. This indicates a task which was * able to use the debug registers for debugging purposes via * ptrace(). Therefore we know it was not using them for - * perfmormance monitoring, so we only decrement the number + * performance monitoring, so we only decrement the number * of "ptraced" debug register users to keep the count up to date */ int diff --git a/arch/m68k/ifpsp060/src/fpsp.S b/arch/m68k/ifpsp060/src/fpsp.S index 6c1a9a217887..73613b5f1ee5 100644 --- a/arch/m68k/ifpsp060/src/fpsp.S +++ b/arch/m68k/ifpsp060/src/fpsp.S @@ -753,7 +753,7 @@ fovfl_ovfl_on: bra.l _real_ovfl -# overflow occurred but is disabled. meanwhile, inexact is enabled. therefore, +# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, # we must jump to real_inex(). fovfl_inex_on: @@ -1015,7 +1015,7 @@ funfl_unfl_on2: bra.l _real_unfl -# undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore, +# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, # we must jump to real_inex(). funfl_inex_on: @@ -2963,7 +2963,7 @@ iea_disabled: tst.w %d0 # is instr fmovm? bmi.b iea_dis_fmovm # yes -# instruction is using an extended precision immediate operand. therefore, +# instruction is using an extended precision immediate operand. Therefore, # the total instruction length is 16 bytes. iea_dis_immed: mov.l &0x10,%d0 # 16 bytes of instruction @@ -9624,7 +9624,7 @@ sok_dnrm: bge.b sok_norm2 # thank goodness no # the multiply factor that we're trying to create should be a denorm -# for the multiply to work. therefore, we're going to actually do a +# for the multiply to work. Therefore, we're going to actually do a # multiply with a denorm which will cause an unimplemented data type # exception to be put into the machine which will be caught and corrected # later. we don't do this with the DENORMs above because this method @@ -12216,7 +12216,7 @@ fin_sd_unfl_dis: # # operand will underflow AND underflow or inexact is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fin_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -12746,7 +12746,7 @@ fdiv_zero_load_p: # # The destination was In Range and the source was a ZERO. The result, -# therefore, is an INF w/ the proper sign. +# Therefore, is an INF w/ the proper sign. # So, determine the sign and return a new INF (w/ the j-bit cleared). # global fdiv_inf_load # global for fsgldiv @@ -12996,7 +12996,7 @@ fneg_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fneg_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -13611,7 +13611,7 @@ fabs_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fabs_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -14973,7 +14973,7 @@ fadd_zero_2: # # the ZEROes have opposite signs: -# - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. +# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. # - -ZERO is returned in the case of RM. # fadd_zero_2_chk_rm: @@ -15425,7 +15425,7 @@ fsub_zero_2: # # the ZEROes have the same signs: -# - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP +# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP # - -ZERO is returned in the case of RM. # fsub_zero_2_chk_rm: @@ -15693,7 +15693,7 @@ fsqrt_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fsqrt_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -21000,7 +21000,7 @@ fout_pack_type: tst.l %d0 bne.b fout_pack_set # "mantissa" is all zero which means that the answer is zero. but, the '040 -# algorithm allows the exponent to be non-zero. the 881/2 do not. therefore, +# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore, # if the mantissa is zero, I will zero the exponent, too. # the question now is whether the exponents sign bit is allowed to be non-zero # for a zero, also... @@ -21743,7 +21743,7 @@ denorm_set_stky: rts # # -# dnrm_lp(): normalize exponent/mantissa to specified threshhold # +# dnrm_lp(): normalize exponent/mantissa to specified threshold # # # # INPUT: # # %a0 : points to the operand to be denormalized # @@ -22402,7 +22402,7 @@ unnorm_shift: bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 # -# exponent would not go < 0. therefore, number stays normalized +# exponent would not go < 0. Therefore, number stays normalized # sub.w %d0, %d1 # shift exponent value mov.w FTEMP_EX(%a0), %d0 # load old exponent diff --git a/arch/m68k/ifpsp060/src/pfpsp.S b/arch/m68k/ifpsp060/src/pfpsp.S index 51b9f7d879dd..e71ba0ab013c 100644 --- a/arch/m68k/ifpsp060/src/pfpsp.S +++ b/arch/m68k/ifpsp060/src/pfpsp.S @@ -752,7 +752,7 @@ fovfl_ovfl_on: bra.l _real_ovfl -# overflow occurred but is disabled. meanwhile, inexact is enabled. therefore, +# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, # we must jump to real_inex(). fovfl_inex_on: @@ -1014,7 +1014,7 @@ funfl_unfl_on2: bra.l _real_unfl -# undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore, +# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, # we must jump to real_inex(). funfl_inex_on: @@ -2962,7 +2962,7 @@ iea_disabled: tst.w %d0 # is instr fmovm? bmi.b iea_dis_fmovm # yes -# instruction is using an extended precision immediate operand. therefore, +# instruction is using an extended precision immediate operand. Therefore, # the total instruction length is 16 bytes. iea_dis_immed: mov.l &0x10,%d0 # 16 bytes of instruction @@ -5865,7 +5865,7 @@ denorm_set_stky: rts # # -# dnrm_lp(): normalize exponent/mantissa to specified threshhold # +# dnrm_lp(): normalize exponent/mantissa to specified threshold # # # # INPUT: # # %a0 : points to the operand to be denormalized # @@ -6524,7 +6524,7 @@ unnorm_shift: bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 # -# exponent would not go < 0. therefore, number stays normalized +# exponent would not go < 0. Therefore, number stays normalized # sub.w %d0, %d1 # shift exponent value mov.w FTEMP_EX(%a0), %d0 # load old exponent @@ -7901,7 +7901,7 @@ fout_pack_type: tst.l %d0 bne.b fout_pack_set # "mantissa" is all zero which means that the answer is zero. but, the '040 -# algorithm allows the exponent to be non-zero. the 881/2 do not. therefore, +# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore, # if the mantissa is zero, I will zero the exponent, too. # the question now is whether the exponents sign bit is allowed to be non-zero # for a zero, also... @@ -8647,7 +8647,7 @@ fin_sd_unfl_dis: # # operand will underflow AND underflow or inexact is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fin_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -9177,7 +9177,7 @@ fdiv_zero_load_p: # # The destination was In Range and the source was a ZERO. The result, -# therefore, is an INF w/ the proper sign. +# Therefore, is an INF w/ the proper sign. # So, determine the sign and return a new INF (w/ the j-bit cleared). # global fdiv_inf_load # global for fsgldiv @@ -9427,7 +9427,7 @@ fneg_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fneg_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -10042,7 +10042,7 @@ fabs_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fabs_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -11404,7 +11404,7 @@ fadd_zero_2: # # the ZEROes have opposite signs: -# - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. +# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. # - -ZERO is returned in the case of RM. # fadd_zero_2_chk_rm: @@ -11856,7 +11856,7 @@ fsub_zero_2: # # the ZEROes have the same signs: -# - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP +# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP # - -ZERO is returned in the case of RM. # fsub_zero_2_chk_rm: @@ -12124,7 +12124,7 @@ fsqrt_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fsqrt_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h index fb8a06b9ab6a..67e7a78ad96b 100644 --- a/arch/m68k/include/asm/bootinfo.h +++ b/arch/m68k/include/asm/bootinfo.h @@ -145,7 +145,7 @@ struct bi_record { /* * Macintosh hardware profile data - unused, see macintosh.h for - * resonable type values + * reasonable type values */ #define BI_MAC_VIA1BASE 0x8010 /* Mac VIA1 base address (always present) */ diff --git a/arch/microblaze/lib/memcpy.c b/arch/microblaze/lib/memcpy.c index 6a907c58a4bc..cc2108b6b260 100644 --- a/arch/microblaze/lib/memcpy.c +++ b/arch/microblaze/lib/memcpy.c @@ -9,7 +9,7 @@ * It is based on demo code originally Copyright 2001 by Intel Corp, taken from * http://www.embedded.com/showArticle.jhtml?articleID=19205567 * - * Attempts were made, unsuccesfully, to contact the original + * Attempts were made, unsuccessfully, to contact the original * author of this code (Michael Morrow, Intel). Below is the original * copyright notice. * diff --git a/arch/microblaze/lib/memmove.c b/arch/microblaze/lib/memmove.c index d4e9f49a71f7..0929198c5e68 100644 --- a/arch/microblaze/lib/memmove.c +++ b/arch/microblaze/lib/memmove.c @@ -9,7 +9,7 @@ * It is based on demo code originally Copyright 2001 by Intel Corp, taken from * http://www.embedded.com/showArticle.jhtml?articleID=19205567 * - * Attempts were made, unsuccesfully, to contact the original + * Attempts were made, unsuccessfully, to contact the original * author of this code (Michael Morrow, Intel). Below is the original * copyright notice. * diff --git a/arch/microblaze/lib/memset.c b/arch/microblaze/lib/memset.c index 941dc8f94b03..4df851d41a29 100644 --- a/arch/microblaze/lib/memset.c +++ b/arch/microblaze/lib/memset.c @@ -9,7 +9,7 @@ * It is based on demo code originally Copyright 2001 by Intel Corp, taken from * http://www.embedded.com/showArticle.jhtml?articleID=19205567 * - * Attempts were made, unsuccesfully, to contact the original + * Attempts were made, unsuccessfully, to contact the original * author of this code (Michael Morrow, Intel). Below is the original * copyright notice. * diff --git a/arch/mips/include/asm/mach-pnx833x/gpio.h b/arch/mips/include/asm/mach-pnx833x/gpio.h index 8de0eb9c98a3..ed3a88da70f6 100644 --- a/arch/mips/include/asm/mach-pnx833x/gpio.h +++ b/arch/mips/include/asm/mach-pnx833x/gpio.h @@ -24,7 +24,7 @@ /* BIG FAT WARNING: races danger! No protections exist here. Current users are only early init code, - when locking is not needed because no cuncurency yet exists there, + when locking is not needed because no concurrency yet exists there, and GPIO IRQ dispatcher, which does locking. However, if many uses will ever happen, proper locking will be needed - including locking between different uses diff --git a/arch/mips/include/asm/sgi/ioc.h b/arch/mips/include/asm/sgi/ioc.h index 343ed15f8dc4..57a971904cfe 100644 --- a/arch/mips/include/asm/sgi/ioc.h +++ b/arch/mips/include/asm/sgi/ioc.h @@ -164,7 +164,7 @@ struct sgioc_regs { u32 _unused5; u8 _write[3]; volatile u8 write; -#define SGIOC_WRITE_NTHRESH 0x01 /* use 4.5db threshhold */ +#define SGIOC_WRITE_NTHRESH 0x01 /* use 4.5db threshold */ #define SGIOC_WRITE_TPSPEED 0x02 /* use 100ohm TP speed */ #define SGIOC_WRITE_EPSEL 0x04 /* force cable mode: 1=AUI 0=TP */ #define SGIOC_WRITE_EASEL 0x08 /* 1=autoselect 0=manual cable selection */ diff --git a/arch/mips/include/asm/sibyte/sb1250_mac.h b/arch/mips/include/asm/sibyte/sb1250_mac.h index b6faf08ca81d..591b9061fd8e 100644 --- a/arch/mips/include/asm/sibyte/sb1250_mac.h +++ b/arch/mips/include/asm/sibyte/sb1250_mac.h @@ -212,7 +212,7 @@ #define G_MAC_TXD_WEIGHT1(x) _SB_GETVALUE(x, S_MAC_TXD_WEIGHT1, M_MAC_TXD_WEIGHT1) /* - * MAC Fifo Threshhold registers (Table 9-14) + * MAC Fifo Threshold registers (Table 9-14) * Register: MAC_THRSH_CFG_0 * Register: MAC_THRSH_CFG_1 * Register: MAC_THRSH_CFG_2 diff --git a/arch/mips/include/asm/sn/sn0/hubio.h b/arch/mips/include/asm/sn/sn0/hubio.h index d0c29d4de084..31c76c021bb6 100644 --- a/arch/mips/include/asm/sn/sn0/hubio.h +++ b/arch/mips/include/asm/sn/sn0/hubio.h @@ -825,7 +825,7 @@ typedef union iprb_u { struct { u64 rsvd1: 15, error: 1, /* Widget rcvd wr resp pkt w/ error */ - ovflow: 5, /* Over flow count. perf measurement */ + ovflow: 5, /* Overflow count. perf measurement */ fire_and_forget: 1, /* Launch Write without response */ mode: 2, /* Widget operation Mode */ rsvd2: 2, diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index 24630fd8ef60..a38e3ee95515 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -1331,7 +1331,7 @@ void smtc_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) if (!((asid += ASID_INC) & ASID_MASK) ) { if (cpu_has_vtag_icache) flush_icache_all(); - /* Traverse all online CPUs (hack requires contigous range) */ + /* Traverse all online CPUs (hack requires contiguous range) */ for_each_online_cpu(i) { /* * We don't need to worry about our own CPU, nor those of diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c index b30c5b1f1a2c..a2127d685a0d 100644 --- a/arch/mips/math-emu/dp_sub.c +++ b/arch/mips/math-emu/dp_sub.c @@ -110,7 +110,7 @@ ieee754dp ieee754dp_sub(ieee754dp x, ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* FAAL THOROUGH */ + /* FALL THROUGH */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): /* normalize ym,ye */ diff --git a/arch/mips/txx9/generic/smsc_fdc37m81x.c b/arch/mips/txx9/generic/smsc_fdc37m81x.c index a2b2d62d88e3..8ebc3848f3ac 100644 --- a/arch/mips/txx9/generic/smsc_fdc37m81x.c +++ b/arch/mips/txx9/generic/smsc_fdc37m81x.c @@ -117,7 +117,7 @@ unsigned long __init smsc_fdc37m81x_init(unsigned long port) if (chip_id == SMSC_FDC37M81X_CHIP_ID) smsc_fdc37m81x_config_end(); else { - printk(KERN_WARNING "%s: unknow chip id 0x%02x\n", __func__, + printk(KERN_WARNING "%s: unknown chip id 0x%02x\n", __func__, chip_id); g_smsc_fdc37m81x_base = 0; } diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h index 1e180a594589..0de404dfee8b 100644 --- a/arch/powerpc/include/asm/reg_fsl_emb.h +++ b/arch/powerpc/include/asm/reg_fsl_emb.h @@ -39,7 +39,7 @@ #define PMRN_PMLCB2 0x112 /* PM Local Control B2 */ #define PMRN_PMLCB3 0x113 /* PM Local Control B3 */ -#define PMLCB_THRESHMUL_MASK 0x0700 /* Threshhold Multiple Field */ +#define PMLCB_THRESHMUL_MASK 0x0700 /* Threshold Multiple Field */ #define PMLCB_THRESHMUL_SHIFT 8 #define PMLCB_THRESHOLD_MASK 0x003f /* Threshold Field */ diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 641c74bb8e27..b6bd1eaa1c24 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -52,7 +52,7 @@ static struct hard_trap_info { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */ { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */ - { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */ + { 0x2060, 0x0e /* SIGILL */ }, /* performance monitor */ { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index c3a56d65c5a9..a753b72efbc0 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -59,7 +59,7 @@ void set_thresholds(unsigned long cpu) mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID); /* setup THRM2, - * threshold, valid bit, enable interrupts, interrupt when above threshhold + * threshold, valid bit, enable interrupts, interrupt when above threshold */ mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE); #else diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 52c98edcd703..2c9e52267292 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -1594,7 +1594,7 @@ static void cell_handle_interrupt_spu(struct pt_regs *regs, * to a latch. The new values (interrupt setting bits, reset * counter value etc.) are not copied to the actual registers * until the performance monitor is enabled. In order to get - * this to work as desired, the permormance monitor needs to + * this to work as desired, the performance monitor needs to * be disabled while writing to the latches. This is a * HW design issue. */ @@ -1668,7 +1668,7 @@ static void cell_handle_interrupt_ppu(struct pt_regs *regs, * to a latch. The new values (interrupt setting bits, reset * counter value etc.) are not copied to the actual registers * until the performance monitor is enabled. In order to get - * this to work as desired, the permormance monitor needs to + * this to work as desired, the performance monitor needs to * be disabled while writing to the latches. This is a * HW design issue. */ diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c index dd43114e9684..da110bd88346 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c @@ -100,7 +100,7 @@ const struct of_device_id mpc52xx_pci_ids[] __initdata = { }; /* ======================================================================== */ -/* PCI configuration acess */ +/* PCI configuration access */ /* ======================================================================== */ static int diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index e81403b245b5..ab2027cdf893 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -302,7 +302,7 @@ static void __init setup_chaos(struct pci_controller *hose, * 1 -> Skip the device but act as if the access was successfull * (return 0xff's on reads, eventually, cache config space * accesses in a later version) - * -1 -> Hide the device (unsuccessful acess) + * -1 -> Hide the device (unsuccessful access) */ static int u3_ht_skip_device(struct pci_controller *hose, struct pci_bus *bus, unsigned int devfn) diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index ae3c4db86fe8..bafc3f85360d 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -160,7 +160,7 @@ static int dart_build(struct iommu_table *tbl, long index, dp = ((unsigned int*)tbl->it_base) + index; - /* On U3, all memory is contigous, so we can move this + /* On U3, all memory is contiguous, so we can move this * out of the loop. */ l = npages; diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c index 3ee78ccb617d..cd4e9c168dd7 100644 --- a/arch/s390/math-emu/math.c +++ b/arch/s390/math-emu/math.c @@ -2088,7 +2088,7 @@ int math_emu_ldr(__u8 *opcode) { __u16 opc = *((__u16 *) opcode); if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */ - /* we got an exception therfore ry can't be in {0,2,4,6} */ + /* we got an exception therefore ry can't be in {0,2,4,6} */ asm volatile( /* load rx from fp_regs.fprs[ry] */ " bras 1,0f\n" " ld 0,0(%1)\n" @@ -2118,7 +2118,7 @@ int math_emu_ler(__u8 *opcode) { __u16 opc = *((__u16 *) opcode); if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */ - /* we got an exception therfore ry can't be in {0,2,4,6} */ + /* we got an exception therefore ry can't be in {0,2,4,6} */ asm volatile( /* load rx from fp_regs.fprs[ry] */ " bras 1,0f\n" " le 0,0(%1)\n" diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 9d6684849fd9..278441f39856 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -12,9 +12,9 @@ #include /* - * FIXME: Acessing the desc_struct through its fields is more elegant, + * FIXME: Accessing the desc_struct through its fields is more elegant, * and should be the one valid thing to do. However, a lot of open code - * still touches the a and b acessors, and doing this allow us to do it + * still touches the a and b accessors, and doing this allow us to do it * incrementally. We keep the signature as a struct, rather than an union, * so we can get rid of it transparently in the future -- glommer */ diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index ede6998bd92c..91df7c51806c 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -47,7 +47,7 @@ static inline void resume_map_numa_kva(pgd_t *pgd) {} /* * generic node memory support, the following assumptions apply: * - * 1) memory comes in 64Mb contigious chunks which are either present or not + * 1) memory comes in 64Mb contiguous chunks which are either present or not * 2) we will not have more than 64Gb in total * * for now assume that 64Gb is max amount of RAM for whole system diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 80e2984f521c..b414d2b401f6 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -55,7 +55,7 @@ #define DESC_STATUS_SOURCE_TIMEOUT 3 /* - * source side threshholds at which message retries print a warning + * source side thresholds at which message retries print a warning */ #define SOURCE_TIMEOUT_LIMIT 20 #define DESTINATION_TIMEOUT_LIMIT 20 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 67e929b89875..1c2c4838d35c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1122,7 +1122,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) if (!acpi_sci_override_gsi) acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); - /* Fill in identity legacy mapings where no override */ + /* Fill in identity legacy mappings where no override */ mp_config_acpi_legacy_irqs(); count = diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0285521e0a99..42ac5e000995 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1637,7 +1637,7 @@ retry: goto out; /* - * aperture was sucessfully enlarged by 128 MB, try + * aperture was successfully enlarged by 128 MB, try * allocation again */ goto retry; @@ -2396,7 +2396,7 @@ int __init amd_iommu_init_passthrough(void) struct pci_dev *dev = NULL; u16 devid, devid2; - /* allocate passthroug domain */ + /* allocate passthrough domain */ pt_domain = protection_domain_alloc(); if (!pt_domain) return -ENOMEM; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c311846..35be5802ac1e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1229,7 +1229,7 @@ x86_perf_event_set_period(struct perf_event *event, return 0; /* - * If we are way outside a reasoable range then just skip forward: + * If we are way outside a reasonable range then just skip forward: */ if (unlikely(left <= -period)) { left = period; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..7d377379fa4a 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -514,7 +514,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they - * remain disabled thorough out this function. + * remain disabled throughout this function. */ static int __kprobes kprobe_handler(struct pt_regs *regs) { @@ -851,7 +851,7 @@ no_change: /* * Interrupts are disabled on entry as trap1 is an interrupt gate and they - * remain disabled thoroughout this function. + * remain disabled throughout this function. */ static int __kprobes post_kprobe_handler(struct pt_regs *regs) { diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..d16d576beebf 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -203,7 +203,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) */ /* * Interrupts are disabled on entry as trap3 is an interrupt gate - * and they remain disabled thorough out this function. + * and they remain disabled throughout this function. */ int kmmio_handler(struct pt_regs *regs, unsigned long addr) { @@ -302,7 +302,7 @@ no_kmmio: /* * Interrupts are disabled on entry as trap1 is an interrupt gate - * and they remain disabled thorough out this function. + * and they remain disabled throughout this function. * This must always get called as the pair to kmmio_handler(). */ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index ca564202ed7a..58916afbbda5 100644 --- a/block/blk-iopoll.c +++ b/block/blk-iopoll.c @@ -28,7 +28,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); * Description: * Add this blk_iopoll structure to the pending poll list and trigger the * raise of the blk iopoll softirq. The driver must already have gotten a - * succesful return from blk_iopoll_sched_prep() before calling this. + * successful return from blk_iopoll_sched_prep() before calling this. **/ void blk_iopoll_sched(struct blk_iopoll *iop) { diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 9ac4e378992e..3aadded05a05 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -599,7 +599,7 @@ static const struct ich_laptop ich_laptop[] = { { 0x27DF, 0x1028, 0x02b0 }, /* ICH7 on unknown Dell */ { 0x27DF, 0x1043, 0x1267 }, /* ICH7 on Asus W5F */ { 0x27DF, 0x103C, 0x30A1 }, /* ICH7 on HP Compaq nc2400 */ - { 0x27DF, 0x103C, 0x361a }, /* ICH7 on unkown HP */ + { 0x27DF, 0x103C, 0x361a }, /* ICH7 on unknown HP */ { 0x27DF, 0x1071, 0xD221 }, /* ICH7 on Hercules EC-900 */ { 0x27DF, 0x152D, 0x0778 }, /* ICH7 on unknown Intel */ { 0x24CA, 0x1025, 0x0061 }, /* ICH4 on ACER Aspire 2023WLMi */ diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index d344db42a002..0d9d2f20788a 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -43,9 +43,9 @@ enum { /* * SATA-FSL host controller supports a max. of (15+1) direct PRDEs, and * chained indirect PRDEs upto a max count of 63. - * We are allocating an array of 63 PRDEs contigiously, but PRDE#15 will + * We are allocating an array of 63 PRDEs contiguously, but PRDE#15 will * be setup as an indirect descriptor, pointing to it's next - * (contigious) PRDE. Though chained indirect PRDE arrays are + * (contiguous) PRDE. Though chained indirect PRDE arrays are * supported,it will be more efficient to use a direct PRDT and * a single chain/link to indirect PRDE array/PRDT. */ @@ -314,7 +314,7 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc, u32 ttl_dwords = 0; /* - * NOTE : direct & indirect prdt's are contigiously allocated + * NOTE : direct & indirect prdt's are contiguously allocated */ struct prde *prd = (struct prde *)&((struct command_desc *) cmd_desc)->prdt; diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index b2c1b37ab2e4..f734b345ac71 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -1132,7 +1132,7 @@ static int rx_pkt(struct atm_dev *dev) IF_ERR(printk(" cause: packet time out\n");) } else { - IF_ERR(printk(" cause: buffer over flow\n");) + IF_ERR(printk(" cause: buffer overflow\n");) } goto out_free_desc; } diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 979d159b5cd1..ee95c76bfd3d 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -188,7 +188,7 @@ EXPORT_SYMBOL_GPL(wait_for_device_probe); * @dev: device to try to bind to the driver * * This function returns -ENODEV if the device is not registered, - * 1 if the device is bound sucessfully and 0 otherwise. + * 1 if the device is bound successfully and 0 otherwise. * * This function must be called with @dev->sem held. When called for a * USB interface, @dev->parent->sem must be held as well. diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c index 5b33b85790f2..63bfc5436799 100644 --- a/drivers/bluetooth/btmrvl_sdio.c +++ b/drivers/bluetooth/btmrvl_sdio.c @@ -535,7 +535,7 @@ static int btmrvl_sdio_card_to_host(struct btmrvl_private *priv) break; default: - BT_ERR("Unknow packet type:%d", type); + BT_ERR("Unknown packet type:%d", type); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, payload, blksz * buf_block_len); diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 4895f0e05322..aa0919386b8c 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -214,7 +214,7 @@ static int hci_uart_send_frame(struct sk_buff *skb) struct hci_uart *hu; if (!hdev) { - BT_ERR("Frame for uknown device (hdev=NULL)"); + BT_ERR("Frame for unknown device (hdev=NULL)"); return -ENODEV; } diff --git a/drivers/char/mem.c b/drivers/char/mem.c index a074fceb67d3..42e65cf8ab52 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -5,7 +5,7 @@ * * Added devfs support. * Jan-11-1998, C. Scott Ananian - * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar + * Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar */ #include diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c index 1997270bb6f4..ecb89d798e35 100644 --- a/drivers/char/mspec.c +++ b/drivers/char/mspec.c @@ -248,7 +248,7 @@ static const struct vm_operations_struct mspec_vm_ops = { /* * mspec_mmap * - * Called when mmaping the device. Initializes the vma with a fault handler + * Called when mmapping the device. Initializes the vma with a fault handler * and private data structure necessary to allocate, track, and free the * underlying pages. */ diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c index 6934025a1ac1..c1d8b54c816d 100644 --- a/drivers/char/n_r3964.c +++ b/drivers/char/n_r3964.c @@ -602,7 +602,7 @@ static void receive_char(struct r3964_info *pInfo, const unsigned char c) } break; case R3964_WAIT_FOR_RX_REPEAT: - /* FALLTROUGH */ + /* FALLTHROUGH */ case R3964_IDLE: if (c == STX) { /* Prevent rx_queue from overflow: */ diff --git a/drivers/char/rio/route.h b/drivers/char/rio/route.h index 20ed73f3fd7b..46e963771c30 100644 --- a/drivers/char/rio/route.h +++ b/drivers/char/rio/route.h @@ -67,7 +67,7 @@ typedef struct COST_ROUTE COST_ROUTE; struct COST_ROUTE { unsigned char cost; /* Cost down this link */ - unsigned char route[NODE_BYTES]; /* Nodes thorough this route */ + unsigned char route[NODE_BYTES]; /* Nodes through this route */ }; typedef struct ROUTE_STR ROUTE_STR; diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c index 5f753fc08730..09ad9154d86c 100644 --- a/drivers/crypto/hifn_795x.c +++ b/drivers/crypto/hifn_795x.c @@ -863,7 +863,7 @@ static int hifn_init_pubrng(struct hifn_device *dev) dev->dmareg |= HIFN_DMAIER_PUBDONE; hifn_write_1(dev, HIFN_1_DMA_IER, dev->dmareg); - dprintk("Chip %s: Public key engine has been sucessfully " + dprintk("Chip %s: Public key engine has been successfully " "initialised.\n", dev->name); } diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 7585c4164bd5..c52ac9efd0bf 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -99,7 +99,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, } /** - * atc_desc_get - get a unsused descriptor from free_list + * atc_desc_get - get an unused descriptor from free_list * @atchan: channel we want a new descriptor for */ static struct at_desc *atc_desc_get(struct at_dma_chan *atchan) diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index fddf2b358936..d373d17257e9 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -183,7 +183,7 @@ static inline struct fw_node *fw_node(struct list_head *l) * This function builds the tree representation of the topology given * by the self IDs from the latest bus reset. During the construction * of the tree, the function checks that the self IDs are valid and - * internally consistent. On succcess this function returns the + * internally consistent. On success this function returns the * fw_node corresponding to the local card otherwise NULL. */ static struct fw_node *build_tree(struct fw_card *card, diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 5cae0b3eee9b..3f7c500b2115 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -272,7 +272,7 @@ EXPORT_SYMBOL(drm_mode_object_find); * functions & device file and adds it to the master fd list. * * RETURNS: - * Zero on success, error code on falure. + * Zero on success, error code on failure. */ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, const struct drm_framebuffer_funcs *funcs) @@ -2328,7 +2328,7 @@ int drm_mode_connector_property_set_ioctl(struct drm_device *dev, } else if (connector->funcs->set_property) ret = connector->funcs->set_property(connector, property, out_resp->value); - /* store the property value if succesful */ + /* store the property value if successful */ if (!ret) drm_connector_property_set_value(connector, property, out_resp->value); out: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index abfc27b0c2ea..a2a3fa599923 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1309,7 +1309,7 @@ out_free_list: * i915_gem_release_mmap - remove physical page mappings * @obj: obj in question * - * Preserve the reservation of the mmaping with the DRM core code, but + * Preserve the reservation of the mmapping with the DRM core code, but * relinquish ownership of the pages back to the system. * * It is vital that we remove the page mapping if we have mapped a tiled diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 2b0fe54cd92c..40fcf6fdef38 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -70,7 +70,7 @@ static struct drm_fb_helper_funcs intel_fb_helper_funcs = { /** - * Curretly it is assumed that the old framebuffer is reused. + * Currently it is assumed that the old framebuffer is reused. * * LOCKING * caller should hold the mode config lock. diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 083bec2e50f9..e7fa3279e2f8 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2726,7 +2726,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) /* Wrap with our custom algo which switches to DDC mode */ intel_output->ddc_bus->algo = &intel_sdvo_i2c_bit_algo; - /* In defaut case sdvo lvds is false */ + /* In default case sdvo lvds is false */ intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps); if (intel_sdvo_output_setup(intel_output, diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 609719490ec2..00c739c44848 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -389,11 +389,11 @@ int r600_mc_init(struct radeon_device *rdev) * AGP so that GPU can catch out of VRAM/AGP access */ if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) { - /* Enought place before */ + /* Enough place before */ rdev->mc.vram_location = rdev->mc.gtt_location - rdev->mc.mc_vram_size; } else if (tmp > rdev->mc.mc_vram_size) { - /* Enought place after */ + /* Enough place after */ rdev->mc.vram_location = rdev->mc.gtt_location + rdev->mc.gtt_size; } else { diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index b38c4c8e2c61..d10eb43645c8 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -59,7 +59,7 @@ static struct fb_ops radeonfb_ops = { }; /** - * Curretly it is assumed that the old framebuffer is reused. + * Currently it is assumed that the old framebuffer is reused. * * LOCKING * caller should hold the mode config lock. diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 38537d971a3e..067167cb39ca 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -1950,7 +1950,7 @@ static void radeon_apply_surface_regs(int surf_index, * Note that refcount can be at most 2, since during a free refcount=3 * might mean we have to allocate a new surface which might not always * be available. - * For example : we allocate three contigous surfaces ABC. If B is + * For example : we allocate three contiguous surfaces ABC. If B is * freed, we suddenly need two surfaces to store A and C, which might * not always be available. */ diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 765bd184b6fc..5a664000bf70 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -372,7 +372,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, new_mem->mem_type == TTM_PL_SYSTEM) || (old_mem->mem_type == TTM_PL_SYSTEM && new_mem->mem_type == TTM_PL_TT)) { - /* bind is enought */ + /* bind is enough */ radeon_move_null(bo, new_mem); return 0; } diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 595ac638039d..9e9826ace305 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -807,11 +807,11 @@ int rv770_mc_init(struct radeon_device *rdev) * AGP so that GPU can catch out of VRAM/AGP access */ if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) { - /* Enought place before */ + /* Enough place before */ rdev->mc.vram_location = rdev->mc.gtt_location - rdev->mc.mc_vram_size; } else if (tmp > rdev->mc.mc_vram_size) { - /* Enought place after */ + /* Enough place after */ rdev->mc.vram_location = rdev->mc.gtt_location + rdev->mc.gtt_size; } else { diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index c70927ecda21..61c5572d2b91 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -427,7 +427,7 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, /* * We need to use vmap to get the desired page protection - * or to make the buffer object look contigous. + * or to make the buffer object look contiguous. */ prot = (mem->placement & TTM_PL_FLAG_CACHED) ? PAGE_KERNEL : diff --git a/drivers/hwmon/adm1029.c b/drivers/hwmon/adm1029.c index 36718150b475..e845b75ccee4 100644 --- a/drivers/hwmon/adm1029.c +++ b/drivers/hwmon/adm1029.c @@ -432,7 +432,7 @@ static int adm1029_remove(struct i2c_client *client) } /* -function that update the status of the chips (temperature for exemple) +function that update the status of the chips (temperature for example) */ static struct adm1029_data *adm1029_update_device(struct device *dev) { diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c index fc36cadf36fb..c48a284b8314 100644 --- a/drivers/hwmon/lm93.c +++ b/drivers/hwmon/lm93.c @@ -928,7 +928,7 @@ static void lm93_update_client_common(struct lm93_data *data, data->prochot_interval = lm93_read_byte(client, LM93_REG_PROCHOT_INTERVAL); - /* Fan Boost Termperature registers */ + /* Fan Boost Temperature registers */ for (i = 0; i < 4; i++) data->boost[i] = lm93_read_byte(client, LM93_REG_BOOST(i)); diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c index 2cd00b5b45b4..9fd4a0d3206e 100644 --- a/drivers/ieee1394/dv1394.c +++ b/drivers/ieee1394/dv1394.c @@ -125,7 +125,7 @@ 0 - no debugging messages 1 - some debugging messages, but none during DMA frame transmission 2 - lots of messages, including during DMA frame transmission - (will cause undeflows if your machine is too slow!) + (will cause underflows if your machine is too slow!) */ #define DV1394_DEBUG_LEVEL 0 diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 4bd39c8af80f..37d12e5efa49 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -381,7 +381,7 @@ static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr = #define IPATH_GPIO_SCL \ (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) -/* keep the code below somewhat more readonable; not used elsewhere */ +/* keep the code below somewhat more readable; not used elsewhere */ #define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \ infinipath_hwe_htclnkabyte1crcerr) #define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \ diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220.c b/drivers/infiniband/hw/ipath/ipath_sd7220.c index aa47eb549520..2a68d9f624dd 100644 --- a/drivers/infiniband/hw/ipath/ipath_sd7220.c +++ b/drivers/infiniband/hw/ipath/ipath_sd7220.c @@ -614,7 +614,7 @@ static int epb_trans(struct ipath_devdata *dd, u16 reg, u64 i_val, u64 *o_vp) * @wd: Write Data - value to set in register * @mask: ones where data should be spliced into reg. * - * Basic register read/modify/write, with un-needed acesses elided. That is, + * Basic register read/modify/write, with un-needed accesses elided. That is, * a mask of zero will prevent write, while a mask of 0xFF will prevent read. * returns current (presumed, if a write was done) contents of selected * register, or <0 if errors. @@ -989,7 +989,7 @@ static struct rxeq_init { /* Set DFELTHFDR/HDR thresholds */ RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR */ RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */ - /* Set TLTHFDR/HDR theshold */ + /* Set TLTHFDR/HDR threshold */ RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR */ RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR */ /* Set Preamp setting 2 (ZFR/ZCNT) */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 219b10397b4d..256a00c6aeea 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -352,7 +352,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, * anymore, so we do this only if selective signaling is off. * * Further, on 32-bit platforms, we can't use vmap() to make - * the QP buffer virtually contigious. Thus we have to use + * the QP buffer virtually contiguous. Thus we have to use * constant-sized WRs to make sure a WR is always fully within * a single page-sized chunk. * diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 1c9410d1822c..bcc2d30ec245 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -955,7 +955,7 @@ static int __init hp_sdc_init_hppa(struct parisc_device *d) INIT_DELAYED_WORK(&moduleloader_work, request_module_delayed); ret = hp_sdc_init(); - /* after sucessfull initialization give SDC some time to settle + /* after successfull initialization give SDC some time to settle * and then load the hp_sdc_mlc upper layer driver */ if (!ret) schedule_delayed_work(&moduleloader_work, diff --git a/drivers/input/serio/hp_sdc_mlc.c b/drivers/input/serio/hp_sdc_mlc.c index 820e51673b26..7d2b820ef58d 100644 --- a/drivers/input/serio/hp_sdc_mlc.c +++ b/drivers/input/serio/hp_sdc_mlc.c @@ -125,7 +125,7 @@ static void hp_sdc_mlc_isr (int irq, void *dev_id, break; default: - printk(KERN_WARNING PREFIX "Unkown HIL Error status (%x)!\n", data); + printk(KERN_WARNING PREFIX "Unknown HIL Error status (%x)!\n", data); break; } diff --git a/drivers/input/touchscreen/atmel-wm97xx.c b/drivers/input/touchscreen/atmel-wm97xx.c index 35377f583e28..a12242f77e23 100644 --- a/drivers/input/touchscreen/atmel-wm97xx.c +++ b/drivers/input/touchscreen/atmel-wm97xx.c @@ -59,7 +59,7 @@ #define ATMEL_WM97XX_AC97C_IRQ (29) #define ATMEL_WM97XX_GPIO_DEFAULT (32+16) /* Pin 16 on port B. */ #else -#error Unkown CPU, this driver only supports AT32AP700X CPUs. +#error Unknown CPU, this driver only supports AT32AP700X CPUs. #endif struct continuous { diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c index 8fc3b08deb3b..6cdcf2a6e036 100644 --- a/drivers/input/touchscreen/mainstone-wm97xx.c +++ b/drivers/input/touchscreen/mainstone-wm97xx.c @@ -14,7 +14,7 @@ * * Notes: * This is a wm97xx extended touch driver to capture touch - * data in a continuous manner on the Intel XScale archictecture + * data in a continuous manner on the Intel XScale architecture * * Features: * - codecs supported:- WM9705, WM9712, WM9713 @@ -131,7 +131,7 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm) /* When the AC97 queue has been drained we need to allow time * to buffer up samples otherwise we end up spinning polling * for samples. The controller can't have a suitably low - * threashold set to use the notifications it gives. + * threshold set to use the notifications it gives. */ schedule_timeout_uninterruptible(1); diff --git a/drivers/input/touchscreen/zylonite-wm97xx.c b/drivers/input/touchscreen/zylonite-wm97xx.c index 41e4359c277c..eca54dbdf493 100644 --- a/drivers/input/touchscreen/zylonite-wm97xx.c +++ b/drivers/input/touchscreen/zylonite-wm97xx.c @@ -96,7 +96,7 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm) /* When the AC97 queue has been drained we need to allow time * to buffer up samples otherwise we end up spinning polling * for samples. The controller can't have a suitably low - * threashold set to use the notifications it gives. + * threshold set to use the notifications it gives. */ msleep(1); diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c index 3e6d17f42a98..66b7d7a86474 100644 --- a/drivers/isdn/capi/capidrv.c +++ b/drivers/isdn/capi/capidrv.c @@ -830,7 +830,7 @@ static void handle_controller(_cmsg * cmsg) case 0: break; case 1: s = "unknown class"; break; case 2: s = "unknown function"; break; - default: s = "unkown error"; break; + default: s = "unknown error"; break; } if (s) printk(KERN_INFO "capidrv-%d: %s from controller 0x%x function %d: %s\n", diff --git a/drivers/isdn/hardware/eicon/di.c b/drivers/isdn/hardware/eicon/di.c index b029d130eb21..cb14ae3e7154 100644 --- a/drivers/isdn/hardware/eicon/di.c +++ b/drivers/isdn/hardware/eicon/di.c @@ -806,7 +806,7 @@ static void xdi_xlog_request (byte Adapter, byte Id, DELIVERY - indication entered isdn_rc function RNR=... - application had returned RNR=... after the look ahead callback - RNum=0 - aplication had not returned any buffer to copy + RNum=0 - application had not returned any buffer to copy this indication and will copy it self COMPLETE - XDI had copied the data to the buffers provided bu the application and is about to issue the diff --git a/drivers/isdn/hardware/eicon/maintidi.c b/drivers/isdn/hardware/eicon/maintidi.c index 23960cb6eaab..e7cfb3b5647f 100644 --- a/drivers/isdn/hardware/eicon/maintidi.c +++ b/drivers/isdn/hardware/eicon/maintidi.c @@ -385,7 +385,7 @@ static int SuperTraceMessageInput (void* hLib) { } break; default: - diva_mnt_internal_dprintf (0, DLI_ERR, "Unknon IDI Ind (DMA mode): %02x", Ind); + diva_mnt_internal_dprintf (0, DLI_ERR, "Unknown IDI Ind (DMA mode): %02x", Ind); } p += (this_ind_length+1); total_length -= (4 + this_ind_length); @@ -420,7 +420,7 @@ static int SuperTraceMessageInput (void* hLib) { } break; default: - diva_mnt_internal_dprintf (0, DLI_ERR, "Unknon IDI Ind: %02x", Ind); + diva_mnt_internal_dprintf (0, DLI_ERR, "Unknown IDI Ind: %02x", Ind); } } } diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index fc46a26cb14f..a64bb6c67ba7 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -721,7 +721,7 @@ hfcsusb_setup_bch(struct bchannel *bch, int protocol) switch (protocol) { case (-1): /* used for init */ bch->state = -1; - /* fall trough */ + /* fall through */ case (ISDN_P_NONE): if (bch->state == ISDN_P_NONE) return 0; /* already in idle state */ diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.h b/drivers/isdn/hardware/mISDN/hfcsusb.h index 43efe7358fa3..369196adae03 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.h +++ b/drivers/isdn/hardware/mISDN/hfcsusb.h @@ -150,7 +150,7 @@ symbolic(struct hfcusb_symbolic_list list[], const int num) for (i = 0; list[i].name != NULL; i++) if (list[i].num == num) return list[i].name; - return ""; + return ""; } /* USB descriptor need to contain one of the following EndPoint combination: */ diff --git a/drivers/isdn/hardware/mISDN/mISDNisar.c b/drivers/isdn/hardware/mISDN/mISDNisar.c index de352a17673a..09095c747110 100644 --- a/drivers/isdn/hardware/mISDN/mISDNisar.c +++ b/drivers/isdn/hardware/mISDN/mISDNisar.c @@ -860,7 +860,7 @@ isar_pump_statev_modem(struct isar_ch *ch, u8 devt) { pr_debug("%s: pump stev GSTN CLEAR\n", ch->is->name); break; default: - pr_info("u%s: nknown pump stev %x\n", ch->is->name, devt); + pr_info("u%s: unknown pump stev %x\n", ch->is->name, devt); break; } } diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c index 9de54202c90c..ad5831f37d84 100644 --- a/drivers/isdn/hisax/hfc_usb.c +++ b/drivers/isdn/hisax/hfc_usb.c @@ -1086,7 +1086,7 @@ hfc_usb_l2l1(struct hisax_if *my_hisax_if, int pr, void *arg) break; default: DBG(HFCUSB_DBG_STATES, - "HFC_USB: hfc_usb_d_l2l1: unkown state : %#x", pr); + "HFC_USB: hfc_usb_d_l2l1: unknown state : %#x", pr); break; } } diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 2d14b64202a3..0f4ea7d16a15 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -836,7 +836,7 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count) unsigned short hl; struct sk_buff *skb; /* - * we need to reserve enought space in front of + * we need to reserve enough space in front of * sk_buff. old call to dev_alloc_skb only reserved * 16 bytes, now we are looking what the driver want */ @@ -1326,7 +1326,7 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev) struct sk_buff *new_skb; unsigned short hl; /* - * we need to reserve enought space in front of + * we need to reserve enough space in front of * sk_buff. old call to dev_alloc_skb only reserved * 16 bytes, now we are looking what the driver want. */ @@ -1685,7 +1685,7 @@ static void isdn_ppp_mp_receive(isdn_net_dev * net_dev, isdn_net_local * lp, * * try to accomplish several tasks: * - reassemble any complete fragment sequence (non-null 'start' - * indicates there is a continguous sequence present) + * indicates there is a contiguous sequence present) * - discard any incomplete sequences that are below minseq -- due * to the fact that sender always increment sequence number, if there * is an incomplete sequence below minseq, no new fragments would diff --git a/drivers/isdn/i4l/isdn_ttyfax.c b/drivers/isdn/i4l/isdn_ttyfax.c index 78f7660c1d0e..4c41f191d4e2 100644 --- a/drivers/isdn/i4l/isdn_ttyfax.c +++ b/drivers/isdn/i4l/isdn_ttyfax.c @@ -470,7 +470,7 @@ isdn_tty_cmd_FCLASS2(char **p, modem_info * info) } return 0; } - /* BADMUL=value - dummy 0=disable errorchk disabled (treshold multiplier) */ + /* BADMUL=value - dummy 0=disable errorchk disabled (threshold multiplier) */ if (!strncmp(p[0], "BADMUL", 6)) { p[0] += 6; switch (*p[0]) { diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index 77ee2867c8b4..43ff4d3b046e 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -110,7 +110,7 @@ * crossconnections and conferences via software if not possible through * hardware. If hardware capability is available, hardware is used. * - * Echo: Is generated by CMX and is used to check performane of hard and + * Echo: Is generated by CMX and is used to check performance of hard and * software CMX. * * The CMX has special functions for conferences with one, two and more diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c index e04bad6c5baf..6d4da6095885 100644 --- a/drivers/isdn/mISDN/tei.c +++ b/drivers/isdn/mISDN/tei.c @@ -725,7 +725,7 @@ tei_id_ver_tout_net(struct FsmInst *fi, int event, void *arg) if (tm->rcnt == 1) { if (*debug & DEBUG_L2_TEI) tm->tei_m.printdebug(fi, - "check req for tei %d sucessful\n", tm->l2->tei); + "check req for tei %d successful\n", tm->l2->tei); mISDN_FsmChangeState(fi, ST_TEI_NOP); } else if (tm->rcnt > 1) { /* duplicate assignment; remove */ diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index 8b9364434aa0..3fbe41b0ac07 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -15,7 +15,7 @@ * * WARNING: This driver has only been testen on Apple's * 1.25 MHz Dual G4 (March 03). It is tuned for a CPU - * temperatur around 57 C. + * temperature around 57 C. * * Copyright (C) 2003, 2004 Samuel Rydh (samuel@ibrium.se) * diff --git a/drivers/media/common/saa7146_i2c.c b/drivers/media/common/saa7146_i2c.c index 7e8f56815998..48cb154c7a46 100644 --- a/drivers/media/common/saa7146_i2c.c +++ b/drivers/media/common/saa7146_i2c.c @@ -98,7 +98,7 @@ static int saa7146_i2c_msg_cleanup(const struct i2c_msg *m, int num, __le32 *op) op_count++; - /* loop throgh all bytes of message i */ + /* loop through all bytes of message i */ for(j = 0; j < m[i].len; j++) { /* write back all bytes that could have been read */ m[i].buf[j] = (le32_to_cpu(op[op_count/3]) >> ((3-(op_count%3))*8)); diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h index 810f07d63246..52e4ce4304ee 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.h +++ b/drivers/media/dvb/dvb-core/dvb_frontend.h @@ -160,7 +160,7 @@ struct tuner_state { * search callback possible return status * * DVBFE_ALGO_SEARCH_SUCCESS - * The frontend search algorithm completed and returned succesfully + * The frontend search algorithm completed and returned successfully * * DVBFE_ALGO_SEARCH_ASLEEP * The frontend search algorithm is sleeping diff --git a/drivers/media/dvb/dvb-usb/anysee.c b/drivers/media/dvb/dvb-usb/anysee.c index 2ae7f648effe..bb69f3719f9a 100644 --- a/drivers/media/dvb/dvb-usb/anysee.c +++ b/drivers/media/dvb/dvb-usb/anysee.c @@ -344,7 +344,7 @@ static int anysee_frontend_attach(struct dvb_usb_adapter *adap) if (ret) return ret; - err("Unkown Anysee version: %02x %02x %02x. "\ + err("Unknown Anysee version: %02x %02x %02x. "\ "Please report the .", hw_info[0], hw_info[1], hw_info[2]); diff --git a/drivers/media/dvb/dvb-usb/dibusb-mb.c b/drivers/media/dvb/dvb-usb/dibusb-mb.c index eeef50bff4f9..5c0126dc1ff9 100644 --- a/drivers/media/dvb/dvb-usb/dibusb-mb.c +++ b/drivers/media/dvb/dvb-usb/dibusb-mb.c @@ -242,7 +242,7 @@ static struct dvb_usb_device_properties dibusb1_1_properties = { { &dibusb_dib3000mb_table[9], &dibusb_dib3000mb_table[11], NULL }, { &dibusb_dib3000mb_table[10], &dibusb_dib3000mb_table[12], NULL }, }, - { "Unkown USB1.1 DVB-T device ???? please report the name to the author", + { "Unknown USB1.1 DVB-T device ???? please report the name to the author", { &dibusb_dib3000mb_table[13], NULL }, { &dibusb_dib3000mb_table[14], NULL }, }, diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c index edde87c6aa3a..6b5ded9e7d5d 100644 --- a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c +++ b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c @@ -259,7 +259,7 @@ int dvb_usb_nec_rc_key_to_event(struct dvb_usb_device *d, *state = REMOTE_KEY_REPEAT; break; default: - deb_err("unkown type of remote status: %d\n",keybuf[0]); + deb_err("unknown type of remote status: %d\n",keybuf[0]); break; } return 0; diff --git a/drivers/media/dvb/dvb-usb/usb-urb.c b/drivers/media/dvb/dvb-usb/usb-urb.c index 9da2cc95ca13..f9702e3756b6 100644 --- a/drivers/media/dvb/dvb-usb/usb-urb.c +++ b/drivers/media/dvb/dvb-usb/usb-urb.c @@ -56,7 +56,7 @@ static void usb_urb_complete(struct urb *urb) stream->complete(stream, b, urb->actual_length); break; default: - err("unkown endpoint type in completition handler."); + err("unknown endpoint type in completition handler."); return; } usb_submit_urb(urb,GFP_ATOMIC); @@ -228,7 +228,7 @@ int usb_urb_init(struct usb_data_stream *stream, struct usb_data_stream_properti case USB_ISOC: return usb_isoc_urb_init(stream); default: - err("unkown URB-type for data transfer."); + err("unknown URB-type for data transfer."); return -EINVAL; } } diff --git a/drivers/media/dvb/frontends/au8522_decoder.c b/drivers/media/dvb/frontends/au8522_decoder.c index 74981ee923c8..7c6431fe33e0 100644 --- a/drivers/media/dvb/frontends/au8522_decoder.c +++ b/drivers/media/dvb/frontends/au8522_decoder.c @@ -315,7 +315,7 @@ static void setup_decoder_defaults(struct au8522_state *state, u8 input_mode) if (input_mode == AU8522_INPUT_CONTROL_REG081H_SVIDEO_CH13 || input_mode == AU8522_INPUT_CONTROL_REG081H_SVIDEO_CH24) { /* Despite what the table says, for the HVR-950q we still need - to be in CVBS mode for the S-Video input (reason uknown). */ + to be in CVBS mode for the S-Video input (reason unknown). */ /* filter_coef_type = 3; */ filter_coef_type = 5; } else { diff --git a/drivers/media/dvb/frontends/cx24110.c b/drivers/media/dvb/frontends/cx24110.c index ffbcfabd83f0..00a4e8f03304 100644 --- a/drivers/media/dvb/frontends/cx24110.c +++ b/drivers/media/dvb/frontends/cx24110.c @@ -1,4 +1,4 @@ - /* +/* cx24110 - Single Chip Satellite Channel Receiver driver module Copyright (C) 2002 Peter Hettkamp based on @@ -96,7 +96,7 @@ static struct {u8 reg; u8 data;} cx24110_regdata[]= {0x42,0x00}, /* @ middle bytes " */ {0x43,0x00}, /* @ LSB " */ /* leave the carrier tracking loop parameters on default */ - /* leave the bit timing loop parameters at gefault */ + /* leave the bit timing loop parameters at default */ {0x56,0x4d}, /* set the filtune voltage to 2.7V, as recommended by */ /* the cx24108 data sheet for symbol rates above 15MS/s */ {0x57,0x00}, /* @ Filter sigma delta enabled, positive */ diff --git a/drivers/media/dvb/frontends/cx24113.c b/drivers/media/dvb/frontends/cx24113.c index 075b2b57cf09..e9ee55592fd3 100644 --- a/drivers/media/dvb/frontends/cx24113.c +++ b/drivers/media/dvb/frontends/cx24113.c @@ -589,7 +589,7 @@ struct dvb_frontend *cx24113_attach(struct dvb_frontend *fe, info("detected CX24113 variant\n"); break; case REV_CX24113: - info("sucessfully detected\n"); + info("successfully detected\n"); break; default: err("unsupported device id: %x\n", state->rev); diff --git a/drivers/media/dvb/frontends/dib3000mb.c b/drivers/media/dvb/frontends/dib3000mb.c index 136b9d2164d7..ad4c8cfd8090 100644 --- a/drivers/media/dvb/frontends/dib3000mb.c +++ b/drivers/media/dvb/frontends/dib3000mb.c @@ -154,7 +154,7 @@ static int dib3000mb_set_frontend(struct dvb_frontend* fe, case BANDWIDTH_AUTO: return -EOPNOTSUPP; default: - err("unkown bandwidth value."); + err("unknown bandwidth value."); return -EINVAL; } } diff --git a/drivers/media/dvb/frontends/lgdt330x.c b/drivers/media/dvb/frontends/lgdt330x.c index 056387b41a8f..43971e63baa7 100644 --- a/drivers/media/dvb/frontends/lgdt330x.c +++ b/drivers/media/dvb/frontends/lgdt330x.c @@ -479,7 +479,7 @@ static int lgdt3302_read_status(struct dvb_frontend* fe, fe_status_t* status) switch (state->current_modulation) { case QAM_256: case QAM_64: - /* Need to undestand why there are 3 lock levels here */ + /* Need to understand why there are 3 lock levels here */ if ((buf[0] & 0x07) == 0x07) *status |= FE_HAS_CARRIER; break; @@ -520,7 +520,7 @@ static int lgdt3303_read_status(struct dvb_frontend* fe, fe_status_t* status) switch (state->current_modulation) { case QAM_256: case QAM_64: - /* Need to undestand why there are 3 lock levels here */ + /* Need to understand why there are 3 lock levels here */ if ((buf[0] & 0x07) == 0x07) *status |= FE_HAS_CARRIER; else diff --git a/drivers/media/dvb/frontends/stb0899_drv.c b/drivers/media/dvb/frontends/stb0899_drv.c index a04c782fff8d..1570669837ea 100644 --- a/drivers/media/dvb/frontends/stb0899_drv.c +++ b/drivers/media/dvb/frontends/stb0899_drv.c @@ -1571,7 +1571,7 @@ static enum dvbfe_search stb0899_search(struct dvb_frontend *fe, struct dvb_fron * stb0899_track * periodically check the signal level against a specified * threshold level and perform derotator centering. - * called once we have a lock from a succesful search + * called once we have a lock from a successful search * event. * * Will be called periodically called to maintain the diff --git a/drivers/media/dvb/ttpci/av7110.c b/drivers/media/dvb/ttpci/av7110.c index 8d65c652ba50..baf3159a3aa6 100644 --- a/drivers/media/dvb/ttpci/av7110.c +++ b/drivers/media/dvb/ttpci/av7110.c @@ -2425,7 +2425,7 @@ static int __devinit av7110_attach(struct saa7146_dev* dev, * use 0x15 to track VPE interrupts - increase by 1 every vpeirq() is called */ saa7146_write(dev, EC1SSR, (0x03<<2) | 3 ); - /* set event counter 1 treshold to maximum allowed value (rEC p55) */ + /* set event counter 1 threshold to maximum allowed value (rEC p55) */ saa7146_write(dev, ECT1R, 0x3fff ); #endif /* Set RPS1 Address register to point to RPS code (r108 p42) */ @@ -2559,7 +2559,7 @@ static int __devinit av7110_attach(struct saa7146_dev* dev, * use 0x15 to track VPE interrupts - increase by 1 every vpeirq() is called */ saa7146_write(dev, EC1SSR, (0x03<<2) | 3 ); - /* set event counter 1 treshold to maximum allowed value (rEC p55) */ + /* set event counter 1 threshold to maximum allowed value (rEC p55) */ saa7146_write(dev, ECT1R, 0x3fff ); #endif /* Setup BUDGETPATCH MAIN RPS1 "program" (p35) */ diff --git a/drivers/media/dvb/ttpci/budget-patch.c b/drivers/media/dvb/ttpci/budget-patch.c index 60136688a9a4..9c92f9ddd223 100644 --- a/drivers/media/dvb/ttpci/budget-patch.c +++ b/drivers/media/dvb/ttpci/budget-patch.c @@ -456,7 +456,7 @@ static int budget_patch_attach (struct saa7146_dev* dev, struct saa7146_pci_exte // use 0x03 to track RPS1 interrupts - increase by 1 every gpio3 is toggled // use 0x15 to track VPE interrupts - increase by 1 every vpeirq() is called saa7146_write(dev, EC1SSR, (0x03<<2) | 3 ); - // set event counter 1 treshold to maximum allowed value (rEC p55) + // set event counter 1 threshold to maximum allowed value (rEC p55) saa7146_write(dev, ECT1R, 0x3fff ); #endif // Fix VSYNC level diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c index a1239083472d..5f79acb56e48 100644 --- a/drivers/media/radio/radio-mr800.c +++ b/drivers/media/radio/radio-mr800.c @@ -28,7 +28,7 @@ * http://av-usbradio.sourceforge.net/index.php * http://sourceforge.net/projects/av-usbradio/ * Latest release of theirs project was in 2005. - * Probably, this driver could be improved trough using their + * Probably, this driver could be improved through using their * achievements (specifications given). * Also, Faidon Liambotis wrote nice driver for this radio * in 2007. He allowed to use his driver to improve current mr800 radio driver. diff --git a/drivers/media/video/cx231xx/cx231xx-avcore.c b/drivers/media/video/cx231xx/cx231xx-avcore.c index 28f48f41f218..c2174413ab29 100644 --- a/drivers/media/video/cx231xx/cx231xx-avcore.c +++ b/drivers/media/video/cx231xx/cx231xx-avcore.c @@ -2414,9 +2414,11 @@ int cx231xx_gpio_i2c_read_ack(struct cx231xx *dev) cx231xx_info("No ACK after %d msec -GPIO I2C failed!", nInit * 10); - /* readAck - throuth clock stretch ,slave has given a SCL signal, - so the SDA data can be directly read. */ + /* + * readAck + * through clock stretch, slave has given a SCL signal, + * so the SDA data can be directly read. + */ status = cx231xx_get_gpio_bit(dev, dev->gpio_dir, (u8 *)&dev->gpio_val); if ((dev->gpio_val & 1 << dev->board.tuner_sda_gpio) == 0) { diff --git a/drivers/media/video/cx23885/cx23885-dvb.c b/drivers/media/video/cx23885/cx23885-dvb.c index 45e13ee66dc7..16c6a921f40b 100644 --- a/drivers/media/video/cx23885/cx23885-dvb.c +++ b/drivers/media/video/cx23885/cx23885-dvb.c @@ -940,7 +940,7 @@ int cx23885_dvb_register(struct cx23885_tsport *port) int err, i; /* Here we need to allocate the correct number of frontends, - * as reflected in the cards struct. The reality is that currrently + * as reflected in the cards struct. The reality is that currently * no cx23885 boards support this - yet. But, if we don't modify this * code then the second frontend would never be allocated (later) * and fail with error before the attach in dvb_register(). diff --git a/drivers/media/video/cx88/cx88-core.c b/drivers/media/video/cx88/cx88-core.c index cf634606ba9a..b35411160f04 100644 --- a/drivers/media/video/cx88/cx88-core.c +++ b/drivers/media/video/cx88/cx88-core.c @@ -624,7 +624,7 @@ int cx88_reset(struct cx88_core *core) /* setup image format */ cx_andor(MO_COLOR_CTRL, 0x4000, 0x4000); - /* setup FIFO Threshholds */ + /* setup FIFO Thresholds */ cx_write(MO_PDMA_STHRSH, 0x0807); cx_write(MO_PDMA_DTHRSH, 0x0807); diff --git a/drivers/media/video/davinci/dm355_ccdc.c b/drivers/media/video/davinci/dm355_ccdc.c index 56fbefe036ae..314390016370 100644 --- a/drivers/media/video/davinci/dm355_ccdc.c +++ b/drivers/media/video/davinci/dm355_ccdc.c @@ -285,7 +285,7 @@ static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam) if ((ccdcparam->med_filt_thres < 0) || (ccdcparam->med_filt_thres > CCDC_MED_FILT_THRESH)) { - dev_dbg(dev, "Invalid value of median filter thresold\n"); + dev_dbg(dev, "Invalid value of median filter threshold\n"); return -EINVAL; } diff --git a/drivers/media/video/davinci/vpss.c b/drivers/media/video/davinci/vpss.c index 6d709ca8cfb0..453236bd7559 100644 --- a/drivers/media/video/davinci/vpss.c +++ b/drivers/media/video/davinci/vpss.c @@ -53,7 +53,7 @@ struct vpss_hw_ops { int (*enable_clock)(enum vpss_clock_sel clock_sel, int en); /* select input to ccdc */ void (*select_ccdc_source)(enum vpss_ccdc_source_sel src_sel); - /* clear wbl overlflow bit */ + /* clear wbl overflow bit */ int (*clear_wbl_overflow)(enum vpss_wbl_sel wbl_sel); }; diff --git a/drivers/media/video/gspca/sonixb.c b/drivers/media/video/gspca/sonixb.c index cf3af8de6e97..e39efb45fa1c 100644 --- a/drivers/media/video/gspca/sonixb.c +++ b/drivers/media/video/gspca/sonixb.c @@ -304,7 +304,7 @@ static const __u8 initOv6650[] = { }; static const __u8 ov6650_sensor_init[][8] = { - /* Bright, contrast, etc are set througth SCBB interface. + /* Bright, contrast, etc are set through SCBB interface. * AVCAP on win2 do not send any data on this controls. */ /* Anyway, some registers appears to alter bright and constrat */ diff --git a/drivers/media/video/gspca/spca500.c b/drivers/media/video/gspca/spca500.c index fab7ef85a6c1..7dbd5eea6cc0 100644 --- a/drivers/media/video/gspca/spca500.c +++ b/drivers/media/video/gspca/spca500.c @@ -589,7 +589,7 @@ static void spca500_reinit(struct gspca_dev *gspca_dev) int err; __u8 Data; - /* some unknow command from Aiptek pocket dv and family300 */ + /* some unknown command from Aiptek pocket dv and family300 */ reg_w(gspca_dev, 0x00, 0x0d01, 0x01); reg_w(gspca_dev, 0x00, 0x0d03, 0x00); diff --git a/drivers/media/video/gspca/spca501.c b/drivers/media/video/gspca/spca501.c index b74a34218da0..66f9f0056146 100644 --- a/drivers/media/video/gspca/spca501.c +++ b/drivers/media/video/gspca/spca501.c @@ -1636,7 +1636,7 @@ static const __u16 spca501c_arowana_init_data[][3] = { {} }; -/* Unknow camera from Ori Usbid 0x0000:0x0000 */ +/* Unknown camera from Ori Usbid 0x0000:0x0000 */ /* Based on snoops from Ori Cohen */ static const __u16 spca501c_mysterious_open_data[][3] = { {0x02, 0x000f, 0x0005}, @@ -1945,7 +1945,7 @@ static int sd_init(struct gspca_dev *gspca_dev) goto error; break; case MystFromOriUnknownCamera: - /* UnKnow Ori CMOS Camera data */ + /* Unknown Ori CMOS Camera data */ if (write_vector(gspca_dev, spca501c_mysterious_open_data)) goto error; break; @@ -1978,7 +1978,7 @@ static int sd_start(struct gspca_dev *gspca_dev) write_vector(gspca_dev, spca501c_arowana_open_data); break; case MystFromOriUnknownCamera: - /* UnKnow CMOS Camera data */ + /* Unknown CMOS Camera data */ write_vector(gspca_dev, spca501c_mysterious_init_data); break; default: diff --git a/drivers/media/video/gspca/sunplus.c b/drivers/media/video/gspca/sunplus.c index aa8f995ce04e..1a9af2ebdbef 100644 --- a/drivers/media/video/gspca/sunplus.c +++ b/drivers/media/video/gspca/sunplus.c @@ -631,7 +631,7 @@ static void spca504A_acknowledged_command(struct gspca_dev *gspca_dev, count = 200; while (--count > 0) { msleep(10); - /* gsmart mini2 write a each wait setting 1 ms is enought */ + /* gsmart mini2 write a each wait setting 1 ms is enough */ /* reg_w_riv(dev, req, idx, val); */ status = reg_r_12(gspca_dev, 0x01, 0x0001, 1); if (status == endcode) { diff --git a/drivers/media/video/gspca/zc3xx.c b/drivers/media/video/gspca/zc3xx.c index cdf3357b4c9f..49c3c1226e0e 100644 --- a/drivers/media/video/gspca/zc3xx.c +++ b/drivers/media/video/gspca/zc3xx.c @@ -7065,7 +7065,7 @@ static int sd_config(struct gspca_dev *gspca_dev, break; default: PDEBUG(D_PROBE, - "Sensor UNKNOW_0 force Tas5130"); + "Sensor UNKNOWN_0 force Tas5130"); sd->sensor = SENSOR_TAS5130CXX; } break; diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h b/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h index 5b152ff20bd0..5fcad28211d2 100644 --- a/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h +++ b/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h @@ -332,7 +332,7 @@ struct pvr2_hdw { /* Bit mask of PVR2_CVAL_INPUT choices which are valid for the hardware */ unsigned int input_avail_mask; - /* Bit mask of PVR2_CVAL_INPUT choices which are currenly allowed */ + /* Bit mask of PVR2_CVAL_INPUT choices which are currently allowed */ unsigned int input_allowed_mask; /* Location of eeprom or a negative number if none */ diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c index 9e3262c0ba37..a4c84368eb10 100644 --- a/drivers/media/video/s2255drv.c +++ b/drivers/media/video/s2255drv.c @@ -1985,7 +1985,7 @@ static int save_frame(struct s2255_dev *dev, struct s2255_pipeinfo *pipe_info) wake_up(&dev->fw_data->wait_fw); break; default: - printk(KERN_INFO "s2255 unknwn resp\n"); + printk(KERN_INFO "s2255 unknown resp\n"); } default: pdata++; diff --git a/drivers/media/video/zoran/zoran.h b/drivers/media/video/zoran/zoran.h index d439c76b27e1..cb1de7ea197a 100644 --- a/drivers/media/video/zoran/zoran.h +++ b/drivers/media/video/zoran/zoran.h @@ -106,7 +106,7 @@ struct zoran_params { unsigned long jpeg_markers; /* Which markers should go into the JPEG output. * Unless you exactly know what you do, leave them untouched. * Inluding less markers will make the resulting code - * smaller, but there will be fewer aplications + * smaller, but there will be fewer applications * which can read it. * The presence of the APP and COM marker is * influenced by APP0_len and COM_len ONLY! */ diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index d505b68cd372..e39986a78273 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -940,7 +940,7 @@ static const struct block_device_operations i2o_block_fops = { * Allocate memory for the i2o_block_device struct, gendisk and request * queue and initialize them as far as no additional information is needed. * - * Returns a pointer to the allocated I2O Block device on succes or a + * Returns a pointer to the allocated I2O Block device on success or a * negative error code on failure. */ static struct i2o_block_device *i2o_block_device_alloc(void) diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c index 27cf4af0e13d..e5ab62141503 100644 --- a/drivers/message/i2o/iop.c +++ b/drivers/message/i2o/iop.c @@ -132,7 +132,7 @@ u32 i2o_cntxt_list_add(struct i2o_controller * c, void *ptr) * Removes a previously added pointer from the context list and returns * the matching context id. * - * Returns context id on succes or 0 on failure. + * Returns context id on success or 0 on failure. */ u32 i2o_cntxt_list_remove(struct i2o_controller * c, void *ptr) { @@ -198,7 +198,7 @@ void *i2o_cntxt_list_get(struct i2o_controller *c, u32 context) * @c: controller to which the context list belong * @ptr: pointer to which the context id should be fetched * - * Returns context id which matches to the pointer on succes or 0 on + * Returns context id which matches to the pointer on success or 0 on * failure. */ u32 i2o_cntxt_list_get_ptr(struct i2o_controller * c, void *ptr) diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index 41c8fe2a928c..ce5eda985ab0 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -92,7 +92,7 @@ static void gru_vma_close(struct vm_area_struct *vma) /* * gru_file_mmap * - * Called when mmaping the device. Initializes the vma with a fault handler + * Called when mmapping the device. Initializes the vma with a fault handler * and private data structure necessary to allocate, track, and free the * underlying pages. */ diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 99b74a351020..941a4d35ef8d 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -1360,7 +1360,7 @@ static struct mmc_host_ops s3cmci_ops = { static struct s3c24xx_mci_pdata s3cmci_def_pdata = { /* This is currently here to avoid a number of if (host->pdata) - * checks. Any zero fields to ensure reaonable defaults are picked. */ + * checks. Any zero fields to ensure reasonable defaults are picked. */ }; #ifdef CONFIG_CPU_FREQ diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c index 3aa05cd18ea1..592016a0668f 100644 --- a/drivers/mtd/devices/slram.c +++ b/drivers/mtd/devices/slram.c @@ -18,7 +18,7 @@ to specify the offset instead of the absolute address NOTE: - With slram it's only possible to map a contigous memory region. Therfore + With slram it's only possible to map a contiguous memory region. Therefore if there's a device mapped somewhere in the region specified slram will fail to load (see kernel log if modprobe fails). diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c index e51c1ed7ac18..b126cf887476 100644 --- a/drivers/mtd/nand/diskonchip.c +++ b/drivers/mtd/nand/diskonchip.c @@ -1056,7 +1056,7 @@ static struct nand_ecclayout doc200x_oobinfo = { }; /* Find the (I)NFTL Media Header, and optionally also the mirror media header. - On sucessful return, buf will contain a copy of the media header for + On successful return, buf will contain a copy of the media header for further processing. id is the string to scan for, and will presumably be either "ANAND" or "BNAND". If findmirror=1, also look for the mirror media header. The page #s of the found media headers are placed in mh0_page and diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c index db7ae9d6a296..92320a643275 100644 --- a/drivers/mtd/nand/nand_ecc.c +++ b/drivers/mtd/nand/nand_ecc.c @@ -475,7 +475,7 @@ int __nand_correct_data(unsigned char *buf, * * The b2 shift is there to get rid of the lowest two bits. * We could also do addressbits[b2] >> 1 but for the - * performace it does not make any difference + * performance it does not make any difference */ if (eccsize_mult == 1) byte_addr = (addressbits[b1] << 4) + addressbits[b0]; diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c index 11dc7e69c4fb..68b5b3a486a9 100644 --- a/drivers/mtd/nand/s3c2410.c +++ b/drivers/mtd/nand/s3c2410.c @@ -875,7 +875,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info, * @info: The controller instance. * @nmtd: The driver version of the MTD instance. * - * This routine is called after the chip probe has succesfully completed + * This routine is called after the chip probe has successfully completed * and the relevant per-chip information updated. This call ensure that * we update the internal state accordingly. * diff --git a/drivers/net/82596.c b/drivers/net/82596.c index ea6b139b812c..1663bc9e45de 100644 --- a/drivers/net/82596.c +++ b/drivers/net/82596.c @@ -19,7 +19,7 @@ TBD: * look at deferring rx frames rather than discarding (as per tulip) * handle tx ring full as per tulip - * performace test to tune rx_copybreak + * performance test to tune rx_copybreak Most of my modifications relate to the braindead big-endian implementation by Intel. When the i596 is operating in diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c index 4e6359fff0e1..766aabfdfc75 100644 --- a/drivers/net/amd8111e.c +++ b/drivers/net/amd8111e.c @@ -1633,8 +1633,13 @@ static int amd8111e_enable_link_change(struct amd8111e_priv* lp) readl(lp->mmio + CMD7); return 0; } -/* This function is called when a packet transmission fails to complete within a resonable period, on the assumption that an interrupts have been failed or the interface is locked up. This function will reinitialize the hardware */ +/* + * This function is called when a packet transmission fails to complete + * within a reasonable period, on the assumption that an interrupt have + * failed or the interface is locked up. This function will reinitialize + * the hardware. + */ static void amd8111e_tx_timeout(struct net_device *dev) { struct amd8111e_priv* lp = netdev_priv(dev); diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index b5dc7f550725..9d828aed968a 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -120,7 +120,7 @@ static int irq = 5; /* Default IRQ */ * DAYNA driver mode: * Dayna DL2000/DaynaTalk PC (Half Length), COPS LT-95, * Farallon PhoneNET PC III, Farallon PhoneNET PC II - * Other cards possibly supported mode unkown though: + * Other cards possibly supported mode unknown though: * Dayna DL2000 (Full length), COPS LT/M (Micro-Channel) * * Cards NOT supported by this driver but supported by the ltpc.c diff --git a/drivers/net/ariadne.h b/drivers/net/ariadne.h index bb613f292e04..727be5cdd1ea 100644 --- a/drivers/net/ariadne.h +++ b/drivers/net/ariadne.h @@ -244,7 +244,7 @@ struct Am79C960 { #define DLNKTST 0x0010 /* Disable Link Status */ #define DAPC 0x0008 /* Disable Automatic Polarity Correction */ #define MENDECL 0x0004 /* MENDEC Loopback Mode */ -#define LRTTSEL 0x0002 /* Low Receive Treshold/Transmit Mode Select */ +#define LRTTSEL 0x0002 /* Low Receive Threshold/Transmit Mode Select */ #define PORTSEL1 0x0001 /* Port Select Bits */ #define PORTSEL2 0x8000 /* Port Select Bits */ #define INTL 0x4000 /* Internal Loopback */ diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c index 1372e9a99f5b..96506eacc131 100644 --- a/drivers/net/atl1c/atl1c_main.c +++ b/drivers/net/atl1c/atl1c_main.c @@ -1543,7 +1543,7 @@ static irqreturn_t atl1c_intr(int irq, void *data) if (status & ISR_OVER) if (netif_msg_intr(adapter)) dev_warn(&pdev->dev, - "TX/RX over flow (status = 0x%x)\n", + "TX/RX overflow (status = 0x%x)\n", status & ISR_OVER); /* link event */ diff --git a/drivers/net/benet/be_cmds.h b/drivers/net/benet/be_cmds.h index 49953787e41c..f0bb62b5ca9e 100644 --- a/drivers/net/benet/be_cmds.h +++ b/drivers/net/benet/be_cmds.h @@ -435,7 +435,7 @@ enum be_if_flags { * filtering capabilities. */ struct be_cmd_req_if_create { struct be_cmd_req_hdr hdr; - u32 version; /* ignore currntly */ + u32 version; /* ignore currently */ u32 capability_flags; u32 enable_flags; u8 mac_addr[ETH_ALEN]; diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c index 1f941f027718..02a0908707ed 100644 --- a/drivers/net/benet/be_main.c +++ b/drivers/net/benet/be_main.c @@ -1876,7 +1876,7 @@ int be_load_fw(struct be_adapter *adapter, u8 *func) goto fw_exit; } - dev_info(&adapter->pdev->dev, "Firmware flashed succesfully\n"); + dev_info(&adapter->pdev->dev, "Firmware flashed successfully\n"); fw_exit: release_firmware(fw); diff --git a/drivers/net/bnx2x_reg.h b/drivers/net/bnx2x_reg.h index aa76cbada5e2..732eafdeb0f2 100644 --- a/drivers/net/bnx2x_reg.h +++ b/drivers/net/bnx2x_reg.h @@ -2536,7 +2536,7 @@ /* [RC 1] A flag to indicate that overflow error occurred in one of the queues. */ #define QM_REG_OVFERROR 0x16805c -/* [RC 7] the Q were the qverflow occurs */ +/* [RC 7] the Q where the overflow occurs */ #define QM_REG_OVFQNUM 0x168058 /* [R 16] Pause state for physical queues 15-0 */ #define QM_REG_PAUSESTATE0 0x168410 diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index f86612857a73..56ba872be9c1 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -1285,7 +1285,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) /* * We do not use Tx completion interrupts to free DMAd Tx packets. - * This is good for performamce but means that we rely on new Tx + * This is good for performance but means that we rely on new Tx * packets arriving to run the destructors of completed packets, * which open up space in their sockets' send queues. Sometimes * we do not get such new packets causing Tx to stall. A single diff --git a/drivers/net/ehea/ehea_ethtool.c b/drivers/net/ehea/ehea_ethtool.c index d76885223366..75b099ce49c9 100644 --- a/drivers/net/ehea/ehea_ethtool.c +++ b/drivers/net/ehea/ehea_ethtool.c @@ -118,7 +118,7 @@ doit: ret = ehea_set_portspeed(port, sp); if (!ret) - ehea_info("%s: Port speed succesfully set: %dMbps " + ehea_info("%s: Port speed successfully set: %dMbps " "%s Duplex", port->netdev->name, port->port_speed, port->full_duplex == 1 ? "Full" : "Half"); @@ -134,7 +134,7 @@ static int ehea_nway_reset(struct net_device *dev) ret = ehea_set_portspeed(port, EHEA_SPEED_AUTONEG); if (!ret) - ehea_info("%s: Port speed succesfully set: %dMbps " + ehea_info("%s: Port speed successfully set: %dMbps " "%s Duplex", port->netdev->name, port->port_speed, port->full_duplex == 1 ? "Full" : "Half"); diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index ed60fd664273..0cab992b3d1a 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -35,7 +35,7 @@ * driver only supports standard serial hardware (8250, 16450, 16550A) * * This modem usually draws its supply current out of the otherwise unused - * TXD pin of the serial port. Thus a contignuous stream of 0x00-bytes + * TXD pin of the serial port. Thus a contiguous stream of 0x00-bytes * is transmitted to achieve a positive supply voltage. * * hsk: This is a 4800 baud FSK modem, designed for TNC use. It works fine diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index aa7286bc4364..8739ba850f82 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -1384,7 +1384,7 @@ static inline void veth_build_dma_list(struct dma_chunk *list, unsigned long done; int i = 1; - /* FIXME: skbs are continguous in real addresses. Do we + /* FIXME: skbs are contiguous in real addresses. Do we * really need to break it into PAGE_SIZE chunks, or can we do * it just at the granularity of iSeries real->absolute * mapping? Indeed, given the way the allocator works, can we diff --git a/drivers/net/lasi_82596.c b/drivers/net/lasi_82596.c index a0c578585a50..b77238dbafb8 100644 --- a/drivers/net/lasi_82596.c +++ b/drivers/net/lasi_82596.c @@ -47,7 +47,7 @@ TBD: * look at deferring rx frames rather than discarding (as per tulip) * handle tx ring full as per tulip - * performace test to tune rx_copybreak + * performance test to tune rx_copybreak Most of my modifications relate to the braindead big-endian implementation by Intel. When the i596 is operating in diff --git a/drivers/net/lib82596.c b/drivers/net/lib82596.c index 51e11c3e53e1..c0dbfc185b53 100644 --- a/drivers/net/lib82596.c +++ b/drivers/net/lib82596.c @@ -47,7 +47,7 @@ TBD: * look at deferring rx frames rather than discarding (as per tulip) * handle tx ring full as per tulip - * performace test to tune rx_copybreak + * performance test to tune rx_copybreak Most of my modifications relate to the braindead big-endian implementation by Intel. When the i596 is operating in diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c index 03b781a7a182..829b9ec9ff67 100644 --- a/drivers/net/mlx4/en_rx.c +++ b/drivers/net/mlx4/en_rx.c @@ -204,7 +204,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); dma = be64_to_cpu(rx_desc->data[nr].addr); - en_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma); + en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma); pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, PCI_DMA_FROMDEVICE); put_page(skb_frags[nr].page); diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c index 8c7279965b44..3d1396af9462 100644 --- a/drivers/net/mlx4/en_tx.c +++ b/drivers/net/mlx4/en_tx.c @@ -47,7 +47,7 @@ enum { static int inline_thold __read_mostly = MAX_INLINE; module_param_named(inline_thold, inline_thold, int, 0444); -MODULE_PARM_DESC(inline_thold, "treshold for using inline data"); +MODULE_PARM_DESC(inline_thold, "threshold for using inline data"); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, u32 size, diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index 4376147b0ea0..82c3ebc584e3 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -162,7 +162,7 @@ enum { #define MLX4_EN_DEF_RX_PAUSE 1 #define MLX4_EN_DEF_TX_PAUSE 1 -/* Interval between sucessive polls in the Tx routine when polling is used +/* Interval between successive polls in the Tx routine when polling is used instead of interrupts (in per-core Tx rings) - should be power of 2 */ #define MLX4_EN_TX_POLL_MODER 16 #define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4) diff --git a/drivers/net/ps3_gelic_net.c b/drivers/net/ps3_gelic_net.c index b211613e9dbd..86fde1a90a5a 100644 --- a/drivers/net/ps3_gelic_net.c +++ b/drivers/net/ps3_gelic_net.c @@ -296,7 +296,7 @@ static void gelic_card_reset_chain(struct gelic_card *card, * @card: card structure * @descr: descriptor to re-init * - * return 0 on succes, <0 on failure + * return 0 on success, <0 on failure * * allocates a new rx skb, iommu-maps it and attaches it to the descriptor. * Activate the descriptor state-wise diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c index c072f7f36acf..9d94a141555c 100644 --- a/drivers/net/sis900.c +++ b/drivers/net/sis900.c @@ -1760,7 +1760,7 @@ static int sis900_rx(struct net_device *net_dev) sis_priv->rx_ring[entry].bufptr, RX_BUF_SIZE, PCI_DMA_FROMDEVICE); - /* refill the Rx buffer, what if there is not enought + /* refill the Rx buffer, what if there is not enough * memory for new socket buffer ?? */ if ((skb = dev_alloc_skb(RX_BUF_SIZE)) == NULL) { /* @@ -1775,7 +1775,7 @@ static int sis900_rx(struct net_device *net_dev) } /* This situation should never happen, but due to - some unknow bugs, it is possible that + some unknown bugs, it is possible that we are working on NULL sk_buff :-( */ if (sis_priv->rx_skbuff[entry] == NULL) { if (netif_msg_rx_err(sis_priv)) diff --git a/drivers/net/skfp/h/smc.h b/drivers/net/skfp/h/smc.h index 1758d9548361..026a83b9f743 100644 --- a/drivers/net/skfp/h/smc.h +++ b/drivers/net/skfp/h/smc.h @@ -393,10 +393,10 @@ struct smt_config { */ u_long mac_d_max ; /* MAC : D_Max timer value */ - u_long lct_short ; /* LCT : error threshhold */ - u_long lct_medium ; /* LCT : error threshhold */ - u_long lct_long ; /* LCT : error threshhold */ - u_long lct_extended ; /* LCT : error threshhold */ + u_long lct_short ; /* LCT : error threshold */ + u_long lct_medium ; /* LCT : error threshold */ + u_long lct_long ; /* LCT : error threshold */ + u_long lct_extended ; /* LCT : error threshold */ } ; #ifdef DEBUG diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c index b27156eaf267..db216a728503 100644 --- a/drivers/net/skfp/skfddi.c +++ b/drivers/net/skfp/skfddi.c @@ -1002,7 +1002,7 @@ static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } break; default: - printk("ioctl for %s: unknow cmd: %04x\n", dev->name, ioc.cmd); + printk("ioctl for %s: unknown cmd: %04x\n", dev->name, ioc.cmd); status = -EOPNOTSUPP; } // switch diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index ccdd196f5297..4a00940d0a54 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -816,7 +816,7 @@ static int smsc911x_mii_probe(struct net_device *dev) SMSC_TRACE(HW, "Passed Loop Back Test"); #endif /* USE_PHY_WORK_AROUND */ - SMSC_TRACE(HW, "phy initialised succesfully"); + SMSC_TRACE(HW, "phy initialised successfully"); return 0; } diff --git a/drivers/net/smsc911x.h b/drivers/net/smsc911x.h index b5716bd8a597..016360c65ce2 100644 --- a/drivers/net/smsc911x.h +++ b/drivers/net/smsc911x.h @@ -30,7 +30,7 @@ #define SMSC_NAPI_WEIGHT 16 /* implements a PHY loopback test at initialisation time, to ensure a packet - * can be succesfully looped back */ + * can be successfully looped back */ #define USE_PHY_WORK_AROUND #define DPRINTK(nlevel, klevel, fmt, args...) \ diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c index 90e663f4515c..40b51e6bc77b 100644 --- a/drivers/net/spider_net.c +++ b/drivers/net/spider_net.c @@ -409,7 +409,7 @@ spider_net_free_rx_chain_contents(struct spider_net_card *card) * @card: card structure * @descr: descriptor to re-init * - * Return 0 on succes, <0 on failure. + * Return 0 on success, <0 on failure. * * Allocates a new rx skb, iommu-maps it and attaches it to the * descriptor. Mark the descriptor as activated, ready-to-use. diff --git a/drivers/net/stmmac/gmac.c b/drivers/net/stmmac/gmac.c index b624bb5bae0a..52586ee68953 100644 --- a/drivers/net/stmmac/gmac.c +++ b/drivers/net/stmmac/gmac.c @@ -112,7 +112,7 @@ static void gmac_dma_operation_mode(unsigned long ioaddr, int txmode, " (threshold = %d)\n", txmode); csr6 &= ~DMA_CONTROL_TSF; csr6 &= DMA_CONTROL_TC_TX_MASK; - /* Set the transmit threashold */ + /* Set the transmit threshold */ if (txmode <= 32) csr6 |= DMA_CONTROL_TTC_32; else if (txmode <= 64) diff --git a/drivers/net/stmmac/gmac.h b/drivers/net/stmmac/gmac.h index 684a363120a9..2e82d6c9a148 100644 --- a/drivers/net/stmmac/gmac.h +++ b/drivers/net/stmmac/gmac.h @@ -154,14 +154,14 @@ enum rx_tx_priority_ratio { #define DMA_CONTROL_DT 0x04000000 /* Disable Drop TCP/IP csum error */ #define DMA_CONTROL_RSF 0x02000000 /* Receive Store and Forward */ #define DMA_CONTROL_DFF 0x01000000 /* Disaable flushing */ -/* Theshold for Activating the FC */ +/* Threshold for Activating the FC */ enum rfa { act_full_minus_1 = 0x00800000, act_full_minus_2 = 0x00800200, act_full_minus_3 = 0x00800400, act_full_minus_4 = 0x00800600, }; -/* Theshold for Deactivating the FC */ +/* Threshold for Deactivating the FC */ enum rfd { deac_full_minus_1 = 0x00400000, deac_full_minus_2 = 0x00400800, diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c index ebda61bc4c2f..78e12b5e3ac7 100644 --- a/drivers/net/tokenring/smctr.c +++ b/drivers/net/tokenring/smctr.c @@ -426,7 +426,7 @@ static int smctr_alloc_shared_memory(struct net_device *dev) smctr_malloc(dev, 1L); /* Allocate Non-MAC receive data buffers. - * To guarantee a minimum of 256 contigous memory to + * To guarantee a minimum of 256 contiguous memory to * UM_Receive_Packet's lookahead pointer, before a page * change or ring end is encountered, place each rx buffer on * a 256 byte boundary. diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c index 4469f2451a6f..5e9adbaf6745 100644 --- a/drivers/net/ucc_geth.c +++ b/drivers/net/ucc_geth.c @@ -3798,7 +3798,7 @@ static int ucc_geth_probe(struct of_device* ofdev, const struct of_device_id *ma prop = of_get_property(np, "tx-clock", NULL); if (!prop) { printk(KERN_ERR - "ucc_geth: mising tx-clock-name property\n"); + "ucc_geth: missing tx-clock-name property\n"); return -EINVAL; } if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) { diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h index 03a6ca016d5a..a007e2acf651 100644 --- a/drivers/net/ucc_geth.h +++ b/drivers/net/ucc_geth.h @@ -80,16 +80,16 @@ struct ucc_geth { frames) received that were between 128 (Including FCS length==4) and 255 octets */ u32 txok; /* Total number of octets residing in frames - that where involved in succesfull + that where involved in successfull transmission */ u16 txcf; /* Total number of PAUSE control frames transmitted by this MAC */ u8 res4[0x2]; u32 tmca; /* Total number of frames that were transmitted - succesfully with the group address bit set + successfully with the group address bit set that are not broadcast frames */ u32 tbca; /* Total number of frames transmitted - succesfully that had destination address + successfully that had destination address field equal to the broadcast address */ u32 rxfok; /* Total number of frames received OK */ u32 rxbok; /* Total number of octets received OK */ @@ -98,9 +98,9 @@ struct ucc_geth { HW because it includes octets in frames that never even reach the UCC */ u32 rmca; /* Total number of frames that were received - succesfully with the group address bit set + successfully with the group address bit set that are not broadcast frames */ - u32 rbca; /* Total number of frames received succesfully + u32 rbca; /* Total number of frames received successfully that had destination address equal to the broadcast address */ u32 scar; /* Statistics carry register */ @@ -759,15 +759,15 @@ struct ucc_geth_hardware_statistics { frames) received that were between 128 (Including FCS length==4) and 255 octets */ u32 txok; /* Total number of octets residing in frames - that where involved in succesfull + that where involved in successfull transmission */ u16 txcf; /* Total number of PAUSE control frames transmitted by this MAC */ u32 tmca; /* Total number of frames that were transmitted - succesfully with the group address bit set + successfully with the group address bit set that are not broadcast frames */ u32 tbca; /* Total number of frames transmitted - succesfully that had destination address + successfully that had destination address field equal to the broadcast address */ u32 rxfok; /* Total number of frames received OK */ u32 rxbok; /* Total number of octets received OK */ @@ -776,9 +776,9 @@ struct ucc_geth_hardware_statistics { HW because it includes octets in frames that never even reach the UCC */ u32 rmca; /* Total number of frames that were received - succesfully with the group address bit set + successfully with the group address bit set that are not broadcast frames */ - u32 rbca; /* Total number of frames received succesfully + u32 rbca; /* Total number of frames received successfully that had destination address equal to the broadcast address */ } __attribute__ ((packed)); diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index c6c922247d05..0c3c738d7419 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -748,7 +748,7 @@ static int smsc95xx_phy_initialize(struct usbnet *dev) mii_nway_restart(&dev->mii); if (netif_msg_ifup(dev)) - devdbg(dev, "phy initialised succesfully"); + devdbg(dev, "phy initialised successfully"); return 0; } diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 7ea71b33d2e9..ee784e091f67 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -927,7 +927,7 @@ static int __devinit lmc_init_one(struct pci_dev *pdev, sc->lmc_media = &lmc_t1_media; break; default: - printk(KERN_WARNING "%s: LMC UNKOWN CARD!\n", dev->name); + printk(KERN_WARNING "%s: LMC UNKNOWN CARD!\n", dev->name); break; } diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c index 07c32e68909f..99d27473ba3f 100644 --- a/drivers/net/wimax/i2400m/rx.c +++ b/drivers/net/wimax/i2400m/rx.c @@ -1114,7 +1114,7 @@ error: * device. See the file header for the format. Run all checks on the * buffer header, then run over each payload's descriptors, verify * their consistency and act on each payload's contents. If - * everything is succesful, update the device's statistics. + * everything is successful, update the device's statistics. * * Note: You need to set the skb to contain only the length of the * received buffer; for that, use skb_trim(skb, RECEIVED_SIZE). diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c index 1a039f2bd732..6f04cc758dcc 100644 --- a/drivers/net/wireless/ath/ath5k/phy.c +++ b/drivers/net/wireless/ath/ath5k/phy.c @@ -117,7 +117,7 @@ static unsigned int ath5k_hw_rfb_op(struct ath5k_hw *ah, /* * This code is used to optimize rf gain on different environments - * (temprature mostly) based on feedback from a power detector. + * (temperature mostly) based on feedback from a power detector. * * It's only used on RF5111 and RF5112, later RF chips seem to have * auto adjustment on hw -notice they have a much smaller BANK 7 and @@ -2675,7 +2675,7 @@ ath5k_setup_channel_powertable(struct ath5k_hw *ah, /* Fill curves in reverse order * from lower power (max gain) * to higher power. Use curve -> idx - * backmaping we did on eeprom init */ + * backmapping we did on eeprom init */ u8 idx = pdg_curve_to_idx[pdg]; /* Grab the needed curves by index */ @@ -2777,7 +2777,7 @@ ath5k_setup_channel_powertable(struct ath5k_hw *ah, /* Now we have a set of curves for this * channel on tmpL (x range is table_max - table_min * and y values are tmpL[pdg][]) sorted in the same - * order as EEPROM (because we've used the backmaping). + * order as EEPROM (because we've used the backmapping). * So for RF5112 it's from higher power to lower power * and for RF2413 it's from lower power to higher power. * For RF5111 we only have one curve. */ diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index 1895d63aad0a..0a35ee62a02a 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -969,7 +969,7 @@ static bool ath_rc_update_per(struct ath_softc *sc, * Since this probe succeeded, we allow the next * probe twice as soon. This allows the maxRate * to move up faster if the probes are - * succesful. + * successful. */ ath_rc_priv->probe_time = now_msec - rate_table->probe_interval / 2; diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index a741d37fd96f..e1b330023200 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -551,7 +551,7 @@ static int ipw2100_get_ordinal(struct ipw2100_priv *priv, u32 ord, /* get number of entries */ field_count = *(((u16 *) & field_info) + 1); - /* abort if no enought memory */ + /* abort if no enough memory */ total_length = field_len * field_count; if (total_length > *len) { *len = total_length; @@ -3044,7 +3044,7 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv) IPW_MAX_BDS)) { /* TODO: Support merging buffers if more than * IPW_MAX_BDS are used */ - IPW_DEBUG_INFO("%s: Maximum BD theshold exceeded. " + IPW_DEBUG_INFO("%s: Maximum BD threshold exceeded. " "Increase fragmentation level.\n", priv->net_dev->name); } @@ -6823,7 +6823,7 @@ static int ipw2100_wx_get_range(struct net_device *dev, range->max_qual.updated = 7; /* Updated all three */ range->avg_qual.qual = 70; /* > 8% missed beacons is 'bad' */ - /* TODO: Find real 'good' to 'bad' threshol value for RSSI */ + /* TODO: Find real 'good' to 'bad' threshold value for RSSI */ range->avg_qual.level = 20 + IPW2100_RSSI_TO_DBM; range->avg_qual.noise = 0; range->avg_qual.updated = 7; /* Updated all three */ diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c index 9b0f2c0646e0..b2aa960b8346 100644 --- a/drivers/net/wireless/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/ipw2x00/ipw2200.c @@ -787,7 +787,7 @@ static int ipw_get_ordinal(struct ipw_priv *priv, u32 ord, void *val, u32 * len) /* get number of entries */ field_count = *(((u16 *) & field_info) + 1); - /* abort if not enought memory */ + /* abort if not enough memory */ total_len = field_len * field_count; if (total_len > *len) { *len = total_len; @@ -7751,7 +7751,7 @@ static void ipw_rebuild_decrypted_skb(struct ipw_priv *priv, case SEC_LEVEL_0: break; default: - printk(KERN_ERR "Unknow security level %d\n", + printk(KERN_ERR "Unknown security level %d\n", priv->ieee->sec.level); break; } @@ -8917,7 +8917,7 @@ static int ipw_wx_get_range(struct net_device *dev, range->max_qual.updated = 7; /* Updated all three */ range->avg_qual.qual = 70; - /* TODO: Find real 'good' to 'bad' threshol value for RSSI */ + /* TODO: Find real 'good' to 'bad' threshold value for RSSI */ range->avg_qual.level = 0; /* FIXME to real average level */ range->avg_qual.noise = 0; range->avg_qual.updated = 7; /* Updated all three */ @@ -10290,7 +10290,7 @@ static int ipw_tx_skb(struct ipw_priv *priv, struct libipw_txb *txb, case SEC_LEVEL_0: break; default: - printk(KERN_ERR "Unknow security level %d\n", + printk(KERN_ERR "Unknown security level %d\n", priv->ieee->sec.level); break; } diff --git a/drivers/net/wireless/ipw2x00/libipw_module.c b/drivers/net/wireless/ipw2x00/libipw_module.c index be5b809ec97a..20b8a8a20644 100644 --- a/drivers/net/wireless/ipw2x00/libipw_module.c +++ b/drivers/net/wireless/ipw2x00/libipw_module.c @@ -199,7 +199,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv, int monitor) ieee->host_decrypt = 1; ieee->host_mc_decrypt = 1; - /* Host fragementation in Open mode. Default is enabled. + /* Host fragmentation in Open mode. Default is enabled. * Note: host fragmentation is always enabled if host encryption * is enabled. For cards can do hardware encryption, they must do * hardware fragmentation as well. So we don't need a variable diff --git a/drivers/net/wireless/iwmc3200wifi/hal.c b/drivers/net/wireless/iwmc3200wifi/hal.c index c430418248b4..d13c8853ee82 100644 --- a/drivers/net/wireless/iwmc3200wifi/hal.c +++ b/drivers/net/wireless/iwmc3200wifi/hal.c @@ -411,7 +411,7 @@ static void iwm_build_lmac_hdr(struct iwm_priv *iwm, struct iwm_lmac_hdr *hdr, /* * iwm_hal_send_host_cmd(): sends commands to the UMAC or the LMAC. * Sending command to the LMAC is equivalent to sending a - * regular UMAC command with the LMAC passtrough or the LMAC + * regular UMAC command with the LMAC passthrough or the LMAC * wrapper UMAC command IDs. */ int iwm_hal_send_host_cmd(struct iwm_priv *iwm, diff --git a/drivers/net/wireless/iwmc3200wifi/rx.c b/drivers/net/wireless/iwmc3200wifi/rx.c index 771a301003c9..8ddb51a2a977 100644 --- a/drivers/net/wireless/iwmc3200wifi/rx.c +++ b/drivers/net/wireless/iwmc3200wifi/rx.c @@ -1448,7 +1448,7 @@ static void iwm_rx_process_packet(struct iwm_priv *iwm, kfree_skb(packet->skb); break; default: - IWM_ERR(iwm, "Unknow ticket action: %d\n", + IWM_ERR(iwm, "Unknown ticket action: %d\n", le16_to_cpu(ticket_node->ticket->action)); kfree_skb(packet->skb); } diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c index 485a8d406525..afe6abecc044 100644 --- a/drivers/net/wireless/libertas/if_sdio.c +++ b/drivers/net/wireless/libertas/if_sdio.c @@ -934,7 +934,7 @@ static int if_sdio_probe(struct sdio_func *func, } if (i == ARRAY_SIZE(if_sdio_models)) { - lbs_pr_err("unkown card model 0x%x\n", card->model); + lbs_pr_err("unknown card model 0x%x\n", card->model); ret = -ENODEV; goto free; } diff --git a/drivers/net/wireless/prism54/isl_ioctl.c b/drivers/net/wireless/prism54/isl_ioctl.c index bc08464d8323..f7f5c793514b 100644 --- a/drivers/net/wireless/prism54/isl_ioctl.c +++ b/drivers/net/wireless/prism54/isl_ioctl.c @@ -1897,7 +1897,7 @@ prism54_get_mac(struct net_device *ndev, struct iw_request_info *info, return 0; } -/* Setting policy also clears the MAC acl, even if we don't change the defaut +/* Setting policy also clears the MAC acl, even if we don't change the default * policy */ @@ -2323,7 +2323,7 @@ prism54_process_trap_helper(islpci_private *priv, enum oid_num_t oid, case DOT11_OID_BEACON: send_formatted_event(priv, - "Received a beacon from an unkown AP", + "Received a beacon from an unknown AP", mlme, 0); break; diff --git a/drivers/net/wireless/rt2x00/rt2400pci.h b/drivers/net/wireless/rt2x00/rt2400pci.h index ccd644104ad1..aced05775693 100644 --- a/drivers/net/wireless/rt2x00/rt2400pci.h +++ b/drivers/net/wireless/rt2x00/rt2400pci.h @@ -35,7 +35,7 @@ /* * Signal information. - * Defaul offset is required for RSSI <-> dBm conversion. + * Default offset is required for RSSI <-> dBm conversion. */ #define DEFAULT_RSSI_OFFSET 100 diff --git a/drivers/net/wireless/rt2x00/rt2500pci.h b/drivers/net/wireless/rt2x00/rt2500pci.h index 54d37957883c..3db9041838a4 100644 --- a/drivers/net/wireless/rt2x00/rt2500pci.h +++ b/drivers/net/wireless/rt2x00/rt2500pci.h @@ -46,7 +46,7 @@ /* * Signal information. - * Defaul offset is required for RSSI <-> dBm conversion. + * Default offset is required for RSSI <-> dBm conversion. */ #define DEFAULT_RSSI_OFFSET 121 diff --git a/drivers/net/wireless/rt2x00/rt2500usb.h b/drivers/net/wireless/rt2x00/rt2500usb.h index b01edca42583..d3000827883a 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.h +++ b/drivers/net/wireless/rt2x00/rt2500usb.h @@ -46,7 +46,7 @@ /* * Signal information. - * Defaul offset is required for RSSI <-> dBm conversion. + * Default offset is required for RSSI <-> dBm conversion. */ #define DEFAULT_RSSI_OFFSET 120 diff --git a/drivers/net/wireless/rt2x00/rt61pci.h b/drivers/net/wireless/rt2x00/rt61pci.h index 93eb699165cc..77b5116f549b 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.h +++ b/drivers/net/wireless/rt2x00/rt61pci.h @@ -37,7 +37,7 @@ /* * Signal information. - * Defaul offset is required for RSSI <-> dBm conversion. + * Default offset is required for RSSI <-> dBm conversion. */ #define DEFAULT_RSSI_OFFSET 120 diff --git a/drivers/net/wireless/rt2x00/rt73usb.h b/drivers/net/wireless/rt2x00/rt73usb.h index 81fe0be51c42..e194332dac5f 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.h +++ b/drivers/net/wireless/rt2x00/rt73usb.h @@ -37,7 +37,7 @@ /* * Signal information. - * Defaul offset is required for RSSI <-> dBm conversion. + * Default offset is required for RSSI <-> dBm conversion. */ #define DEFAULT_RSSI_OFFSET 120 diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index 431a20ec6db6..b3b0b5b685c6 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -4011,7 +4011,7 @@ wavelan_interrupt(int irq, #endif /* Prevent reentrancy. We need to do that because we may have - * multiple interrupt handler running concurently. + * multiple interrupt handler running concurrently. * It is safe because interrupts are disabled before aquiring * the spinlock. */ spin_lock(&lp->spinlock); diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 6d666359a42f..2b7f96594373 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -312,7 +312,7 @@ static void tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, * zd_mac_tx_failed - callback for failed frames * @dev: the mac80211 wireless device * - * This function is called if a frame couldn't be succesfully be + * This function is called if a frame couldn't be successfully be * transferred. The first frame from the tx queue, will be selected and * reported as error to the upper layers. */ diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index a6b4a5a53d40..f511e70d454c 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -650,7 +650,7 @@ ccio_clear_io_tlb(struct ioc *ioc, dma_addr_t iovp, size_t byte_cnt) * Mark the I/O Pdir entries invalid and blow away the corresponding I/O * TLB entries. * - * FIXME: at some threshhold it might be "cheaper" to just blow + * FIXME: at some threshold it might be "cheaper" to just blow * away the entire I/O TLB instead of individual entries. * * FIXME: Uturn has 256 TLB entries. We don't need to purge every diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index d93108d148fc..36db20a8f892 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -6533,7 +6533,7 @@ static struct ibm_struct volume_driver_data = { * The speeds are stored on handles * (FANA:FAN9), (FANC:FANB), (FANE:FAND). * - * There are three default speed sets, acessible as handles: + * There are three default speed sets, accessible as handles: * FS1L,FS1M,FS1H; FS2L,FS2M,FS2H; FS3L,FS3M,FS3H * * ACPI DSDT switches which set is in use depending on various diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index 87b4f49a5251..a5135ebe5f07 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -191,7 +191,7 @@ static unsigned char *pnpbios_parse_allocated_resource_data(struct pnp_dev *dev, return (unsigned char *)p; break; - default: /* an unkown tag */ + default: /* an unknown tag */ len_err: dev_err(&dev->dev, "unknown tag %#x length %d\n", tag, len); @@ -405,7 +405,7 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, case SMALL_TAG_END: return p + 2; - default: /* an unkown tag */ + default: /* an unknown tag */ len_err: dev_err(&dev->dev, "unknown tag %#x length %d\n", tag, len); @@ -475,7 +475,7 @@ static unsigned char *pnpbios_parse_compatible_ids(unsigned char *p, return (unsigned char *)p; break; - default: /* an unkown tag */ + default: /* an unknown tag */ len_err: dev_err(&dev->dev, "unknown tag %#x length %d\n", tag, len); @@ -744,7 +744,7 @@ static unsigned char *pnpbios_encode_allocated_resource_data(struct pnp_dev return (unsigned char *)p; break; - default: /* an unkown tag */ + default: /* an unknown tag */ len_err: dev_err(&dev->dev, "unknown tag %#x length %d\n", tag, len); diff --git a/drivers/ps3/ps3-sys-manager.c b/drivers/ps3/ps3-sys-manager.c index 88cb74088611..3cbaf1811bd0 100644 --- a/drivers/ps3/ps3-sys-manager.c +++ b/drivers/ps3/ps3-sys-manager.c @@ -46,7 +46,7 @@ /** * struct ps3_sys_manager_header - System manager message header. * @version: Header version, currently 1. - * @size: Header size in bytes, curently 16. + * @size: Header size in bytes, currently 16. * @payload_size: Message payload size in bytes. * @service_id: Message type, one of enum ps3_sys_manager_service_id. * @request_tag: Unique number to identify reply. diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c index ad164056feb6..434e92fdb966 100644 --- a/drivers/rtc/rtc-v3020.c +++ b/drivers/rtc/rtc-v3020.c @@ -335,7 +335,7 @@ static int rtc_probe(struct platform_device *pdev) goto err_io; } - /* Make sure frequency measurment mode, test modes, and lock + /* Make sure frequency measurement mode, test modes, and lock * are all disabled */ v3020_set_reg(chip, V3020_STATUS_0, 0x0); diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 097d3846a828..f54b1eec6ddf 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -74,7 +74,7 @@ fs3270_do_io(struct raw3270_view *view, struct raw3270_request *rq) } rc = raw3270_start(view, rq); if (rc == 0) { - /* Started sucessfully. Now wait for completion. */ + /* Started successfully. Now wait for completion. */ wait_event(fp->wait, raw3270_request_final(rq)); } } while (rc == -EACCES); diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 8ab51608da55..c268a2e5b7c3 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -65,7 +65,7 @@ static void set_chp_logically_online(struct chp_id chpid, int onoff) chpid_to_chp(chpid)->state = onoff; } -/* On succes return 0 if channel-path is varied offline, 1 if it is varied +/* On success return 0 if channel-path is varied offline, 1 if it is varied * online. Return -ENODEV if channel-path is not registered. */ int chp_get_status(struct chp_id chpid) { diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 30f516111307..2985eb439485 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -462,7 +462,7 @@ static struct cmb_area cmb_area = { * block of memory, which can not be moved as long as any channel * is active. Therefore, a maximum number of subchannels needs to * be defined somewhere. This is a module parameter, defaulting to - * a resonable value of 1024, or 32 kb of memory. + * a reasonable value of 1024, or 32 kb of memory. * Current kernels don't allow kmalloc with more than 128kb, so the * maximum is 4096. */ diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c index 58e583b61e60..aa2b60a868ba 100644 --- a/drivers/sbus/char/envctrl.c +++ b/drivers/sbus/char/envctrl.c @@ -92,11 +92,11 @@ #define ENVCTRL_CPUTEMP_MON 1 /* cpu temperature monitor */ #define ENVCTRL_CPUVOLTAGE_MON 2 /* voltage monitor */ #define ENVCTRL_FANSTAT_MON 3 /* fan status monitor */ -#define ENVCTRL_ETHERTEMP_MON 4 /* ethernet temperarture */ +#define ENVCTRL_ETHERTEMP_MON 4 /* ethernet temperature */ /* monitor */ #define ENVCTRL_VOLTAGESTAT_MON 5 /* voltage status monitor */ #define ENVCTRL_MTHRBDTEMP_MON 6 /* motherboard temperature */ -#define ENVCTRL_SCSITEMP_MON 7 /* scsi temperarture */ +#define ENVCTRL_SCSITEMP_MON 7 /* scsi temperature */ #define ENVCTRL_GLOBALADDR_MON 8 /* global address */ /* Child device type. diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index f5a9addb7050..07ce9bfcdf06 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -1491,7 +1491,7 @@ NCR_700_intr(int irq, void *dev_id) unsigned long flags; int handled = 0; - /* Use the host lock to serialise acess to the 53c700 + /* Use the host lock to serialise access to the 53c700 * hardware. Note: In future, we may need to take the queue * lock to enter the done routines. When that happens, we * need to ensure that for this driver, the host lock and the diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index cdbdec9f4fb2..83986ed86556 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -526,10 +526,10 @@ struct aac_driver_ident /* * The adapter interface specs all queues to be located in the same - * physically contigous block. The host structure that defines the + * physically contiguous block. The host structure that defines the * commuication queues will assume they are each a separate physically - * contigous memory region that will support them all being one big - * contigous block. + * contiguous memory region that will support them all being one big + * contiguous block. * There is a command and response queue for each level and direction of * commuication. These regions are accessed by both the host and adapter. */ diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index d598eba630d0..666d5151d628 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -226,7 +226,7 @@ static int aac_comm_init(struct aac_dev * dev) spin_lock_init(&dev->fib_lock); /* - * Allocate the physically contigous space for the commuication + * Allocate the physically contiguous space for the commuication * queue headers. */ diff --git a/drivers/scsi/aic7xxx/aic79xx.seq b/drivers/scsi/aic7xxx/aic79xx.seq index 3b66b5ae3d9f..2fb78e35a9e5 100644 --- a/drivers/scsi/aic7xxx/aic79xx.seq +++ b/drivers/scsi/aic7xxx/aic79xx.seq @@ -217,7 +217,7 @@ BEGIN_CRITICAL; scbdma_tohost_done: test CCSCBCTL, CCARREN jz fill_qoutfifo_dmadone; /* - * An SCB has been succesfully uploaded to the host. + * An SCB has been successfully uploaded to the host. * If the SCB was uploaded for some reason other than * bad SCSI status (currently only for underruns), we * queue the SCB for normal completion. Otherwise, we diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c index 63b521d615f2..4d419c155ce9 100644 --- a/drivers/scsi/aic7xxx/aic79xx_core.c +++ b/drivers/scsi/aic7xxx/aic79xx_core.c @@ -2487,7 +2487,7 @@ ahd_handle_scsiint(struct ahd_softc *ahd, u_int intstat) /* * Although the driver does not care about the * 'Selection in Progress' status bit, the busy - * LED does. SELINGO is only cleared by a sucessfull + * LED does. SELINGO is only cleared by a successfull * selection, so we must manually clear it to insure * the LED turns off just incase no future successful * selections occur (e.g. no devices on the bus). diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c index 8dfb59d58992..45aa728a76b2 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_core.c +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c @@ -1733,7 +1733,7 @@ ahc_handle_scsiint(struct ahc_softc *ahc, u_int intstat) /* * Although the driver does not care about the * 'Selection in Progress' status bit, the busy - * LED does. SELINGO is only cleared by a sucessfull + * LED does. SELINGO is only cleared by a successfull * selection, so we must manually clear it to insure * the LED turns off just incase no future successful * selections occur (e.g. no devices on the bus). diff --git a/drivers/scsi/bfa/include/defs/bfa_defs_pport.h b/drivers/scsi/bfa/include/defs/bfa_defs_pport.h index a000bc4e2d4a..bf320412ee24 100644 --- a/drivers/scsi/bfa/include/defs/bfa_defs_pport.h +++ b/drivers/scsi/bfa/include/defs/bfa_defs_pport.h @@ -61,7 +61,7 @@ enum bfa_pport_speed { * Port operational type (in sync with SNIA port type). */ enum bfa_pport_type { - BFA_PPORT_TYPE_UNKNOWN = 1, /* port type is unkown */ + BFA_PPORT_TYPE_UNKNOWN = 1, /* port type is unknown */ BFA_PPORT_TYPE_TRUNKED = 2, /* Trunked mode */ BFA_PPORT_TYPE_NPORT = 5, /* P2P with switched fabric */ BFA_PPORT_TYPE_NLPORT = 6, /* public loop */ diff --git a/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h b/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h index 31881d218515..ade763dbc8ce 100644 --- a/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h +++ b/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h @@ -25,7 +25,7 @@ * Temperature sensor status values */ enum bfa_tsensor_status { - BFA_TSENSOR_STATUS_UNKNOWN = 1, /* unkown status */ + BFA_TSENSOR_STATUS_UNKNOWN = 1, /* unknown status */ BFA_TSENSOR_STATUS_FAULTY = 2, /* sensor is faulty */ BFA_TSENSOR_STATUS_BELOW_MIN = 3, /* temperature below mininum */ BFA_TSENSOR_STATUS_NOMINAL = 4, /* normal temperature */ diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c index a0e7e711ff9d..5be67a6fca93 100644 --- a/drivers/scsi/hptiop.c +++ b/drivers/scsi/hptiop.c @@ -834,7 +834,7 @@ static int hptiop_reset_hba(struct hptiop_hba *hba) atomic_read(&hba->resetting) == 0, 60 * HZ); if (atomic_read(&hba->resetting)) { - /* IOP is in unkown state, abort reset */ + /* IOP is in unknown state, abort reset */ printk(KERN_ERR "scsi%d: reset failed\n", hba->host->host_no); return -1; } diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index bd2f77197447..6486ae4591b8 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -56,7 +56,7 @@ * at the same time. * * When discovery succeeds or fails a callback is made to the lport as - * notification. Currently, succesful discovery causes the lport to take no + * notification. Currently, successful discovery causes the lport to take no * action. A failure will cause the lport to reset. There is likely a circular * locking problem with this implementation. */ diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index 2e0746d70303..ca25ee5190b0 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -1004,7 +1004,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task) * iscsi_tcp_task_xmit - xmit normal PDU task * @task: iscsi command task * - * We're expected to return 0 when everything was transmitted succesfully, + * We're expected to return 0 when everything was transmitted successfully, * -EAGAIN if there's still data in the queue, or != 0 for any other kind * of error. */ diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index e1a30a16a9fa..9bd19aa14249 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -654,7 +654,7 @@ lpfc_selective_reset(struct lpfc_hba *phba) * Notes: * Assumes any error from lpfc_selective_reset() will be negative. * If lpfc_selective_reset() returns zero then the length of the buffer - * is returned which indicates succcess + * is returned which indicates success * * Returns: * -EINVAL if the buffer does not contain the string "selective" @@ -3147,7 +3147,7 @@ sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr, * sysfs_ctlreg_read - Read method for reading from ctlreg * @kobj: kernel kobject that contains the kernel class device. * @bin_attr: kernel attributes passed to us. - * @buf: if succesful contains the data from the adapter IOREG space. + * @buf: if successful contains the data from the adapter IOREG space. * @off: offset into buffer to beginning of data. * @count: bytes to transfer. * diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 45337cd23feb..a14ab4580d4e 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -802,7 +802,7 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* FLOGI completes successfully */ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, - "0101 FLOGI completes sucessfully " + "0101 FLOGI completes successfully " "Data: x%x x%x x%x x%x\n", irsp->un.ulpWord[4], sp->cmn.e_d_tov, sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution); @@ -4133,7 +4133,7 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, /* Indicate we are walking fc_rscn_id_list on this vport */ vport->fc_rscn_flush = 1; spin_unlock_irq(shost->host_lock); - /* Get the array count after sucessfully have the token */ + /* Get the array count after successfully have the token */ rscn_cnt = vport->fc_rscn_id_cnt; /* If we are already processing an RSCN, save the received * RSCN payload buffer, cmdiocb->context2 to process later. diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 562d8cee874b..82f8ab5c72cd 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -645,7 +645,7 @@ lpfc_hba_down_prep(struct lpfc_hba *phba) * down the SLI Layer. * * Return codes - * 0 - sucess. + * 0 - success. * Any other value - error. **/ static int @@ -700,7 +700,7 @@ lpfc_hba_down_post_s3(struct lpfc_hba *phba) * down the SLI Layer. * * Return codes - * 0 - sucess. + * 0 - success. * Any other value - error. **/ static int @@ -755,7 +755,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) * uninitialization after the HBA is reset when bring down the SLI Layer. * * Return codes - * 0 - sucess. + * 0 - success. * Any other value - error. **/ int @@ -1254,7 +1254,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba) * routine from the API jump table function pointer from the lpfc_hba struct. * * Return codes - * 0 - sucess. + * 0 - success. * Any other value - error. **/ void @@ -3124,7 +3124,7 @@ static void lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode) * PCI devices. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3220,7 +3220,7 @@ lpfc_reset_hba(struct lpfc_hba *phba) * support the SLI-3 HBA device it attached to. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3321,7 +3321,7 @@ lpfc_sli_driver_resource_unset(struct lpfc_hba *phba) * support the SLI-4 HBA device it attached to. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3642,7 +3642,7 @@ lpfc_init_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp) * device specific resource setup to support the HBA device it attached to. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3688,7 +3688,7 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba) * device specific resource setup to support the HBA device it attached to. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3753,7 +3753,7 @@ lpfc_free_iocb_list(struct lpfc_hba *phba) * list and set up the IOCB tag array accordingly. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3872,7 +3872,7 @@ lpfc_free_active_sgl(struct lpfc_hba *phba) * list and set up the sgl xritag tag array accordingly. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3986,7 +3986,7 @@ out_free_mem: * enabled and the driver is reinitializing the device. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -4146,7 +4146,7 @@ lpfc_sli4_remove_rpi_hdrs(struct lpfc_hba *phba) * PCI device data structure is set. * * Return codes - * pointer to @phba - sucessful + * pointer to @phba - successful * NULL - error **/ static struct lpfc_hba * @@ -4202,7 +4202,7 @@ lpfc_hba_free(struct lpfc_hba *phba) * host with it. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -4365,7 +4365,7 @@ lpfc_post_init_setup(struct lpfc_hba *phba) * with SLI-3 interface spec. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -4662,7 +4662,7 @@ lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf) * this routine. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - could not allocated memory. **/ static int @@ -4761,7 +4761,7 @@ lpfc_destroy_bootstrap_mbox(struct lpfc_hba *phba) * allocation for the port. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -4861,7 +4861,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) * HBA consistent with the SLI-4 interface spec. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -4910,7 +4910,7 @@ lpfc_setup_endian_order(struct lpfc_hba *phba) * we just use some constant number as place holder. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -5218,7 +5218,7 @@ out_error: * operation. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -5286,7 +5286,7 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) * operation. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -5552,7 +5552,7 @@ out_error: * operation. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -5599,7 +5599,7 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba) * Later, this can be used for all the slow-path events. * * Return codes - * 0 - sucessful + * 0 - successful * -ENOMEM - No availble memory **/ static int @@ -5760,7 +5760,7 @@ lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba) * all resources assigned to the PCI function which originates this request. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -5923,7 +5923,7 @@ lpfc_sli4_fcfi_unreg(struct lpfc_hba *phba, uint16_t fcfi) * with SLI-4 interface spec. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -6052,7 +6052,7 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba) * will be left with MSI-X enabled and leaks its vectors. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -6184,7 +6184,7 @@ lpfc_sli_disable_msix(struct lpfc_hba *phba) * is done in this function. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error */ static int @@ -6243,7 +6243,7 @@ lpfc_sli_disable_msi(struct lpfc_hba *phba) * MSI-X -> MSI -> IRQ. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static uint32_t @@ -6333,7 +6333,7 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba) * enabled and leaks its vectors. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -6443,7 +6443,7 @@ lpfc_sli4_disable_msix(struct lpfc_hba *phba) * which is done in this function. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -6508,7 +6508,7 @@ lpfc_sli4_disable_msi(struct lpfc_hba *phba) * MSI-X -> MSI -> IRQ. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static uint32_t diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 43cbe336f1f8..42d4f3dae1d6 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1794,7 +1794,7 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba) */ if (lpfc_sli_chk_mbx_command(pmbox->mbxCommand) == MBX_SHUTDOWN) { - /* Unknow mailbox command compl */ + /* Unknown mailbox command compl */ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI, "(%d):0323 Unknown Mailbox command " "x%x (x%x) Cmpl\n", @@ -4163,7 +4163,7 @@ lpfc_sli4_read_fcoe_params(struct lpfc_hba *phba, * addition, this routine gets the port vpd data. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - could not allocated memory. **/ static int @@ -11091,7 +11091,7 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba) * sequential. * * Return codes - * 0 - sucessful + * 0 - successful * EIO - The mailbox failed to complete successfully. * When this error occurs, the driver is not guaranteed * to have any rpi regions posted to the device and @@ -11129,7 +11129,7 @@ lpfc_sli4_post_all_rpi_hdrs(struct lpfc_hba *phba) * maps up to 64 rpi context regions. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No available memory * EIO - The mailbox failed to complete successfully. **/ @@ -11191,7 +11191,7 @@ lpfc_sli4_post_rpi_hdr(struct lpfc_hba *phba, struct lpfc_rpi_hdr *rpi_page) * PAGE_SIZE modulo 64 rpi context headers. * * Returns - * A nonzero rpi defined as rpi_base <= rpi < max_rpi if sucessful + * A nonzero rpi defined as rpi_base <= rpi < max_rpi if successful * LPFC_RPI_ALLOC_ERROR if no rpis are available. **/ int diff --git a/drivers/scsi/megaraid.h b/drivers/scsi/megaraid.h index 512c2cc1a33f..d310f49d077e 100644 --- a/drivers/scsi/megaraid.h +++ b/drivers/scsi/megaraid.h @@ -381,7 +381,7 @@ typedef struct { u8 battery_status; /* * BIT 0: battery module missing * BIT 1: VBAD - * BIT 2: temprature high + * BIT 2: temperature high * BIT 3: battery pack missing * BIT 4,5: * 00 - charge complete diff --git a/drivers/scsi/megaraid/mbox_defs.h b/drivers/scsi/megaraid/mbox_defs.h index b25b74764ec3..ce2487a888ed 100644 --- a/drivers/scsi/megaraid/mbox_defs.h +++ b/drivers/scsi/megaraid/mbox_defs.h @@ -497,7 +497,7 @@ typedef struct { * @inserted_drive : channel:Id of inserted drive * @battery_status : bit 0: battery module missing * bit 1: VBAD - * bit 2: temprature high + * bit 2: temperature high * bit 3: battery pack missing * bit 4,5: * 00 - charge complete diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index 234f0b7eb21c..f9ae8037a710 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -2704,7 +2704,7 @@ megaraid_reset_handler(struct scsi_cmnd *scp) } else { con_log(CL_ANN, (KERN_NOTICE - "megaraid mbox: reset sequence completed sucessfully\n")); + "megaraid mbox: reset sequence completed successfully\n")); } diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index 86ab32d7ab15..756e509d495c 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -2894,7 +2894,7 @@ _scsih_normalize_sense(char *sense_buffer, struct sense_info *data) #ifdef CONFIG_SCSI_MPT2SAS_LOGGING /** - * _scsih_scsi_ioc_info - translated non-succesfull SCSI_IO request + * _scsih_scsi_ioc_info - translated non-successfull SCSI_IO request * @ioc: per adapter object * @scmd: pointer to scsi command object * @mpi_reply: reply mf payload returned from firmware diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c index e3c482aa87b5..a2d569828308 100644 --- a/drivers/scsi/ncr53c8xx.c +++ b/drivers/scsi/ncr53c8xx.c @@ -6495,7 +6495,7 @@ static void ncr_int_ma (struct ncb *np) ** we force a SIR_NEGO_PROTO interrupt (it is a hack that avoids ** bloat for such a should_not_happen situation). ** In all other situation, we reset the BUS. - ** Are these assumptions reasonnable ? (Wait and see ...) + ** Are these assumptions reasonable ? (Wait and see ...) */ unexpected_phase: dsp -= 8; diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index f7c70e2a8224..d3d39f86fcf7 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -3342,7 +3342,7 @@ static int pmcraid_chr_fasync(int fd, struct file *filep, int mode) * @direction : data transfer direction * * Return value - * 0 on sucess, non-zero error code on failure + * 0 on success, non-zero error code on failure */ static int pmcraid_build_passthrough_ioadls( struct pmcraid_cmd *cmd, @@ -3401,7 +3401,7 @@ static int pmcraid_build_passthrough_ioadls( * @direction: data transfer direction * * Return value - * 0 on sucess, non-zero error code on failure + * 0 on success, non-zero error code on failure */ static void pmcraid_release_passthrough_ioadls( struct pmcraid_cmd *cmd, @@ -3429,7 +3429,7 @@ static void pmcraid_release_passthrough_ioadls( * @arg: pointer to pmcraid_passthrough_buffer user buffer * * Return value - * 0 on sucess, non-zero error code on failure + * 0 on success, non-zero error code on failure */ static long pmcraid_ioctl_passthrough( struct pmcraid_instance *pinstance, diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h index 3441b3f90827..2752b56cad56 100644 --- a/drivers/scsi/pmcraid.h +++ b/drivers/scsi/pmcraid.h @@ -771,11 +771,11 @@ static struct pmcraid_ioasc_error pmcraid_ioasc_error_table[] = { {0x01180600, IOASC_LOG_LEVEL_MUST, "Recovered Error, soft media error, sector reassignment suggested"}, {0x015D0000, IOASC_LOG_LEVEL_MUST, - "Recovered Error, failure prediction thresold exceeded"}, + "Recovered Error, failure prediction threshold exceeded"}, {0x015D9200, IOASC_LOG_LEVEL_MUST, - "Recovered Error, soft Cache Card Battery error thresold"}, + "Recovered Error, soft Cache Card Battery error threshold"}, {0x015D9200, IOASC_LOG_LEVEL_MUST, - "Recovered Error, soft Cache Card Battery error thresold"}, + "Recovered Error, soft Cache Card Battery error threshold"}, {0x02048000, IOASC_LOG_LEVEL_MUST, "Not Ready, IOA Reset Required"}, {0x02408500, IOASC_LOG_LEVEL_MUST, diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index 723fdecd91bd..0fd6ae6911ad 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -613,7 +613,7 @@ EXPORT_SYMBOL_GPL(scsi_nl_send_transport_msg); * @data_buf: pointer to vendor unique data buffer * * Returns: - * 0 on succesful return + * 0 on successful return * otherwise, failing error code * * Notes: diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index fd47cb1bee1b..f27e52d963d3 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -666,7 +666,7 @@ EXPORT_SYMBOL(sas_phy_add); * * Note: * This function must only be called on a PHY that has not - * sucessfully been added using sas_phy_add(). + * successfully been added using sas_phy_add(). */ void sas_phy_free(struct sas_phy *phy) { @@ -896,7 +896,7 @@ EXPORT_SYMBOL(sas_port_add); * * Note: * This function must only be called on a PORT that has not - * sucessfully been added using sas_port_add(). + * successfully been added using sas_port_add(). */ void sas_port_free(struct sas_port *port) { @@ -1476,7 +1476,7 @@ EXPORT_SYMBOL(sas_rphy_add); * * Note: * This function must only be called on a remote - * PHY that has not sucessfully been added using + * PHY that has not successfully been added using * sas_rphy_add() (or has been sas_rphy_remove()'d) */ void sas_rphy_free(struct sas_rphy *rphy) diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index 45374d66d26a..2b38f6ad6e11 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -1864,7 +1864,7 @@ static pci_ers_result_t sym2_io_slot_dump(struct pci_dev *pdev) * * This routine is similar to sym_set_workarounds(), except * that, at this point, we already know that the device was - * succesfully intialized at least once before, and so most + * successfully intialized at least once before, and so most * of the steps taken there are un-needed here. */ static void sym2_reset_workarounds(struct pci_dev *pdev) diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c index 297deb817a5d..a7bc8b7b09ac 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.c +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c @@ -2692,7 +2692,7 @@ static void sym_int_ma (struct sym_hcb *np) * we force a SIR_NEGO_PROTO interrupt (it is a hack that avoids * bloat for such a should_not_happen situation). * In all other situation, we reset the BUS. - * Are these assumptions reasonnable ? (Wait and see ...) + * Are these assumptions reasonable ? (Wait and see ...) */ unexpected_phase: dsp -= 8; diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.h b/drivers/scsi/sym53c8xx_2/sym_hipd.h index 053e63c86822..5a80cbac3f92 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.h +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.h @@ -54,7 +54,7 @@ * * SYM_OPT_LIMIT_COMMAND_REORDERING * When this option is set, the driver tries to limit tagged - * command reordering to some reasonnable value. + * command reordering to some reasonable value. * (set for Linux) */ #if 0 diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index d71dfe398940..36ede02ceacf 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -361,9 +361,9 @@ static const struct pnp_device_id pnp_dev_table[] = { { "LTS0001", 0 }, /* Rockwell's (PORALiNK) 33600 INT PNP */ { "WCI0003", 0 }, - /* Unkown PnP modems */ + /* Unknown PnP modems */ { "PNPCXXX", UNKNOWN_DEV }, - /* More unkown PnP modems */ + /* More unknown PnP modems */ { "PNPDXXX", UNKNOWN_DEV }, { "", 0 } }; diff --git a/drivers/serial/pmac_zilog.h b/drivers/serial/pmac_zilog.h index 570b0d925e83..f6e77f12acd5 100644 --- a/drivers/serial/pmac_zilog.h +++ b/drivers/serial/pmac_zilog.h @@ -73,7 +73,7 @@ static inline struct uart_pmac_port *pmz_get_port_A(struct uart_pmac_port *uap) } /* - * Register acessors. Note that we don't need to enforce a recovery + * Register accessors. Note that we don't need to enforce a recovery * delay on PCI PowerMac hardware, it's dealt in HW by the MacIO chip, * though if we try to use this driver on older machines, we might have * to add it back diff --git a/drivers/serial/ucc_uart.c b/drivers/serial/ucc_uart.c index 0c08f286a2ef..46de564aaea0 100644 --- a/drivers/serial/ucc_uart.c +++ b/drivers/serial/ucc_uart.c @@ -313,7 +313,7 @@ static void qe_uart_stop_tx(struct uart_port *port) * This function will attempt to stuff of all the characters from the * kernel's transmit buffer into TX BDs. * - * A return value of non-zero indicates that it sucessfully stuffed all + * A return value of non-zero indicates that it successfully stuffed all * characters from the kernel buffer. * * A return value of zero indicates that there are still characters in the diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c index 40de151f2789..e89304c72568 100644 --- a/drivers/telephony/ixj.c +++ b/drivers/telephony/ixj.c @@ -4190,7 +4190,7 @@ static void ixj_aec_start(IXJ *j, int level) ixj_WriteDSPCommand(0x1224, j); ixj_WriteDSPCommand(0xE014, j); - ixj_WriteDSPCommand(0x0003, j); /* Lock threashold at 3dB */ + ixj_WriteDSPCommand(0x0003, j); /* Lock threshold at 3dB */ ixj_WriteDSPCommand(0xE338, j); /* Set Echo Suppresser Attenuation to 0dB */ @@ -4235,7 +4235,7 @@ static void ixj_aec_start(IXJ *j, int level) ixj_WriteDSPCommand(0x1224, j); ixj_WriteDSPCommand(0xE014, j); - ixj_WriteDSPCommand(0x0003, j); /* Lock threashold at 3dB */ + ixj_WriteDSPCommand(0x0003, j); /* Lock threshold at 3dB */ ixj_WriteDSPCommand(0xE338, j); /* Set Echo Suppresser Attenuation to 0dB */ diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index d171b563e94c..bba4d3eabe0f 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -1958,7 +1958,7 @@ static void uea_dispatch_cmv_e1(struct uea_softc *sc, struct intr_pkt *intr) goto bad1; /* FIXME : ADI930 reply wrong preambule (func = 2, sub = 2) to - * the first MEMACESS cmv. Ignore it... + * the first MEMACCESS cmv. Ignore it... */ if (cmv->bFunction != dsc->function) { if (UEA_CHIP_VERSION(sc) == ADI930 diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 2473cf0c6b1d..b4bd2411c666 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -1,5 +1,5 @@ /** - * drivers/usb/class/usbtmc.c - USB Test & Measurment class driver + * drivers/usb/class/usbtmc.c - USB Test & Measurement class driver * * Copyright (C) 2007 Stefan Kopp, Gechingen, Germany * Copyright (C) 2008 Novell, Inc. diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index da718e84d58d..e80f1af438c8 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1890,7 +1890,7 @@ static void cancel_async_set_config(struct usb_device *udev) * routine gets around the normal restrictions by using a work thread to * submit the change-config request. * - * Returns 0 if the request was succesfully queued, error code otherwise. + * Returns 0 if the request was successfully queued, error code otherwise. * The caller has no way to know whether the queued request will eventually * succeed. */ diff --git a/drivers/usb/gadget/f_acm.c b/drivers/usb/gadget/f_acm.c index 7953948bfe4a..4e3657808b0f 100644 --- a/drivers/usb/gadget/f_acm.c +++ b/drivers/usb/gadget/f_acm.c @@ -432,7 +432,7 @@ static void acm_disable(struct usb_function *f) * @length: size of data * Context: irqs blocked, acm->lock held, acm_notify_req non-null * - * Returns zero on sucess or a negative errno. + * Returns zero on success or a negative errno. * * See section 6.3.5 of the CDC 1.1 specification for information * about the only notification we issue: SerialState change. diff --git a/drivers/usb/gadget/pxa27x_udc.c b/drivers/usb/gadget/pxa27x_udc.c index 1937d8c7b433..adda1208a1ec 100644 --- a/drivers/usb/gadget/pxa27x_udc.c +++ b/drivers/usb/gadget/pxa27x_udc.c @@ -1524,7 +1524,7 @@ static int pxa_udc_get_frame(struct usb_gadget *_gadget) * pxa_udc_wakeup - Force udc device out of suspend * @_gadget: usb gadget * - * Returns 0 if succesfull, error code otherwise + * Returns 0 if successfull, error code otherwise */ static int pxa_udc_wakeup(struct usb_gadget *_gadget) { diff --git a/drivers/usb/host/fhci-sched.c b/drivers/usb/host/fhci-sched.c index 62a226b61670..00a29855d0c4 100644 --- a/drivers/usb/host/fhci-sched.c +++ b/drivers/usb/host/fhci-sched.c @@ -627,7 +627,7 @@ irqreturn_t fhci_irq(struct usb_hcd *hcd) /* - * Process normal completions(error or sucess) and clean the schedule. + * Process normal completions(error or success) and clean the schedule. * * This is the main path for handing urbs back to drivers. The only other patth * is process_del_list(),which unlinks URBs by scanning EDs,instead of scanning diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c index 9ec7fd5da489..9579cf4c38bf 100644 --- a/drivers/usb/wusbcore/crypto.c +++ b/drivers/usb/wusbcore/crypto.c @@ -111,7 +111,7 @@ struct aes_ccm_b1 { * * CCM uses Ax blocks to generate a keystream with which the MIC and * the message's payload are encoded. A0 always encrypts/decrypts the - * MIC. Ax (x>0) are used for the sucesive payload blocks. + * MIC. Ax (x>0) are used for the successive payload blocks. * * The x is the counter, and is increased for each block. */ diff --git a/drivers/usb/wusbcore/wa-xfer.c b/drivers/usb/wusbcore/wa-xfer.c index 613a5fc490d3..489b47833e2c 100644 --- a/drivers/usb/wusbcore/wa-xfer.c +++ b/drivers/usb/wusbcore/wa-xfer.c @@ -558,7 +558,7 @@ static void wa_seg_dto_cb(struct urb *urb) /* * Callback for the segment request * - * If succesful transition state (unless already transitioned or + * If successful transition state (unless already transitioned or * outbound transfer); otherwise, take a note of the error, mark this * segment done and try completion. * @@ -1364,7 +1364,7 @@ segment_aborted: /* * Callback for the IN data phase * - * If succesful transition state; otherwise, take a note of the + * If successful transition state; otherwise, take a note of the * error, mark this segment done and try completion. * * Note we don't access until we are sure that the transfer hasn't diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c index c7080d497311..0bb665a0c024 100644 --- a/drivers/uwb/i1480/dfu/usb.c +++ b/drivers/uwb/i1480/dfu/usb.c @@ -229,7 +229,7 @@ void i1480_usb_neep_cb(struct urb *urb) * will verify it. * * Set i1480->evt_result with the result of getting the event or its - * size (if succesful). + * size (if successful). * * Delivers the data directly to i1480->evt_buf */ diff --git a/drivers/uwb/wlp/txrx.c b/drivers/uwb/wlp/txrx.c index 86a853b84119..7350ed6909f8 100644 --- a/drivers/uwb/wlp/txrx.c +++ b/drivers/uwb/wlp/txrx.c @@ -282,7 +282,7 @@ EXPORT_SYMBOL_GPL(wlp_receive_frame); * and transmission will be done by the calling function. * @dst: On return this will contain the device address to which the * frame is destined. - * @returns: 0 on success no tx : WLP header sucessfully applied to skb buffer, + * @returns: 0 on success no tx : WLP header successfully applied to skb buffer, * calling function can proceed with tx * 1 on success with tx : WLP will take over transmission of this * frame diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index 913b4a47ae52..1ddeb4c34763 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -3276,7 +3276,7 @@ static void __devinit aty_init_lcd(struct atyfb_par *par, u32 bios_base) txtformat = "24 bit interface"; break; default: - txtformat = "unkown format"; + txtformat = "unknown format"; } } else { switch (format & 7) { @@ -3299,7 +3299,7 @@ static void __devinit aty_init_lcd(struct atyfb_par *par, u32 bios_base) txtformat = "262144 colours (FDPI-2 mode)"; break; default: - txtformat = "unkown format"; + txtformat = "unknown format"; } } PRINTKI("%s%s %s monitor detected: %s\n", diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c index 505c0823a105..2cf7ba52f67c 100644 --- a/drivers/video/backlight/atmel-pwm-bl.c +++ b/drivers/video/backlight/atmel-pwm-bl.c @@ -158,7 +158,7 @@ static int atmel_pwm_bl_probe(struct platform_device *pdev) goto err_free_pwm; } - /* Turn display off by defatult. */ + /* Turn display off by default. */ retval = gpio_direction_output(pwmbl->gpio_on, 0 ^ pdata->on_active_low); if (retval) diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c index 50ec17dfc517..fa32b94a4546 100644 --- a/drivers/video/backlight/tosa_lcd.c +++ b/drivers/video/backlight/tosa_lcd.c @@ -177,7 +177,7 @@ static int __devinit tosa_lcd_probe(struct spi_device *spi) if (!data) return -ENOMEM; - data->is_vga = true; /* defaut to VGA mode */ + data->is_vga = true; /* default to VGA mode */ /* * bits_per_word cannot be configured in platform data diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index 857b3668b3ba..6468a297e341 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c @@ -436,7 +436,7 @@ sti_init_glob_cfg(struct sti_struct *sti, (offs < PCI_BASE_ADDRESS_0 || offs > PCI_BASE_ADDRESS_5)) { printk (KERN_WARNING - "STI pci region maping for region %d (%02x) can't be mapped\n", + "STI pci region mapping for region %d (%02x) can't be mapped\n", i,sti->rm_entry[i]); continue; } diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c index 1a83709f9611..492e6e64b653 100644 --- a/drivers/video/gbefb.c +++ b/drivers/video/gbefb.c @@ -701,7 +701,7 @@ static int gbefb_set_par(struct fb_info *info) blocks of 512x128, 256x128 or 128x128 pixels, respectively for 8bit, 16bit and 32 bit modes (64 kB). They cover the screen with partial tiles on the right and/or bottom of the screen if needed. - For exemple in 640x480 8 bit mode the mapping is: + For example in 640x480 8 bit mode the mapping is: <-------- 640 -----> <---- 512 ----><128|384 offscreen> diff --git a/drivers/video/stifb.c b/drivers/video/stifb.c index 6120f0c526fe..876648e15e9d 100644 --- a/drivers/video/stifb.c +++ b/drivers/video/stifb.c @@ -756,9 +756,9 @@ hyperResetPlanes(struct stifb_info *fb, int enable) if (fb->info.var.bits_per_pixel == 32) controlPlaneReg = 0x04000F00; else - controlPlaneReg = 0x00000F00; /* 0x00000800 should be enought, but lets clear all 4 bits */ + controlPlaneReg = 0x00000F00; /* 0x00000800 should be enough, but lets clear all 4 bits */ else - controlPlaneReg = 0x00000F00; /* 0x00000100 should be enought, but lets clear all 4 bits */ + controlPlaneReg = 0x00000F00; /* 0x00000100 should be enough, but lets clear all 4 bits */ switch (enable) { case ENABLE: diff --git a/drivers/video/tdfxfb.c b/drivers/video/tdfxfb.c index ff43c8885028..980548390048 100644 --- a/drivers/video/tdfxfb.c +++ b/drivers/video/tdfxfb.c @@ -52,7 +52,7 @@ * * 0.1.3 (released 1999-11-02) added Attila's panning support, code * reorg, hwcursor address page size alignment - * (for mmaping both frame buffer and regs), + * (for mmapping both frame buffer and regs), * and my changes to get rid of hardcoded * VGA i/o register locations (uses PCI * configuration info now) diff --git a/drivers/video/via/dvi.c b/drivers/video/via/dvi.c index c5c32b6b6e6c..67b36932212b 100644 --- a/drivers/video/via/dvi.c +++ b/drivers/video/via/dvi.c @@ -467,7 +467,7 @@ static int dvi_get_panel_size_from_DDCv1(void) default: viaparinfo->tmds_setting_info->dvi_panel_size = VIA_RES_1024X768; - DEBUG_MSG(KERN_INFO "Unknow panel size max resolution = %d !\ + DEBUG_MSG(KERN_INFO "Unknown panel size max resolution = %d !\ set default panel size.\n", max_h); break; } @@ -534,7 +534,7 @@ static int dvi_get_panel_size_from_DDCv2(void) default: viaparinfo->tmds_setting_info->dvi_panel_size = VIA_RES_1024X768; - DEBUG_MSG(KERN_INFO "Unknow panel size max resolution = %d!\ + DEBUG_MSG(KERN_INFO "Unknown panel size max resolution = %d!\ set default panel size.\n", HSize); break; } diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c index 3df17dc8c3d7..65ccd215d496 100644 --- a/drivers/video/vt8623fb.c +++ b/drivers/video/vt8623fb.c @@ -446,7 +446,7 @@ static int vt8623fb_set_par(struct fb_info *info) svga_wseq_mask(0x1E, 0xF0, 0xF0); // DI/DVP bus svga_wseq_mask(0x2A, 0x0F, 0x0F); // DI/DVP bus - svga_wseq_mask(0x16, 0x08, 0xBF); // FIFO read treshold + svga_wseq_mask(0x16, 0x08, 0xBF); // FIFO read threshold vga_wseq(NULL, 0x17, 0x1F); // FIFO depth vga_wseq(NULL, 0x18, 0x4E); svga_wseq_mask(0x1A, 0x08, 0x08); // enable MMIO ? diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c index 381026c0bd7b..923cc68dba26 100644 --- a/drivers/watchdog/coh901327_wdt.c +++ b/drivers/watchdog/coh901327_wdt.c @@ -508,7 +508,7 @@ void coh901327_watchdog_reset(void) * deactivating the watchdog before it is shut down by it. * * NOTE: on future versions of the watchdog, this restriction is - * gone: the watchdog will be reloaded with a defaul value (1 min) + * gone: the watchdog will be reloaded with a default value (1 min) * instead of last value, and you can conveniently set the watchdog * timeout to 10ms (value = 1) without any problems. */ diff --git a/drivers/watchdog/machzwd.c b/drivers/watchdog/machzwd.c index b6b3f59ab446..47d719717a3b 100644 --- a/drivers/watchdog/machzwd.c +++ b/drivers/watchdog/machzwd.c @@ -21,7 +21,7 @@ * wd#1 - 2 seconds; * wd#2 - 7.2 ms; * After the expiration of wd#1, it can generate a NMI, SCI, SMI, or - * a system RESET and it starts wd#2 that unconditionaly will RESET + * a system RESET and it starts wd#2 that unconditionally will RESET * the system when the counter reaches zero. * * 14-Dec-2001 Matt Domsch diff --git a/drivers/watchdog/wdrtas.c b/drivers/watchdog/wdrtas.c index 3bde56bce63a..5bfb1f2c5319 100644 --- a/drivers/watchdog/wdrtas.c +++ b/drivers/watchdog/wdrtas.c @@ -542,7 +542,7 @@ static struct notifier_block wdrtas_notifier = { /** * wdrtas_get_tokens - reads in RTAS tokens * - * returns 0 on succes, <0 on failure + * returns 0 on success, <0 on failure * * wdrtas_get_tokens reads in the tokens for the RTAS calls used in * this watchdog driver. It tolerates, if "get-sensor-state" and @@ -598,7 +598,7 @@ static void wdrtas_unregister_devs(void) /** * wdrtas_register_devs - registers the misc dev handlers * - * returns 0 on succes, <0 on failure + * returns 0 on success, <0 on failure * * wdrtas_register_devs registers the watchdog and temperature watchdog * misc devs @@ -630,7 +630,7 @@ static int wdrtas_register_devs(void) /** * wdrtas_init - init function of the watchdog driver * - * returns 0 on succes, <0 on failure + * returns 0 on success, <0 on failure * * registers the file handlers and the reboot notifier */ diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b9b3bb51b1e4..d15ea1790bfb 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -767,7 +767,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->start_stack = bprm->p; - /* Now we do a little grungy work by mmaping the ELF image into + /* Now we do a little grungy work by mmapping the ELF image into the correct location in memory. */ for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { diff --git a/fs/bio.c b/fs/bio.c index 12da5db8682c..2bd671a08e3b 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -272,7 +272,7 @@ EXPORT_SYMBOL(bio_init); * for a &struct bio to become free. If a %NULL @bs is passed in, we will * fall back to just using @kmalloc to allocate the required memory. * - * Note that the caller must set ->bi_destructor on succesful return + * Note that the caller must set ->bi_destructor on successful return * of a bio, to do the appropriate freeing of the bio once the reference * count drops to zero. **/ diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2c726b7b9faa..6815d2a84b94 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -256,7 +256,7 @@ out: * Insert @em into @tree or perform a simple forward/backward merge with * existing mappings. The extent_map struct passed in will be inserted * into the tree directly, with an additional reference taken, or a - * reference dropped if the merge attempt was sucessfull. + * reference dropped if the merge attempt was successfull. */ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) diff --git a/fs/cifs/README b/fs/cifs/README index 79c1a93400be..a727b7cb075f 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -423,7 +423,7 @@ A partial list of the supported mount options follows: source name to use to represent the client netbios machine name when doing the RFC1001 netbios session initialize. direct Do not do inode data caching on files opened on this mount. - This precludes mmaping files on this mount. In some cases + This precludes mmapping files on this mount. In some cases with fast networks and little or no caching benefits on the client (e.g. when the application is doing large sequential reads bigger than page size without rereading the same data) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 5d0fde18039c..4b35f7ec0583 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -39,7 +39,7 @@ /* * MAX_REQ is the maximum number of requests that WE will send - * on one socket concurently. It also matches the most common + * on one socket concurrently. It also matches the most common * value of max multiplex returned by servers. We may * eventually want to use the negotiated value (in case * future servers can handle more) when we are more confident that diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 5e2492535daa..83580213fcac 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -917,8 +917,8 @@ undo_setattr: /* * If dentry->d_inode is null (usually meaning the cached dentry * is a negative dentry) then we would attempt a standard SMB delete, but - * if that fails we can not attempt the fall back mechanisms on EACESS - * but will return the EACESS to the caller. Note that the VFS does not call + * if that fails we can not attempt the fall back mechanisms on EACCESS + * but will return the EACCESS to the caller. Note that the VFS does not call * unlink on negative dentries currently. */ int cifs_unlink(struct inode *dir, struct dentry *dentry) diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c index 224a1f478966..b6b6dcb500bf 100644 --- a/fs/cifs/smbdes.c +++ b/fs/cifs/smbdes.c @@ -371,7 +371,7 @@ E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24) smbhash(p24 + 16, c8, p21 + 14, 1); } -#if 0 /* currently unsued */ +#if 0 /* currently unused */ static void D_P16(unsigned char *p14, unsigned char *in, unsigned char *out) { diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 16f682e26c07..b540aa5d1f61 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -143,7 +143,7 @@ out: } EXPORT_SYMBOL_GPL(dlm_posix_lock); -/* Returns failure iff a succesful lock operation should be canceled */ +/* Returns failure iff a successful lock operation should be canceled */ static int dlm_plock_callback(struct plock_op *op) { struct file *file; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 618ca95cbb59..0282ec78cf8f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2932,7 +2932,7 @@ retry: ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); /* - * If we have a contigous extent of pages and we + * If we have a contiguous extent of pages and we * haven't done the I/O yet, map the blocks and submit * them for I/O. */ @@ -5370,7 +5370,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) * worse case, the indexs blocks spread over different block groups * * If datablocks are discontiguous, they are possible to spread over - * different block groups too. If they are contiugous, with flexbg, + * different block groups too. If they are contiuguous, with flexbg, * they could still across block group boundary. * * Also account for superblock, inode, quota and xattr blocks @@ -5446,7 +5446,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) * Calculate the journal credits for a chunk of data modification. * * This is called from DIO, fallocate or whoever calling - * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. + * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. * * journal buffers for data blocks are not included here, as DIO * and fallocate do no need to journal data buffers. diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bba12824defa..74e495dabe09 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -142,7 +142,7 @@ * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The * value of s_mb_order2_reqs can be tuned via * /sys/fs/ext4//mb_order2_req. If the request len is equal to - * stripe size (sbi->s_stripe), we try to search for contigous block in + * stripe size (sbi->s_stripe), we try to search for contiguous block in * stripe size. This should result in better allocation on RAID setups. If * not, we search in the specific group using bitmap for best extents. The * tunable min_to_scan and max_to_scan control the behaviour here. diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c index f25e70c1b51c..f0294410868d 100644 --- a/fs/jffs2/compr.c +++ b/fs/jffs2/compr.c @@ -177,7 +177,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f, spin_unlock(&jffs2_compressor_list_lock); break; default: - printk(KERN_ERR "JFFS2: unknow compression mode.\n"); + printk(KERN_ERR "JFFS2: unknown compression mode.\n"); } out: if (ret == JFFS2_COMPR_NONE) { diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 1a80301004b8..378991cfe40f 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -931,7 +931,7 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re * Helper function for jffs2_get_inode_nodes(). * The function detects whether more data should be read and reads it if yes. * - * Returns: 0 on succes; + * Returns: 0 on success; * negative error code on failure. */ static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 082e844ab2db..4b107881acd5 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -31,7 +31,7 @@ * is used to release xattr name/value pair and detach from c->xattrindex. * reclaim_xattr_datum(c) * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when - * memory usage by cache is over c->xdatum_mem_threshold. Currentry, this threshold + * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold * is hard coded as 32KiB. * do_verify_xattr_datum(c, xd) * is used to load the xdatum informations without name/value pair from the medium. diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 2bc7d8aa5740..d9b031cf69f5 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -755,7 +755,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) * allocation group. */ if ((blkno & (bmp->db_agsize - 1)) == 0) - /* check if the AG is currenly being written to. + /* check if the AG is currently being written to. * if so, call dbNextAG() to find a non-busy * AG with sufficient free space. */ @@ -3337,7 +3337,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) for (i = 0, n = 0; i < agno; n++) { bmp->db_agfree[n] = 0; /* init collection point */ - /* coalesce cotiguous k AGs; */ + /* coalesce contiguous k AGs; */ for (j = 0; j < k && i < agno; j++, i++) { /* merge AGi to AGn */ bmp->db_agfree[n] += bmp->db_agfree[i]; diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 0d58caf4a6e1..ec8f45f12e05 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -835,7 +835,7 @@ static int ncp_ioctl_need_write(unsigned int cmd) case NCP_IOC_SETROOT: return 0; default: - /* unkown IOCTL command, assume write */ + /* unknown IOCTL command, assume write */ return 1; } } diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 9669541d0119..08f7530e9341 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -927,7 +927,7 @@ lock_retry_remap: return 0; ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ? - "EOVERFLOW" : (!err ? "EIO" : "unkown error")); + "EOVERFLOW" : (!err ? "EIO" : "unknown error")); return err < 0 ? err : -EIO; read_err: diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 663c0e341f8b..43179ddd336f 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -399,7 +399,7 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov, * @cached_page: allocated but as yet unused page * @lru_pvec: lru-buffering pagevec of caller * - * Obtain @nr_pages locked page cache pages from the mapping @maping and + * Obtain @nr_pages locked page cache pages from the mapping @mapping and * starting at index @index. * * If a page is newly created, increment its refcount and add it to the @@ -1281,7 +1281,7 @@ rl_not_mapped_enoent: /* * Copy as much as we can into the pages and return the number of bytes which - * were sucessfully copied. If a fault is encountered then clear the pages + * were successfully copied. If a fault is encountered then clear the pages * out to (ofs + bytes) and return the number of bytes which were copied. */ static inline size_t ntfs_copy_from_user(struct page **pages, diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 89b02985c054..4dadcdf3d451 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -338,7 +338,7 @@ err_out: * copy of the complete multi sector transfer deprotected page. On failure, * *@wrp is undefined. * - * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current + * Simillarly, if @lsn is not NULL, on success *@lsn will be set to the current * logfile lsn according to this restart page. On failure, *@lsn is undefined. * * The following error codes are defined: diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 38a42f5d59ff..7c7198a5bc90 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -2398,7 +2398,7 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos) * * The array is assumed to be large enough to hold an entire path (tree depth). * - * Upon succesful return from this function: + * Upon successful return from this function: * * - The 'right_path' array will contain a path to the leaf block * whose range contains e_cpos. diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 83bcaf266b35..03ccf9a7b1f4 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2586,7 +2586,7 @@ fail: * is complete everywhere. if the target dies while this is * going on, some nodes could potentially see the target as the * master, so it is important that my recovery finds the migration - * mle and sets the master to UNKNONWN. */ + * mle and sets the master to UNKNOWN. */ /* wait for new node to assert master */ diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 0d38d67194cb..c5e4a49e3a12 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1855,7 +1855,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) * outstanding lock request, so a cancel convert is * required. We intentionally overwrite 'ret' - if the * cancel fails and the lock was granted, it's easier - * to just bubble sucess back up to the user. + * to just bubble success back up to the user. */ ret = ocfs2_flock_handle_signal(lockres, level); } else if (!ret && (level > lockres->l_level)) { diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 54c16b66327e..bf34c491ae96 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -659,7 +659,7 @@ static int __ocfs2_journal_access(handle_t *handle, default: status = -EINVAL; - mlog(ML_ERROR, "Uknown access type!\n"); + mlog(ML_ERROR, "Unknown access type!\n"); } if (!status && ocfs2_meta_ecc(osb) && triggers) jbd2_journal_set_triggers(bh, &triggers->ot_triggers); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 60287fc56bcb..d00c658ca150 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2431,7 +2431,7 @@ out: * we gonna touch and whether we need to create new blocks. * * Normally the refcount blocks store these refcount should be - * continguous also, so that we can get the number easily. + * contiguous also, so that we can get the number easily. * As for meta_ac, we will at most add split 2 refcount record and * 2 more refcount block, so just check it in a rough way. * diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c index e1c0ec0ae989..082234581d05 100644 --- a/fs/omfs/bitmap.c +++ b/fs/omfs/bitmap.c @@ -85,7 +85,7 @@ out: } /* - * Tries to allocate exactly one block. Returns true if sucessful. + * Tries to allocate exactly one block. Returns true if successful. */ int omfs_allocate_block(struct super_block *sb, u64 block) { diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index f94ddf7efba0..868a55ee080f 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -23,7 +23,7 @@ /* * This file implements functions needed to recover from unclean un-mounts. * When UBIFS is mounted, it checks a flag on the master node to determine if - * an un-mount was completed sucessfully. If not, the process of mounting + * an un-mount was completed successfully. If not, the process of mounting * incorparates additional checking and fixing of on-flash data structures. * UBIFS always cleans away all remnants of an unclean un-mount, so that * errors do not accumulate. However UBIFS defers recovery if it is mounted diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 6533ead9b889..a2c16bcee90b 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -98,7 +98,7 @@ typedef struct xfs_dquot { #define dq_flags q_lists.dqm_flags /* - * Lock hierachy for q_qlock: + * Lock hierarchy for q_qlock: * XFS_QLOCK_NORMAL is the implicit default, * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 */ diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 4c8d0afae711..fb2d63f13f4c 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -47,7 +47,7 @@ #elif defined(CONFIG_SPARSEMEM_VMEMMAP) -/* memmap is virtually contigious. */ +/* memmap is virtually contiguous. */ #define __pfn_to_page(pfn) (vmemmap + (pfn)) #define __page_to_pfn(page) (unsigned long)((page) - vmemmap) diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index d76b66acea95..7c38c147e5e6 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -631,7 +631,7 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open) * these are provided for both review and as a porting * help for the C library version. * - * Last chance: are any of these important enought to + * Last chance: are any of these important enough to * enable by default? */ #ifdef __ARCH_WANT_SYSCALL_NO_AT diff --git a/include/linux/chio.h b/include/linux/chio.h index 519248d8b2b6..d9bac7f97282 100644 --- a/include/linux/chio.h +++ b/include/linux/chio.h @@ -21,7 +21,7 @@ * query vendor-specific element types * * accessing elements works by specifing type and unit of the element. - * for eample, storage elements are addressed with type = CHET_ST and + * for example, storage elements are addressed with type = CHET_ST and * unit = 0 .. cp_nslots-1 * */ diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h index e5124ceea769..3402042ddc31 100644 --- a/include/linux/mfd/ezx-pcap.h +++ b/include/linux/mfd/ezx-pcap.h @@ -45,7 +45,7 @@ void pcap_set_ts_bits(struct pcap_chip *, u32); #define PCAP_CLEAR_INTERRUPT_REGISTER 0x01ffffff #define PCAP_MASK_ALL_INTERRUPT 0x01ffffff -/* registers acessible by both pcap ports */ +/* registers accessible by both pcap ports */ #define PCAP_REG_ISR 0x0 /* Interrupt Status */ #define PCAP_REG_MSR 0x1 /* Interrupt Mask */ #define PCAP_REG_PSTAT 0x2 /* Processor Status */ @@ -67,7 +67,7 @@ void pcap_set_ts_bits(struct pcap_chip *, u32); #define PCAP_REG_VENDOR_TEST1 0x1e #define PCAP_REG_VENDOR_TEST2 0x1f -/* registers acessible by pcap port 1 only (a1200, e2 & e6) */ +/* registers accessible by pcap port 1 only (a1200, e2 & e6) */ #define PCAP_REG_INT_SEL 0x3 /* Interrupt Select */ #define PCAP_REG_SWCTRL 0x4 /* Switching Regulator Control */ #define PCAP_REG_VREG1 0x5 /* Regulator Bank 1 Control */ diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index d745f5b6c7b0..76e5053e1fac 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -30,7 +30,7 @@ /* * use drive write caching -- we need deferred error handling to be - * able to sucessfully recover with this option (drive will return good + * able to successfully recover with this option (drive will return good * status as soon as the cdb is validated). */ #if defined(CONFIG_CDROM_PKTCDVD_WCACHE) diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h index 850db2e80510..cf9327c051ad 100644 --- a/include/linux/serial_reg.h +++ b/include/linux/serial_reg.h @@ -216,10 +216,10 @@ #define UART_IIR_TOD 0x08 /* Character Timeout Indication Detected */ -#define UART_FCR_PXAR1 0x00 /* receive FIFO treshold = 1 */ -#define UART_FCR_PXAR8 0x40 /* receive FIFO treshold = 8 */ -#define UART_FCR_PXAR16 0x80 /* receive FIFO treshold = 16 */ -#define UART_FCR_PXAR32 0xc0 /* receive FIFO treshold = 32 */ +#define UART_FCR_PXAR1 0x00 /* receive FIFO threshold = 1 */ +#define UART_FCR_PXAR8 0x40 /* receive FIFO threshold = 8 */ +#define UART_FCR_PXAR16 0x80 /* receive FIFO threshold = 16 */ +#define UART_FCR_PXAR32 0xc0 /* receive FIFO threshold = 32 */ diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index b59e78c57161..dfd4745a955f 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -490,7 +490,7 @@ struct v4l2_jpegcompression { * you do, leave them untouched. * Inluding less markers will make the * resulting code smaller, but there will - * be fewer aplications which can read it. + * be fewer applications which can read it. * The presence of the APP and COM marker * is influenced by APP_len and COM_len * ONLY, not by this property! */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6e5f0e0c7967..ca4789e4f1e1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -988,7 +988,7 @@ struct sctp_transport { int init_sent_count; /* state : The current state of this destination, - * : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKOWN. + * : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKNOWN. */ int state; diff --git a/include/net/tcp.h b/include/net/tcp.h index 03a49c703377..1827e7f217d1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1260,7 +1260,7 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) /* This function calculates a "timeout" which is equivalent to the timeout of a - * TCP connection after "boundary" unsucessful, exponentially backed-off + * TCP connection after "boundary" unsuccessful, exponentially backed-off * retransmissions with an initial RTO of TCP_RTO_MIN. */ static inline bool retransmits_timed_out(const struct sock *sk, diff --git a/include/net/wimax.h b/include/net/wimax.h index 2af7bf839f23..3b07f6aad102 100644 --- a/include/net/wimax.h +++ b/include/net/wimax.h @@ -79,7 +79,7 @@ * drivers have to only report state changes due to external * conditions. * - * All API operations are 'atomic', serialized thorough a mutex in the + * All API operations are 'atomic', serialized through a mutex in the * `struct wimax_dev`. * * EXPORTING TO USER SPACE THROUGH GENERIC NETLINK diff --git a/include/sound/wm8993.h b/include/sound/wm8993.h index 9c661f2f8cda..eee19f63c0d8 100644 --- a/include/sound/wm8993.h +++ b/include/sound/wm8993.h @@ -36,7 +36,7 @@ struct wm8993_platform_data { unsigned int micbias1_lvl:1; unsigned int micbias2_lvl:1; - /* Jack detect threashold levels, see datasheet for values */ + /* Jack detect threshold levels, see datasheet for values */ unsigned int jd_scthr:2; unsigned int jd_thr:2; }; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 7f29643c8985..759a629cc4bc 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -419,7 +419,7 @@ static void perf_event_remove_from_context(struct perf_event *event) if (!task) { /* * Per cpu events are removed via an smp call and - * the removal is always sucessful. + * the removal is always successful. */ smp_call_function_single(event->cpu, __perf_event_remove_from_context, @@ -827,7 +827,7 @@ perf_install_in_context(struct perf_event_context *ctx, if (!task) { /* * Per cpu events are installed via an smp call and - * the install is always sucessful. + * the install is always successful. */ smp_call_function_single(cpu, __perf_install_in_context, event, 1); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 234ceb10861f..456b2bc6b1ff 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -105,7 +105,7 @@ config DEBUG_SECTION_MISMATCH bool "Enable full Section mismatch analysis" depends on UNDEFINED # This option is on purpose disabled for now. - # It will be enabled when we are down to a resonable number + # It will be enabled when we are down to a reasonable number # of section mismatch warnings (< 10 for an allyesconfig build) help The section mismatch analysis checks if there are illegal diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 600f473a5610..76074209f9a2 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -299,7 +299,7 @@ static int INIT get_next_block(struct bunzip_data *bd) again when using them (during symbol decoding).*/ base = hufGroup->base-1; limit = hufGroup->limit-1; - /* Calculate permute[]. Concurently, initialize + /* Calculate permute[]. Concurrently, initialize * temp[] and limit[]. */ pp = 0; for (i = minLen; i <= maxLen; i++) { diff --git a/lib/dma-debug.c b/lib/dma-debug.c index ce6b7eabf674..d9b08e0f7f55 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -259,7 +259,7 @@ static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket, * times. Without a hardware IOMMU this results in the * same device addresses being put into the dma-debug * hash multiple times too. This can result in false - * positives being reported. Therfore we implement a + * positives being reported. Therefore we implement a * best-fit algorithm here which returns the entry from * the hash which fits best to the reference value * instead of the first-fit. diff --git a/lib/swiotlb.c b/lib/swiotlb.c index ac25cd28e807..853907e45868 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -453,7 +453,7 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) /* * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contigous entries available. + * entries to indicate the number of contiguous entries available. * While returning the entries to the free list, we merge the entries * with slots below and above the pool being returned. */ diff --git a/mm/filemap.c b/mm/filemap.c index ef169f37156d..c3d3506ecaba 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1844,7 +1844,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr, /* * Copy as much as we can into the page and return the number of bytes which - * were sucessfully copied. If a fault is encountered then return the number of + * were successfully copied. If a fault is encountered then return the number of * bytes which were copied. */ size_t iov_iter_copy_from_user_atomic(struct page *page, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7226e60e52af..c31a310aa146 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -209,7 +209,7 @@ struct mem_cgroup { int prev_priority; /* for recording reclaim priority */ /* - * While reclaiming in a hiearchy, we cache the last child we + * While reclaiming in a hierarchy, we cache the last child we * reclaimed from. */ int last_scanned_child; @@ -2466,7 +2466,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, cgroup_lock(); /* - * If parent's use_hiearchy is set, we can't make any modifications + * If parent's use_hierarchy is set, we can't make any modifications * in the child subtrees. If it is unset, then the change can * occur, provided the current cgroup has no children. * diff --git a/mm/memory-failure.c b/mm/memory-failure.c index dacc64183874..1ac49fef95ab 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -174,7 +174,7 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, list_for_each_entry_safe (tk, next, to_kill, nd) { if (doit) { /* - * In case something went wrong with munmaping + * In case something went wrong with munmapping * make sure the process doesn't catch the * signal and then access the memory. Just kill it. * the signal handlers diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f7e2fa0974dc..16ad251c9725 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -50,7 +50,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) struct tcphdr _tcph, *tcph; __be16 oldval; - /* Not enought header? */ + /* Not enough header? */ tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (!tcph) return false; diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 356e65b1dc42..783c5f367d29 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -450,10 +450,10 @@ void irlap_disconnect_request(struct irlap_cb *self) /* Check if we are in the right state for disconnecting */ switch (self->state) { - case LAP_XMIT_P: /* FALLTROUGH */ - case LAP_XMIT_S: /* FALLTROUGH */ - case LAP_CONN: /* FALLTROUGH */ - case LAP_RESET_WAIT: /* FALLTROUGH */ + case LAP_XMIT_P: /* FALLTHROUGH */ + case LAP_XMIT_S: /* FALLTHROUGH */ + case LAP_CONN: /* FALLTHROUGH */ + case LAP_RESET_WAIT: /* FALLTHROUGH */ case LAP_RESET_CHECK: irlap_do_event(self, DISCONNECT_REQUEST, NULL, NULL); break; @@ -485,9 +485,9 @@ void irlap_disconnect_indication(struct irlap_cb *self, LAP_REASON reason) IRDA_DEBUG(1, "%s(), Sending reset request!\n", __func__); irlap_do_event(self, RESET_REQUEST, NULL, NULL); break; - case LAP_NO_RESPONSE: /* FALLTROUGH */ - case LAP_DISC_INDICATION: /* FALLTROUGH */ - case LAP_FOUND_NONE: /* FALLTROUGH */ + case LAP_NO_RESPONSE: /* FALLTHROUGH */ + case LAP_DISC_INDICATION: /* FALLTHROUGH */ + case LAP_FOUND_NONE: /* FALLTHROUGH */ case LAP_MEDIA_BUSY: irlmp_link_disconnect_indication(self->notify.instance, self, reason, NULL); diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index c5c51959e3ce..94a9884d7146 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -1741,7 +1741,7 @@ static int irlap_state_reset(struct irlap_cb *self, IRLAP_EVENT event, * Function irlap_state_xmit_s (event, skb, info) * * XMIT_S, The secondary station has been given the right to transmit, - * and we therefor do not expect to receive any transmissions from other + * and we therefore do not expect to receive any transmissions from other * stations. */ static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event, diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 7bf5b913828b..0e7d8bde145d 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -105,7 +105,7 @@ int __init irlmp_init(void) init_timer(&irlmp->discovery_timer); - /* Do discovery every 3 seconds, conditionaly */ + /* Do discovery every 3 seconds, conditionally */ if (sysctl_discovery) irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ); @@ -1842,7 +1842,7 @@ LM_REASON irlmp_convert_lap_reason( LAP_REASON lap_reason) reason = LM_CONNECT_FAILURE; break; default: - IRDA_DEBUG(1, "%s(), Unknow IrLAP disconnect reason %d!\n", + IRDA_DEBUG(1, "%s(), Unknown IrLAP disconnect reason %d!\n", __func__, lap_reason); reason = LM_LAP_DISCONNECT; break; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 751c4d0e2b36..719ddbc9e48c 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -244,7 +244,7 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data * @addr: destination address of the path (ETH_ALEN length) * @sdata: local subif * - * Returns: 0 on sucess + * Returns: 0 on success * * State: the initial state of the new path is set to 0 */ @@ -530,7 +530,7 @@ static void mesh_path_node_reclaim(struct rcu_head *rp) * @addr: dst address (ETH_ALEN length) * @sdata: local subif * - * Returns: 0 if succesful + * Returns: 0 if successful */ int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata) { diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 7a10bbe02c13..c5d9f97ef217 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -682,7 +682,7 @@ struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, * buckets and @skip_chain entries. For each entry in the table call * @callback, if @callback returns a negative value stop 'walking' through the * table and return. Updates the values in @skip_bkt and @skip_chain on - * return. Returns zero on succcess, negative values on failure. + * return. Returns zero on success, negative values on failure. * */ int netlbl_domhsh_walk(u32 *skip_bkt, diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8674d4919556..29d8501bf156 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -719,7 +719,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, if (sctp_style(sk, TCP)) { /* Change the sk->sk_state of a TCP-style socket that has - * sucessfully completed a connect() call. + * successfully completed a connect() call. */ if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED)) sk->sk_state = SCTP_SS_ESTABLISHED; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index f11be72a1a80..b15e1ebb2bfa 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -54,7 +54,7 @@ * Assumptions: * - head[0] is physically contiguous. * - tail[0] is physically contiguous. - * - pages[] is not physically or virtually contigous and consists of + * - pages[] is not physically or virtually contiguous and consists of * PAGE_SIZE elements. * * Output: diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c index ca269178c4d4..35f370091f4f 100644 --- a/net/wimax/op-reset.c +++ b/net/wimax/op-reset.c @@ -62,7 +62,7 @@ * Called when wanting to reset the device for any reason. Device is * taken back to power on status. * - * This call blocks; on succesful return, the device has completed the + * This call blocks; on successful return, the device has completed the * reset process and is ready to operate. */ int wimax_reset(struct wimax_dev *wimax_dev) diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index d82953573588..8413cf38ed27 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -213,7 +213,7 @@ load_config_help[] = N_( "to modify that configuration.\n" "\n" "If you are uncertain, then you have probably never used alternate\n" - "configuration files. You should therefor leave this blank to abort.\n"), + "configuration files. You should therefore leave this blank to abort.\n"), save_config_text[] = N_( "Enter a filename to which this configuration should be saved " "as an alternate. Leave blank to abort."), diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index e68823741ad5..2534400317c5 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -204,7 +204,7 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, * * Description * Call the NetLabel mechanism to set the label of a packet using @sid. - * Returns zero on auccess, negative values on failure. + * Returns zero on success, negative values on failure. * */ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index ff17820d35ec..5914eeb0b339 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -741,7 +741,7 @@ int security_bounded_transition(u32 old_sid, u32 new_sid) goto out; } - /* type/domain unchaned */ + /* type/domain unchanged */ if (old_context->type == new_context->type) { rc = 0; goto out; diff --git a/sound/Kconfig b/sound/Kconfig index 4b5365ad6b46..fcad760f5691 100644 --- a/sound/Kconfig +++ b/sound/Kconfig @@ -55,7 +55,7 @@ config SOUND_OSS_CORE_PRECLAIM Please read Documentation/feature-removal-schedule.txt for details. - If unusre, say Y. + If unsure, say Y. source "sound/oss/dmasound/Kconfig" diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index a076a6ce8071..a828baaab636 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -177,7 +177,7 @@ static struct pnp_card_device_id snd_cs423x_pnpids[] = { { .id = "CSC0437", .devs = { { "CSC0000" }, { "CSC0010" }, { "CSC0003" } } }, /* Digital PC 5000 Onboard - CS4236B */ { .id = "CSC0735", .devs = { { "CSC0000" }, { "CSC0010" } } }, - /* some uknown CS4236B */ + /* some unknown CS4236B */ { .id = "CSC0b35", .devs = { { "CSC0000" }, { "CSC0010" }, { "CSC0003" } } }, /* Intel PR440FX Onboard sound */ { .id = "CSC0b36", .devs = { { "CSC0000" }, { "CSC0010" }, { "CSC0003" } } }, diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index 02e30d7c6a93..ddad60ef3f37 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -137,7 +137,7 @@ struct snd_miro { static void snd_miro_proc_init(struct snd_miro * miro); static char * snd_opti9xx_names[] = { - "unkown", + "unknown", "82C928", "82C929", "82C924", "82C925", "82C930", "82C931", "82C933" diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 5cd555325b9d..848007508ffd 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -185,7 +185,7 @@ MODULE_DEVICE_TABLE(pnp_card, snd_opti9xx_pnpids); #endif static char * snd_opti9xx_names[] = { - "unkown", + "unknown", "82C928", "82C929", "82C924", "82C925", "82C930", "82C931", "82C933" diff --git a/sound/oss/dmasound/dmasound_paula.c b/sound/oss/dmasound/dmasound_paula.c index 06e9e88e4c05..bb14e4c67e89 100644 --- a/sound/oss/dmasound/dmasound_paula.c +++ b/sound/oss/dmasound/dmasound_paula.c @@ -657,7 +657,7 @@ static int AmiStateInfo(char *buffer, size_t space) len += sprintf(buffer+len, "\tsound.volume_right = %d [0...64]\n", dmasound.volume_right); if (len >= space) { - printk(KERN_ERR "dmasound_paula: overlowed state buffer alloc.\n") ; + printk(KERN_ERR "dmasound_paula: overflowed state buffer alloc.\n") ; len = space ; } return len; diff --git a/sound/pci/ca0106/ca0106_proc.c b/sound/pci/ca0106/ca0106_proc.c index c62b7d10ec61..8d13092300da 100644 --- a/sound/pci/ca0106/ca0106_proc.c +++ b/sound/pci/ca0106/ca0106_proc.c @@ -233,7 +233,7 @@ static void snd_ca0106_proc_dump_iec958( struct snd_info_buffer *buffer, u32 val snd_iprintf(buffer, "user-defined\n"); break; default: - snd_iprintf(buffer, "unkown\n"); + snd_iprintf(buffer, "unknown\n"); break; } snd_iprintf(buffer, "Sample Bits: "); diff --git a/sound/pci/cs46xx/imgs/cwcdma.asp b/sound/pci/cs46xx/imgs/cwcdma.asp index 09d24c76f034..a65e1193c89a 100644 --- a/sound/pci/cs46xx/imgs/cwcdma.asp +++ b/sound/pci/cs46xx/imgs/cwcdma.asp @@ -26,10 +26,11 @@ // // // The purpose of this code is very simple: make it possible to tranfser -// the samples 'as they are' with no alteration from a PCMreader SCB (DMA from host) -// to any other SCB. This is useful for AC3 throug SPDIF. SRC (source rate converters) -// task always alters the samples in some how, however it's from 48khz -> 48khz. The -// alterations are not audible, but AC3 wont work. +// the samples 'as they are' with no alteration from a PCMreader +// SCB (DMA from host) to any other SCB. This is useful for AC3 through SPDIF. +// SRC (source rate converters) task always alters the samples in somehow, +// however it's from 48khz -> 48khz. +// The alterations are not audible, but AC3 wont work. // // ... // | diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c index 36e08bd2b3cc..360e3809a60b 100644 --- a/sound/pci/emu10k1/emu10k1x.c +++ b/sound/pci/emu10k1/emu10k1x.c @@ -184,7 +184,7 @@ MODULE_PARM_DESC(enable, "Enable the EMU10K1X soundcard."); * The hardware has 3 channels for playback and 1 for capture. * - channel 0 is the front channel * - channel 1 is the rear channel - * - channel 2 is the center/lfe chanel + * - channel 2 is the center/lfe channel * Volume is controlled by the AC97 for the front and rear channels by * the PCM Playback Volume, Sigmatel Surround Playback Volume and * Surround Playback Volume. The Sigmatel 4-Speaker Stereo switch affects diff --git a/sound/pci/hda/patch_cmedia.c b/sound/pci/hda/patch_cmedia.c index 780e1a72114a..8917071d5b6a 100644 --- a/sound/pci/hda/patch_cmedia.c +++ b/sound/pci/hda/patch_cmedia.c @@ -66,7 +66,7 @@ struct cmi_spec { struct hda_pcm pcm_rec[2]; /* PCM information */ - /* pin deafault configuration */ + /* pin default configuration */ hda_nid_t pin_nid[NUM_PINS]; unsigned int def_conf[NUM_PINS]; unsigned int pin_def_confs; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ff20048504b6..872731eb49e8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6619,7 +6619,7 @@ static struct hda_input_mux alc889A_mb31_capture_source = { /* Front Mic (0x01) unused */ { "Line", 0x2 }, /* Line 2 (0x03) unused */ - /* CD (0x04) unsused? */ + /* CD (0x04) unused? */ }, }; diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index 0dce331a2a3b..a1b10d1a384d 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -3017,7 +3017,7 @@ snd_hdspm_proc_read_madi(struct snd_info_entry * entry, insel = "Coaxial"; break; default: - insel = "Unkown"; + insel = "Unknown"; } switch (hdspm->control_register & HDSPM_SyncRefMask) { @@ -3028,7 +3028,7 @@ snd_hdspm_proc_read_madi(struct snd_info_entry * entry, syncref = "MADI"; break; default: - syncref = "Unkown"; + syncref = "Unknown"; } snd_iprintf(buffer, "Inputsel = %s, SyncRef = %s\n", insel, syncref); diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c index c33b92edbded..8ce1c9b2e5b8 100644 --- a/sound/soc/codecs/uda134x.c +++ b/sound/soc/codecs/uda134x.c @@ -101,7 +101,7 @@ static int uda134x_write(struct snd_soc_codec *codec, unsigned int reg, pr_debug("%s reg: %02X, value:%02X\n", __func__, reg, value); if (reg >= UDA134X_REGS_NUM) { - printk(KERN_ERR "%s unkown register: reg: %u", + printk(KERN_ERR "%s unknown register: reg: %u", __func__, reg); return -EINVAL; } @@ -552,7 +552,7 @@ static int uda134x_soc_probe(struct platform_device *pdev) ARRAY_SIZE(uda1341_snd_controls)); break; default: - printk(KERN_ERR "%s unkown codec type: %d", + printk(KERN_ERR "%s unknown codec type: %d", __func__, pd->model); return -EINVAL; } diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index fe1307b500cf..d72347d90b70 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -607,7 +607,7 @@ SOC_SINGLE("Right Input PGA Common Mode Switch", WM8903_ANALOGUE_RIGHT_INPUT_1, SOC_SINGLE("DRC Switch", WM8903_DRC_0, 15, 1, 0), SOC_ENUM("DRC Compressor Slope R0", drc_slope_r0), SOC_ENUM("DRC Compressor Slope R1", drc_slope_r1), -SOC_SINGLE_TLV("DRC Compressor Threashold Volume", WM8903_DRC_3, 5, 124, 1, +SOC_SINGLE_TLV("DRC Compressor Threshold Volume", WM8903_DRC_3, 5, 124, 1, drc_tlv_thresh), SOC_SINGLE_TLV("DRC Volume", WM8903_DRC_3, 0, 30, 1, drc_tlv_amp), SOC_SINGLE_TLV("DRC Minimum Gain Volume", WM8903_DRC_1, 2, 3, 1, drc_tlv_min), @@ -617,11 +617,11 @@ SOC_ENUM("DRC Decay Rate", drc_decay), SOC_ENUM("DRC FF Delay", drc_ff_delay), SOC_SINGLE("DRC Anticlip Switch", WM8903_DRC_0, 1, 1, 0), SOC_SINGLE("DRC QR Switch", WM8903_DRC_0, 2, 1, 0), -SOC_SINGLE_TLV("DRC QR Threashold Volume", WM8903_DRC_0, 6, 3, 0, drc_tlv_max), +SOC_SINGLE_TLV("DRC QR Threshold Volume", WM8903_DRC_0, 6, 3, 0, drc_tlv_max), SOC_ENUM("DRC QR Decay Rate", drc_qr_decay), SOC_SINGLE("DRC Smoothing Switch", WM8903_DRC_0, 3, 1, 0), SOC_SINGLE("DRC Smoothing Hysteresis Switch", WM8903_DRC_0, 0, 1, 0), -SOC_ENUM("DRC Smoothing Threashold", drc_smoothing), +SOC_ENUM("DRC Smoothing Threshold", drc_smoothing), SOC_SINGLE_TLV("DRC Startup Volume", WM8903_DRC_0, 6, 18, 0, drc_tlv_startup), SOC_DOUBLE_R_TLV("Digital Capture Volume", WM8903_ADC_DIGITAL_VOLUME_LEFT, diff --git a/sound/soc/codecs/wm8993.c b/sound/soc/codecs/wm8993.c index d9987999e92c..bc033687b220 100644 --- a/sound/soc/codecs/wm8993.c +++ b/sound/soc/codecs/wm8993.c @@ -689,7 +689,7 @@ SOC_DOUBLE_TLV("Digital Sidetone Volume", WM8993_DIGITAL_SIDE_TONE, SOC_SINGLE("DRC Switch", WM8993_DRC_CONTROL_1, 15, 1, 0), SOC_ENUM("DRC Path", drc_path), -SOC_SINGLE_TLV("DRC Compressor Threashold Volume", WM8993_DRC_CONTROL_2, +SOC_SINGLE_TLV("DRC Compressor Threshold Volume", WM8993_DRC_CONTROL_2, 2, 60, 1, drc_comp_threash), SOC_SINGLE_TLV("DRC Compressor Amplitude Volume", WM8993_DRC_CONTROL_3, 11, 30, 1, drc_comp_amp), @@ -709,7 +709,7 @@ SOC_SINGLE_TLV("DRC Quick Release Volume", WM8993_DRC_CONTROL_3, 2, 3, 0, SOC_ENUM("DRC Quick Release Rate", drc_qr_rate), SOC_SINGLE("DRC Smoothing Switch", WM8993_DRC_CONTROL_1, 11, 1, 0), SOC_SINGLE("DRC Smoothing Hysteresis Switch", WM8993_DRC_CONTROL_1, 8, 1, 0), -SOC_ENUM("DRC Smoothing Hysteresis Threashold", drc_smooth), +SOC_ENUM("DRC Smoothing Hysteresis Threshold", drc_smooth), SOC_SINGLE_TLV("DRC Startup Volume", WM8993_DRC_CONTROL_4, 8, 18, 0, drc_startup_tlv), diff --git a/sound/soc/s3c24xx/s3c24xx_simtec.c b/sound/soc/s3c24xx/s3c24xx_simtec.c index 1966e0d5652d..3c7ccb78b6ab 100644 --- a/sound/soc/s3c24xx/s3c24xx_simtec.c +++ b/sound/soc/s3c24xx/s3c24xx_simtec.c @@ -270,7 +270,7 @@ static int attach_gpio_amp(struct device *dev, gpio_direction_output(pd->amp_gain[1], 0); } - /* note, curently we assume GPA0 isn't valid amp */ + /* note, currently we assume GPA0 isn't valid amp */ if (pdata->amp_gpio > 0) { ret = gpio_request(pd->amp_gpio, "gpio-amp"); if (ret) { diff --git a/sound/soc/s6000/s6000-pcm.c b/sound/soc/s6000/s6000-pcm.c index 83b8028e209d..81d6f983f51e 100644 --- a/sound/soc/s6000/s6000-pcm.c +++ b/sound/soc/s6000/s6000-pcm.c @@ -196,7 +196,7 @@ static int s6000_pcm_start(struct snd_pcm_substream *substream) 0 /* destination skip after chunk (impossible) */, 4 /* 16 byte burst size */, -1 /* don't conserve bandwidth */, - 0 /* low watermark irq descriptor theshold */, + 0 /* low watermark irq descriptor threshold */, 0 /* disable hardware timestamps */, 1 /* enable channel */); diff --git a/sound/sound_core.c b/sound/sound_core.c index 49c998186592..dbca7c909a31 100644 --- a/sound/sound_core.c +++ b/sound/sound_core.c @@ -353,7 +353,7 @@ static struct sound_unit *chains[SOUND_STEP]; * @dev: device pointer * * Allocate a special sound device by minor number from the sound - * subsystem. The allocated number is returned on succes. On failure + * subsystem. The allocated number is returned on success. On failure * a negative error code is returned. */ -- cgit v1.3-8-gc7d7 From bebd04cc4569844effbdae49c01a48e57fa77864 Mon Sep 17 00:00:00 2001 From: Krzysztof Halasa Date: Sun, 15 Nov 2009 18:57:24 +0100 Subject: doc: Fix IRQ chip docs This patch updates the IRQ docs to match reality. Signed-off-by: Krzysztof Halasa Signed-off-by: Jiri Kosina --- Documentation/DocBook/genericirq.tmpl | 4 ++-- include/linux/irq.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl index c671a0168096..1448b33fd222 100644 --- a/Documentation/DocBook/genericirq.tmpl +++ b/Documentation/DocBook/genericirq.tmpl @@ -417,8 +417,8 @@ desc->chip->end(); To make use of the split implementation, replace the call to - __do_IRQ by a call to desc->chip->handle_irq() and associate - the appropriate handler function to desc->chip->handle_irq(). + __do_IRQ by a call to desc->handle_irq() and associate + the appropriate handler function to desc->handle_irq(). In most cases the generic handler implementations should be sufficient. diff --git a/include/linux/irq.h b/include/linux/irq.h index ae9653dbcd78..a287cfc0b1a6 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -282,7 +282,7 @@ extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action); /* * Built-in IRQ handlers for various IRQ types, - * callable via desc->chip->handle_irq() + * callable via desc->handle_irq() */ extern void handle_level_irq(unsigned int irq, struct irq_desc *desc); extern void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc); -- cgit v1.3-8-gc7d7 From b69f2292063d2caf37ca9aec7d63ded203701bf3 Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Fri, 4 Dec 2009 14:52:42 +0100 Subject: block: Fix io_context leak after failure of clone with CLONE_IO With CLONE_IO, parent's io_context->nr_tasks is incremented, but never decremented whenever copy_process() fails afterwards, which prevents exit_io_context() from calling IO schedulers exit functions. Give a task_struct to exit_io_context(), and call exit_io_context() instead of put_io_context() in copy_process() cleanup path. Signed-off-by: Louis Rilling Signed-off-by: Jens Axboe --- block/blk-ioc.c | 10 +++++----- include/linux/iocontext.h | 5 +++-- kernel/exit.c | 2 +- kernel/fork.c | 3 ++- 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/block/blk-ioc.c b/block/blk-ioc.c index dcd041290b28..cbdabb0dd6d7 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -66,14 +66,14 @@ static void cfq_exit(struct io_context *ioc) } /* Called by the exitting task */ -void exit_io_context(void) +void exit_io_context(struct task_struct *task) { struct io_context *ioc; - task_lock(current); - ioc = current->io_context; - current->io_context = NULL; - task_unlock(current); + task_lock(task); + ioc = task->io_context; + task->io_context = NULL; + task_unlock(task); if (atomic_dec_and_test(&ioc->nr_tasks)) { if (ioc->aic && ioc->aic->exit) diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index d61b0b8b5cd1..a63235996309 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -98,14 +98,15 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) return NULL; } +struct task_struct; #ifdef CONFIG_BLOCK int put_io_context(struct io_context *ioc); -void exit_io_context(void); +void exit_io_context(struct task_struct *task); struct io_context *get_io_context(gfp_t gfp_flags, int node); struct io_context *alloc_io_context(gfp_t gfp_flags, int node); void copy_io_context(struct io_context **pdst, struct io_context **psrc); #else -static inline void exit_io_context(void) +static inline void exit_io_context(struct task_struct *task) { } diff --git a/kernel/exit.c b/kernel/exit.c index f7864ac2ecc1..2544000125d9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1004,7 +1004,7 @@ NORET_TYPE void do_exit(long code) tsk->flags |= PF_EXITPIDONE; if (tsk->io_context) - exit_io_context(); + exit_io_context(tsk); if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); diff --git a/kernel/fork.c b/kernel/fork.c index 166b8c49257c..607353425bb0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1310,7 +1310,8 @@ bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: - put_io_context(p->io_context); + if (p->io_context) + exit_io_context(p); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: -- cgit v1.3-8-gc7d7 From f57e4502cea471c69782d4790c71d8414ab49a9d Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 15 Oct 2009 14:43:23 -0400 Subject: [SCSI] scsi: Add missing command definitions Add definitions for UNMAP, WRITE SAME{16,32} and GET LBA STATUS commands. Signed-off-by: Martin K. Petersen Signed-off-by: James Bottomley --- drivers/scsi/constants.c | 1 + include/scsi/scsi.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/scsi/constants.c b/drivers/scsi/constants.c index 63abb06c4edb..9129bcf117cf 100644 --- a/drivers/scsi/constants.c +++ b/drivers/scsi/constants.c @@ -141,6 +141,7 @@ static const struct value_name_pair serv_out12_arr[] = { static const struct value_name_pair serv_in16_arr[] = { {0x10, "Read capacity(16)"}, {0x11, "Read long(16)"}, + {0x12, "Get LBA status"}, }; #define SERV_IN16_SZ ARRAY_SIZE(serv_in16_arr) diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 34c46ab5c31b..8b4deca996ad 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -94,6 +94,7 @@ struct scsi_cmnd; #define WRITE_LONG 0x3f #define CHANGE_DEFINITION 0x40 #define WRITE_SAME 0x41 +#define UNMAP 0x42 #define READ_TOC 0x43 #define LOG_SELECT 0x4c #define LOG_SENSE 0x4d @@ -122,9 +123,11 @@ struct scsi_cmnd; #define READ_16 0x88 #define WRITE_16 0x8a #define VERIFY_16 0x8f +#define WRITE_SAME_16 0x93 #define SERVICE_ACTION_IN 0x9e /* values for service action in */ #define SAI_READ_CAPACITY_16 0x10 +#define SAI_GET_LBA_STATUS 0x12 /* values for maintenance in */ #define MI_REPORT_TARGET_PGS 0x0a /* values for maintenance out */ @@ -132,6 +135,7 @@ struct scsi_cmnd; /* values for variable length command */ #define READ_32 0x09 #define WRITE_32 0x0b +#define WRITE_SAME_32 0x0d /* Values for T10/04-262r7 */ #define ATA_16 0x85 /* 16-byte pass-thru */ -- cgit v1.3-8-gc7d7 From e95147d8fa4e63bf6d8ff249f074d0047338fc61 Mon Sep 17 00:00:00 2001 From: Vasu Dev Date: Wed, 21 Oct 2009 16:27:39 -0700 Subject: [SCSI] libfc: removes unused disc_work and ex_list Reported-by: Alex Lyakas Signed-off-by: Vasu Dev Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_exch.c | 1 - include/scsi/libfc.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 8ce418296537..170cdf4bac97 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -75,7 +75,6 @@ struct fc_exch_mgr { struct kref kref; /* exchange mgr reference count */ u16 min_xid; /* min exchange ID */ u16 max_xid; /* max exchange ID */ - struct list_head ex_list; /* allocated exchanges list */ mempool_t *ep_pool; /* reserve ep's */ u16 pool_max_index; /* max exch array index in exch pool */ struct fc_exch_pool *pool; /* per cpu exch pool */ diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 65dc9aacbf70..4ff148580562 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -742,7 +742,6 @@ struct fc_lport { /* Miscellaneous */ struct delayed_work retry_work; - struct delayed_work disc_work; }; /* -- cgit v1.3-8-gc7d7 From d37322a43ebac79eef417149f5696390cf8872db Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Wed, 21 Oct 2009 16:27:58 -0700 Subject: [SCSI] libfc: Fix frags in frame exceeding SKB_MAX_FRAGS in fc_fcp_send_data In case of sequence offload, in fc_fcp_send_data(), the skb_fill_page_info() called may end up adding more frags to the skb_shinfo(fp_skb(fp))->frags[], exceeding SKB_MAX_FRAGS, this eventually corrupts the memory. I am adding the FR_FRAME_SG_LEN back, but as SKB_MAX_FRAGS -1, leaving 1 for our fcoe_eof_crc page. And send will be broken into multiple large sends if the frame already contains more frags than skb handle. Signed-off-by: Yi Zou Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_fcp.c | 3 ++- include/scsi/fc_frame.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index 40ed7442d9df..28bfe1c2c50a 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -574,7 +574,8 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq, tlen -= sg_bytes; remaining -= sg_bytes; - if (tlen) + if ((skb_shinfo(fp_skb(fp))->nr_frags < FC_FRAME_SG_LEN) && + (tlen)) continue; /* diff --git a/include/scsi/fc_frame.h b/include/scsi/fc_frame.h index c35d2383cc26..148126dcf9e9 100644 --- a/include/scsi/fc_frame.h +++ b/include/scsi/fc_frame.h @@ -37,6 +37,9 @@ #define FC_FRAME_HEADROOM 32 /* headroom for VLAN + FCoE headers */ #define FC_FRAME_TAILROOM 8 /* trailer space for FCoE */ +/* Max number of skb frags allowed, reserving one for fcoe_crc_eof page */ +#define FC_FRAME_SG_LEN (MAX_SKB_FRAGS - 1) + #define fp_skb(fp) (&((fp)->skb)) #define fr_hdr(fp) ((fp)->skb.data) #define fr_len(fp) ((fp)->skb.len) -- cgit v1.3-8-gc7d7 From b4a9c7ede96e90f7b1ec009ce7256059295e76df Mon Sep 17 00:00:00 2001 From: Joe Eykholt Date: Wed, 21 Oct 2009 16:28:30 -0700 Subject: [SCSI] libfc: fix free of fc_rport_priv with timer pending Timer crashes were caused by freeing a struct fc_rport_priv with a timer pending, causing the timer facility list to be corrupted. This was during FC uplink flap tests with a lot of targets. After discovery, we were doing an PLOGI on an rdata that was in DELETE state but not yet removed from the lookup list. This moved the rdata from DELETE state to PLOGI state. If the PLOGI exchange allocation failed and needed to be retried, the timer scheduling could race with the free being done by fc_rport_work(). When fc_rport_login() is called on a rport in DELETE state, move it to a new state RESTART. In fc_rport_work, when handling a LOGO, STOPPED or FAILED event, look for restart state. In the RESTART case, don't take the rdata off the list and after the transport remote port is deleted and exchanges are reset, re-login to the remote port. Note that the new RESTART state also corrects a problem we had when re-discovering a port that had moved to DELETE state. In that case, a new rdata was created, but the old rdata would do an exchange manager reset affecting the FC_ID for both the new rdata and old rdata. With the new state, the new port isn't logged into until after any old exchanges are reset. Signed-off-by: Joe Eykholt Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_rport.c | 69 +++++++++++++++++++++++++++++++------------ include/scsi/libfc.h | 1 + 2 files changed, 51 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 49abb839a223..324e156b5d07 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -86,6 +86,7 @@ static const char *fc_rport_state_names[] = { [RPORT_ST_LOGO] = "LOGO", [RPORT_ST_ADISC] = "ADISC", [RPORT_ST_DELETE] = "Delete", + [RPORT_ST_RESTART] = "Restart", }; /** @@ -99,8 +100,7 @@ static struct fc_rport_priv *fc_rport_lookup(const struct fc_lport *lport, struct fc_rport_priv *rdata; list_for_each_entry(rdata, &lport->disc.rports, peers) - if (rdata->ids.port_id == port_id && - rdata->rp_state != RPORT_ST_DELETE) + if (rdata->ids.port_id == port_id) return rdata; return NULL; } @@ -235,6 +235,7 @@ static void fc_rport_work(struct work_struct *work) struct fc_rport_operations *rport_ops; struct fc_rport_identifiers ids; struct fc_rport *rport; + int restart = 0; mutex_lock(&rdata->rp_mutex); event = rdata->event; @@ -287,8 +288,19 @@ static void fc_rport_work(struct work_struct *work) mutex_unlock(&rdata->rp_mutex); if (port_id != FC_FID_DIR_SERV) { + /* + * We must drop rp_mutex before taking disc_mutex. + * Re-evaluate state to allow for restart. + * A transition to RESTART state must only happen + * while disc_mutex is held and rdata is on the list. + */ mutex_lock(&lport->disc.disc_mutex); - list_del(&rdata->peers); + mutex_lock(&rdata->rp_mutex); + if (rdata->rp_state == RPORT_ST_RESTART) + restart = 1; + else + list_del(&rdata->peers); + mutex_unlock(&rdata->rp_mutex); mutex_unlock(&lport->disc.disc_mutex); } @@ -312,7 +324,13 @@ static void fc_rport_work(struct work_struct *work) mutex_unlock(&rdata->rp_mutex); fc_remote_port_delete(rport); } - kref_put(&rdata->kref, lport->tt.rport_destroy); + if (restart) { + mutex_lock(&rdata->rp_mutex); + FC_RPORT_DBG(rdata, "work restart\n"); + fc_rport_enter_plogi(rdata); + mutex_unlock(&rdata->rp_mutex); + } else + kref_put(&rdata->kref, lport->tt.rport_destroy); break; default: @@ -342,6 +360,12 @@ int fc_rport_login(struct fc_rport_priv *rdata) FC_RPORT_DBG(rdata, "ADISC port\n"); fc_rport_enter_adisc(rdata); break; + case RPORT_ST_RESTART: + break; + case RPORT_ST_DELETE: + FC_RPORT_DBG(rdata, "Restart deleted port\n"); + fc_rport_state_enter(rdata, RPORT_ST_RESTART); + break; default: FC_RPORT_DBG(rdata, "Login to port\n"); fc_rport_enter_plogi(rdata); @@ -397,20 +421,21 @@ int fc_rport_logoff(struct fc_rport_priv *rdata) if (rdata->rp_state == RPORT_ST_DELETE) { FC_RPORT_DBG(rdata, "Port in Delete state, not removing\n"); - mutex_unlock(&rdata->rp_mutex); goto out; } - fc_rport_enter_logo(rdata); + if (rdata->rp_state == RPORT_ST_RESTART) + FC_RPORT_DBG(rdata, "Port in Restart state, deleting\n"); + else + fc_rport_enter_logo(rdata); /* * Change the state to Delete so that we discard * the response. */ fc_rport_enter_delete(rdata, RPORT_EV_STOP); - mutex_unlock(&rdata->rp_mutex); - out: + mutex_unlock(&rdata->rp_mutex); return 0; } @@ -466,6 +491,7 @@ static void fc_rport_timeout(struct work_struct *work) case RPORT_ST_READY: case RPORT_ST_INIT: case RPORT_ST_DELETE: + case RPORT_ST_RESTART: break; } @@ -499,6 +525,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp) fc_rport_enter_logo(rdata); break; case RPORT_ST_DELETE: + case RPORT_ST_RESTART: case RPORT_ST_READY: case RPORT_ST_INIT: break; @@ -1248,6 +1275,7 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport, } break; case RPORT_ST_PRLI: + case RPORT_ST_RTV: case RPORT_ST_READY: case RPORT_ST_ADISC: FC_RPORT_DBG(rdata, "Received PLOGI in logged-in state %d " @@ -1255,11 +1283,14 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport, /* XXX TBD - should reset */ break; case RPORT_ST_DELETE: - default: - FC_RPORT_DBG(rdata, "Received PLOGI in unexpected state %d\n", - rdata->rp_state); - fc_frame_free(rx_fp); - goto out; + case RPORT_ST_LOGO: + case RPORT_ST_RESTART: + FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n", + fc_rport_state(rdata)); + mutex_unlock(&rdata->rp_mutex); + rjt_data.reason = ELS_RJT_BUSY; + rjt_data.explan = ELS_EXPL_NONE; + goto reject; } /* @@ -1510,14 +1541,14 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport, FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n", fc_rport_state(rdata)); + fc_rport_enter_delete(rdata, RPORT_EV_LOGO); + /* - * If the remote port was created due to discovery, - * log back in. It may have seen a stale RSCN about us. + * If the remote port was created due to discovery, set state + * to log back in. It may have seen a stale RSCN about us. */ - if (rdata->rp_state != RPORT_ST_DELETE && rdata->disc_id) - fc_rport_enter_plogi(rdata); - else - fc_rport_enter_delete(rdata, RPORT_EV_LOGO); + if (rdata->disc_id) + fc_rport_state_enter(rdata, RPORT_ST_RESTART); mutex_unlock(&rdata->rp_mutex); } else FC_RPORT_ID_DBG(lport, sid, diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 4ff148580562..1662d73d85a7 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -145,6 +145,7 @@ enum fc_rport_state { RPORT_ST_LOGO, /* port logout sent */ RPORT_ST_ADISC, /* Discover Address sent */ RPORT_ST_DELETE, /* port being deleted */ + RPORT_ST_RESTART, /* remote port being deleted and will restart */ }; /** -- cgit v1.3-8-gc7d7 From dbf9bfe615717d1145f263c0049fe2328e6ed395 Mon Sep 17 00:00:00 2001 From: jack wang Date: Wed, 14 Oct 2009 16:19:21 +0800 Subject: [SCSI] pm8001: add SAS/SATA HBA driver This driver supports PMC-Sierra PCIe SAS/SATA 8x6G SPC 8001 chip based host adapters. Signed-off-by: Jack Wang Signed-off-by: Lindar Liu Signed-off-by: Tom Peng Signed-off-by: Kevin Ao Signed-off-by: James Bottomley --- MAINTAINERS | 7 + drivers/scsi/Kconfig | 8 + drivers/scsi/Makefile | 1 + drivers/scsi/pm8001/Makefile | 12 + drivers/scsi/pm8001/pm8001_chips.h | 89 + drivers/scsi/pm8001/pm8001_ctl.c | 573 +++++ drivers/scsi/pm8001/pm8001_ctl.h | 67 + drivers/scsi/pm8001/pm8001_defs.h | 112 + drivers/scsi/pm8001/pm8001_hwi.c | 4371 ++++++++++++++++++++++++++++++++++++ drivers/scsi/pm8001/pm8001_hwi.h | 1011 +++++++++ drivers/scsi/pm8001/pm8001_init.c | 888 ++++++++ drivers/scsi/pm8001/pm8001_sas.c | 1104 +++++++++ drivers/scsi/pm8001/pm8001_sas.h | 480 ++++ include/linux/pci_ids.h | 2 + 14 files changed, 8725 insertions(+) create mode 100644 drivers/scsi/pm8001/Makefile create mode 100644 drivers/scsi/pm8001/pm8001_chips.h create mode 100644 drivers/scsi/pm8001/pm8001_ctl.c create mode 100644 drivers/scsi/pm8001/pm8001_ctl.h create mode 100644 drivers/scsi/pm8001/pm8001_defs.h create mode 100644 drivers/scsi/pm8001/pm8001_hwi.c create mode 100644 drivers/scsi/pm8001/pm8001_hwi.h create mode 100644 drivers/scsi/pm8001/pm8001_init.c create mode 100644 drivers/scsi/pm8001/pm8001_sas.c create mode 100644 drivers/scsi/pm8001/pm8001_sas.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index a1a2aceca5bd..016411cadc9a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4116,6 +4116,13 @@ W: http://www.pmc-sierra.com/ S: Supported F: drivers/scsi/pmcraid.* +PMC SIERRA PM8001 DRIVER +M: jack_wang@usish.com +M: lindar_liu@usish.com +L: linux-scsi@vger.kernel.org +S: Supported +F: drivers/scsi/pm8001/ + POSIX CLOCKS and TIMERS M: Thomas Gleixner S: Supported diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index e11cca4c784c..2e4f7d0ee639 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1818,6 +1818,14 @@ config SCSI_PMCRAID ---help--- This driver supports the PMC SIERRA MaxRAID adapters. +config SCSI_PM8001 + tristate "PMC-Sierra SPC 8001 SAS/SATA Based Host Adapter driver" + depends on PCI && SCSI + select SCSI_SAS_LIBSAS + help + This driver supports PMC-Sierra PCIE SAS/SATA 8x6G SPC 8001 chip + based host adapters. + config SCSI_SRP tristate "SCSI RDMA Protocol helper library" depends on SCSI && PCI diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 3ad61db5e3fa..53b1dac7e7d9 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_SCSI_AIC79XX) += aic7xxx/ obj-$(CONFIG_SCSI_AACRAID) += aacraid/ obj-$(CONFIG_SCSI_AIC7XXX_OLD) += aic7xxx_old.o obj-$(CONFIG_SCSI_AIC94XX) += aic94xx/ +obj-$(CONFIG_SCSI_PM8001) += pm8001/ obj-$(CONFIG_SCSI_IPS) += ips.o obj-$(CONFIG_SCSI_FD_MCS) += fd_mcs.o obj-$(CONFIG_SCSI_FUTURE_DOMAIN)+= fdomain.o diff --git a/drivers/scsi/pm8001/Makefile b/drivers/scsi/pm8001/Makefile new file mode 100644 index 000000000000..52f04296171c --- /dev/null +++ b/drivers/scsi/pm8001/Makefile @@ -0,0 +1,12 @@ +# +# Kernel configuration file for the PM8001 SAS/SATA 8x6G based HBA driver +# +# Copyright (C) 2008-2009 USI Co., Ltd. + + +obj-$(CONFIG_SCSI_PM8001) += pm8001.o +pm8001-y += pm8001_init.o \ + pm8001_sas.o \ + pm8001_ctl.o \ + pm8001_hwi.o + diff --git a/drivers/scsi/pm8001/pm8001_chips.h b/drivers/scsi/pm8001/pm8001_chips.h new file mode 100644 index 000000000000..4efa4d0950e5 --- /dev/null +++ b/drivers/scsi/pm8001/pm8001_chips.h @@ -0,0 +1,89 @@ +/* + * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver + * + * Copyright (c) 2008-2009 USI Co., Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + */ + +#ifndef _PM8001_CHIPS_H_ +#define _PM8001_CHIPS_H_ + +static inline u32 pm8001_read_32(void *virt_addr) +{ + return *((u32 *)virt_addr); +} + +static inline void pm8001_write_32(void *addr, u32 offset, u32 val) +{ + *((u32 *)(addr + offset)) = val; +} + +static inline u32 pm8001_cr32(struct pm8001_hba_info *pm8001_ha, u32 bar, + u32 offset) +{ + return readl(pm8001_ha->io_mem[bar].memvirtaddr + offset); +} + +static inline void pm8001_cw32(struct pm8001_hba_info *pm8001_ha, u32 bar, + u32 addr, u32 val) +{ + writel(val, pm8001_ha->io_mem[bar].memvirtaddr + addr); +} +static inline u32 pm8001_mr32(void __iomem *addr, u32 offset) +{ + return readl(addr + offset); +} +static inline void pm8001_mw32(void __iomem *addr, u32 offset, u32 val) +{ + writel(val, addr + offset); +} +static inline u32 get_pci_bar_index(u32 pcibar) +{ + switch (pcibar) { + case 0x18: + case 0x1C: + return 1; + case 0x20: + return 2; + case 0x24: + return 3; + default: + return 0; + } +} + +#endif /* _PM8001_CHIPS_H_ */ + diff --git a/drivers/scsi/pm8001/pm8001_ctl.c b/drivers/scsi/pm8001/pm8001_ctl.c new file mode 100644 index 000000000000..14b13acae6dd --- /dev/null +++ b/drivers/scsi/pm8001/pm8001_ctl.c @@ -0,0 +1,573 @@ +/* + * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver + * + * Copyright (c) 2008-2009 USI Co., Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + */ +#include +#include "pm8001_sas.h" +#include "pm8001_ctl.h" + +/* scsi host attributes */ + +/** + * pm8001_ctl_mpi_interface_rev_show - MPI interface revision number + * @cdev: pointer to embedded class device + * @buf: the buffer returned + * + * A sysfs 'read-only' shost attribute. + */ +static ssize_t pm8001_ctl_mpi_interface_rev_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + + return snprintf(buf, PAGE_SIZE, "%d\n", + pm8001_ha->main_cfg_tbl.interface_rev); +} +static +DEVICE_ATTR(interface_rev, S_IRUGO, pm8001_ctl_mpi_interface_rev_show, NULL); + +/** + * pm8001_ctl_fw_version_show - firmware version + * @cdev: pointer to embedded class device + * @buf: the buffer returned + * + * A sysfs 'read-only' shost attribute. + */ +static ssize_t pm8001_ctl_fw_version_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + + return snprintf(buf, PAGE_SIZE, "%02x.%02x.%02x.%02x\n", + (u8)(pm8001_ha->main_cfg_tbl.firmware_rev >> 24), + (u8)(pm8001_ha->main_cfg_tbl.firmware_rev >> 16), + (u8)(pm8001_ha->main_cfg_tbl.firmware_rev >> 8), + (u8)(pm8001_ha->main_cfg_tbl.firmware_rev)); +} +static DEVICE_ATTR(fw_version, S_IRUGO, pm8001_ctl_fw_version_show, NULL); +/** + * pm8001_ctl_max_out_io_show - max outstanding io supported + * @cdev: pointer to embedded class device + * @buf: the buffer returned + * + * A sysfs 'read-only' shost attribute. + */ +static ssize_t pm8001_ctl_max_out_io_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + + return snprintf(buf, PAGE_SIZE, "%d\n", + pm8001_ha->main_cfg_tbl.max_out_io); +} +static DEVICE_ATTR(max_out_io, S_IRUGO, pm8001_ctl_max_out_io_show, NULL); +/** + * pm8001_ctl_max_devices_show - max devices support + * @cdev: pointer to embedded class device + * @buf: the buffer returned + * + * A sysfs 'read-only' shost attribute. + */ +static ssize_t pm8001_ctl_max_devices_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + + return snprintf(buf, PAGE_SIZE, "%04d\n", + (u16)(pm8001_ha->main_cfg_tbl.max_sgl >> 16)); +} +static DEVICE_ATTR(max_devices, S_IRUGO, pm8001_ctl_max_devices_show, NULL); +/** + * pm8001_ctl_max_sg_list_show - max sg list supported iff not 0.0 for no + * hardware limitation + * @cdev: pointer to embedded class device + * @buf: the buffer returned + * + * A sysfs 'read-only' shost attribute. + */ +static ssize_t pm8001_ctl_max_sg_list_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + + return snprintf(buf, PAGE_SIZE, "%04d\n", + pm8001_ha->main_cfg_tbl.max_sgl & 0x0000FFFF); +} +static DEVICE_ATTR(max_sg_list, S_IRUGO, pm8001_ctl_max_sg_list_show, NULL); + +#define SAS_1_0 0x1 +#define SAS_1_1 0x2 +#define SAS_2_0 0x4 + +static ssize_t +show_sas_spec_support_status(unsigned int mode, char *buf) +{ + ssize_t len = 0; + + if (mode & SAS_1_1) + len = sprintf(buf, "%s", "SAS1.1"); + if (mode & SAS_2_0) + len += sprintf(buf + len, "%s%s", len ? ", " : "", "SAS2.0"); + len += sprintf(buf + len, "\n"); + + return len; +} + +/** + * pm8001_ctl_sas_spec_support_show - sas spec supported + * @cdev: pointer to embedded class device + * @buf: the buffer returned + * + * A sysfs 'read-only' shost attribute. + */ +static ssize_t pm8001_ctl_sas_spec_support_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + unsigned int mode; + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + mode = (pm8001_ha->main_cfg_tbl.ctrl_cap_flag & 0xfe000000)>>25; + return show_sas_spec_support_status(mode, buf); +} +static DEVICE_ATTR(sas_spec_support, S_IRUGO, + pm8001_ctl_sas_spec_support_show, NULL); + +/** + * pm8001_ctl_sas_address_show - sas address + * @cdev: pointer to embedded class device + * @buf: the buffer returned + * + * This is the controller sas address + * + * A sysfs 'read-only' shost attribute. + */ +static ssize_t pm8001_ctl_host_sas_address_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + return snprintf(buf, PAGE_SIZE, "0x%016llx\n", + be64_to_cpu(*(__be64 *)pm8001_ha->sas_addr)); +} +static DEVICE_ATTR(host_sas_address, S_IRUGO, + pm8001_ctl_host_sas_address_show, NULL); + +/** + * pm8001_ctl_logging_level_show - logging level + * @cdev: pointer to embedded class device + * @buf: the buffer returned + * + * A sysfs 'read/write' shost attribute. + */ +static ssize_t pm8001_ctl_logging_level_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + + return snprintf(buf, PAGE_SIZE, "%08xh\n", pm8001_ha->logging_level); +} +static ssize_t pm8001_ctl_logging_level_store(struct device *cdev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + int val = 0; + + if (sscanf(buf, "%x", &val) != 1) + return -EINVAL; + + pm8001_ha->logging_level = val; + return strlen(buf); +} + +static DEVICE_ATTR(logging_level, S_IRUGO | S_IWUSR, + pm8001_ctl_logging_level_show, pm8001_ctl_logging_level_store); +/** + * pm8001_ctl_aap_log_show - aap1 event log + * @cdev: pointer to embedded class device + * @buf: the buffer returned + * + * A sysfs 'read-only' shost attribute. + */ +static ssize_t pm8001_ctl_aap_log_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + int i; +#define AAP1_MEMMAP(r, c) \ + (*(u32 *)((u8*)pm8001_ha->memoryMap.region[AAP1].virt_ptr + (r) * 32 \ + + (c))) + + char *str = buf; + int max = 2; + for (i = 0; i < max; i++) { + str += sprintf(str, "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x" + "0x%08x 0x%08x\n", + AAP1_MEMMAP(i, 0), + AAP1_MEMMAP(i, 4), + AAP1_MEMMAP(i, 8), + AAP1_MEMMAP(i, 12), + AAP1_MEMMAP(i, 16), + AAP1_MEMMAP(i, 20), + AAP1_MEMMAP(i, 24), + AAP1_MEMMAP(i, 28)); + } + + return str - buf; +} +static DEVICE_ATTR(aap_log, S_IRUGO, pm8001_ctl_aap_log_show, NULL); +/** + * pm8001_ctl_aap_log_show - IOP event log + * @cdev: pointer to embedded class device + * @buf: the buffer returned + * + * A sysfs 'read-only' shost attribute. + */ +static ssize_t pm8001_ctl_iop_log_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; +#define IOP_MEMMAP(r, c) \ + (*(u32 *)((u8*)pm8001_ha->memoryMap.region[IOP].virt_ptr + (r) * 32 \ + + (c))) + int i; + char *str = buf; + int max = 2; + for (i = 0; i < max; i++) { + str += sprintf(str, "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x" + "0x%08x 0x%08x\n", + IOP_MEMMAP(i, 0), + IOP_MEMMAP(i, 4), + IOP_MEMMAP(i, 8), + IOP_MEMMAP(i, 12), + IOP_MEMMAP(i, 16), + IOP_MEMMAP(i, 20), + IOP_MEMMAP(i, 24), + IOP_MEMMAP(i, 28)); + } + + return str - buf; +} +static DEVICE_ATTR(iop_log, S_IRUGO, pm8001_ctl_iop_log_show, NULL); + +#define FLASH_CMD_NONE 0x00 +#define FLASH_CMD_UPDATE 0x01 +#define FLASH_CMD_SET_NVMD 0x02 + +struct flash_command { + u8 command[8]; + int code; +}; + +static struct flash_command flash_command_table[] = +{ + {"set_nvmd", FLASH_CMD_SET_NVMD}, + {"update", FLASH_CMD_UPDATE}, + {"", FLASH_CMD_NONE} /* Last entry should be NULL. */ +}; + +struct error_fw { + char *reason; + int err_code; +}; + +static struct error_fw flash_error_table[] = +{ + {"Failed to open fw image file", FAIL_OPEN_BIOS_FILE}, + {"image header mismatch", FLASH_UPDATE_HDR_ERR}, + {"image offset mismatch", FLASH_UPDATE_OFFSET_ERR}, + {"image CRC Error", FLASH_UPDATE_CRC_ERR}, + {"image length Error.", FLASH_UPDATE_LENGTH_ERR}, + {"Failed to program flash chip", FLASH_UPDATE_HW_ERR}, + {"Flash chip not supported.", FLASH_UPDATE_DNLD_NOT_SUPPORTED}, + {"Flash update disabled.", FLASH_UPDATE_DISABLED}, + {"Flash in progress", FLASH_IN_PROGRESS}, + {"Image file size Error", FAIL_FILE_SIZE}, + {"Input parameter error", FAIL_PARAMETERS}, + {"Out of memory", FAIL_OUT_MEMORY}, + {"OK", 0} /* Last entry err_code = 0. */ +}; + +static int pm8001_set_nvmd(struct pm8001_hba_info *pm8001_ha) +{ + struct pm8001_ioctl_payload *payload; + DECLARE_COMPLETION_ONSTACK(completion); + u8 *ioctlbuffer = NULL; + u32 length = 0; + u32 ret = 0; + + length = 1024 * 5 + sizeof(*payload) - 1; + ioctlbuffer = kzalloc(length, GFP_KERNEL); + if (!ioctlbuffer) + return -ENOMEM; + if ((pm8001_ha->fw_image->size <= 0) || + (pm8001_ha->fw_image->size > 4096)) { + ret = FAIL_FILE_SIZE; + goto out; + } + payload = (struct pm8001_ioctl_payload *)ioctlbuffer; + memcpy((u8 *)payload->func_specific, (u8 *)pm8001_ha->fw_image->data, + pm8001_ha->fw_image->size); + payload->length = pm8001_ha->fw_image->size; + payload->id = 0; + pm8001_ha->nvmd_completion = &completion; + ret = PM8001_CHIP_DISP->set_nvmd_req(pm8001_ha, payload); + wait_for_completion(&completion); +out: + kfree(ioctlbuffer); + return ret; +} + +static int pm8001_update_flash(struct pm8001_hba_info *pm8001_ha) +{ + struct pm8001_ioctl_payload *payload; + DECLARE_COMPLETION_ONSTACK(completion); + u8 *ioctlbuffer = NULL; + u32 length = 0; + struct fw_control_info *fwControl; + u32 loopNumber, loopcount = 0; + u32 sizeRead = 0; + u32 partitionSize, partitionSizeTmp; + u32 ret = 0; + u32 partitionNumber = 0; + struct pm8001_fw_image_header *image_hdr; + + length = 1024 * 16 + sizeof(*payload) - 1; + ioctlbuffer = kzalloc(length, GFP_KERNEL); + image_hdr = (struct pm8001_fw_image_header *)pm8001_ha->fw_image->data; + if (!ioctlbuffer) + return -ENOMEM; + if (pm8001_ha->fw_image->size < 28) { + ret = FAIL_FILE_SIZE; + goto out; + } + + while (sizeRead < pm8001_ha->fw_image->size) { + partitionSizeTmp = + *(u32 *)((u8 *)&image_hdr->image_length + sizeRead); + partitionSize = be32_to_cpu(partitionSizeTmp); + loopcount = (partitionSize + HEADER_LEN)/IOCTL_BUF_SIZE; + if (loopcount % IOCTL_BUF_SIZE) + loopcount++; + if (loopcount == 0) + loopcount++; + for (loopNumber = 0; loopNumber < loopcount; loopNumber++) { + payload = (struct pm8001_ioctl_payload *)ioctlbuffer; + payload->length = 1024*16; + payload->id = 0; + fwControl = + (struct fw_control_info *)payload->func_specific; + fwControl->len = IOCTL_BUF_SIZE; /* IN */ + fwControl->size = partitionSize + HEADER_LEN;/* IN */ + fwControl->retcode = 0;/* OUT */ + fwControl->offset = loopNumber * IOCTL_BUF_SIZE;/*OUT */ + + /* for the last chunk of data in case file size is not even with + 4k, load only the rest*/ + if (((loopcount-loopNumber) == 1) && + ((partitionSize + HEADER_LEN) % IOCTL_BUF_SIZE)) { + fwControl->len = + (partitionSize + HEADER_LEN) % IOCTL_BUF_SIZE; + memcpy((u8 *)fwControl->buffer, + (u8 *)pm8001_ha->fw_image->data + sizeRead, + (partitionSize + HEADER_LEN) % IOCTL_BUF_SIZE); + sizeRead += + (partitionSize + HEADER_LEN) % IOCTL_BUF_SIZE; + } else { + memcpy((u8 *)fwControl->buffer, + (u8 *)pm8001_ha->fw_image->data + sizeRead, + IOCTL_BUF_SIZE); + sizeRead += IOCTL_BUF_SIZE; + } + + pm8001_ha->nvmd_completion = &completion; + ret = PM8001_CHIP_DISP->fw_flash_update_req(pm8001_ha, payload); + wait_for_completion(&completion); + if (ret || (fwControl->retcode > FLASH_UPDATE_IN_PROGRESS)) { + ret = fwControl->retcode; + kfree(ioctlbuffer); + ioctlbuffer = NULL; + break; + } + } + if (ret) + break; + partitionNumber++; +} +out: + kfree(ioctlbuffer); + return ret; +} +static ssize_t pm8001_store_update_fw(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + char *cmd_ptr, *filename_ptr; + int res, i; + int flash_command = FLASH_CMD_NONE; + int err = 0; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + cmd_ptr = kzalloc(count*2, GFP_KERNEL); + + if (!cmd_ptr) { + err = FAIL_OUT_MEMORY; + goto out; + } + + filename_ptr = cmd_ptr + count; + res = sscanf(buf, "%s %s", cmd_ptr, filename_ptr); + if (res != 2) { + err = FAIL_PARAMETERS; + goto out1; + } + + for (i = 0; flash_command_table[i].code != FLASH_CMD_NONE; i++) { + if (!memcmp(flash_command_table[i].command, + cmd_ptr, strlen(cmd_ptr))) { + flash_command = flash_command_table[i].code; + break; + } + } + if (flash_command == FLASH_CMD_NONE) { + err = FAIL_PARAMETERS; + goto out1; + } + + if (pm8001_ha->fw_status == FLASH_IN_PROGRESS) { + err = FLASH_IN_PROGRESS; + goto out1; + } + err = request_firmware(&pm8001_ha->fw_image, + filename_ptr, + pm8001_ha->dev); + + if (err) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Failed to load firmware image file %s," + " error %d\n", filename_ptr, err)); + err = FAIL_OPEN_BIOS_FILE; + goto out1; + } + + switch (flash_command) { + case FLASH_CMD_UPDATE: + pm8001_ha->fw_status = FLASH_IN_PROGRESS; + err = pm8001_update_flash(pm8001_ha); + break; + case FLASH_CMD_SET_NVMD: + pm8001_ha->fw_status = FLASH_IN_PROGRESS; + err = pm8001_set_nvmd(pm8001_ha); + break; + default: + pm8001_ha->fw_status = FAIL_PARAMETERS; + err = FAIL_PARAMETERS; + break; + } + release_firmware(pm8001_ha->fw_image); +out1: + kfree(cmd_ptr); +out: + pm8001_ha->fw_status = err; + + if (!err) + return count; + else + return -err; +} + +static ssize_t pm8001_show_update_fw(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + int i; + struct Scsi_Host *shost = class_to_shost(cdev); + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + + for (i = 0; flash_error_table[i].err_code != 0; i++) { + if (flash_error_table[i].err_code == pm8001_ha->fw_status) + break; + } + if (pm8001_ha->fw_status != FLASH_IN_PROGRESS) + pm8001_ha->fw_status = FLASH_OK; + + return snprintf(buf, PAGE_SIZE, "status=%x %s\n", + flash_error_table[i].err_code, + flash_error_table[i].reason); +} + +static DEVICE_ATTR(update_fw, S_IRUGO|S_IWUGO, + pm8001_show_update_fw, pm8001_store_update_fw); +struct device_attribute *pm8001_host_attrs[] = { + &dev_attr_interface_rev, + &dev_attr_fw_version, + &dev_attr_update_fw, + &dev_attr_aap_log, + &dev_attr_iop_log, + &dev_attr_max_out_io, + &dev_attr_max_devices, + &dev_attr_max_sg_list, + &dev_attr_sas_spec_support, + &dev_attr_logging_level, + &dev_attr_host_sas_address, + NULL, +}; + diff --git a/drivers/scsi/pm8001/pm8001_ctl.h b/drivers/scsi/pm8001/pm8001_ctl.h new file mode 100644 index 000000000000..22644de26399 --- /dev/null +++ b/drivers/scsi/pm8001/pm8001_ctl.h @@ -0,0 +1,67 @@ + /* + * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver + * + * Copyright (c) 2008-2009 USI Co., Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + */ + +#ifndef PM8001_CTL_H_INCLUDED +#define PM8001_CTL_H_INCLUDED + +#define IOCTL_BUF_SIZE 4096 +#define HEADER_LEN 28 +#define SIZE_OFFSET 16 + +struct pm8001_ioctl_payload { + u32 signature; + u16 major_function; + u16 minor_function; + u16 length; + u16 status; + u16 offset; + u16 id; + u8 func_specific[1]; +}; + +#define FLASH_OK 0x000000 +#define FAIL_OPEN_BIOS_FILE 0x000100 +#define FAIL_FILE_SIZE 0x000a00 +#define FAIL_PARAMETERS 0x000b00 +#define FAIL_OUT_MEMORY 0x000c00 +#define FLASH_IN_PROGRESS 0x001000 + +#endif /* PM8001_CTL_H_INCLUDED */ + diff --git a/drivers/scsi/pm8001/pm8001_defs.h b/drivers/scsi/pm8001/pm8001_defs.h new file mode 100644 index 000000000000..944afada61ee --- /dev/null +++ b/drivers/scsi/pm8001/pm8001_defs.h @@ -0,0 +1,112 @@ +/* + * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver + * + * Copyright (c) 2008-2009 USI Co., Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + */ + +#ifndef _PM8001_DEFS_H_ +#define _PM8001_DEFS_H_ + +enum chip_flavors { + chip_8001, +}; +#define USI_MAX_MEMCNT 9 +#define PM8001_MAX_DMA_SG SG_ALL +enum phy_speed { + PHY_SPEED_15 = 0x01, + PHY_SPEED_30 = 0x02, + PHY_SPEED_60 = 0x04, +}; + +enum data_direction { + DATA_DIR_NONE = 0x0, /* NO TRANSFER */ + DATA_DIR_IN = 0x01, /* INBOUND */ + DATA_DIR_OUT = 0x02, /* OUTBOUND */ + DATA_DIR_BYRECIPIENT = 0x04, /* UNSPECIFIED */ +}; + +enum port_type { + PORT_TYPE_SAS = (1L << 1), + PORT_TYPE_SATA = (1L << 0), +}; + +/* driver compile-time configuration */ +#define PM8001_MAX_CCB 512 /* max ccbs supported */ +#define PM8001_MAX_INB_NUM 1 +#define PM8001_MAX_OUTB_NUM 1 +#define PM8001_CAN_QUEUE 128 /* SCSI Queue depth */ + +/* unchangeable hardware details */ +#define PM8001_MAX_PHYS 8 /* max. possible phys */ +#define PM8001_MAX_PORTS 8 /* max. possible ports */ +#define PM8001_MAX_DEVICES 1024 /* max supported device */ + +enum memory_region_num { + AAP1 = 0x0, /* application acceleration processor */ + IOP, /* IO processor */ + CI, /* consumer index */ + PI, /* producer index */ + IB, /* inbound queue */ + OB, /* outbound queue */ + NVMD, /* NVM device */ + DEV_MEM, /* memory for devices */ + CCB_MEM, /* memory for command control block */ +}; +#define PM8001_EVENT_LOG_SIZE (128 * 1024) + +/*error code*/ +enum mpi_err { + MPI_IO_STATUS_SUCCESS = 0x0, + MPI_IO_STATUS_BUSY = 0x01, + MPI_IO_STATUS_FAIL = 0x02, +}; + +/** + * Phy Control constants + */ +enum phy_control_type { + PHY_LINK_RESET = 0x01, + PHY_HARD_RESET = 0x02, + PHY_NOTIFY_ENABLE_SPINUP = 0x10, +}; + +enum pm8001_hba_info_flags { + PM8001F_INIT_TIME = (1U << 0), + PM8001F_RUN_TIME = (1U << 1), +}; + +#endif diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c new file mode 100644 index 000000000000..aa5756fe0574 --- /dev/null +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -0,0 +1,4371 @@ +/* + * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver + * + * Copyright (c) 2008-2009 USI Co., Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + */ + #include "pm8001_sas.h" + #include "pm8001_hwi.h" + #include "pm8001_chips.h" + #include "pm8001_ctl.h" + +/** + * read_main_config_table - read the configure table and save it. + * @pm8001_ha: our hba card information + */ +static void __devinit read_main_config_table(struct pm8001_hba_info *pm8001_ha) +{ + void __iomem *address = pm8001_ha->main_cfg_tbl_addr; + pm8001_ha->main_cfg_tbl.signature = pm8001_mr32(address, 0x00); + pm8001_ha->main_cfg_tbl.interface_rev = pm8001_mr32(address, 0x04); + pm8001_ha->main_cfg_tbl.firmware_rev = pm8001_mr32(address, 0x08); + pm8001_ha->main_cfg_tbl.max_out_io = pm8001_mr32(address, 0x0C); + pm8001_ha->main_cfg_tbl.max_sgl = pm8001_mr32(address, 0x10); + pm8001_ha->main_cfg_tbl.ctrl_cap_flag = pm8001_mr32(address, 0x14); + pm8001_ha->main_cfg_tbl.gst_offset = pm8001_mr32(address, 0x18); + pm8001_ha->main_cfg_tbl.inbound_queue_offset = + pm8001_mr32(address, 0x1C); + pm8001_ha->main_cfg_tbl.outbound_queue_offset = + pm8001_mr32(address, 0x20); + pm8001_ha->main_cfg_tbl.hda_mode_flag = + pm8001_mr32(address, MAIN_HDA_FLAGS_OFFSET); + + /* read analog Setting offset from the configuration table */ + pm8001_ha->main_cfg_tbl.anolog_setup_table_offset = + pm8001_mr32(address, MAIN_ANALOG_SETUP_OFFSET); + + /* read Error Dump Offset and Length */ + pm8001_ha->main_cfg_tbl.fatal_err_dump_offset0 = + pm8001_mr32(address, MAIN_FATAL_ERROR_RDUMP0_OFFSET); + pm8001_ha->main_cfg_tbl.fatal_err_dump_length0 = + pm8001_mr32(address, MAIN_FATAL_ERROR_RDUMP0_LENGTH); + pm8001_ha->main_cfg_tbl.fatal_err_dump_offset1 = + pm8001_mr32(address, MAIN_FATAL_ERROR_RDUMP1_OFFSET); + pm8001_ha->main_cfg_tbl.fatal_err_dump_length1 = + pm8001_mr32(address, MAIN_FATAL_ERROR_RDUMP1_LENGTH); +} + +/** + * read_general_status_table - read the general status table and save it. + * @pm8001_ha: our hba card information + */ +static void __devinit +read_general_status_table(struct pm8001_hba_info *pm8001_ha) +{ + void __iomem *address = pm8001_ha->general_stat_tbl_addr; + pm8001_ha->gs_tbl.gst_len_mpistate = pm8001_mr32(address, 0x00); + pm8001_ha->gs_tbl.iq_freeze_state0 = pm8001_mr32(address, 0x04); + pm8001_ha->gs_tbl.iq_freeze_state1 = pm8001_mr32(address, 0x08); + pm8001_ha->gs_tbl.msgu_tcnt = pm8001_mr32(address, 0x0C); + pm8001_ha->gs_tbl.iop_tcnt = pm8001_mr32(address, 0x10); + pm8001_ha->gs_tbl.reserved = pm8001_mr32(address, 0x14); + pm8001_ha->gs_tbl.phy_state[0] = pm8001_mr32(address, 0x18); + pm8001_ha->gs_tbl.phy_state[1] = pm8001_mr32(address, 0x1C); + pm8001_ha->gs_tbl.phy_state[2] = pm8001_mr32(address, 0x20); + pm8001_ha->gs_tbl.phy_state[3] = pm8001_mr32(address, 0x24); + pm8001_ha->gs_tbl.phy_state[4] = pm8001_mr32(address, 0x28); + pm8001_ha->gs_tbl.phy_state[5] = pm8001_mr32(address, 0x2C); + pm8001_ha->gs_tbl.phy_state[6] = pm8001_mr32(address, 0x30); + pm8001_ha->gs_tbl.phy_state[7] = pm8001_mr32(address, 0x34); + pm8001_ha->gs_tbl.reserved1 = pm8001_mr32(address, 0x38); + pm8001_ha->gs_tbl.reserved2 = pm8001_mr32(address, 0x3C); + pm8001_ha->gs_tbl.reserved3 = pm8001_mr32(address, 0x40); + pm8001_ha->gs_tbl.recover_err_info[0] = pm8001_mr32(address, 0x44); + pm8001_ha->gs_tbl.recover_err_info[1] = pm8001_mr32(address, 0x48); + pm8001_ha->gs_tbl.recover_err_info[2] = pm8001_mr32(address, 0x4C); + pm8001_ha->gs_tbl.recover_err_info[3] = pm8001_mr32(address, 0x50); + pm8001_ha->gs_tbl.recover_err_info[4] = pm8001_mr32(address, 0x54); + pm8001_ha->gs_tbl.recover_err_info[5] = pm8001_mr32(address, 0x58); + pm8001_ha->gs_tbl.recover_err_info[6] = pm8001_mr32(address, 0x5C); + pm8001_ha->gs_tbl.recover_err_info[7] = pm8001_mr32(address, 0x60); +} + +/** + * read_inbnd_queue_table - read the inbound queue table and save it. + * @pm8001_ha: our hba card information + */ +static void __devinit +read_inbnd_queue_table(struct pm8001_hba_info *pm8001_ha) +{ + int inbQ_num = 1; + int i; + void __iomem *address = pm8001_ha->inbnd_q_tbl_addr; + for (i = 0; i < inbQ_num; i++) { + u32 offset = i * 0x24; + pm8001_ha->inbnd_q_tbl[i].pi_pci_bar = + get_pci_bar_index(pm8001_mr32(address, (offset + 0x14))); + pm8001_ha->inbnd_q_tbl[i].pi_offset = + pm8001_mr32(address, (offset + 0x18)); + } +} + +/** + * read_outbnd_queue_table - read the outbound queue table and save it. + * @pm8001_ha: our hba card information + */ +static void __devinit +read_outbnd_queue_table(struct pm8001_hba_info *pm8001_ha) +{ + int outbQ_num = 1; + int i; + void __iomem *address = pm8001_ha->outbnd_q_tbl_addr; + for (i = 0; i < outbQ_num; i++) { + u32 offset = i * 0x24; + pm8001_ha->outbnd_q_tbl[i].ci_pci_bar = + get_pci_bar_index(pm8001_mr32(address, (offset + 0x14))); + pm8001_ha->outbnd_q_tbl[i].ci_offset = + pm8001_mr32(address, (offset + 0x18)); + } +} + +/** + * init_default_table_values - init the default table. + * @pm8001_ha: our hba card information + */ +static void __devinit +init_default_table_values(struct pm8001_hba_info *pm8001_ha) +{ + int qn = 1; + int i; + u32 offsetib, offsetob; + void __iomem *addressib = pm8001_ha->inbnd_q_tbl_addr; + void __iomem *addressob = pm8001_ha->outbnd_q_tbl_addr; + + pm8001_ha->main_cfg_tbl.inbound_q_nppd_hppd = 0; + pm8001_ha->main_cfg_tbl.outbound_hw_event_pid0_3 = 0; + pm8001_ha->main_cfg_tbl.outbound_hw_event_pid4_7 = 0; + pm8001_ha->main_cfg_tbl.outbound_ncq_event_pid0_3 = 0; + pm8001_ha->main_cfg_tbl.outbound_ncq_event_pid4_7 = 0; + pm8001_ha->main_cfg_tbl.outbound_tgt_ITNexus_event_pid0_3 = 0; + pm8001_ha->main_cfg_tbl.outbound_tgt_ITNexus_event_pid4_7 = 0; + pm8001_ha->main_cfg_tbl.outbound_tgt_ssp_event_pid0_3 = 0; + pm8001_ha->main_cfg_tbl.outbound_tgt_ssp_event_pid4_7 = 0; + pm8001_ha->main_cfg_tbl.outbound_tgt_smp_event_pid0_3 = 0; + pm8001_ha->main_cfg_tbl.outbound_tgt_smp_event_pid4_7 = 0; + + pm8001_ha->main_cfg_tbl.upper_event_log_addr = + pm8001_ha->memoryMap.region[AAP1].phys_addr_hi; + pm8001_ha->main_cfg_tbl.lower_event_log_addr = + pm8001_ha->memoryMap.region[AAP1].phys_addr_lo; + pm8001_ha->main_cfg_tbl.event_log_size = PM8001_EVENT_LOG_SIZE; + pm8001_ha->main_cfg_tbl.event_log_option = 0x01; + pm8001_ha->main_cfg_tbl.upper_iop_event_log_addr = + pm8001_ha->memoryMap.region[IOP].phys_addr_hi; + pm8001_ha->main_cfg_tbl.lower_iop_event_log_addr = + pm8001_ha->memoryMap.region[IOP].phys_addr_lo; + pm8001_ha->main_cfg_tbl.iop_event_log_size = PM8001_EVENT_LOG_SIZE; + pm8001_ha->main_cfg_tbl.iop_event_log_option = 0x01; + pm8001_ha->main_cfg_tbl.fatal_err_interrupt = 0x01; + for (i = 0; i < qn; i++) { + pm8001_ha->inbnd_q_tbl[i].element_pri_size_cnt = + 0x00000100 | (0x00000040 << 16) | (0x00<<30); + pm8001_ha->inbnd_q_tbl[i].upper_base_addr = + pm8001_ha->memoryMap.region[IB].phys_addr_hi; + pm8001_ha->inbnd_q_tbl[i].lower_base_addr = + pm8001_ha->memoryMap.region[IB].phys_addr_lo; + pm8001_ha->inbnd_q_tbl[i].base_virt = + (u8 *)pm8001_ha->memoryMap.region[IB].virt_ptr; + pm8001_ha->inbnd_q_tbl[i].total_length = + pm8001_ha->memoryMap.region[IB].total_len; + pm8001_ha->inbnd_q_tbl[i].ci_upper_base_addr = + pm8001_ha->memoryMap.region[CI].phys_addr_hi; + pm8001_ha->inbnd_q_tbl[i].ci_lower_base_addr = + pm8001_ha->memoryMap.region[CI].phys_addr_lo; + pm8001_ha->inbnd_q_tbl[i].ci_virt = + pm8001_ha->memoryMap.region[CI].virt_ptr; + offsetib = i * 0x20; + pm8001_ha->inbnd_q_tbl[i].pi_pci_bar = + get_pci_bar_index(pm8001_mr32(addressib, + (offsetib + 0x14))); + pm8001_ha->inbnd_q_tbl[i].pi_offset = + pm8001_mr32(addressib, (offsetib + 0x18)); + pm8001_ha->inbnd_q_tbl[i].producer_idx = 0; + pm8001_ha->inbnd_q_tbl[i].consumer_index = 0; + } + for (i = 0; i < qn; i++) { + pm8001_ha->outbnd_q_tbl[i].element_size_cnt = + 256 | (64 << 16) | (1<<30); + pm8001_ha->outbnd_q_tbl[i].upper_base_addr = + pm8001_ha->memoryMap.region[OB].phys_addr_hi; + pm8001_ha->outbnd_q_tbl[i].lower_base_addr = + pm8001_ha->memoryMap.region[OB].phys_addr_lo; + pm8001_ha->outbnd_q_tbl[i].base_virt = + (u8 *)pm8001_ha->memoryMap.region[OB].virt_ptr; + pm8001_ha->outbnd_q_tbl[i].total_length = + pm8001_ha->memoryMap.region[OB].total_len; + pm8001_ha->outbnd_q_tbl[i].pi_upper_base_addr = + pm8001_ha->memoryMap.region[PI].phys_addr_hi; + pm8001_ha->outbnd_q_tbl[i].pi_lower_base_addr = + pm8001_ha->memoryMap.region[PI].phys_addr_lo; + pm8001_ha->outbnd_q_tbl[i].interrup_vec_cnt_delay = + 0 | (0 << 16) | (0 << 24); + pm8001_ha->outbnd_q_tbl[i].pi_virt = + pm8001_ha->memoryMap.region[PI].virt_ptr; + offsetob = i * 0x24; + pm8001_ha->outbnd_q_tbl[i].ci_pci_bar = + get_pci_bar_index(pm8001_mr32(addressob, + offsetob + 0x14)); + pm8001_ha->outbnd_q_tbl[i].ci_offset = + pm8001_mr32(addressob, (offsetob + 0x18)); + pm8001_ha->outbnd_q_tbl[i].consumer_idx = 0; + pm8001_ha->outbnd_q_tbl[i].producer_index = 0; + } +} + +/** + * update_main_config_table - update the main default table to the HBA. + * @pm8001_ha: our hba card information + */ +static void __devinit +update_main_config_table(struct pm8001_hba_info *pm8001_ha) +{ + void __iomem *address = pm8001_ha->main_cfg_tbl_addr; + pm8001_mw32(address, 0x24, + pm8001_ha->main_cfg_tbl.inbound_q_nppd_hppd); + pm8001_mw32(address, 0x28, + pm8001_ha->main_cfg_tbl.outbound_hw_event_pid0_3); + pm8001_mw32(address, 0x2C, + pm8001_ha->main_cfg_tbl.outbound_hw_event_pid4_7); + pm8001_mw32(address, 0x30, + pm8001_ha->main_cfg_tbl.outbound_ncq_event_pid0_3); + pm8001_mw32(address, 0x34, + pm8001_ha->main_cfg_tbl.outbound_ncq_event_pid4_7); + pm8001_mw32(address, 0x38, + pm8001_ha->main_cfg_tbl.outbound_tgt_ITNexus_event_pid0_3); + pm8001_mw32(address, 0x3C, + pm8001_ha->main_cfg_tbl.outbound_tgt_ITNexus_event_pid4_7); + pm8001_mw32(address, 0x40, + pm8001_ha->main_cfg_tbl.outbound_tgt_ssp_event_pid0_3); + pm8001_mw32(address, 0x44, + pm8001_ha->main_cfg_tbl.outbound_tgt_ssp_event_pid4_7); + pm8001_mw32(address, 0x48, + pm8001_ha->main_cfg_tbl.outbound_tgt_smp_event_pid0_3); + pm8001_mw32(address, 0x4C, + pm8001_ha->main_cfg_tbl.outbound_tgt_smp_event_pid4_7); + pm8001_mw32(address, 0x50, + pm8001_ha->main_cfg_tbl.upper_event_log_addr); + pm8001_mw32(address, 0x54, + pm8001_ha->main_cfg_tbl.lower_event_log_addr); + pm8001_mw32(address, 0x58, pm8001_ha->main_cfg_tbl.event_log_size); + pm8001_mw32(address, 0x5C, pm8001_ha->main_cfg_tbl.event_log_option); + pm8001_mw32(address, 0x60, + pm8001_ha->main_cfg_tbl.upper_iop_event_log_addr); + pm8001_mw32(address, 0x64, + pm8001_ha->main_cfg_tbl.lower_iop_event_log_addr); + pm8001_mw32(address, 0x68, pm8001_ha->main_cfg_tbl.iop_event_log_size); + pm8001_mw32(address, 0x6C, + pm8001_ha->main_cfg_tbl.iop_event_log_option); + pm8001_mw32(address, 0x70, + pm8001_ha->main_cfg_tbl.fatal_err_interrupt); +} + +/** + * update_inbnd_queue_table - update the inbound queue table to the HBA. + * @pm8001_ha: our hba card information + */ +static void __devinit +update_inbnd_queue_table(struct pm8001_hba_info *pm8001_ha, int number) +{ + void __iomem *address = pm8001_ha->inbnd_q_tbl_addr; + u16 offset = number * 0x20; + pm8001_mw32(address, offset + 0x00, + pm8001_ha->inbnd_q_tbl[number].element_pri_size_cnt); + pm8001_mw32(address, offset + 0x04, + pm8001_ha->inbnd_q_tbl[number].upper_base_addr); + pm8001_mw32(address, offset + 0x08, + pm8001_ha->inbnd_q_tbl[number].lower_base_addr); + pm8001_mw32(address, offset + 0x0C, + pm8001_ha->inbnd_q_tbl[number].ci_upper_base_addr); + pm8001_mw32(address, offset + 0x10, + pm8001_ha->inbnd_q_tbl[number].ci_lower_base_addr); +} + +/** + * update_outbnd_queue_table - update the outbound queue table to the HBA. + * @pm8001_ha: our hba card information + */ +static void __devinit +update_outbnd_queue_table(struct pm8001_hba_info *pm8001_ha, int number) +{ + void __iomem *address = pm8001_ha->outbnd_q_tbl_addr; + u16 offset = number * 0x24; + pm8001_mw32(address, offset + 0x00, + pm8001_ha->outbnd_q_tbl[number].element_size_cnt); + pm8001_mw32(address, offset + 0x04, + pm8001_ha->outbnd_q_tbl[number].upper_base_addr); + pm8001_mw32(address, offset + 0x08, + pm8001_ha->outbnd_q_tbl[number].lower_base_addr); + pm8001_mw32(address, offset + 0x0C, + pm8001_ha->outbnd_q_tbl[number].pi_upper_base_addr); + pm8001_mw32(address, offset + 0x10, + pm8001_ha->outbnd_q_tbl[number].pi_lower_base_addr); + pm8001_mw32(address, offset + 0x1C, + pm8001_ha->outbnd_q_tbl[number].interrup_vec_cnt_delay); +} + +/** + * bar4_shift - function is called to shift BAR base address + * @pm8001_ha : our hba card infomation + * @shiftValue : shifting value in memory bar. + */ +static u32 bar4_shift(struct pm8001_hba_info *pm8001_ha, u32 shiftValue) +{ + u32 regVal; + u32 max_wait_count; + + /* program the inbound AXI translation Lower Address */ + pm8001_cw32(pm8001_ha, 1, SPC_IBW_AXI_TRANSLATION_LOW, shiftValue); + + /* confirm the setting is written */ + max_wait_count = 1 * 1000 * 1000; /* 1 sec */ + do { + udelay(1); + regVal = pm8001_cr32(pm8001_ha, 1, SPC_IBW_AXI_TRANSLATION_LOW); + } while ((regVal != shiftValue) && (--max_wait_count)); + + if (!max_wait_count) { + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("TIMEOUT:SPC_IBW_AXI_TRANSLATION_LOW" + " = 0x%x\n", regVal)); + return -1; + } + return 0; +} + +/** + * mpi_set_phys_g3_with_ssc + * @pm8001_ha: our hba card information + * @SSCbit: set SSCbit to 0 to disable all phys ssc; 1 to enable all phys ssc. + */ +static void __devinit +mpi_set_phys_g3_with_ssc(struct pm8001_hba_info *pm8001_ha, u32 SSCbit) +{ + u32 offset; + u32 value; + u32 i; + +#define SAS2_SETTINGS_LOCAL_PHY_0_3_SHIFT_ADDR 0x00030000 +#define SAS2_SETTINGS_LOCAL_PHY_4_7_SHIFT_ADDR 0x00040000 +#define SAS2_SETTINGS_LOCAL_PHY_0_3_OFFSET 0x1074 +#define SAS2_SETTINGS_LOCAL_PHY_4_7_OFFSET 0x1074 +#define PHY_SSC_BIT_SHIFT 13 + + /* + * Using shifted destination address 0x3_0000:0x1074 + 0x4000*N (N=0:3) + * Using shifted destination address 0x4_0000:0x1074 + 0x4000*(N-4) (N=4:7) + */ + if (-1 == bar4_shift(pm8001_ha, SAS2_SETTINGS_LOCAL_PHY_0_3_SHIFT_ADDR)) + return; + /* set SSC bit of PHY 0 - 3 */ + for (i = 0; i < 4; i++) { + offset = SAS2_SETTINGS_LOCAL_PHY_0_3_OFFSET + 0x4000 * i; + value = pm8001_cr32(pm8001_ha, 2, offset); + if (SSCbit) + value = value | (0x00000001 << PHY_SSC_BIT_SHIFT); + else + value = value & (~(0x00000001<general_stat_tbl_addr, + GST_GSTLEN_MPIS_OFFSET); + if (GST_MPI_STATE_INIT != (gst_len_mpistate & GST_MPI_STATE_MASK)) + return -1; + /* check MPI Initialization error */ + gst_len_mpistate = gst_len_mpistate >> 16; + if (0x0000 != gst_len_mpistate) + return -1; + return 0; +} + +/** + * check_fw_ready - The LLDD check if the FW is ready, if not, return error. + * @pm8001_ha: our hba card information + */ +static int check_fw_ready(struct pm8001_hba_info *pm8001_ha) +{ + u32 value, value1; + u32 max_wait_count; + /* check error state */ + value = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); + value1 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2); + /* check AAP error */ + if (SCRATCH_PAD1_ERR == (value & SCRATCH_PAD_STATE_MASK)) { + /* error state */ + value = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_0); + return -1; + } + + /* check IOP error */ + if (SCRATCH_PAD2_ERR == (value1 & SCRATCH_PAD_STATE_MASK)) { + /* error state */ + value1 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_3); + return -1; + } + + /* bit 4-31 of scratch pad1 should be zeros if it is not + in error state*/ + if (value & SCRATCH_PAD1_STATE_MASK) { + /* error case */ + pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_0); + return -1; + } + + /* bit 2, 4-31 of scratch pad2 should be zeros if it is not + in error state */ + if (value1 & SCRATCH_PAD2_STATE_MASK) { + /* error case */ + return -1; + } + + max_wait_count = 1 * 1000 * 1000;/* 1 sec timeout */ + + /* wait until scratch pad 1 and 2 registers in ready state */ + do { + udelay(1); + value = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1) + & SCRATCH_PAD1_RDY; + value1 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2) + & SCRATCH_PAD2_RDY; + if ((--max_wait_count) == 0) + return -1; + } while ((value != SCRATCH_PAD1_RDY) || (value1 != SCRATCH_PAD2_RDY)); + return 0; +} + +static void init_pci_device_addresses(struct pm8001_hba_info *pm8001_ha) +{ + void __iomem *base_addr; + u32 value; + u32 offset; + u32 pcibar; + u32 pcilogic; + + value = pm8001_cr32(pm8001_ha, 0, 0x44); + offset = value & 0x03FFFFFF; + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("Scratchpad 0 Offset: %x \n", offset)); + pcilogic = (value & 0xFC000000) >> 26; + pcibar = get_pci_bar_index(pcilogic); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("Scratchpad 0 PCI BAR: %d \n", pcibar)); + pm8001_ha->main_cfg_tbl_addr = base_addr = + pm8001_ha->io_mem[pcibar].memvirtaddr + offset; + pm8001_ha->general_stat_tbl_addr = + base_addr + pm8001_cr32(pm8001_ha, pcibar, offset + 0x18); + pm8001_ha->inbnd_q_tbl_addr = + base_addr + pm8001_cr32(pm8001_ha, pcibar, offset + 0x1C); + pm8001_ha->outbnd_q_tbl_addr = + base_addr + pm8001_cr32(pm8001_ha, pcibar, offset + 0x20); +} + +/** + * pm8001_chip_init - the main init function that initialize whole PM8001 chip. + * @pm8001_ha: our hba card information + */ +static int __devinit pm8001_chip_init(struct pm8001_hba_info *pm8001_ha) +{ + /* check the firmware status */ + if (-1 == check_fw_ready(pm8001_ha)) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Firmware is not ready!\n")); + return -EBUSY; + } + + /* Initialize pci space address eg: mpi offset */ + init_pci_device_addresses(pm8001_ha); + init_default_table_values(pm8001_ha); + read_main_config_table(pm8001_ha); + read_general_status_table(pm8001_ha); + read_inbnd_queue_table(pm8001_ha); + read_outbnd_queue_table(pm8001_ha); + /* update main config table ,inbound table and outbound table */ + update_main_config_table(pm8001_ha); + update_inbnd_queue_table(pm8001_ha, 0); + update_outbnd_queue_table(pm8001_ha, 0); + mpi_set_phys_g3_with_ssc(pm8001_ha, 0); + mpi_set_open_retry_interval_reg(pm8001_ha, 7); + /* notify firmware update finished and check initialization status */ + if (0 == mpi_init_check(pm8001_ha)) { + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("MPI initialize successful!\n")); + } else + return -EBUSY; + /*This register is a 16-bit timer with a resolution of 1us. This is the + timer used for interrupt delay/coalescing in the PCIe Application Layer. + Zero is not a valid value. A value of 1 in the register will cause the + interrupts to be normal. A value greater than 1 will cause coalescing + delays.*/ + pm8001_cw32(pm8001_ha, 1, 0x0033c0, 0x1); + pm8001_cw32(pm8001_ha, 1, 0x0033c4, 0x0); + return 0; +} + +static int mpi_uninit_check(struct pm8001_hba_info *pm8001_ha) +{ + u32 max_wait_count; + u32 value; + u32 gst_len_mpistate; + init_pci_device_addresses(pm8001_ha); + /* Write bit1=1 to Inbound DoorBell Register to tell the SPC FW the + table is stop */ + pm8001_cw32(pm8001_ha, 0, MSGU_IBDB_SET, SPC_MSGU_CFG_TABLE_RESET); + + /* wait until Inbound DoorBell Clear Register toggled */ + max_wait_count = 1 * 1000 * 1000;/* 1 sec */ + do { + udelay(1); + value = pm8001_cr32(pm8001_ha, 0, MSGU_IBDB_SET); + value &= SPC_MSGU_CFG_TABLE_RESET; + } while ((value != 0) && (--max_wait_count)); + + if (!max_wait_count) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("TIMEOUT:IBDB value/=0x%x\n", value)); + return -1; + } + + /* check the MPI-State for termination in progress */ + /* wait until Inbound DoorBell Clear Register toggled */ + max_wait_count = 1 * 1000 * 1000; /* 1 sec */ + do { + udelay(1); + gst_len_mpistate = + pm8001_mr32(pm8001_ha->general_stat_tbl_addr, + GST_GSTLEN_MPIS_OFFSET); + if (GST_MPI_STATE_UNINIT == + (gst_len_mpistate & GST_MPI_STATE_MASK)) + break; + } while (--max_wait_count); + if (!max_wait_count) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk(" TIME OUT MPI State = 0x%x\n", + gst_len_mpistate & GST_MPI_STATE_MASK)); + return -1; + } + return 0; +} + +/** + * soft_reset_ready_check - Function to check FW is ready for soft reset. + * @pm8001_ha: our hba card information + */ +static u32 soft_reset_ready_check(struct pm8001_hba_info *pm8001_ha) +{ + u32 regVal, regVal1, regVal2; + if (mpi_uninit_check(pm8001_ha) != 0) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MPI state is not ready\n")); + return -1; + } + /* read the scratch pad 2 register bit 2 */ + regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2) + & SCRATCH_PAD2_FWRDY_RST; + if (regVal == SCRATCH_PAD2_FWRDY_RST) { + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("Firmware is ready for reset .\n")); + } else { + /* Trigger NMI twice via RB6 */ + if (-1 == bar4_shift(pm8001_ha, RB6_ACCESS_REG)) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Shift Bar4 to 0x%x failed\n", + RB6_ACCESS_REG)); + return -1; + } + pm8001_cw32(pm8001_ha, 2, SPC_RB6_OFFSET, + RB6_MAGIC_NUMBER_RST); + pm8001_cw32(pm8001_ha, 2, SPC_RB6_OFFSET, RB6_MAGIC_NUMBER_RST); + /* wait for 100 ms */ + mdelay(100); + regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2) & + SCRATCH_PAD2_FWRDY_RST; + if (regVal != SCRATCH_PAD2_FWRDY_RST) { + regVal1 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); + regVal2 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("TIMEOUT:MSGU_SCRATCH_PAD1" + "=0x%x, MSGU_SCRATCH_PAD2=0x%x\n", + regVal1, regVal2)); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("SCRATCH_PAD0 value = 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_0))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("SCRATCH_PAD3 value = 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_3))); + return -1; + } + } + return 0; +} + +/** + * pm8001_chip_soft_rst - soft reset the PM8001 chip, so that the clear all + * the FW register status to the originated status. + * @pm8001_ha: our hba card information + * @signature: signature in host scratch pad0 register. + */ +static int +pm8001_chip_soft_rst(struct pm8001_hba_info *pm8001_ha, u32 signature) +{ + u32 regVal, toggleVal; + u32 max_wait_count; + u32 regVal1, regVal2, regVal3; + + /* step1: Check FW is ready for soft reset */ + if (soft_reset_ready_check(pm8001_ha) != 0) { + PM8001_FAIL_DBG(pm8001_ha, pm8001_printk("FW is not ready\n")); + return -1; + } + + /* step 2: clear NMI status register on AAP1 and IOP, write the same + value to clear */ + /* map 0x60000 to BAR4(0x20), BAR2(win) */ + if (-1 == bar4_shift(pm8001_ha, MBIC_AAP1_ADDR_BASE)) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Shift Bar4 to 0x%x failed\n", + MBIC_AAP1_ADDR_BASE)); + return -1; + } + regVal = pm8001_cr32(pm8001_ha, 2, MBIC_NMI_ENABLE_VPE0_IOP); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("MBIC - NMI Enable VPE0 (IOP)= 0x%x\n", regVal)); + pm8001_cw32(pm8001_ha, 2, MBIC_NMI_ENABLE_VPE0_IOP, 0x0); + /* map 0x70000 to BAR4(0x20), BAR2(win) */ + if (-1 == bar4_shift(pm8001_ha, MBIC_IOP_ADDR_BASE)) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Shift Bar4 to 0x%x failed\n", + MBIC_IOP_ADDR_BASE)); + return -1; + } + regVal = pm8001_cr32(pm8001_ha, 2, MBIC_NMI_ENABLE_VPE0_AAP1); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("MBIC - NMI Enable VPE0 (AAP1)= 0x%x\n", regVal)); + pm8001_cw32(pm8001_ha, 2, MBIC_NMI_ENABLE_VPE0_AAP1, 0x0); + + regVal = pm8001_cr32(pm8001_ha, 1, PCIE_EVENT_INTERRUPT_ENABLE); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("PCIE -Event Interrupt Enable = 0x%x\n", regVal)); + pm8001_cw32(pm8001_ha, 1, PCIE_EVENT_INTERRUPT_ENABLE, 0x0); + + regVal = pm8001_cr32(pm8001_ha, 1, PCIE_EVENT_INTERRUPT); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("PCIE - Event Interrupt = 0x%x\n", regVal)); + pm8001_cw32(pm8001_ha, 1, PCIE_EVENT_INTERRUPT, regVal); + + regVal = pm8001_cr32(pm8001_ha, 1, PCIE_ERROR_INTERRUPT_ENABLE); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("PCIE -Error Interrupt Enable = 0x%x\n", regVal)); + pm8001_cw32(pm8001_ha, 1, PCIE_ERROR_INTERRUPT_ENABLE, 0x0); + + regVal = pm8001_cr32(pm8001_ha, 1, PCIE_ERROR_INTERRUPT); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("PCIE - Error Interrupt = 0x%x\n", regVal)); + pm8001_cw32(pm8001_ha, 1, PCIE_ERROR_INTERRUPT, regVal); + + /* read the scratch pad 1 register bit 2 */ + regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1) + & SCRATCH_PAD1_RST; + toggleVal = regVal ^ SCRATCH_PAD1_RST; + + /* set signature in host scratch pad0 register to tell SPC that the + host performs the soft reset */ + pm8001_cw32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_0, signature); + + /* read required registers for confirmming */ + /* map 0x0700000 to BAR4(0x20), BAR2(win) */ + if (-1 == bar4_shift(pm8001_ha, GSM_ADDR_BASE)) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Shift Bar4 to 0x%x failed\n", + GSM_ADDR_BASE)); + return -1; + } + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x0(0x00007b88)-GSM Configuration and" + " Reset = 0x%x\n", + pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET))); + + /* step 3: host read GSM Configuration and Reset register */ + regVal = pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET); + /* Put those bits to low */ + /* GSM XCBI offset = 0x70 0000 + 0x00 Bit 13 COM_SLV_SW_RSTB 1 + 0x00 Bit 12 QSSP_SW_RSTB 1 + 0x00 Bit 11 RAAE_SW_RSTB 1 + 0x00 Bit 9 RB_1_SW_RSTB 1 + 0x00 Bit 8 SM_SW_RSTB 1 + */ + regVal &= ~(0x00003b00); + /* host write GSM Configuration and Reset register */ + pm8001_cw32(pm8001_ha, 2, GSM_CONFIG_RESET, regVal); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x0 (0x00007b88 ==> 0x00004088) - GSM " + "Configuration and Reset is set to = 0x%x\n", + pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET))); + + /* step 4: */ + /* disable GSM - Read Address Parity Check */ + regVal1 = pm8001_cr32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x700038 - Read Address Parity Check " + "Enable = 0x%x\n", regVal1)); + pm8001_cw32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK, 0x0); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x700038 - Read Address Parity Check Enable" + "is set to = 0x%x\n", + pm8001_cr32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK))); + + /* disable GSM - Write Address Parity Check */ + regVal2 = pm8001_cr32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x700040 - Write Address Parity Check" + " Enable = 0x%x\n", regVal2)); + pm8001_cw32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK, 0x0); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x700040 - Write Address Parity Check " + "Enable is set to = 0x%x\n", + pm8001_cr32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK))); + + /* disable GSM - Write Data Parity Check */ + regVal3 = pm8001_cr32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x300048 - Write Data Parity Check" + " Enable = 0x%x\n", regVal3)); + pm8001_cw32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK, 0x0); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x300048 - Write Data Parity Check Enable" + "is set to = 0x%x\n", + pm8001_cr32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK))); + + /* step 5: delay 10 usec */ + udelay(10); + /* step 5-b: set GPIO-0 output control to tristate anyway */ + if (-1 == bar4_shift(pm8001_ha, GPIO_ADDR_BASE)) { + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("Shift Bar4 to 0x%x failed\n", + GPIO_ADDR_BASE)); + return -1; + } + regVal = pm8001_cr32(pm8001_ha, 2, GPIO_GPIO_0_0UTPUT_CTL_OFFSET); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GPIO Output Control Register:" + " = 0x%x\n", regVal)); + /* set GPIO-0 output control to tri-state */ + regVal &= 0xFFFFFFFC; + pm8001_cw32(pm8001_ha, 2, GPIO_GPIO_0_0UTPUT_CTL_OFFSET, regVal); + + /* Step 6: Reset the IOP and AAP1 */ + /* map 0x00000 to BAR4(0x20), BAR2(win) */ + if (-1 == bar4_shift(pm8001_ha, SPC_TOP_LEVEL_ADDR_BASE)) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("SPC Shift Bar4 to 0x%x failed\n", + SPC_TOP_LEVEL_ADDR_BASE)); + return -1; + } + regVal = pm8001_cr32(pm8001_ha, 2, SPC_REG_RESET); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("Top Register before resetting IOP/AAP1" + ":= 0x%x\n", regVal)); + regVal &= ~(SPC_REG_RESET_PCS_IOP_SS | SPC_REG_RESET_PCS_AAP1_SS); + pm8001_cw32(pm8001_ha, 2, SPC_REG_RESET, regVal); + + /* step 7: Reset the BDMA/OSSP */ + regVal = pm8001_cr32(pm8001_ha, 2, SPC_REG_RESET); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("Top Register before resetting BDMA/OSSP" + ": = 0x%x\n", regVal)); + regVal &= ~(SPC_REG_RESET_BDMA_CORE | SPC_REG_RESET_OSSP); + pm8001_cw32(pm8001_ha, 2, SPC_REG_RESET, regVal); + + /* step 8: delay 10 usec */ + udelay(10); + + /* step 9: bring the BDMA and OSSP out of reset */ + regVal = pm8001_cr32(pm8001_ha, 2, SPC_REG_RESET); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("Top Register before bringing up BDMA/OSSP" + ":= 0x%x\n", regVal)); + regVal |= (SPC_REG_RESET_BDMA_CORE | SPC_REG_RESET_OSSP); + pm8001_cw32(pm8001_ha, 2, SPC_REG_RESET, regVal); + + /* step 10: delay 10 usec */ + udelay(10); + + /* step 11: reads and sets the GSM Configuration and Reset Register */ + /* map 0x0700000 to BAR4(0x20), BAR2(win) */ + if (-1 == bar4_shift(pm8001_ha, GSM_ADDR_BASE)) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("SPC Shift Bar4 to 0x%x failed\n", + GSM_ADDR_BASE)); + return -1; + } + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x0 (0x00007b88)-GSM Configuration and " + "Reset = 0x%x\n", pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET))); + regVal = pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET); + /* Put those bits to high */ + /* GSM XCBI offset = 0x70 0000 + 0x00 Bit 13 COM_SLV_SW_RSTB 1 + 0x00 Bit 12 QSSP_SW_RSTB 1 + 0x00 Bit 11 RAAE_SW_RSTB 1 + 0x00 Bit 9 RB_1_SW_RSTB 1 + 0x00 Bit 8 SM_SW_RSTB 1 + */ + regVal |= (GSM_CONFIG_RESET_VALUE); + pm8001_cw32(pm8001_ha, 2, GSM_CONFIG_RESET, regVal); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM (0x00004088 ==> 0x00007b88) - GSM" + " Configuration and Reset is set to = 0x%x\n", + pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET))); + + /* step 12: Restore GSM - Read Address Parity Check */ + regVal = pm8001_cr32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK); + /* just for debugging */ + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x700038 - Read Address Parity Check Enable" + " = 0x%x\n", regVal)); + pm8001_cw32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK, regVal1); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x700038 - Read Address Parity" + " Check Enable is set to = 0x%x\n", + pm8001_cr32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK))); + /* Restore GSM - Write Address Parity Check */ + regVal = pm8001_cr32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK); + pm8001_cw32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK, regVal2); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x700040 - Write Address Parity Check" + " Enable is set to = 0x%x\n", + pm8001_cr32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK))); + /* Restore GSM - Write Data Parity Check */ + regVal = pm8001_cr32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK); + pm8001_cw32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK, regVal3); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("GSM 0x700048 - Write Data Parity Check Enable" + "is set to = 0x%x\n", + pm8001_cr32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK))); + + /* step 13: bring the IOP and AAP1 out of reset */ + /* map 0x00000 to BAR4(0x20), BAR2(win) */ + if (-1 == bar4_shift(pm8001_ha, SPC_TOP_LEVEL_ADDR_BASE)) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Shift Bar4 to 0x%x failed\n", + SPC_TOP_LEVEL_ADDR_BASE)); + return -1; + } + regVal = pm8001_cr32(pm8001_ha, 2, SPC_REG_RESET); + regVal |= (SPC_REG_RESET_PCS_IOP_SS | SPC_REG_RESET_PCS_AAP1_SS); + pm8001_cw32(pm8001_ha, 2, SPC_REG_RESET, regVal); + + /* step 14: delay 10 usec - Normal Mode */ + udelay(10); + /* check Soft Reset Normal mode or Soft Reset HDA mode */ + if (signature == SPC_SOFT_RESET_SIGNATURE) { + /* step 15 (Normal Mode): wait until scratch pad1 register + bit 2 toggled */ + max_wait_count = 2 * 1000 * 1000;/* 2 sec */ + do { + udelay(1); + regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1) & + SCRATCH_PAD1_RST; + } while ((regVal != toggleVal) && (--max_wait_count)); + + if (!max_wait_count) { + regVal = pm8001_cr32(pm8001_ha, 0, + MSGU_SCRATCH_PAD_1); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("TIMEOUT : ToggleVal 0x%x," + "MSGU_SCRATCH_PAD1 = 0x%x\n", + toggleVal, regVal)); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("SCRATCH_PAD0 value = 0x%x\n", + pm8001_cr32(pm8001_ha, 0, + MSGU_SCRATCH_PAD_0))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("SCRATCH_PAD2 value = 0x%x\n", + pm8001_cr32(pm8001_ha, 0, + MSGU_SCRATCH_PAD_2))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("SCRATCH_PAD3 value = 0x%x\n", + pm8001_cr32(pm8001_ha, 0, + MSGU_SCRATCH_PAD_3))); + return -1; + } + + /* step 16 (Normal) - Clear ODMR and ODCR */ + pm8001_cw32(pm8001_ha, 0, MSGU_ODCR, ODCR_CLEAR_ALL); + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, ODMR_CLEAR_ALL); + + /* step 17 (Normal Mode): wait for the FW and IOP to get + ready - 1 sec timeout */ + /* Wait for the SPC Configuration Table to be ready */ + if (check_fw_ready(pm8001_ha) == -1) { + regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); + /* return error if MPI Configuration Table not ready */ + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("FW not ready SCRATCH_PAD1" + " = 0x%x\n", regVal)); + regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2); + /* return error if MPI Configuration Table not ready */ + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("FW not ready SCRATCH_PAD2" + " = 0x%x\n", regVal)); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("SCRATCH_PAD0 value = 0x%x\n", + pm8001_cr32(pm8001_ha, 0, + MSGU_SCRATCH_PAD_0))); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("SCRATCH_PAD3 value = 0x%x\n", + pm8001_cr32(pm8001_ha, 0, + MSGU_SCRATCH_PAD_3))); + return -1; + } + } + + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("SPC soft reset Complete\n")); + return 0; +} + +static void pm8001_hw_chip_rst(struct pm8001_hba_info *pm8001_ha) +{ + u32 i; + u32 regVal; + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("chip reset start\n")); + + /* do SPC chip reset. */ + regVal = pm8001_cr32(pm8001_ha, 1, SPC_REG_RESET); + regVal &= ~(SPC_REG_RESET_DEVICE); + pm8001_cw32(pm8001_ha, 1, SPC_REG_RESET, regVal); + + /* delay 10 usec */ + udelay(10); + + /* bring chip reset out of reset */ + regVal = pm8001_cr32(pm8001_ha, 1, SPC_REG_RESET); + regVal |= SPC_REG_RESET_DEVICE; + pm8001_cw32(pm8001_ha, 1, SPC_REG_RESET, regVal); + + /* delay 10 usec */ + udelay(10); + + /* wait for 20 msec until the firmware gets reloaded */ + i = 20; + do { + mdelay(1); + } while ((--i) != 0); + + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("chip reset finished\n")); +} + +/** + * pm8001_chip_iounmap - which maped when initilized. + * @pm8001_ha: our hba card information + */ +static void pm8001_chip_iounmap(struct pm8001_hba_info *pm8001_ha) +{ + s8 bar, logical = 0; + for (bar = 0; bar < 6; bar++) { + /* + ** logical BARs for SPC: + ** bar 0 and 1 - logical BAR0 + ** bar 2 and 3 - logical BAR1 + ** bar4 - logical BAR2 + ** bar5 - logical BAR3 + ** Skip the appropriate assignments: + */ + if ((bar == 1) || (bar == 3)) + continue; + if (pm8001_ha->io_mem[logical].memvirtaddr) { + iounmap(pm8001_ha->io_mem[logical].memvirtaddr); + logical++; + } + } +} + +/** + * pm8001_chip_interrupt_enable - enable PM8001 chip interrupt + * @pm8001_ha: our hba card information + */ +static void +pm8001_chip_intx_interrupt_enable(struct pm8001_hba_info *pm8001_ha) +{ + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, ODMR_CLEAR_ALL); + pm8001_cw32(pm8001_ha, 0, MSGU_ODCR, ODCR_CLEAR_ALL); +} + + /** + * pm8001_chip_intx_interrupt_disable- disable PM8001 chip interrupt + * @pm8001_ha: our hba card information + */ +static void +pm8001_chip_intx_interrupt_disable(struct pm8001_hba_info *pm8001_ha) +{ + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, ODMR_MASK_ALL); +} + +/** + * pm8001_chip_msix_interrupt_enable - enable PM8001 chip interrupt + * @pm8001_ha: our hba card information + */ +static void +pm8001_chip_msix_interrupt_enable(struct pm8001_hba_info *pm8001_ha, + u32 int_vec_idx) +{ + u32 msi_index; + u32 value; + msi_index = int_vec_idx * MSIX_TABLE_ELEMENT_SIZE; + msi_index += MSIX_TABLE_BASE; + pm8001_cw32(pm8001_ha, 0, msi_index, MSIX_INTERRUPT_ENABLE); + value = (1 << int_vec_idx); + pm8001_cw32(pm8001_ha, 0, MSGU_ODCR, value); + +} + +/** + * pm8001_chip_msix_interrupt_disable - disable PM8001 chip interrupt + * @pm8001_ha: our hba card information + */ +static void +pm8001_chip_msix_interrupt_disable(struct pm8001_hba_info *pm8001_ha, + u32 int_vec_idx) +{ + u32 msi_index; + msi_index = int_vec_idx * MSIX_TABLE_ELEMENT_SIZE; + msi_index += MSIX_TABLE_BASE; + pm8001_cw32(pm8001_ha, 0, msi_index, MSIX_INTERRUPT_DISABLE); + +} +/** + * pm8001_chip_interrupt_enable - enable PM8001 chip interrupt + * @pm8001_ha: our hba card information + */ +static void +pm8001_chip_interrupt_enable(struct pm8001_hba_info *pm8001_ha) +{ +#ifdef PM8001_USE_MSIX + pm8001_chip_msix_interrupt_enable(pm8001_ha, 0); + return; +#endif + pm8001_chip_intx_interrupt_enable(pm8001_ha); + +} + +/** + * pm8001_chip_intx_interrupt_disable- disable PM8001 chip interrupt + * @pm8001_ha: our hba card information + */ +static void +pm8001_chip_interrupt_disable(struct pm8001_hba_info *pm8001_ha) +{ +#ifdef PM8001_USE_MSIX + pm8001_chip_msix_interrupt_disable(pm8001_ha, 0); + return; +#endif + pm8001_chip_intx_interrupt_disable(pm8001_ha); + +} + +/** + * mpi_msg_free_get- get the free message buffer for transfer inbound queue. + * @circularQ: the inbound queue we want to transfer to HBA. + * @messageSize: the message size of this transfer, normally it is 64 bytes + * @messagePtr: the pointer to message. + */ +static u32 mpi_msg_free_get(struct inbound_queue_table *circularQ, + u16 messageSize, void **messagePtr) +{ + u32 offset, consumer_index; + struct mpi_msg_hdr *msgHeader; + u8 bcCount = 1; /* only support single buffer */ + + /* Checks is the requested message size can be allocated in this queue*/ + if (messageSize > 64) { + *messagePtr = NULL; + return -1; + } + + /* Stores the new consumer index */ + consumer_index = pm8001_read_32(circularQ->ci_virt); + circularQ->consumer_index = cpu_to_le32(consumer_index); + if (((circularQ->producer_idx + bcCount) % 256) == + circularQ->consumer_index) { + *messagePtr = NULL; + return -1; + } + /* get memory IOMB buffer address */ + offset = circularQ->producer_idx * 64; + /* increment to next bcCount element */ + circularQ->producer_idx = (circularQ->producer_idx + bcCount) % 256; + /* Adds that distance to the base of the region virtual address plus + the message header size*/ + msgHeader = (struct mpi_msg_hdr *)(circularQ->base_virt + offset); + *messagePtr = ((void *)msgHeader) + sizeof(struct mpi_msg_hdr); + return 0; +} + +/** + * mpi_build_cmd- build the message queue for transfer, update the PI to FW + * to tell the fw to get this message from IOMB. + * @pm8001_ha: our hba card information + * @circularQ: the inbound queue we want to transfer to HBA. + * @opCode: the operation code represents commands which LLDD and fw recognized. + * @payload: the command payload of each operation command. + */ +static u32 mpi_build_cmd(struct pm8001_hba_info *pm8001_ha, + struct inbound_queue_table *circularQ, + u32 opCode, void *payload) +{ + u32 Header = 0, hpriority = 0, bc = 1, category = 0x02; + u32 responseQueue = 0; + void *pMessage; + + if (mpi_msg_free_get(circularQ, 64, &pMessage) < 0) { + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("No free mpi buffer \n")); + return -1; + } + + /*Copy to the payload*/ + memcpy(pMessage, payload, (64 - sizeof(struct mpi_msg_hdr))); + + /*Build the header*/ + Header = ((1 << 31) | (hpriority << 30) | ((bc & 0x1f) << 24) + | ((responseQueue & 0x3F) << 16) + | ((category & 0xF) << 12) | (opCode & 0xFFF)); + + pm8001_write_32((pMessage - 4), 0, cpu_to_le32(Header)); + /*Update the PI to the firmware*/ + pm8001_cw32(pm8001_ha, circularQ->pi_pci_bar, + circularQ->pi_offset, circularQ->producer_idx); + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("after PI= %d CI= %d \n", circularQ->producer_idx, + circularQ->consumer_index)); + return 0; +} + +static u32 mpi_msg_free_set(struct pm8001_hba_info *pm8001_ha, + struct outbound_queue_table *circularQ, u8 bc) +{ + u32 producer_index; + /* free the circular queue buffer elements associated with the message*/ + circularQ->consumer_idx = (circularQ->consumer_idx + bc) % 256; + /* update the CI of outbound queue */ + pm8001_cw32(pm8001_ha, circularQ->ci_pci_bar, circularQ->ci_offset, + circularQ->consumer_idx); + /* Update the producer index from SPC*/ + producer_index = pm8001_read_32(circularQ->pi_virt); + circularQ->producer_index = cpu_to_le32(producer_index); + PM8001_IO_DBG(pm8001_ha, + pm8001_printk(" CI=%d PI=%d\n", circularQ->consumer_idx, + circularQ->producer_index)); + return 0; +} + +/** + * mpi_msg_consume- get the MPI message from outbound queue message table. + * @pm8001_ha: our hba card information + * @circularQ: the outbound queue table. + * @messagePtr1: the message contents of this outbound message. + * @pBC: the message size. + */ +static u32 mpi_msg_consume(struct pm8001_hba_info *pm8001_ha, + struct outbound_queue_table *circularQ, + void **messagePtr1, u8 *pBC) +{ + struct mpi_msg_hdr *msgHeader; + __le32 msgHeader_tmp; + u32 header_tmp; + do { + /* If there are not-yet-delivered messages ... */ + if (circularQ->producer_index != circularQ->consumer_idx) { + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("process an IOMB\n")); + /*Get the pointer to the circular queue buffer element*/ + msgHeader = (struct mpi_msg_hdr *) + (circularQ->base_virt + + circularQ->consumer_idx * 64); + /* read header */ + header_tmp = pm8001_read_32(msgHeader); + msgHeader_tmp = cpu_to_le32(header_tmp); + if (0 != (msgHeader_tmp & 0x80000000)) { + if (OPC_OUB_SKIP_ENTRY != + (msgHeader_tmp & 0xfff)) { + *messagePtr1 = + ((u8 *)msgHeader) + + sizeof(struct mpi_msg_hdr); + *pBC = (u8)((msgHeader_tmp >> 24) & + 0x1f); + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("mpi_msg_consume" + ": CI=%d PI=%d msgHeader=%x\n", + circularQ->consumer_idx, + circularQ->producer_index, + msgHeader_tmp)); + return MPI_IO_STATUS_SUCCESS; + } else { + u32 producer_index; + void *pi_virt = circularQ->pi_virt; + /* free the circular queue buffer + elements associated with the message*/ + circularQ->consumer_idx = + (circularQ->consumer_idx + + ((msgHeader_tmp >> 24) & 0x1f)) + % 256; + /* update the CI of outbound queue */ + pm8001_cw32(pm8001_ha, + circularQ->ci_pci_bar, + circularQ->ci_offset, + circularQ->consumer_idx); + /* Update the producer index from SPC */ + producer_index = + pm8001_read_32(pi_virt); + circularQ->producer_index = + cpu_to_le32(producer_index); + } + } else + return MPI_IO_STATUS_FAIL; + } + } while (circularQ->producer_index != circularQ->consumer_idx); + /* while we don't have any more not-yet-delivered message */ + /* report empty */ + return MPI_IO_STATUS_BUSY; +} + +static void pm8001_work_queue(struct work_struct *work) +{ + struct delayed_work *dw = container_of(work, struct delayed_work, work); + struct pm8001_wq *wq = container_of(dw, struct pm8001_wq, work_q); + struct pm8001_device *pm8001_dev; + struct domain_device *dev; + + switch (wq->handler) { + case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS: + pm8001_dev = wq->data; + dev = pm8001_dev->sas_device; + pm8001_I_T_nexus_reset(dev); + break; + case IO_OPEN_CNX_ERROR_STP_RESOURCES_BUSY: + pm8001_dev = wq->data; + dev = pm8001_dev->sas_device; + pm8001_I_T_nexus_reset(dev); + break; + case IO_DS_IN_ERROR: + pm8001_dev = wq->data; + dev = pm8001_dev->sas_device; + pm8001_I_T_nexus_reset(dev); + break; + case IO_DS_NON_OPERATIONAL: + pm8001_dev = wq->data; + dev = pm8001_dev->sas_device; + pm8001_I_T_nexus_reset(dev); + break; + } + list_del(&wq->entry); + kfree(wq); +} + +static int pm8001_handle_event(struct pm8001_hba_info *pm8001_ha, void *data, + int handler) +{ + struct pm8001_wq *wq; + int ret = 0; + + wq = kmalloc(sizeof(struct pm8001_wq), GFP_ATOMIC); + if (wq) { + wq->pm8001_ha = pm8001_ha; + wq->data = data; + wq->handler = handler; + INIT_DELAYED_WORK(&wq->work_q, pm8001_work_queue); + list_add_tail(&wq->entry, &pm8001_ha->wq_list); + schedule_delayed_work(&wq->work_q, 0); + } else + ret = -ENOMEM; + + return ret; +} + +/** + * mpi_ssp_completion- process the event that FW response to the SSP request. + * @pm8001_ha: our hba card information + * @piomb: the message contents of this outbound message. + * + * When FW has completed a ssp request for example a IO request, after it has + * filled the SG data with the data, it will trigger this event represent + * that he has finished the job,please check the coresponding buffer. + * So we will tell the caller who maybe waiting the result to tell upper layer + * that the task has been finished. + */ +static int +mpi_ssp_completion(struct pm8001_hba_info *pm8001_ha , void *piomb) +{ + struct sas_task *t; + struct pm8001_ccb_info *ccb; + unsigned long flags; + u32 status; + u32 param; + u32 tag; + struct ssp_completion_resp *psspPayload; + struct task_status_struct *ts; + struct ssp_response_iu *iu; + struct pm8001_device *pm8001_dev; + psspPayload = (struct ssp_completion_resp *)(piomb + 4); + status = le32_to_cpu(psspPayload->status); + tag = le32_to_cpu(psspPayload->tag); + ccb = &pm8001_ha->ccb_info[tag]; + pm8001_dev = ccb->device; + param = le32_to_cpu(psspPayload->param); + + PM8001_IO_DBG(pm8001_ha, pm8001_printk("OPC_OUB_SSP_COMP\n")); + t = ccb->task; + + if (status) + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("sas IO status 0x%x\n", status)); + if (unlikely(!t || !t->lldd_task || !t->dev)) + return -1; + ts = &t->task_status; + switch (status) { + case IO_SUCCESS: + PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_SUCCESS" + ",param = %d \n", param)); + if (param == 0) { + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAM_GOOD; + } else { + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_PROTO_RESPONSE; + ts->residual = param; + iu = &psspPayload->ssp_resp_iu; + sas_ssp_task_response(pm8001_ha->dev, t, iu); + } + if (pm8001_dev) + pm8001_dev->running_req--; + break; + case IO_ABORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_ABORTED IOMB Tag \n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_ABORTED_TASK; + break; + case IO_UNDERFLOW: + /* SSP Completion with error */ + PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_UNDERFLOW" + ",param = %d \n", param)); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_UNDERRUN; + ts->residual = param; + if (pm8001_dev) + pm8001_dev->running_req--; + break; + case IO_NO_DEVICE: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_NO_DEVICE\n")); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_PHY_DOWN; + break; + case IO_XFER_ERROR_BREAK: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_BREAK\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + break; + case IO_XFER_ERROR_PHY_NOT_READY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_PHY_NOT_READY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; + case IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_EPROTO; + break; + case IO_OPEN_CNX_ERROR_ZONE_VIOLATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_ZONE_VIOLATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_UNKNOWN; + break; + case IO_OPEN_CNX_ERROR_BREAK: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_BREAK\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_CONT0; + break; + case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_UNKNOWN; + if (!t->uldd_task) + pm8001_handle_event(pm8001_ha, + pm8001_dev, + IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS); + break; + case IO_OPEN_CNX_ERROR_BAD_DESTINATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_BAD_DESTINATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_BAD_DEST; + break; + case IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_CONNECTION_RATE_" + "NOT_SUPPORTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_CONN_RATE; + break; + case IO_OPEN_CNX_ERROR_WRONG_DESTINATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_WRONG_DESTINATION\n")); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_WRONG_DEST; + break; + case IO_XFER_ERROR_NAK_RECEIVED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_NAK_RECEIVED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + break; + case IO_XFER_ERROR_ACK_NAK_TIMEOUT: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_ACK_NAK_TIMEOUT\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_NAK_R_ERR; + break; + case IO_XFER_ERROR_DMA: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_DMA\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + break; + case IO_XFER_OPEN_RETRY_TIMEOUT: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_OPEN_RETRY_TIMEOUT\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; + case IO_XFER_ERROR_OFFSET_MISMATCH: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_OFFSET_MISMATCH\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + break; + case IO_PORT_IN_RESET: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_PORT_IN_RESET\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + break; + case IO_DS_NON_OPERATIONAL: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_DS_NON_OPERATIONAL\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + if (!t->uldd_task) + pm8001_handle_event(pm8001_ha, + pm8001_dev, + IO_DS_NON_OPERATIONAL); + break; + case IO_DS_IN_RECOVERY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_DS_IN_RECOVERY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + break; + case IO_TM_TAG_NOT_FOUND: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_TM_TAG_NOT_FOUND\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + break; + case IO_SSP_EXT_IU_ZERO_LEN_ERROR: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_SSP_EXT_IU_ZERO_LEN_ERROR\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + break; + case IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + default: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("Unknown status 0x%x\n", status)); + /* not allowed case. Therefore, return failed status */ + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + break; + } + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("scsi_satus = %x \n ", + psspPayload->ssp_resp_iu.status)); + spin_lock_irqsave(&t->task_state_lock, flags); + t->task_state_flags &= ~SAS_TASK_STATE_PENDING; + t->task_state_flags &= ~SAS_TASK_AT_INITIATOR; + t->task_state_flags |= SAS_TASK_STATE_DONE; + if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) { + spin_unlock_irqrestore(&t->task_state_lock, flags); + PM8001_FAIL_DBG(pm8001_ha, pm8001_printk("task 0x%p done with" + " io_status 0x%x resp 0x%x " + "stat 0x%x but aborted by upper layer!\n", + t, status, ts->resp, ts->stat)); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + } else { + spin_unlock_irqrestore(&t->task_state_lock, flags); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + mb();/* in order to force CPU ordering */ + t->task_done(t); + } + return 0; +} + +/*See the comments for mpi_ssp_completion */ +static int mpi_ssp_event(struct pm8001_hba_info *pm8001_ha , void *piomb) +{ + struct sas_task *t; + unsigned long flags; + struct task_status_struct *ts; + struct pm8001_ccb_info *ccb; + struct pm8001_device *pm8001_dev; + struct ssp_event_resp *psspPayload = + (struct ssp_event_resp *)(piomb + 4); + u32 event = le32_to_cpu(psspPayload->event); + u32 tag = le32_to_cpu(psspPayload->tag); + u32 port_id = le32_to_cpu(psspPayload->port_id); + u32 dev_id = le32_to_cpu(psspPayload->device_id); + + ccb = &pm8001_ha->ccb_info[tag]; + t = ccb->task; + pm8001_dev = ccb->device; + if (event) + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("sas IO status 0x%x\n", event)); + if (unlikely(!t || !t->lldd_task || !t->dev)) + return -1; + ts = &t->task_status; + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("port_id = %x,device_id = %x\n", + port_id, dev_id)); + switch (event) { + case IO_OVERFLOW: + PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_UNDERFLOW\n");) + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_OVERRUN; + ts->residual = 0; + if (pm8001_dev) + pm8001_dev->running_req--; + break; + case IO_XFER_ERROR_BREAK: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_BREAK\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_INTERRUPTED; + break; + case IO_XFER_ERROR_PHY_NOT_READY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_PHY_NOT_READY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; + case IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_PROTOCOL_NOT" + "_SUPPORTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_EPROTO; + break; + case IO_OPEN_CNX_ERROR_ZONE_VIOLATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_ZONE_VIOLATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_UNKNOWN; + break; + case IO_OPEN_CNX_ERROR_BREAK: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_BREAK\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_CONT0; + break; + case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_UNKNOWN; + if (!t->uldd_task) + pm8001_handle_event(pm8001_ha, + pm8001_dev, + IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS); + break; + case IO_OPEN_CNX_ERROR_BAD_DESTINATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_BAD_DESTINATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_BAD_DEST; + break; + case IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_CONNECTION_RATE_" + "NOT_SUPPORTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_CONN_RATE; + break; + case IO_OPEN_CNX_ERROR_WRONG_DESTINATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_WRONG_DESTINATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_WRONG_DEST; + break; + case IO_XFER_ERROR_NAK_RECEIVED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_NAK_RECEIVED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + break; + case IO_XFER_ERROR_ACK_NAK_TIMEOUT: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_ACK_NAK_TIMEOUT\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_NAK_R_ERR; + break; + case IO_XFER_OPEN_RETRY_TIMEOUT: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_OPEN_RETRY_TIMEOUT\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; + case IO_XFER_ERROR_UNEXPECTED_PHASE: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_UNEXPECTED_PHASE\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_OVERRUN; + break; + case IO_XFER_ERROR_XFER_RDY_OVERRUN: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_XFER_RDY_OVERRUN\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_OVERRUN; + break; + case IO_XFER_ERROR_XFER_RDY_NOT_EXPECTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_XFER_RDY_NOT_EXPECTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_OVERRUN; + break; + case IO_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_OVERRUN; + break; + case IO_XFER_ERROR_OFFSET_MISMATCH: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_OFFSET_MISMATCH\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_OVERRUN; + break; + case IO_XFER_ERROR_XFER_ZERO_DATA_LEN: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_XFER_ZERO_DATA_LEN\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_OVERRUN; + break; + case IO_XFER_CMD_FRAME_ISSUED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk(" IO_XFER_CMD_FRAME_ISSUED\n")); + return 0; + default: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("Unknown status 0x%x\n", event)); + /* not allowed case. Therefore, return failed status */ + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_OVERRUN; + break; + } + spin_lock_irqsave(&t->task_state_lock, flags); + t->task_state_flags &= ~SAS_TASK_STATE_PENDING; + t->task_state_flags &= ~SAS_TASK_AT_INITIATOR; + t->task_state_flags |= SAS_TASK_STATE_DONE; + if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) { + spin_unlock_irqrestore(&t->task_state_lock, flags); + PM8001_FAIL_DBG(pm8001_ha, pm8001_printk("task 0x%p done with" + " event 0x%x resp 0x%x " + "stat 0x%x but aborted by upper layer!\n", + t, event, ts->resp, ts->stat)); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + } else { + spin_unlock_irqrestore(&t->task_state_lock, flags); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + mb();/* in order to force CPU ordering */ + t->task_done(t); + } + return 0; +} + +/*See the comments for mpi_ssp_completion */ +static int +mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + struct sas_task *t; + struct pm8001_ccb_info *ccb; + unsigned long flags; + u32 param; + u32 status; + u32 tag; + struct sata_completion_resp *psataPayload; + struct task_status_struct *ts; + struct ata_task_resp *resp ; + u32 *sata_resp; + struct pm8001_device *pm8001_dev; + + psataPayload = (struct sata_completion_resp *)(piomb + 4); + status = le32_to_cpu(psataPayload->status); + tag = le32_to_cpu(psataPayload->tag); + + ccb = &pm8001_ha->ccb_info[tag]; + param = le32_to_cpu(psataPayload->param); + t = ccb->task; + ts = &t->task_status; + pm8001_dev = ccb->device; + if (status) + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("sata IO status 0x%x\n", status)); + if (unlikely(!t || !t->lldd_task || !t->dev)) + return -1; + + switch (status) { + case IO_SUCCESS: + PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_SUCCESS\n")); + if (param == 0) { + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAM_GOOD; + } else { + u8 len; + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_PROTO_RESPONSE; + ts->residual = param; + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("SAS_PROTO_RESPONSE len = %d\n", + param)); + sata_resp = &psataPayload->sata_resp[0]; + resp = (struct ata_task_resp *)ts->buf; + if (t->ata_task.dma_xfer == 0 && + t->data_dir == PCI_DMA_FROMDEVICE) { + len = sizeof(struct pio_setup_fis); + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("PIO read len = %d\n", len)); + } else if (t->ata_task.use_ncq) { + len = sizeof(struct set_dev_bits_fis); + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("FPDMA len = %d\n", len)); + } else { + len = sizeof(struct dev_to_host_fis); + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("other len = %d\n", len)); + } + if (SAS_STATUS_BUF_SIZE >= sizeof(*resp)) { + resp->frame_len = len; + memcpy(&resp->ending_fis[0], sata_resp, len); + ts->buf_valid_size = sizeof(*resp); + } else + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("response to large \n")); + } + if (pm8001_dev) + pm8001_dev->running_req--; + break; + case IO_ABORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_ABORTED IOMB Tag \n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_ABORTED_TASK; + if (pm8001_dev) + pm8001_dev->running_req--; + break; + /* following cases are to do cases */ + case IO_UNDERFLOW: + /* SATA Completion with error */ + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_UNDERFLOW param = %d\n", param)); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_UNDERRUN; + ts->residual = param; + if (pm8001_dev) + pm8001_dev->running_req--; + break; + case IO_NO_DEVICE: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_NO_DEVICE\n")); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_PHY_DOWN; + break; + case IO_XFER_ERROR_BREAK: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_BREAK\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_INTERRUPTED; + break; + case IO_XFER_ERROR_PHY_NOT_READY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_PHY_NOT_READY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; + case IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_PROTOCOL_NOT" + "_SUPPORTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_EPROTO; + break; + case IO_OPEN_CNX_ERROR_ZONE_VIOLATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_ZONE_VIOLATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_UNKNOWN; + break; + case IO_OPEN_CNX_ERROR_BREAK: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_BREAK\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_CONT0; + break; + case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DEV_NO_RESPONSE; + if (!t->uldd_task) { + pm8001_handle_event(pm8001_ha, + pm8001_dev, + IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_QUEUE_FULL; + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + mb();/*in order to force CPU ordering*/ + t->task_done(t); + return 0; + } + break; + case IO_OPEN_CNX_ERROR_BAD_DESTINATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_BAD_DESTINATION\n")); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_BAD_DEST; + if (!t->uldd_task) { + pm8001_handle_event(pm8001_ha, + pm8001_dev, + IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_QUEUE_FULL; + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + mb();/*ditto*/ + t->task_done(t); + return 0; + } + break; + case IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_CONNECTION_RATE_" + "NOT_SUPPORTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_CONN_RATE; + break; + case IO_OPEN_CNX_ERROR_STP_RESOURCES_BUSY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_STP_RESOURCES" + "_BUSY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DEV_NO_RESPONSE; + if (!t->uldd_task) { + pm8001_handle_event(pm8001_ha, + pm8001_dev, + IO_OPEN_CNX_ERROR_STP_RESOURCES_BUSY); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_QUEUE_FULL; + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + mb();/* ditto*/ + t->task_done(t); + return 0; + } + break; + case IO_OPEN_CNX_ERROR_WRONG_DESTINATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_WRONG_DESTINATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_WRONG_DEST; + break; + case IO_XFER_ERROR_NAK_RECEIVED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_NAK_RECEIVED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_NAK_R_ERR; + break; + case IO_XFER_ERROR_ACK_NAK_TIMEOUT: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_ACK_NAK_TIMEOUT\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_NAK_R_ERR; + break; + case IO_XFER_ERROR_DMA: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_DMA\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_ABORTED_TASK; + break; + case IO_XFER_ERROR_SATA_LINK_TIMEOUT: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_SATA_LINK_TIMEOUT\n")); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_DEV_NO_RESPONSE; + break; + case IO_XFER_ERROR_REJECTED_NCQ_MODE: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_REJECTED_NCQ_MODE\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_UNDERRUN; + break; + case IO_XFER_OPEN_RETRY_TIMEOUT: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_OPEN_RETRY_TIMEOUT\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_TO; + break; + case IO_PORT_IN_RESET: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_PORT_IN_RESET\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DEV_NO_RESPONSE; + break; + case IO_DS_NON_OPERATIONAL: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_DS_NON_OPERATIONAL\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DEV_NO_RESPONSE; + if (!t->uldd_task) { + pm8001_handle_event(pm8001_ha, pm8001_dev, + IO_DS_NON_OPERATIONAL); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_QUEUE_FULL; + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + mb();/*ditto*/ + t->task_done(t); + return 0; + } + break; + case IO_DS_IN_RECOVERY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk(" IO_DS_IN_RECOVERY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DEV_NO_RESPONSE; + break; + case IO_DS_IN_ERROR: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_DS_IN_ERROR\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DEV_NO_RESPONSE; + if (!t->uldd_task) { + pm8001_handle_event(pm8001_ha, pm8001_dev, + IO_DS_IN_ERROR); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_QUEUE_FULL; + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + mb();/*ditto*/ + t->task_done(t); + return 0; + } + break; + case IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + default: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("Unknown status 0x%x\n", status)); + /* not allowed case. Therefore, return failed status */ + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DEV_NO_RESPONSE; + break; + } + spin_lock_irqsave(&t->task_state_lock, flags); + t->task_state_flags &= ~SAS_TASK_STATE_PENDING; + t->task_state_flags &= ~SAS_TASK_AT_INITIATOR; + t->task_state_flags |= SAS_TASK_STATE_DONE; + if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) { + spin_unlock_irqrestore(&t->task_state_lock, flags); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("task 0x%p done with io_status 0x%x" + " resp 0x%x stat 0x%x but aborted by upper layer!\n", + t, status, ts->resp, ts->stat)); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + } else { + spin_unlock_irqrestore(&t->task_state_lock, flags); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + mb();/* ditto */ + t->task_done(t); + } + return 0; +} + +/*See the comments for mpi_ssp_completion */ +static int mpi_sata_event(struct pm8001_hba_info *pm8001_ha , void *piomb) +{ + struct sas_task *t; + unsigned long flags; + struct task_status_struct *ts; + struct pm8001_ccb_info *ccb; + struct pm8001_device *pm8001_dev; + struct sata_event_resp *psataPayload = + (struct sata_event_resp *)(piomb + 4); + u32 event = le32_to_cpu(psataPayload->event); + u32 tag = le32_to_cpu(psataPayload->tag); + u32 port_id = le32_to_cpu(psataPayload->port_id); + u32 dev_id = le32_to_cpu(psataPayload->device_id); + + ccb = &pm8001_ha->ccb_info[tag]; + t = ccb->task; + pm8001_dev = ccb->device; + if (event) + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("sata IO status 0x%x\n", event)); + if (unlikely(!t || !t->lldd_task || !t->dev)) + return -1; + ts = &t->task_status; + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("port_id = %x,device_id = %x\n", + port_id, dev_id)); + switch (event) { + case IO_OVERFLOW: + PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_UNDERFLOW\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_OVERRUN; + ts->residual = 0; + if (pm8001_dev) + pm8001_dev->running_req--; + break; + case IO_XFER_ERROR_BREAK: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_BREAK\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_INTERRUPTED; + break; + case IO_XFER_ERROR_PHY_NOT_READY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_PHY_NOT_READY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; + case IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_PROTOCOL_NOT" + "_SUPPORTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_EPROTO; + break; + case IO_OPEN_CNX_ERROR_ZONE_VIOLATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_ZONE_VIOLATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_UNKNOWN; + break; + case IO_OPEN_CNX_ERROR_BREAK: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_BREAK\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_CONT0; + break; + case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS\n")); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_DEV_NO_RESPONSE; + if (!t->uldd_task) { + pm8001_handle_event(pm8001_ha, + pm8001_dev, + IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_QUEUE_FULL; + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + mb();/*ditto*/ + t->task_done(t); + return 0; + } + break; + case IO_OPEN_CNX_ERROR_BAD_DESTINATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_BAD_DESTINATION\n")); + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_BAD_DEST; + break; + case IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_CONNECTION_RATE_" + "NOT_SUPPORTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_CONN_RATE; + break; + case IO_OPEN_CNX_ERROR_WRONG_DESTINATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_WRONG_DESTINATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_WRONG_DEST; + break; + case IO_XFER_ERROR_NAK_RECEIVED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_NAK_RECEIVED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_NAK_R_ERR; + break; + case IO_XFER_ERROR_PEER_ABORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_PEER_ABORTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_NAK_R_ERR; + break; + case IO_XFER_ERROR_REJECTED_NCQ_MODE: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_REJECTED_NCQ_MODE\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_UNDERRUN; + break; + case IO_XFER_OPEN_RETRY_TIMEOUT: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_OPEN_RETRY_TIMEOUT\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_TO; + break; + case IO_XFER_ERROR_UNEXPECTED_PHASE: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_UNEXPECTED_PHASE\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_TO; + break; + case IO_XFER_ERROR_XFER_RDY_OVERRUN: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_XFER_RDY_OVERRUN\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_TO; + break; + case IO_XFER_ERROR_XFER_RDY_NOT_EXPECTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_XFER_RDY_NOT_EXPECTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_TO; + break; + case IO_XFER_ERROR_OFFSET_MISMATCH: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_OFFSET_MISMATCH\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_TO; + break; + case IO_XFER_ERROR_XFER_ZERO_DATA_LEN: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_XFER_ZERO_DATA_LEN\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_TO; + break; + case IO_XFER_CMD_FRAME_ISSUED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_CMD_FRAME_ISSUED\n")); + break; + case IO_XFER_PIO_SETUP_ERROR: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_PIO_SETUP_ERROR\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_TO; + break; + default: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("Unknown status 0x%x\n", event)); + /* not allowed case. Therefore, return failed status */ + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_TO; + break; + } + spin_lock_irqsave(&t->task_state_lock, flags); + t->task_state_flags &= ~SAS_TASK_STATE_PENDING; + t->task_state_flags &= ~SAS_TASK_AT_INITIATOR; + t->task_state_flags |= SAS_TASK_STATE_DONE; + if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) { + spin_unlock_irqrestore(&t->task_state_lock, flags); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("task 0x%p done with io_status 0x%x" + " resp 0x%x stat 0x%x but aborted by upper layer!\n", + t, event, ts->resp, ts->stat)); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + } else { + spin_unlock_irqrestore(&t->task_state_lock, flags); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + mb();/* in order to force CPU ordering */ + t->task_done(t); + } + return 0; +} + +/*See the comments for mpi_ssp_completion */ +static int +mpi_smp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + u32 param; + struct sas_task *t; + struct pm8001_ccb_info *ccb; + unsigned long flags; + u32 status; + u32 tag; + struct smp_completion_resp *psmpPayload; + struct task_status_struct *ts; + struct pm8001_device *pm8001_dev; + + psmpPayload = (struct smp_completion_resp *)(piomb + 4); + status = le32_to_cpu(psmpPayload->status); + tag = le32_to_cpu(psmpPayload->tag); + + ccb = &pm8001_ha->ccb_info[tag]; + param = le32_to_cpu(psmpPayload->param); + t = ccb->task; + ts = &t->task_status; + pm8001_dev = ccb->device; + if (status) + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("smp IO status 0x%x\n", status)); + if (unlikely(!t || !t->lldd_task || !t->dev)) + return -1; + + switch (status) { + case IO_SUCCESS: + PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_SUCCESS\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAM_GOOD; + if (pm8001_dev) + pm8001_dev->running_req--; + break; + case IO_ABORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_ABORTED IOMB\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_ABORTED_TASK; + if (pm8001_dev) + pm8001_dev->running_req--; + break; + case IO_OVERFLOW: + PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_UNDERFLOW\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_OVERRUN; + ts->residual = 0; + if (pm8001_dev) + pm8001_dev->running_req--; + break; + case IO_NO_DEVICE: + PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_NO_DEVICE\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_PHY_DOWN; + break; + case IO_ERROR_HW_TIMEOUT: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_ERROR_HW_TIMEOUT\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAM_BUSY; + break; + case IO_XFER_ERROR_BREAK: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_BREAK\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAM_BUSY; + break; + case IO_XFER_ERROR_PHY_NOT_READY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_PHY_NOT_READY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAM_BUSY; + break; + case IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_UNKNOWN; + break; + case IO_OPEN_CNX_ERROR_ZONE_VIOLATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_ZONE_VIOLATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_UNKNOWN; + break; + case IO_OPEN_CNX_ERROR_BREAK: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_BREAK\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_CONT0; + break; + case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_UNKNOWN; + pm8001_handle_event(pm8001_ha, + pm8001_dev, + IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS); + break; + case IO_OPEN_CNX_ERROR_BAD_DESTINATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_BAD_DESTINATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_BAD_DEST; + break; + case IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_CONNECTION_RATE_" + "NOT_SUPPORTED\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_CONN_RATE; + break; + case IO_OPEN_CNX_ERROR_WRONG_DESTINATION: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_WRONG_DESTINATION\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_WRONG_DEST; + break; + case IO_XFER_ERROR_RX_FRAME: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_ERROR_RX_FRAME\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DEV_NO_RESPONSE; + break; + case IO_XFER_OPEN_RETRY_TIMEOUT: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_XFER_OPEN_RETRY_TIMEOUT\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; + case IO_ERROR_INTERNAL_SMP_RESOURCE: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_ERROR_INTERNAL_SMP_RESOURCE\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_QUEUE_FULL; + break; + case IO_PORT_IN_RESET: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_PORT_IN_RESET\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; + case IO_DS_NON_OPERATIONAL: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_DS_NON_OPERATIONAL\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DEV_NO_RESPONSE; + break; + case IO_DS_IN_RECOVERY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_DS_IN_RECOVERY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; + case IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_OPEN_REJECT; + ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; + break; + default: + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("Unknown status 0x%x\n", status)); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DEV_NO_RESPONSE; + /* not allowed case. Therefore, return failed status */ + break; + } + spin_lock_irqsave(&t->task_state_lock, flags); + t->task_state_flags &= ~SAS_TASK_STATE_PENDING; + t->task_state_flags &= ~SAS_TASK_AT_INITIATOR; + t->task_state_flags |= SAS_TASK_STATE_DONE; + if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) { + spin_unlock_irqrestore(&t->task_state_lock, flags); + PM8001_FAIL_DBG(pm8001_ha, pm8001_printk("task 0x%p done with" + " io_status 0x%x resp 0x%x " + "stat 0x%x but aborted by upper layer!\n", + t, status, ts->resp, ts->stat)); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + } else { + spin_unlock_irqrestore(&t->task_state_lock, flags); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + mb();/* in order to force CPU ordering */ + t->task_done(t); + } + return 0; +} + +static void +mpi_set_dev_state_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + struct set_dev_state_resp *pPayload = + (struct set_dev_state_resp *)(piomb + 4); + u32 tag = le32_to_cpu(pPayload->tag); + struct pm8001_ccb_info *ccb = &pm8001_ha->ccb_info[tag]; + struct pm8001_device *pm8001_dev = ccb->device; + u32 status = le32_to_cpu(pPayload->status); + u32 device_id = le32_to_cpu(pPayload->device_id); + u8 pds = le32_to_cpu(pPayload->pds_nds) | PDS_BITS; + u8 nds = le32_to_cpu(pPayload->pds_nds) | NDS_BITS; + PM8001_MSG_DBG(pm8001_ha, pm8001_printk("Set device id = 0x%x state " + "from 0x%x to 0x%x status = 0x%x!\n", + device_id, pds, nds, status)); + complete(pm8001_dev->setds_completion); + ccb->task = NULL; + ccb->ccb_tag = 0xFFFFFFFF; + pm8001_ccb_free(pm8001_ha, tag); +} + +static void +mpi_set_nvmd_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + struct get_nvm_data_resp *pPayload = + (struct get_nvm_data_resp *)(piomb + 4); + u32 tag = le32_to_cpu(pPayload->tag); + struct pm8001_ccb_info *ccb = &pm8001_ha->ccb_info[tag]; + u32 dlen_status = le32_to_cpu(pPayload->dlen_status); + complete(pm8001_ha->nvmd_completion); + PM8001_MSG_DBG(pm8001_ha, pm8001_printk("Set nvm data complete!\n")); + if ((dlen_status & NVMD_STAT) != 0) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Set nvm data error!\n")); + return; + } + ccb->task = NULL; + ccb->ccb_tag = 0xFFFFFFFF; + pm8001_ccb_free(pm8001_ha, tag); +} + +static void +mpi_get_nvmd_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + struct fw_control_ex *fw_control_context; + struct get_nvm_data_resp *pPayload = + (struct get_nvm_data_resp *)(piomb + 4); + u32 tag = le32_to_cpu(pPayload->tag); + struct pm8001_ccb_info *ccb = &pm8001_ha->ccb_info[tag]; + u32 dlen_status = le32_to_cpu(pPayload->dlen_status); + u32 ir_tds_bn_dps_das_nvm = + le32_to_cpu(pPayload->ir_tda_bn_dps_das_nvm); + void *virt_addr = pm8001_ha->memoryMap.region[NVMD].virt_ptr; + fw_control_context = ccb->fw_control_context; + + PM8001_MSG_DBG(pm8001_ha, pm8001_printk("Get nvm data complete!\n")); + if ((dlen_status & NVMD_STAT) != 0) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Get nvm data error!\n")); + complete(pm8001_ha->nvmd_completion); + return; + } + + if (ir_tds_bn_dps_das_nvm & IPMode) { + /* indirect mode - IR bit set */ + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("Get NVMD success, IR=1\n")); + if ((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == TWI_DEVICE) { + if (ir_tds_bn_dps_das_nvm == 0x80a80200) { + memcpy(pm8001_ha->sas_addr, + ((u8 *)virt_addr + 4), + SAS_ADDR_SIZE); + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("Get SAS address" + " from VPD successfully!\n")); + } + } else if (((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == C_SEEPROM) + || ((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == VPD_FLASH) || + ((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == EXPAN_ROM)) { + ; + } else if (((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == AAP1_RDUMP) + || ((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == IOP_RDUMP)) { + ; + } else { + /* Should not be happened*/ + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("(IR=1)Wrong Device type 0x%x\n", + ir_tds_bn_dps_das_nvm)); + } + } else /* direct mode */{ + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("Get NVMD success, IR=0, dataLen=%d\n", + (dlen_status & NVMD_LEN) >> 24)); + } + memcpy((void *)(fw_control_context->usrAddr), + (void *)(pm8001_ha->memoryMap.region[NVMD].virt_ptr), + fw_control_context->len); + complete(pm8001_ha->nvmd_completion); + ccb->task = NULL; + ccb->ccb_tag = 0xFFFFFFFF; + pm8001_ccb_free(pm8001_ha, tag); +} + +static int mpi_local_phy_ctl(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + struct local_phy_ctl_resp *pPayload = + (struct local_phy_ctl_resp *)(piomb + 4); + u32 status = le32_to_cpu(pPayload->status); + u32 phy_id = le32_to_cpu(pPayload->phyop_phyid) & ID_BITS; + u32 phy_op = le32_to_cpu(pPayload->phyop_phyid) & OP_BITS; + if (status != 0) { + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("%x phy execute %x phy op failed! \n", + phy_id, phy_op)); + } else + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("%x phy execute %x phy op success! \n", + phy_id, phy_op)); + return 0; +} + +/** + * pm8001_bytes_dmaed - one of the interface function communication with libsas + * @pm8001_ha: our hba card information + * @i: which phy that received the event. + * + * when HBA driver received the identify done event or initiate FIS received + * event(for SATA), it will invoke this function to notify the sas layer that + * the sas toplogy has formed, please discover the the whole sas domain, + * while receive a broadcast(change) primitive just tell the sas + * layer to discover the changed domain rather than the whole domain. + */ +static void pm8001_bytes_dmaed(struct pm8001_hba_info *pm8001_ha, int i) +{ + struct pm8001_phy *phy = &pm8001_ha->phy[i]; + struct asd_sas_phy *sas_phy = &phy->sas_phy; + struct sas_ha_struct *sas_ha; + if (!phy->phy_attached) + return; + + sas_ha = pm8001_ha->sas; + if (sas_phy->phy) { + struct sas_phy *sphy = sas_phy->phy; + sphy->negotiated_linkrate = sas_phy->linkrate; + sphy->minimum_linkrate = phy->minimum_linkrate; + sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS; + sphy->maximum_linkrate = phy->maximum_linkrate; + sphy->maximum_linkrate_hw = phy->maximum_linkrate; + } + + if (phy->phy_type & PORT_TYPE_SAS) { + struct sas_identify_frame *id; + id = (struct sas_identify_frame *)phy->frame_rcvd; + id->dev_type = phy->identify.device_type; + id->initiator_bits = SAS_PROTOCOL_ALL; + id->target_bits = phy->identify.target_port_protocols; + } else if (phy->phy_type & PORT_TYPE_SATA) { + /*Nothing*/ + } + PM8001_MSG_DBG(pm8001_ha, pm8001_printk("phy %d byte dmaded.\n", i)); + + sas_phy->frame_rcvd_size = phy->frame_rcvd_size; + pm8001_ha->sas->notify_port_event(sas_phy, PORTE_BYTES_DMAED); +} + +/* Get the link rate speed */ +static void get_lrate_mode(struct pm8001_phy *phy, u8 link_rate) +{ + struct sas_phy *sas_phy = phy->sas_phy.phy; + + switch (link_rate) { + case PHY_SPEED_60: + phy->sas_phy.linkrate = SAS_LINK_RATE_6_0_GBPS; + phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS; + break; + case PHY_SPEED_30: + phy->sas_phy.linkrate = SAS_LINK_RATE_3_0_GBPS; + phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS; + break; + case PHY_SPEED_15: + phy->sas_phy.linkrate = SAS_LINK_RATE_1_5_GBPS; + phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS; + break; + } + sas_phy->negotiated_linkrate = phy->sas_phy.linkrate; + sas_phy->maximum_linkrate_hw = SAS_LINK_RATE_6_0_GBPS; + sas_phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS; + sas_phy->maximum_linkrate = SAS_LINK_RATE_6_0_GBPS; + sas_phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS; +} + +/** + * asd_get_attached_sas_addr -- extract/generate attached SAS address + * @phy: pointer to asd_phy + * @sas_addr: pointer to buffer where the SAS address is to be written + * + * This function extracts the SAS address from an IDENTIFY frame + * received. If OOB is SATA, then a SAS address is generated from the + * HA tables. + * + * LOCKING: the frame_rcvd_lock needs to be held since this parses the frame + * buffer. + */ +static void pm8001_get_attached_sas_addr(struct pm8001_phy *phy, + u8 *sas_addr) +{ + if (phy->sas_phy.frame_rcvd[0] == 0x34 + && phy->sas_phy.oob_mode == SATA_OOB_MODE) { + struct pm8001_hba_info *pm8001_ha = phy->sas_phy.ha->lldd_ha; + /* FIS device-to-host */ + u64 addr = be64_to_cpu(*(__be64 *)pm8001_ha->sas_addr); + addr += phy->sas_phy.id; + *(__be64 *)sas_addr = cpu_to_be64(addr); + } else { + struct sas_identify_frame *idframe = + (void *) phy->sas_phy.frame_rcvd; + memcpy(sas_addr, idframe->sas_addr, SAS_ADDR_SIZE); + } +} + +/** + * pm8001_hw_event_ack_req- For PM8001,some events need to acknowage to FW. + * @pm8001_ha: our hba card information + * @Qnum: the outbound queue message number. + * @SEA: source of event to ack + * @port_id: port id. + * @phyId: phy id. + * @param0: parameter 0. + * @param1: parameter 1. + */ +static void pm8001_hw_event_ack_req(struct pm8001_hba_info *pm8001_ha, + u32 Qnum, u32 SEA, u32 port_id, u32 phyId, u32 param0, u32 param1) +{ + struct hw_event_ack_req payload; + u32 opc = OPC_INB_SAS_HW_EVENT_ACK; + + struct inbound_queue_table *circularQ; + + memset((u8 *)&payload, 0, sizeof(payload)); + circularQ = &pm8001_ha->inbnd_q_tbl[Qnum]; + payload.tag = 1; + payload.sea_phyid_portid = cpu_to_le32(((SEA & 0xFFFF) << 8) | + ((phyId & 0x0F) << 4) | (port_id & 0x0F)); + payload.param0 = cpu_to_le32(param0); + payload.param1 = cpu_to_le32(param1); + mpi_build_cmd(pm8001_ha, circularQ, opc, &payload); +} + +static int pm8001_chip_phy_ctl_req(struct pm8001_hba_info *pm8001_ha, + u32 phyId, u32 phy_op); + +/** + * hw_event_sas_phy_up -FW tells me a SAS phy up event. + * @pm8001_ha: our hba card information + * @piomb: IO message buffer + */ +static void +hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + struct hw_event_resp *pPayload = + (struct hw_event_resp *)(piomb + 4); + u32 lr_evt_status_phyid_portid = + le32_to_cpu(pPayload->lr_evt_status_phyid_portid); + u8 link_rate = + (u8)((lr_evt_status_phyid_portid & 0xF0000000) >> 28); + u8 phy_id = + (u8)((lr_evt_status_phyid_portid & 0x000000F0) >> 4); + struct sas_ha_struct *sas_ha = pm8001_ha->sas; + struct pm8001_phy *phy = &pm8001_ha->phy[phy_id]; + unsigned long flags; + u8 deviceType = pPayload->sas_identify.dev_type; + + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_SAS_PHY_UP \n")); + + switch (deviceType) { + case SAS_PHY_UNUSED: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("device type no device.\n")); + break; + case SAS_END_DEVICE: + PM8001_MSG_DBG(pm8001_ha, pm8001_printk("end device.\n")); + pm8001_chip_phy_ctl_req(pm8001_ha, phy_id, + PHY_NOTIFY_ENABLE_SPINUP); + get_lrate_mode(phy, link_rate); + break; + case SAS_EDGE_EXPANDER_DEVICE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("expander device.\n")); + get_lrate_mode(phy, link_rate); + break; + case SAS_FANOUT_EXPANDER_DEVICE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("fanout expander device.\n")); + get_lrate_mode(phy, link_rate); + break; + default: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("unkown device type(%x)\n", deviceType)); + break; + } + phy->phy_type |= PORT_TYPE_SAS; + phy->identify.device_type = deviceType; + phy->phy_attached = 1; + if (phy->identify.device_type == SAS_END_DEV) + phy->identify.target_port_protocols = SAS_PROTOCOL_SSP; + else if (phy->identify.device_type != NO_DEVICE) + phy->identify.target_port_protocols = SAS_PROTOCOL_SMP; + phy->sas_phy.oob_mode = SAS_OOB_MODE; + sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); + spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags); + memcpy(phy->frame_rcvd, &pPayload->sas_identify, + sizeof(struct sas_identify_frame)-4); + phy->frame_rcvd_size = sizeof(struct sas_identify_frame) - 4; + pm8001_get_attached_sas_addr(phy, phy->sas_phy.attached_sas_addr); + spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags); + if (pm8001_ha->flags == PM8001F_RUN_TIME) + mdelay(200);/*delay a moment to wait disk to spinup*/ + pm8001_bytes_dmaed(pm8001_ha, phy_id); +} + +/** + * hw_event_sata_phy_up -FW tells me a SATA phy up event. + * @pm8001_ha: our hba card information + * @piomb: IO message buffer + */ +static void +hw_event_sata_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + struct hw_event_resp *pPayload = + (struct hw_event_resp *)(piomb + 4); + u32 lr_evt_status_phyid_portid = + le32_to_cpu(pPayload->lr_evt_status_phyid_portid); + u8 link_rate = + (u8)((lr_evt_status_phyid_portid & 0xF0000000) >> 28); + u8 phy_id = + (u8)((lr_evt_status_phyid_portid & 0x000000F0) >> 4); + struct sas_ha_struct *sas_ha = pm8001_ha->sas; + struct pm8001_phy *phy = &pm8001_ha->phy[phy_id]; + unsigned long flags; + get_lrate_mode(phy, link_rate); + phy->phy_type |= PORT_TYPE_SATA; + phy->phy_attached = 1; + phy->sas_phy.oob_mode = SATA_OOB_MODE; + sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); + spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags); + memcpy(phy->frame_rcvd, ((u8 *)&pPayload->sata_fis - 4), + sizeof(struct dev_to_host_fis)); + phy->frame_rcvd_size = sizeof(struct dev_to_host_fis); + phy->identify.target_port_protocols = SAS_PROTOCOL_SATA; + phy->identify.device_type = SATA_DEV; + pm8001_get_attached_sas_addr(phy, phy->sas_phy.attached_sas_addr); + spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags); + pm8001_bytes_dmaed(pm8001_ha, phy_id); +} + +/** + * hw_event_phy_down -we should notify the libsas the phy is down. + * @pm8001_ha: our hba card information + * @piomb: IO message buffer + */ +static void +hw_event_phy_down(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + struct hw_event_resp *pPayload = + (struct hw_event_resp *)(piomb + 4); + u32 lr_evt_status_phyid_portid = + le32_to_cpu(pPayload->lr_evt_status_phyid_portid); + u8 port_id = (u8)(lr_evt_status_phyid_portid & 0x0000000F); + u8 phy_id = + (u8)((lr_evt_status_phyid_portid & 0x000000F0) >> 4); + u32 npip_portstate = le32_to_cpu(pPayload->npip_portstate); + u8 portstate = (u8)(npip_portstate & 0x0000000F); + + switch (portstate) { + case PORT_VALID: + break; + case PORT_INVALID: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(" PortInvalid portID %d \n", port_id)); + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(" Last phy Down and port invalid\n")); + pm8001_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN, + port_id, phy_id, 0, 0); + break; + case PORT_IN_RESET: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(" PortInReset portID %d \n", port_id)); + break; + case PORT_NOT_ESTABLISHED: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(" phy Down and PORT_NOT_ESTABLISHED\n")); + break; + case PORT_LOSTCOMM: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(" phy Down and PORT_LOSTCOMM\n")); + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(" Last phy Down and port invalid\n")); + pm8001_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN, + port_id, phy_id, 0, 0); + break; + default: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(" phy Down and(default) = %x\n", + portstate)); + break; + + } +} + +/** + * mpi_reg_resp -process register device ID response. + * @pm8001_ha: our hba card information + * @piomb: IO message buffer + * + * when sas layer find a device it will notify LLDD, then the driver register + * the domain device to FW, this event is the return device ID which the FW + * has assigned, from now,inter-communication with FW is no longer using the + * SAS address, use device ID which FW assigned. + */ +static int mpi_reg_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + u32 status; + u32 device_id; + u32 htag; + struct pm8001_ccb_info *ccb; + struct pm8001_device *pm8001_dev; + struct dev_reg_resp *registerRespPayload = + (struct dev_reg_resp *)(piomb + 4); + + htag = le32_to_cpu(registerRespPayload->tag); + ccb = &pm8001_ha->ccb_info[registerRespPayload->tag]; + pm8001_dev = ccb->device; + status = le32_to_cpu(registerRespPayload->status); + device_id = le32_to_cpu(registerRespPayload->device_id); + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(" register device is status = %d\n", status)); + switch (status) { + case DEVREG_SUCCESS: + PM8001_MSG_DBG(pm8001_ha, pm8001_printk("DEVREG_SUCCESS\n")); + pm8001_dev->device_id = device_id; + break; + case DEVREG_FAILURE_OUT_OF_RESOURCE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("DEVREG_FAILURE_OUT_OF_RESOURCE\n")); + break; + case DEVREG_FAILURE_DEVICE_ALREADY_REGISTERED: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("DEVREG_FAILURE_DEVICE_ALREADY_REGISTERED\n")); + break; + case DEVREG_FAILURE_INVALID_PHY_ID: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("DEVREG_FAILURE_INVALID_PHY_ID\n")); + break; + case DEVREG_FAILURE_PHY_ID_ALREADY_REGISTERED: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("DEVREG_FAILURE_PHY_ID_ALREADY_REGISTERED\n")); + break; + case DEVREG_FAILURE_PORT_ID_OUT_OF_RANGE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("DEVREG_FAILURE_PORT_ID_OUT_OF_RANGE\n")); + break; + case DEVREG_FAILURE_PORT_NOT_VALID_STATE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("DEVREG_FAILURE_PORT_NOT_VALID_STATE\n")); + break; + case DEVREG_FAILURE_DEVICE_TYPE_NOT_VALID: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("DEVREG_FAILURE_DEVICE_TYPE_NOT_VALID\n")); + break; + default: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("DEVREG_FAILURE_DEVICE_TYPE_NOT_UNSORPORTED\n")); + break; + } + complete(pm8001_dev->dcompletion); + ccb->task = NULL; + ccb->ccb_tag = 0xFFFFFFFF; + pm8001_ccb_free(pm8001_ha, htag); + return 0; +} + +static int mpi_dereg_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + u32 status; + u32 device_id; + struct dev_reg_resp *registerRespPayload = + (struct dev_reg_resp *)(piomb + 4); + + status = le32_to_cpu(registerRespPayload->status); + device_id = le32_to_cpu(registerRespPayload->device_id); + if (status != 0) + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(" deregister device failed ,status = %x" + ", device_id = %x\n", status, device_id)); + return 0; +} + +static int +mpi_fw_flash_update_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + u32 status; + struct fw_control_ex fw_control_context; + struct fw_flash_Update_resp *ppayload = + (struct fw_flash_Update_resp *)(piomb + 4); + u32 tag = le32_to_cpu(ppayload->tag); + struct pm8001_ccb_info *ccb = &pm8001_ha->ccb_info[tag]; + status = le32_to_cpu(ppayload->status); + memcpy(&fw_control_context, + ccb->fw_control_context, + sizeof(fw_control_context)); + switch (status) { + case FLASH_UPDATE_COMPLETE_PENDING_REBOOT: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(": FLASH_UPDATE_COMPLETE_PENDING_REBOOT\n")); + break; + case FLASH_UPDATE_IN_PROGRESS: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(": FLASH_UPDATE_IN_PROGRESS\n")); + break; + case FLASH_UPDATE_HDR_ERR: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(": FLASH_UPDATE_HDR_ERR\n")); + break; + case FLASH_UPDATE_OFFSET_ERR: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(": FLASH_UPDATE_OFFSET_ERR\n")); + break; + case FLASH_UPDATE_CRC_ERR: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(": FLASH_UPDATE_CRC_ERR\n")); + break; + case FLASH_UPDATE_LENGTH_ERR: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(": FLASH_UPDATE_LENGTH_ERR\n")); + break; + case FLASH_UPDATE_HW_ERR: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(": FLASH_UPDATE_HW_ERR\n")); + break; + case FLASH_UPDATE_DNLD_NOT_SUPPORTED: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(": FLASH_UPDATE_DNLD_NOT_SUPPORTED\n")); + break; + case FLASH_UPDATE_DISABLED: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(": FLASH_UPDATE_DISABLED\n")); + break; + default: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("No matched status = %d\n", status)); + break; + } + ccb->fw_control_context->fw_control->retcode = status; + pci_free_consistent(pm8001_ha->pdev, + fw_control_context.len, + fw_control_context.virtAddr, + fw_control_context.phys_addr); + complete(pm8001_ha->nvmd_completion); + ccb->task = NULL; + ccb->ccb_tag = 0xFFFFFFFF; + pm8001_ccb_free(pm8001_ha, tag); + return 0; +} + +static int +mpi_general_event(struct pm8001_hba_info *pm8001_ha , void *piomb) +{ + u32 status; + int i; + struct general_event_resp *pPayload = + (struct general_event_resp *)(piomb + 4); + status = le32_to_cpu(pPayload->status); + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk(" status = 0x%x\n", status)); + for (i = 0; i < GENERAL_EVENT_PAYLOAD; i++) + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("inb_IOMB_payload[0x%x] 0x%x, \n", i, + pPayload->inb_IOMB_payload[i])); + return 0; +} + +static int +mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + struct sas_task *t; + struct pm8001_ccb_info *ccb; + unsigned long flags; + u32 status ; + u32 tag, scp; + struct task_status_struct *ts; + + struct task_abort_resp *pPayload = + (struct task_abort_resp *)(piomb + 4); + ccb = &pm8001_ha->ccb_info[pPayload->tag]; + t = ccb->task; + ts = &t->task_status; + + if (t == NULL) + return -1; + + status = le32_to_cpu(pPayload->status); + tag = le32_to_cpu(pPayload->tag); + scp = le32_to_cpu(pPayload->scp); + PM8001_IO_DBG(pm8001_ha, + pm8001_printk(" status = 0x%x\n", status)); + if (status != 0) + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("task abort failed tag = 0x%x," + " scp= 0x%x\n", tag, scp)); + switch (status) { + case IO_SUCCESS: + PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_SUCCESS\n")); + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAM_GOOD; + break; + case IO_NOT_VALID: + PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_NOT_VALID\n")); + ts->resp = TMF_RESP_FUNC_FAILED; + break; + } + spin_lock_irqsave(&t->task_state_lock, flags); + t->task_state_flags &= ~SAS_TASK_STATE_PENDING; + t->task_state_flags &= ~SAS_TASK_AT_INITIATOR; + t->task_state_flags |= SAS_TASK_STATE_DONE; + spin_unlock_irqrestore(&t->task_state_lock, flags); + pm8001_ccb_task_free(pm8001_ha, t, ccb, pPayload->tag); + mb(); + t->task_done(t); + return 0; +} + +/** + * mpi_hw_event -The hw event has come. + * @pm8001_ha: our hba card information + * @piomb: IO message buffer + */ +static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb) +{ + unsigned long flags; + struct hw_event_resp *pPayload = + (struct hw_event_resp *)(piomb + 4); + u32 lr_evt_status_phyid_portid = + le32_to_cpu(pPayload->lr_evt_status_phyid_portid); + u8 port_id = (u8)(lr_evt_status_phyid_portid & 0x0000000F); + u8 phy_id = + (u8)((lr_evt_status_phyid_portid & 0x000000F0) >> 4); + u16 eventType = + (u16)((lr_evt_status_phyid_portid & 0x00FFFF00) >> 8); + u8 status = + (u8)((lr_evt_status_phyid_portid & 0x0F000000) >> 24); + struct sas_ha_struct *sas_ha = pm8001_ha->sas; + struct pm8001_phy *phy = &pm8001_ha->phy[phy_id]; + struct asd_sas_phy *sas_phy = sas_ha->sas_phy[phy_id]; + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("outbound queue HW event & event type : ")); + switch (eventType) { + case HW_EVENT_PHY_START_STATUS: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_PHY_START_STATUS" + " status = %x\n", status)); + if (status == 0) { + phy->phy_state = 1; + if (pm8001_ha->flags == PM8001F_RUN_TIME) + complete(phy->enable_completion); + } + break; + case HW_EVENT_SAS_PHY_UP: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_PHY_START_STATUS \n")); + hw_event_sas_phy_up(pm8001_ha, piomb); + break; + case HW_EVENT_SATA_PHY_UP: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_SATA_PHY_UP \n")); + hw_event_sata_phy_up(pm8001_ha, piomb); + break; + case HW_EVENT_PHY_STOP_STATUS: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_PHY_STOP_STATUS " + "status = %x\n", status)); + if (status == 0) + phy->phy_state = 0; + break; + case HW_EVENT_SATA_SPINUP_HOLD: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_SATA_SPINUP_HOLD \n")); + sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD); + break; + case HW_EVENT_PHY_DOWN: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_PHY_DOWN \n")); + sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL); + phy->phy_attached = 0; + phy->phy_state = 0; + hw_event_phy_down(pm8001_ha, piomb); + break; + case HW_EVENT_PORT_INVALID: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_PORT_INVALID\n")); + sas_phy_disconnected(sas_phy); + phy->phy_attached = 0; + sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + break; + /* the broadcast change primitive received, tell the LIBSAS this event + to revalidate the sas domain*/ + case HW_EVENT_BROADCAST_CHANGE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_BROADCAST_CHANGE\n")); + pm8001_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_BROADCAST_CHANGE, + port_id, phy_id, 1, 0); + spin_lock_irqsave(&sas_phy->sas_prim_lock, flags); + sas_phy->sas_prim = HW_EVENT_BROADCAST_CHANGE; + spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags); + sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + break; + case HW_EVENT_PHY_ERROR: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_PHY_ERROR\n")); + sas_phy_disconnected(&phy->sas_phy); + phy->phy_attached = 0; + sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR); + break; + case HW_EVENT_BROADCAST_EXP: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_BROADCAST_EXP\n")); + spin_lock_irqsave(&sas_phy->sas_prim_lock, flags); + sas_phy->sas_prim = HW_EVENT_BROADCAST_EXP; + spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags); + sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + break; + case HW_EVENT_LINK_ERR_INVALID_DWORD: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_LINK_ERR_INVALID_DWORD\n")); + pm8001_hw_event_ack_req(pm8001_ha, 0, + HW_EVENT_LINK_ERR_INVALID_DWORD, port_id, phy_id, 0, 0); + sas_phy_disconnected(sas_phy); + phy->phy_attached = 0; + sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + break; + case HW_EVENT_LINK_ERR_DISPARITY_ERROR: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_LINK_ERR_DISPARITY_ERROR\n")); + pm8001_hw_event_ack_req(pm8001_ha, 0, + HW_EVENT_LINK_ERR_DISPARITY_ERROR, + port_id, phy_id, 0, 0); + sas_phy_disconnected(sas_phy); + phy->phy_attached = 0; + sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + break; + case HW_EVENT_LINK_ERR_CODE_VIOLATION: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_LINK_ERR_CODE_VIOLATION\n")); + pm8001_hw_event_ack_req(pm8001_ha, 0, + HW_EVENT_LINK_ERR_CODE_VIOLATION, + port_id, phy_id, 0, 0); + sas_phy_disconnected(sas_phy); + phy->phy_attached = 0; + sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + break; + case HW_EVENT_LINK_ERR_LOSS_OF_DWORD_SYNCH: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_LINK_ERR_LOSS_OF_DWORD_SYNCH\n")); + pm8001_hw_event_ack_req(pm8001_ha, 0, + HW_EVENT_LINK_ERR_LOSS_OF_DWORD_SYNCH, + port_id, phy_id, 0, 0); + sas_phy_disconnected(sas_phy); + phy->phy_attached = 0; + sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + break; + case HW_EVENT_MALFUNCTION: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_MALFUNCTION\n")); + break; + case HW_EVENT_BROADCAST_SES: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_BROADCAST_SES\n")); + spin_lock_irqsave(&sas_phy->sas_prim_lock, flags); + sas_phy->sas_prim = HW_EVENT_BROADCAST_SES; + spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags); + sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + break; + case HW_EVENT_INBOUND_CRC_ERROR: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_INBOUND_CRC_ERROR\n")); + pm8001_hw_event_ack_req(pm8001_ha, 0, + HW_EVENT_INBOUND_CRC_ERROR, + port_id, phy_id, 0, 0); + break; + case HW_EVENT_HARD_RESET_RECEIVED: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_HARD_RESET_RECEIVED\n")); + sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET); + break; + case HW_EVENT_ID_FRAME_TIMEOUT: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_ID_FRAME_TIMEOUT\n")); + sas_phy_disconnected(sas_phy); + phy->phy_attached = 0; + sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + break; + case HW_EVENT_LINK_ERR_PHY_RESET_FAILED: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_LINK_ERR_PHY_RESET_FAILED \n")); + pm8001_hw_event_ack_req(pm8001_ha, 0, + HW_EVENT_LINK_ERR_PHY_RESET_FAILED, + port_id, phy_id, 0, 0); + sas_phy_disconnected(sas_phy); + phy->phy_attached = 0; + sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + break; + case HW_EVENT_PORT_RESET_TIMER_TMO: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_PORT_RESET_TIMER_TMO \n")); + sas_phy_disconnected(sas_phy); + phy->phy_attached = 0; + sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + break; + case HW_EVENT_PORT_RECOVERY_TIMER_TMO: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_PORT_RECOVERY_TIMER_TMO \n")); + sas_phy_disconnected(sas_phy); + phy->phy_attached = 0; + sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + break; + case HW_EVENT_PORT_RECOVER: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_PORT_RECOVER \n")); + break; + case HW_EVENT_PORT_RESET_COMPLETE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("HW_EVENT_PORT_RESET_COMPLETE \n")); + break; + case EVENT_BROADCAST_ASYNCH_EVENT: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("EVENT_BROADCAST_ASYNCH_EVENT\n")); + break; + default: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("Unknown event type = %x\n", eventType)); + break; + } + return 0; +} + +/** + * process_one_iomb - process one outbound Queue memory block + * @pm8001_ha: our hba card information + * @piomb: IO message buffer + */ +static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, void *piomb) +{ + u32 pHeader = (u32)*(u32 *)piomb; + u8 opc = (u8)((le32_to_cpu(pHeader)) & 0xFFF); + + PM8001_MSG_DBG(pm8001_ha, pm8001_printk("process_one_iomb:\n")); + + switch (opc) { + case OPC_OUB_ECHO: + PM8001_MSG_DBG(pm8001_ha, pm8001_printk("OPC_OUB_ECHO \n")); + break; + case OPC_OUB_HW_EVENT: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_HW_EVENT \n")); + mpi_hw_event(pm8001_ha, piomb); + break; + case OPC_OUB_SSP_COMP: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SSP_COMP \n")); + mpi_ssp_completion(pm8001_ha, piomb); + break; + case OPC_OUB_SMP_COMP: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SMP_COMP \n")); + mpi_smp_completion(pm8001_ha, piomb); + break; + case OPC_OUB_LOCAL_PHY_CNTRL: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_LOCAL_PHY_CNTRL\n")); + mpi_local_phy_ctl(pm8001_ha, piomb); + break; + case OPC_OUB_DEV_REGIST: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_DEV_REGIST \n")); + mpi_reg_resp(pm8001_ha, piomb); + break; + case OPC_OUB_DEREG_DEV: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("unresgister the deviece \n")); + mpi_dereg_resp(pm8001_ha, piomb); + break; + case OPC_OUB_GET_DEV_HANDLE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_GET_DEV_HANDLE \n")); + break; + case OPC_OUB_SATA_COMP: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SATA_COMP \n")); + mpi_sata_completion(pm8001_ha, piomb); + break; + case OPC_OUB_SATA_EVENT: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SATA_EVENT \n")); + mpi_sata_event(pm8001_ha, piomb); + break; + case OPC_OUB_SSP_EVENT: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SSP_EVENT\n")); + mpi_ssp_event(pm8001_ha, piomb); + break; + case OPC_OUB_DEV_HANDLE_ARRIV: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_DEV_HANDLE_ARRIV\n")); + /*This is for target*/ + break; + case OPC_OUB_SSP_RECV_EVENT: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SSP_RECV_EVENT\n")); + /*This is for target*/ + break; + case OPC_OUB_DEV_INFO: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_DEV_INFO\n")); + break; + case OPC_OUB_FW_FLASH_UPDATE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_FW_FLASH_UPDATE\n")); + mpi_fw_flash_update_resp(pm8001_ha, piomb); + break; + case OPC_OUB_GPIO_RESPONSE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_GPIO_RESPONSE\n")); + break; + case OPC_OUB_GPIO_EVENT: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_GPIO_EVENT\n")); + break; + case OPC_OUB_GENERAL_EVENT: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_GENERAL_EVENT\n")); + mpi_general_event(pm8001_ha, piomb); + break; + case OPC_OUB_SSP_ABORT_RSP: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SSP_ABORT_RSP\n")); + mpi_task_abort_resp(pm8001_ha, piomb); + break; + case OPC_OUB_SATA_ABORT_RSP: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SATA_ABORT_RSP\n")); + mpi_task_abort_resp(pm8001_ha, piomb); + break; + case OPC_OUB_SAS_DIAG_MODE_START_END: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SAS_DIAG_MODE_START_END\n")); + break; + case OPC_OUB_SAS_DIAG_EXECUTE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SAS_DIAG_EXECUTE\n")); + break; + case OPC_OUB_GET_TIME_STAMP: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_GET_TIME_STAMP\n")); + break; + case OPC_OUB_SAS_HW_EVENT_ACK: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SAS_HW_EVENT_ACK\n")); + break; + case OPC_OUB_PORT_CONTROL: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_PORT_CONTROL\n")); + break; + case OPC_OUB_SMP_ABORT_RSP: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SMP_ABORT_RSP\n")); + mpi_task_abort_resp(pm8001_ha, piomb); + break; + case OPC_OUB_GET_NVMD_DATA: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_GET_NVMD_DATA\n")); + mpi_get_nvmd_resp(pm8001_ha, piomb); + break; + case OPC_OUB_SET_NVMD_DATA: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SET_NVMD_DATA\n")); + mpi_set_nvmd_resp(pm8001_ha, piomb); + break; + case OPC_OUB_DEVICE_HANDLE_REMOVAL: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_DEVICE_HANDLE_REMOVAL\n")); + break; + case OPC_OUB_SET_DEVICE_STATE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SET_DEVICE_STATE\n")); + mpi_set_dev_state_resp(pm8001_ha, piomb); + break; + case OPC_OUB_GET_DEVICE_STATE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_GET_DEVICE_STATE\n")); + break; + case OPC_OUB_SET_DEV_INFO: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SET_DEV_INFO\n")); + break; + case OPC_OUB_SAS_RE_INITIALIZE: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("OPC_OUB_SAS_RE_INITIALIZE\n")); + break; + default: + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("Unknown outbound Queue IOMB OPC = %x\n", + opc)); + break; + } +} + +static int process_oq(struct pm8001_hba_info *pm8001_ha) +{ + struct outbound_queue_table *circularQ; + void *pMsg1 = NULL; + u8 bc = 0; + u32 ret = MPI_IO_STATUS_FAIL, processedMsgCount = 0; + + circularQ = &pm8001_ha->outbnd_q_tbl[0]; + do { + ret = mpi_msg_consume(pm8001_ha, circularQ, &pMsg1, &bc); + if (MPI_IO_STATUS_SUCCESS == ret) { + /* process the outbound message */ + process_one_iomb(pm8001_ha, (void *)((u8 *)pMsg1 - 4)); + /* free the message from the outbound circular buffer */ + mpi_msg_free_set(pm8001_ha, circularQ, bc); + processedMsgCount++; + } + if (MPI_IO_STATUS_BUSY == ret) { + u32 producer_idx; + /* Update the producer index from SPC */ + producer_idx = pm8001_read_32(circularQ->pi_virt); + circularQ->producer_index = cpu_to_le32(producer_idx); + if (circularQ->producer_index == + circularQ->consumer_idx) + /* OQ is empty */ + break; + } + } while (100 > processedMsgCount);/*end message processing if hit the + count*/ + return ret; +} + +/* PCI_DMA_... to our direction translation. */ +static const u8 data_dir_flags[] = { + [PCI_DMA_BIDIRECTIONAL] = DATA_DIR_BYRECIPIENT,/* UNSPECIFIED */ + [PCI_DMA_TODEVICE] = DATA_DIR_OUT,/* OUTBOUND */ + [PCI_DMA_FROMDEVICE] = DATA_DIR_IN,/* INBOUND */ + [PCI_DMA_NONE] = DATA_DIR_NONE,/* NO TRANSFER */ +}; +static void +pm8001_chip_make_sg(struct scatterlist *scatter, int nr, void *prd) +{ + int i; + struct scatterlist *sg; + struct pm8001_prd *buf_prd = prd; + + for_each_sg(scatter, sg, nr, i) { + buf_prd->addr = cpu_to_le64(sg_dma_address(sg)); + buf_prd->im_len.len = cpu_to_le32(sg_dma_len(sg)); + buf_prd->im_len.e = 0; + buf_prd++; + } +} + +static void build_smp_cmd(u32 deviceID, u32 hTag, struct smp_req *psmp_cmd) +{ + psmp_cmd->tag = cpu_to_le32(hTag); + psmp_cmd->device_id = cpu_to_le32(deviceID); + psmp_cmd->len_ip_ir = cpu_to_le32(1|(1 << 1)); +} + +/** + * pm8001_chip_smp_req - send a SMP task to FW + * @pm8001_ha: our hba card information. + * @ccb: the ccb information this request used. + */ +static int pm8001_chip_smp_req(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb) +{ + int elem, rc; + struct sas_task *task = ccb->task; + struct domain_device *dev = task->dev; + struct pm8001_device *pm8001_dev = dev->lldd_dev; + struct scatterlist *sg_req, *sg_resp; + u32 req_len, resp_len; + struct smp_req smp_cmd; + u32 opc; + struct inbound_queue_table *circularQ; + + memset(&smp_cmd, 0, sizeof(smp_cmd)); + /* + * DMA-map SMP request, response buffers + */ + sg_req = &task->smp_task.smp_req; + elem = dma_map_sg(pm8001_ha->dev, sg_req, 1, PCI_DMA_TODEVICE); + if (!elem) + return -ENOMEM; + req_len = sg_dma_len(sg_req); + + sg_resp = &task->smp_task.smp_resp; + elem = dma_map_sg(pm8001_ha->dev, sg_resp, 1, PCI_DMA_FROMDEVICE); + if (!elem) { + rc = -ENOMEM; + goto err_out; + } + resp_len = sg_dma_len(sg_resp); + /* must be in dwords */ + if ((req_len & 0x3) || (resp_len & 0x3)) { + rc = -EINVAL; + goto err_out_2; + } + + opc = OPC_INB_SMP_REQUEST; + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + smp_cmd.tag = cpu_to_le32(ccb->ccb_tag); + smp_cmd.long_smp_req.long_req_addr = + cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_req)); + smp_cmd.long_smp_req.long_req_size = + cpu_to_le32((u32)sg_dma_len(&task->smp_task.smp_req)-4); + smp_cmd.long_smp_req.long_resp_addr = + cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_resp)); + smp_cmd.long_smp_req.long_resp_size = + cpu_to_le32((u32)sg_dma_len(&task->smp_task.smp_resp)-4); + build_smp_cmd(pm8001_dev->device_id, smp_cmd.tag, &smp_cmd); + mpi_build_cmd(pm8001_ha, circularQ, opc, (u32 *)&smp_cmd); + return 0; + +err_out_2: + dma_unmap_sg(pm8001_ha->dev, &ccb->task->smp_task.smp_resp, 1, + PCI_DMA_FROMDEVICE); +err_out: + dma_unmap_sg(pm8001_ha->dev, &ccb->task->smp_task.smp_req, 1, + PCI_DMA_TODEVICE); + return rc; +} + +/** + * pm8001_chip_ssp_io_req - send a SSP task to FW + * @pm8001_ha: our hba card information. + * @ccb: the ccb information this request used. + */ +static int pm8001_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb) +{ + struct sas_task *task = ccb->task; + struct domain_device *dev = task->dev; + struct pm8001_device *pm8001_dev = dev->lldd_dev; + struct ssp_ini_io_start_req ssp_cmd; + u32 tag = ccb->ccb_tag; + __le64 phys_addr; + struct inbound_queue_table *circularQ; + u32 opc = OPC_INB_SSPINIIOSTART; + memset(&ssp_cmd, 0, sizeof(ssp_cmd)); + memcpy(ssp_cmd.ssp_iu.lun, task->ssp_task.LUN, 8); + ssp_cmd.dir_m_tlr = data_dir_flags[task->data_dir] << 8 | 0x0;/*0 for + SAS 1.1 compatible TLR*/ + ssp_cmd.data_len = cpu_to_le32(task->total_xfer_len); + ssp_cmd.device_id = cpu_to_le32(pm8001_dev->device_id); + ssp_cmd.tag = cpu_to_le32(tag); + if (task->ssp_task.enable_first_burst) + ssp_cmd.ssp_iu.efb_prio_attr |= 0x80; + ssp_cmd.ssp_iu.efb_prio_attr |= (task->ssp_task.task_prio << 3); + ssp_cmd.ssp_iu.efb_prio_attr |= (task->ssp_task.task_attr & 7); + memcpy(ssp_cmd.ssp_iu.cdb, task->ssp_task.cdb, 16); + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + + /* fill in PRD (scatter/gather) table, if any */ + if (task->num_scatter > 1) { + pm8001_chip_make_sg(task->scatter, ccb->n_elem, ccb->buf_prd); + phys_addr = cpu_to_le64(ccb->ccb_dma_handle + + offsetof(struct pm8001_ccb_info, buf_prd[0])); + ssp_cmd.addr_low = lower_32_bits(phys_addr); + ssp_cmd.addr_high = upper_32_bits(phys_addr); + ssp_cmd.esgl = cpu_to_le32(1<<31); + } else if (task->num_scatter == 1) { + __le64 dma_addr = cpu_to_le64(sg_dma_address(task->scatter)); + ssp_cmd.addr_low = lower_32_bits(dma_addr); + ssp_cmd.addr_high = upper_32_bits(dma_addr); + ssp_cmd.len = cpu_to_le32(task->total_xfer_len); + ssp_cmd.esgl = 0; + } else if (task->num_scatter == 0) { + ssp_cmd.addr_low = 0; + ssp_cmd.addr_high = 0; + ssp_cmd.len = cpu_to_le32(task->total_xfer_len); + ssp_cmd.esgl = 0; + } + mpi_build_cmd(pm8001_ha, circularQ, opc, &ssp_cmd); + return 0; +} + +static int pm8001_chip_sata_req(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb) +{ + struct sas_task *task = ccb->task; + struct domain_device *dev = task->dev; + struct pm8001_device *pm8001_ha_dev = dev->lldd_dev; + u32 tag = ccb->ccb_tag; + struct sata_start_req sata_cmd; + u32 hdr_tag, ncg_tag = 0; + __le64 phys_addr; + u32 ATAP = 0x0; + u32 dir; + struct inbound_queue_table *circularQ; + u32 opc = OPC_INB_SATA_HOST_OPSTART; + memset(&sata_cmd, 0, sizeof(sata_cmd)); + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + if (task->data_dir == PCI_DMA_NONE) { + ATAP = 0x04; /* no data*/ + PM8001_IO_DBG(pm8001_ha, pm8001_printk("no data \n")); + } else if (likely(!task->ata_task.device_control_reg_update)) { + if (task->ata_task.dma_xfer) { + ATAP = 0x06; /* DMA */ + PM8001_IO_DBG(pm8001_ha, pm8001_printk("DMA \n")); + } else { + ATAP = 0x05; /* PIO*/ + PM8001_IO_DBG(pm8001_ha, pm8001_printk("PIO \n")); + } + if (task->ata_task.use_ncq && + dev->sata_dev.command_set != ATAPI_COMMAND_SET) { + ATAP = 0x07; /* FPDMA */ + PM8001_IO_DBG(pm8001_ha, pm8001_printk("FPDMA \n")); + } + } + if (task->ata_task.use_ncq && pm8001_get_ncq_tag(task, &hdr_tag)) + ncg_tag = cpu_to_le32(hdr_tag); + dir = data_dir_flags[task->data_dir] << 8; + sata_cmd.tag = cpu_to_le32(tag); + sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id); + sata_cmd.data_len = cpu_to_le32(task->total_xfer_len); + sata_cmd.ncqtag_atap_dir_m = + cpu_to_le32(((ncg_tag & 0xff)<<16)|((ATAP & 0x3f) << 10) | dir); + sata_cmd.sata_fis = task->ata_task.fis; + if (likely(!task->ata_task.device_control_reg_update)) + sata_cmd.sata_fis.flags |= 0x80;/* C=1: update ATA cmd reg */ + sata_cmd.sata_fis.flags &= 0xF0;/* PM_PORT field shall be 0 */ + /* fill in PRD (scatter/gather) table, if any */ + if (task->num_scatter > 1) { + pm8001_chip_make_sg(task->scatter, ccb->n_elem, ccb->buf_prd); + phys_addr = cpu_to_le64(ccb->ccb_dma_handle + + offsetof(struct pm8001_ccb_info, buf_prd[0])); + sata_cmd.addr_low = lower_32_bits(phys_addr); + sata_cmd.addr_high = upper_32_bits(phys_addr); + sata_cmd.esgl = cpu_to_le32(1 << 31); + } else if (task->num_scatter == 1) { + __le64 dma_addr = cpu_to_le64(sg_dma_address(task->scatter)); + sata_cmd.addr_low = lower_32_bits(dma_addr); + sata_cmd.addr_high = upper_32_bits(dma_addr); + sata_cmd.len = cpu_to_le32(task->total_xfer_len); + sata_cmd.esgl = 0; + } else if (task->num_scatter == 0) { + sata_cmd.addr_low = 0; + sata_cmd.addr_high = 0; + sata_cmd.len = cpu_to_le32(task->total_xfer_len); + sata_cmd.esgl = 0; + } + mpi_build_cmd(pm8001_ha, circularQ, opc, &sata_cmd); + return 0; +} + +/** + * pm8001_chip_phy_start_req - start phy via PHY_START COMMAND + * @pm8001_ha: our hba card information. + * @num: the inbound queue number + * @phy_id: the phy id which we wanted to start up. + */ +static int +pm8001_chip_phy_start_req(struct pm8001_hba_info *pm8001_ha, u8 phy_id) +{ + struct phy_start_req payload; + struct inbound_queue_table *circularQ; + u32 tag = 0x01; + u32 opcode = OPC_INB_PHYSTART; + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + memset(&payload, 0, sizeof(payload)); + payload.tag = cpu_to_le32(tag); + /* + ** [0:7] PHY Identifier + ** [8:11] link rate 1.5G, 3G, 6G + ** [12:13] link mode 01b SAS mode; 10b SATA mode; 11b both + ** [14] 0b disable spin up hold; 1b enable spin up hold + */ + payload.ase_sh_lm_slr_phyid = cpu_to_le32(SPINHOLD_DISABLE | + LINKMODE_AUTO | LINKRATE_15 | + LINKRATE_30 | LINKRATE_60 | phy_id); + payload.sas_identify.dev_type = SAS_END_DEV; + payload.sas_identify.initiator_bits = SAS_PROTOCOL_ALL; + memcpy(payload.sas_identify.sas_addr, + pm8001_ha->sas_addr, SAS_ADDR_SIZE); + payload.sas_identify.phy_id = phy_id; + mpi_build_cmd(pm8001_ha, circularQ, opcode, &payload); + return 0; +} + +/** + * pm8001_chip_phy_stop_req - start phy via PHY_STOP COMMAND + * @pm8001_ha: our hba card information. + * @num: the inbound queue number + * @phy_id: the phy id which we wanted to start up. + */ +static int pm8001_chip_phy_stop_req(struct pm8001_hba_info *pm8001_ha, + u8 phy_id) +{ + struct phy_stop_req payload; + struct inbound_queue_table *circularQ; + u32 tag = 0x01; + u32 opcode = OPC_INB_PHYSTOP; + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + memset(&payload, 0, sizeof(payload)); + payload.tag = cpu_to_le32(tag); + payload.phy_id = cpu_to_le32(phy_id); + mpi_build_cmd(pm8001_ha, circularQ, opcode, &payload); + return 0; +} + +/** + * see comments on mpi_reg_resp. + */ +static int pm8001_chip_reg_dev_req(struct pm8001_hba_info *pm8001_ha, + struct pm8001_device *pm8001_dev, u32 flag) +{ + struct reg_dev_req payload; + u32 opc; + u32 stp_sspsmp_sata = 0x4; + struct inbound_queue_table *circularQ; + u32 linkrate, phy_id; + u32 rc, tag = 0xdeadbeef; + struct pm8001_ccb_info *ccb; + u8 retryFlag = 0x1; + u16 firstBurstSize = 0; + u16 ITNT = 2000; + struct domain_device *dev = pm8001_dev->sas_device; + struct domain_device *parent_dev = dev->parent; + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + + memset(&payload, 0, sizeof(payload)); + rc = pm8001_tag_alloc(pm8001_ha, &tag); + if (rc) + return rc; + ccb = &pm8001_ha->ccb_info[tag]; + ccb->device = pm8001_dev; + ccb->ccb_tag = tag; + payload.tag = cpu_to_le32(tag); + if (flag == 1) + stp_sspsmp_sata = 0x02; /*direct attached sata */ + else { + if (pm8001_dev->dev_type == SATA_DEV) + stp_sspsmp_sata = 0x00; /* stp*/ + else if (pm8001_dev->dev_type == SAS_END_DEV || + pm8001_dev->dev_type == EDGE_DEV || + pm8001_dev->dev_type == FANOUT_DEV) + stp_sspsmp_sata = 0x01; /*ssp or smp*/ + } + if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) + phy_id = parent_dev->ex_dev.ex_phy->phy_id; + else + phy_id = pm8001_dev->attached_phy; + opc = OPC_INB_REG_DEV; + linkrate = (pm8001_dev->sas_device->linkrate < dev->port->linkrate) ? + pm8001_dev->sas_device->linkrate : dev->port->linkrate; + payload.phyid_portid = + cpu_to_le32(((pm8001_dev->sas_device->port->id) & 0x0F) | + ((phy_id & 0x0F) << 4)); + payload.dtype_dlr_retry = cpu_to_le32((retryFlag & 0x01) | + ((linkrate & 0x0F) * 0x1000000) | + ((stp_sspsmp_sata & 0x03) * 0x10000000)); + payload.firstburstsize_ITNexustimeout = + cpu_to_le32(ITNT | (firstBurstSize * 0x10000)); + memcpy(&payload.sas_addr_hi, pm8001_dev->sas_device->sas_addr, + SAS_ADDR_SIZE); + mpi_build_cmd(pm8001_ha, circularQ, opc, &payload); + return 0; +} + +/** + * see comments on mpi_reg_resp. + */ +static int pm8001_chip_dereg_dev_req(struct pm8001_hba_info *pm8001_ha, + u32 device_id) +{ + struct dereg_dev_req payload; + u32 opc = OPC_INB_DEREG_DEV_HANDLE; + struct inbound_queue_table *circularQ; + + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + memset((u8 *)&payload, 0, sizeof(payload)); + payload.tag = 1; + payload.device_id = cpu_to_le32(device_id); + PM8001_MSG_DBG(pm8001_ha, + pm8001_printk("unregister device device_id = %d\n", device_id)); + mpi_build_cmd(pm8001_ha, circularQ, opc, &payload); + return 0; +} + +/** + * pm8001_chip_phy_ctl_req - support the local phy operation + * @pm8001_ha: our hba card information. + * @num: the inbound queue number + * @phy_id: the phy id which we wanted to operate + * @phy_op: + */ +static int pm8001_chip_phy_ctl_req(struct pm8001_hba_info *pm8001_ha, + u32 phyId, u32 phy_op) +{ + struct local_phy_ctl_req payload; + struct inbound_queue_table *circularQ; + u32 opc = OPC_INB_LOCAL_PHY_CONTROL; + memset((u8 *)&payload, 0, sizeof(payload)); + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + payload.tag = 1; + payload.phyop_phyid = + cpu_to_le32(((phy_op & 0xff) << 8) | (phyId & 0x0F)); + mpi_build_cmd(pm8001_ha, circularQ, opc, &payload); + return 0; +} + +static u32 pm8001_chip_is_our_interupt(struct pm8001_hba_info *pm8001_ha) +{ + u32 value; +#ifdef PM8001_USE_MSIX + return 1; +#endif + value = pm8001_cr32(pm8001_ha, 0, MSGU_ODR); + if (value) + return 1; + return 0; + +} + +/** + * pm8001_chip_isr - PM8001 isr handler. + * @pm8001_ha: our hba card information. + * @irq: irq number. + * @stat: stat. + */ +static void +pm8001_chip_isr(struct pm8001_hba_info *pm8001_ha) +{ + pm8001_chip_interrupt_disable(pm8001_ha); + process_oq(pm8001_ha); + pm8001_chip_interrupt_enable(pm8001_ha); +} + +static int send_task_abort(struct pm8001_hba_info *pm8001_ha, u32 opc, + u32 dev_id, u8 flag, u32 task_tag, u32 cmd_tag) +{ + struct task_abort_req task_abort; + struct inbound_queue_table *circularQ; + + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + memset(&task_abort, 0, sizeof(task_abort)); + if (ABORT_SINGLE == (flag & ABORT_MASK)) { + task_abort.abort_all = 0; + task_abort.device_id = cpu_to_le32(dev_id); + task_abort.tag_to_abort = cpu_to_le32(task_tag); + task_abort.tag = cpu_to_le32(cmd_tag); + } else if (ABORT_ALL == (flag & ABORT_MASK)) { + task_abort.abort_all = cpu_to_le32(1); + task_abort.device_id = cpu_to_le32(dev_id); + task_abort.tag = cpu_to_le32(cmd_tag); + } + mpi_build_cmd(pm8001_ha, circularQ, opc, &task_abort); + return 0; +} + +/** + * pm8001_chip_abort_task - SAS abort task when error or exception happened. + * @task: the task we wanted to aborted. + * @flag: the abort flag. + */ +static int pm8001_chip_abort_task(struct pm8001_hba_info *pm8001_ha, + struct pm8001_device *pm8001_dev, u8 flag, u32 task_tag, u32 cmd_tag) +{ + u32 opc, device_id; + int rc = TMF_RESP_FUNC_FAILED; + PM8001_IO_DBG(pm8001_ha, pm8001_printk("Abort tag[%x]", task_tag)); + if (pm8001_dev->dev_type == SAS_END_DEV) + opc = OPC_INB_SSP_ABORT; + else if (pm8001_dev->dev_type == SATA_DEV) + opc = OPC_INB_SATA_ABORT; + else + opc = OPC_INB_SMP_ABORT;/* SMP */ + device_id = pm8001_dev->device_id; + rc = send_task_abort(pm8001_ha, opc, device_id, flag, + task_tag, cmd_tag); + if (rc != TMF_RESP_FUNC_COMPLETE) + PM8001_IO_DBG(pm8001_ha, pm8001_printk("rc= %d\n", rc)); + return rc; +} + +/** + * pm8001_chip_ssp_tm_req - built the task managment command. + * @pm8001_ha: our hba card information. + * @ccb: the ccb information. + * @tmf: task management function. + */ +static int pm8001_chip_ssp_tm_req(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb, struct pm8001_tmf_task *tmf) +{ + struct sas_task *task = ccb->task; + struct domain_device *dev = task->dev; + struct pm8001_device *pm8001_dev = dev->lldd_dev; + u32 opc = OPC_INB_SSPINITMSTART; + struct inbound_queue_table *circularQ; + struct ssp_ini_tm_start_req sspTMCmd; + + memset(&sspTMCmd, 0, sizeof(sspTMCmd)); + sspTMCmd.device_id = cpu_to_le32(pm8001_dev->device_id); + sspTMCmd.relate_tag = cpu_to_le32(tmf->tag_of_task_to_be_managed); + sspTMCmd.tmf = cpu_to_le32(tmf->tmf); + sspTMCmd.ds_ads_m = cpu_to_le32(1 << 2); + memcpy(sspTMCmd.lun, task->ssp_task.LUN, 8); + sspTMCmd.tag = cpu_to_le32(ccb->ccb_tag); + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + mpi_build_cmd(pm8001_ha, circularQ, opc, &sspTMCmd); + return 0; +} + +static int pm8001_chip_get_nvmd_req(struct pm8001_hba_info *pm8001_ha, + void *payload) +{ + u32 opc = OPC_INB_GET_NVMD_DATA; + u32 nvmd_type; + u32 rc; + u32 tag; + struct pm8001_ccb_info *ccb; + struct inbound_queue_table *circularQ; + struct get_nvm_data_req nvmd_req; + struct fw_control_ex *fw_control_context; + struct pm8001_ioctl_payload *ioctl_payload = payload; + + nvmd_type = ioctl_payload->minor_function; + fw_control_context = kzalloc(sizeof(struct fw_control_ex), GFP_KERNEL); + fw_control_context->usrAddr = (u8 *)&ioctl_payload->func_specific[0]; + fw_control_context->len = ioctl_payload->length; + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + memset(&nvmd_req, 0, sizeof(nvmd_req)); + rc = pm8001_tag_alloc(pm8001_ha, &tag); + if (rc) + return rc; + ccb = &pm8001_ha->ccb_info[tag]; + ccb->ccb_tag = tag; + ccb->fw_control_context = fw_control_context; + nvmd_req.tag = cpu_to_le32(tag); + + switch (nvmd_type) { + case TWI_DEVICE: { + u32 twi_addr, twi_page_size; + twi_addr = 0xa8; + twi_page_size = 2; + + nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | twi_addr << 16 | + twi_page_size << 8 | TWI_DEVICE); + nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length); + nvmd_req.resp_addr_hi = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi); + nvmd_req.resp_addr_lo = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo); + break; + } + case C_SEEPROM: { + nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | C_SEEPROM); + nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length); + nvmd_req.resp_addr_hi = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi); + nvmd_req.resp_addr_lo = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo); + break; + } + case VPD_FLASH: { + nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | VPD_FLASH); + nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length); + nvmd_req.resp_addr_hi = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi); + nvmd_req.resp_addr_lo = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo); + break; + } + case EXPAN_ROM: { + nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | EXPAN_ROM); + nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length); + nvmd_req.resp_addr_hi = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi); + nvmd_req.resp_addr_lo = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo); + break; + } + default: + break; + } + mpi_build_cmd(pm8001_ha, circularQ, opc, &nvmd_req); + return 0; +} + +static int pm8001_chip_set_nvmd_req(struct pm8001_hba_info *pm8001_ha, + void *payload) +{ + u32 opc = OPC_INB_SET_NVMD_DATA; + u32 nvmd_type; + u32 rc; + u32 tag; + struct pm8001_ccb_info *ccb; + struct inbound_queue_table *circularQ; + struct set_nvm_data_req nvmd_req; + struct fw_control_ex *fw_control_context; + struct pm8001_ioctl_payload *ioctl_payload = payload; + + nvmd_type = ioctl_payload->minor_function; + fw_control_context = kzalloc(sizeof(struct fw_control_ex), GFP_KERNEL); + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + memcpy(pm8001_ha->memoryMap.region[NVMD].virt_ptr, + ioctl_payload->func_specific, + ioctl_payload->length); + memset(&nvmd_req, 0, sizeof(nvmd_req)); + rc = pm8001_tag_alloc(pm8001_ha, &tag); + if (rc) + return rc; + ccb = &pm8001_ha->ccb_info[tag]; + ccb->fw_control_context = fw_control_context; + ccb->ccb_tag = tag; + nvmd_req.tag = cpu_to_le32(tag); + switch (nvmd_type) { + case TWI_DEVICE: { + u32 twi_addr, twi_page_size; + twi_addr = 0xa8; + twi_page_size = 2; + nvmd_req.reserved[0] = cpu_to_le32(0xFEDCBA98); + nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | twi_addr << 16 | + twi_page_size << 8 | TWI_DEVICE); + nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length); + nvmd_req.resp_addr_hi = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi); + nvmd_req.resp_addr_lo = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo); + break; + } + case C_SEEPROM: + nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | C_SEEPROM); + nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length); + nvmd_req.reserved[0] = cpu_to_le32(0xFEDCBA98); + nvmd_req.resp_addr_hi = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi); + nvmd_req.resp_addr_lo = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo); + break; + case VPD_FLASH: + nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | VPD_FLASH); + nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length); + nvmd_req.reserved[0] = cpu_to_le32(0xFEDCBA98); + nvmd_req.resp_addr_hi = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi); + nvmd_req.resp_addr_lo = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo); + break; + case EXPAN_ROM: + nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | EXPAN_ROM); + nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length); + nvmd_req.reserved[0] = cpu_to_le32(0xFEDCBA98); + nvmd_req.resp_addr_hi = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi); + nvmd_req.resp_addr_lo = + cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo); + break; + default: + break; + } + mpi_build_cmd(pm8001_ha, circularQ, opc, &nvmd_req); + return 0; +} + +/** + * pm8001_chip_fw_flash_update_build - support the firmware update operation + * @pm8001_ha: our hba card information. + * @fw_flash_updata_info: firmware flash update param + */ +static int +pm8001_chip_fw_flash_update_build(struct pm8001_hba_info *pm8001_ha, + void *fw_flash_updata_info, u32 tag) +{ + struct fw_flash_Update_req payload; + struct fw_flash_updata_info *info; + struct inbound_queue_table *circularQ; + u32 opc = OPC_INB_FW_FLASH_UPDATE; + + memset((u8 *)&payload, 0, sizeof(struct fw_flash_Update_req)); + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + info = fw_flash_updata_info; + payload.tag = cpu_to_le32(tag); + payload.cur_image_len = cpu_to_le32(info->cur_image_len); + payload.cur_image_offset = cpu_to_le32(info->cur_image_offset); + payload.total_image_len = cpu_to_le32(info->total_image_len); + payload.len = info->sgl.im_len.len ; + payload.sgl_addr_lo = lower_32_bits(info->sgl.addr); + payload.sgl_addr_hi = upper_32_bits(info->sgl.addr); + mpi_build_cmd(pm8001_ha, circularQ, opc, &payload); + return 0; +} + +static int +pm8001_chip_fw_flash_update_req(struct pm8001_hba_info *pm8001_ha, + void *payload) +{ + struct fw_flash_updata_info flash_update_info; + struct fw_control_info *fw_control; + struct fw_control_ex *fw_control_context; + u32 rc; + u32 tag; + struct pm8001_ccb_info *ccb; + void *buffer = NULL; + dma_addr_t phys_addr; + u32 phys_addr_hi; + u32 phys_addr_lo; + struct pm8001_ioctl_payload *ioctl_payload = payload; + + fw_control_context = kzalloc(sizeof(struct fw_control_ex), GFP_KERNEL); + fw_control = (struct fw_control_info *)&ioctl_payload->func_specific[0]; + if (fw_control->len != 0) { + if (pm8001_mem_alloc(pm8001_ha->pdev, + (void **)&buffer, + &phys_addr, + &phys_addr_hi, + &phys_addr_lo, + fw_control->len, 0) != 0) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Mem alloc failure\n")); + return -ENOMEM; + } + } + memset((void *)buffer, 0, fw_control->len); + memcpy((void *)buffer, fw_control->buffer, fw_control->len); + flash_update_info.sgl.addr = cpu_to_le64(phys_addr); + flash_update_info.sgl.im_len.len = cpu_to_le32(fw_control->len); + flash_update_info.sgl.im_len.e = 0; + flash_update_info.cur_image_offset = fw_control->offset; + flash_update_info.cur_image_len = fw_control->len; + flash_update_info.total_image_len = fw_control->size; + fw_control_context->fw_control = fw_control; + fw_control_context->virtAddr = buffer; + fw_control_context->len = fw_control->len; + rc = pm8001_tag_alloc(pm8001_ha, &tag); + if (rc) + return rc; + ccb = &pm8001_ha->ccb_info[tag]; + ccb->fw_control_context = fw_control_context; + ccb->ccb_tag = tag; + pm8001_chip_fw_flash_update_build(pm8001_ha, &flash_update_info, tag); + return 0; +} + +static int +pm8001_chip_set_dev_state_req(struct pm8001_hba_info *pm8001_ha, + struct pm8001_device *pm8001_dev, u32 state) +{ + struct set_dev_state_req payload; + struct inbound_queue_table *circularQ; + struct pm8001_ccb_info *ccb; + u32 rc; + u32 tag; + u32 opc = OPC_INB_SET_DEVICE_STATE; + memset((u8 *)&payload, 0, sizeof(payload)); + rc = pm8001_tag_alloc(pm8001_ha, &tag); + if (rc) + return -1; + ccb = &pm8001_ha->ccb_info[tag]; + ccb->ccb_tag = tag; + ccb->device = pm8001_dev; + circularQ = &pm8001_ha->inbnd_q_tbl[0]; + payload.tag = cpu_to_le32(tag); + payload.device_id = cpu_to_le32(pm8001_dev->device_id); + payload.nds = cpu_to_le32(state); + mpi_build_cmd(pm8001_ha, circularQ, opc, &payload); + return 0; + +} + +const struct pm8001_dispatch pm8001_8001_dispatch = { + .name = "pmc8001", + .chip_init = pm8001_chip_init, + .chip_soft_rst = pm8001_chip_soft_rst, + .chip_rst = pm8001_hw_chip_rst, + .chip_iounmap = pm8001_chip_iounmap, + .isr = pm8001_chip_isr, + .is_our_interupt = pm8001_chip_is_our_interupt, + .isr_process_oq = process_oq, + .interrupt_enable = pm8001_chip_interrupt_enable, + .interrupt_disable = pm8001_chip_interrupt_disable, + .make_prd = pm8001_chip_make_sg, + .smp_req = pm8001_chip_smp_req, + .ssp_io_req = pm8001_chip_ssp_io_req, + .sata_req = pm8001_chip_sata_req, + .phy_start_req = pm8001_chip_phy_start_req, + .phy_stop_req = pm8001_chip_phy_stop_req, + .reg_dev_req = pm8001_chip_reg_dev_req, + .dereg_dev_req = pm8001_chip_dereg_dev_req, + .phy_ctl_req = pm8001_chip_phy_ctl_req, + .task_abort = pm8001_chip_abort_task, + .ssp_tm_req = pm8001_chip_ssp_tm_req, + .get_nvmd_req = pm8001_chip_get_nvmd_req, + .set_nvmd_req = pm8001_chip_set_nvmd_req, + .fw_flash_update_req = pm8001_chip_fw_flash_update_req, + .set_dev_state_req = pm8001_chip_set_dev_state_req, +}; + diff --git a/drivers/scsi/pm8001/pm8001_hwi.h b/drivers/scsi/pm8001/pm8001_hwi.h new file mode 100644 index 000000000000..3690a2ba0eb2 --- /dev/null +++ b/drivers/scsi/pm8001/pm8001_hwi.h @@ -0,0 +1,1011 @@ +/* + * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver + * + * Copyright (c) 2008-2009 USI Co., Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + */ +#ifndef _PMC8001_REG_H_ +#define _PMC8001_REG_H_ + +#include +#include + + +/* for Request Opcode of IOMB */ +#define OPC_INB_ECHO 1 /* 0x000 */ +#define OPC_INB_PHYSTART 4 /* 0x004 */ +#define OPC_INB_PHYSTOP 5 /* 0x005 */ +#define OPC_INB_SSPINIIOSTART 6 /* 0x006 */ +#define OPC_INB_SSPINITMSTART 7 /* 0x007 */ +#define OPC_INB_SSPINIEXTIOSTART 8 /* 0x008 */ +#define OPC_INB_DEV_HANDLE_ACCEPT 9 /* 0x009 */ +#define OPC_INB_SSPTGTIOSTART 10 /* 0x00A */ +#define OPC_INB_SSPTGTRSPSTART 11 /* 0x00B */ +#define OPC_INB_SSPINIEDCIOSTART 12 /* 0x00C */ +#define OPC_INB_SSPINIEXTEDCIOSTART 13 /* 0x00D */ +#define OPC_INB_SSPTGTEDCIOSTART 14 /* 0x00E */ +#define OPC_INB_SSP_ABORT 15 /* 0x00F */ +#define OPC_INB_DEREG_DEV_HANDLE 16 /* 0x010 */ +#define OPC_INB_GET_DEV_HANDLE 17 /* 0x011 */ +#define OPC_INB_SMP_REQUEST 18 /* 0x012 */ +/* SMP_RESPONSE is removed */ +#define OPC_INB_SMP_RESPONSE 19 /* 0x013 */ +#define OPC_INB_SMP_ABORT 20 /* 0x014 */ +#define OPC_INB_REG_DEV 22 /* 0x016 */ +#define OPC_INB_SATA_HOST_OPSTART 23 /* 0x017 */ +#define OPC_INB_SATA_ABORT 24 /* 0x018 */ +#define OPC_INB_LOCAL_PHY_CONTROL 25 /* 0x019 */ +#define OPC_INB_GET_DEV_INFO 26 /* 0x01A */ +#define OPC_INB_FW_FLASH_UPDATE 32 /* 0x020 */ +#define OPC_INB_GPIO 34 /* 0x022 */ +#define OPC_INB_SAS_DIAG_MODE_START_END 35 /* 0x023 */ +#define OPC_INB_SAS_DIAG_EXECUTE 36 /* 0x024 */ +#define OPC_INB_SAS_HW_EVENT_ACK 37 /* 0x025 */ +#define OPC_INB_GET_TIME_STAMP 38 /* 0x026 */ +#define OPC_INB_PORT_CONTROL 39 /* 0x027 */ +#define OPC_INB_GET_NVMD_DATA 40 /* 0x028 */ +#define OPC_INB_SET_NVMD_DATA 41 /* 0x029 */ +#define OPC_INB_SET_DEVICE_STATE 42 /* 0x02A */ +#define OPC_INB_GET_DEVICE_STATE 43 /* 0x02B */ +#define OPC_INB_SET_DEV_INFO 44 /* 0x02C */ +#define OPC_INB_SAS_RE_INITIALIZE 45 /* 0x02D */ + +/* for Response Opcode of IOMB */ +#define OPC_OUB_ECHO 1 /* 0x001 */ +#define OPC_OUB_HW_EVENT 4 /* 0x004 */ +#define OPC_OUB_SSP_COMP 5 /* 0x005 */ +#define OPC_OUB_SMP_COMP 6 /* 0x006 */ +#define OPC_OUB_LOCAL_PHY_CNTRL 7 /* 0x007 */ +#define OPC_OUB_DEV_REGIST 10 /* 0x00A */ +#define OPC_OUB_DEREG_DEV 11 /* 0x00B */ +#define OPC_OUB_GET_DEV_HANDLE 12 /* 0x00C */ +#define OPC_OUB_SATA_COMP 13 /* 0x00D */ +#define OPC_OUB_SATA_EVENT 14 /* 0x00E */ +#define OPC_OUB_SSP_EVENT 15 /* 0x00F */ +#define OPC_OUB_DEV_HANDLE_ARRIV 16 /* 0x010 */ +/* SMP_RECEIVED Notification is removed */ +#define OPC_OUB_SMP_RECV_EVENT 17 /* 0x011 */ +#define OPC_OUB_SSP_RECV_EVENT 18 /* 0x012 */ +#define OPC_OUB_DEV_INFO 19 /* 0x013 */ +#define OPC_OUB_FW_FLASH_UPDATE 20 /* 0x014 */ +#define OPC_OUB_GPIO_RESPONSE 22 /* 0x016 */ +#define OPC_OUB_GPIO_EVENT 23 /* 0x017 */ +#define OPC_OUB_GENERAL_EVENT 24 /* 0x018 */ +#define OPC_OUB_SSP_ABORT_RSP 26 /* 0x01A */ +#define OPC_OUB_SATA_ABORT_RSP 27 /* 0x01B */ +#define OPC_OUB_SAS_DIAG_MODE_START_END 28 /* 0x01C */ +#define OPC_OUB_SAS_DIAG_EXECUTE 29 /* 0x01D */ +#define OPC_OUB_GET_TIME_STAMP 30 /* 0x01E */ +#define OPC_OUB_SAS_HW_EVENT_ACK 31 /* 0x01F */ +#define OPC_OUB_PORT_CONTROL 32 /* 0x020 */ +#define OPC_OUB_SKIP_ENTRY 33 /* 0x021 */ +#define OPC_OUB_SMP_ABORT_RSP 34 /* 0x022 */ +#define OPC_OUB_GET_NVMD_DATA 35 /* 0x023 */ +#define OPC_OUB_SET_NVMD_DATA 36 /* 0x024 */ +#define OPC_OUB_DEVICE_HANDLE_REMOVAL 37 /* 0x025 */ +#define OPC_OUB_SET_DEVICE_STATE 38 /* 0x026 */ +#define OPC_OUB_GET_DEVICE_STATE 39 /* 0x027 */ +#define OPC_OUB_SET_DEV_INFO 40 /* 0x028 */ +#define OPC_OUB_SAS_RE_INITIALIZE 41 /* 0x029 */ + +/* for phy start*/ +#define SPINHOLD_DISABLE (0x00 << 14) +#define SPINHOLD_ENABLE (0x01 << 14) +#define LINKMODE_SAS (0x01 << 12) +#define LINKMODE_DSATA (0x02 << 12) +#define LINKMODE_AUTO (0x03 << 12) +#define LINKRATE_15 (0x01 << 8) +#define LINKRATE_30 (0x02 << 8) +#define LINKRATE_60 (0x04 << 8) + +struct mpi_msg_hdr{ + __le32 header; /* Bits [11:0] - Message operation code */ + /* Bits [15:12] - Message Category */ + /* Bits [21:16] - Outboundqueue ID for the + operation completion message */ + /* Bits [23:22] - Reserved */ + /* Bits [28:24] - Buffer Count, indicates how + many buffer are allocated for the massage */ + /* Bits [30:29] - Reserved */ + /* Bits [31] - Message Valid bit */ +} __attribute__((packed, aligned(4))); + + +/* + * brief the data structure of PHY Start Command + * use to describe enable the phy (64 bytes) + */ +struct phy_start_req { + __le32 tag; + __le32 ase_sh_lm_slr_phyid; + struct sas_identify_frame sas_identify; + u32 reserved[5]; +} __attribute__((packed, aligned(4))); + + +/* + * brief the data structure of PHY Start Command + * use to disable the phy (64 bytes) + */ +struct phy_stop_req { + __le32 tag; + __le32 phy_id; + u32 reserved[13]; +} __attribute__((packed, aligned(4))); + + +/* set device bits fis - device to host */ +struct set_dev_bits_fis { + u8 fis_type; /* 0xA1*/ + u8 n_i_pmport; + /* b7 : n Bit. Notification bit. If set device needs attention. */ + /* b6 : i Bit. Interrupt Bit */ + /* b5-b4: reserved2 */ + /* b3-b0: PM Port */ + u8 status; + u8 error; + u32 _r_a; +} __attribute__ ((packed)); +/* PIO setup FIS - device to host */ +struct pio_setup_fis { + u8 fis_type; /* 0x5f */ + u8 i_d_pmPort; + /* b7 : reserved */ + /* b6 : i bit. Interrupt bit */ + /* b5 : d bit. data transfer direction. set to 1 for device to host + xfer */ + /* b4 : reserved */ + /* b3-b0: PM Port */ + u8 status; + u8 error; + u8 lbal; + u8 lbam; + u8 lbah; + u8 device; + u8 lbal_exp; + u8 lbam_exp; + u8 lbah_exp; + u8 _r_a; + u8 sector_count; + u8 sector_count_exp; + u8 _r_b; + u8 e_status; + u8 _r_c[2]; + u8 transfer_count; +} __attribute__ ((packed)); + +/* + * brief the data structure of SATA Completion Response + * use to discribe the sata task response (64 bytes) + */ +struct sata_completion_resp { + __le32 tag; + __le32 status; + __le32 param; + u32 sata_resp[12]; +} __attribute__((packed, aligned(4))); + + +/* + * brief the data structure of SAS HW Event Notification + * use to alert the host about the hardware event(64 bytes) + */ +struct hw_event_resp { + __le32 lr_evt_status_phyid_portid; + __le32 evt_param; + __le32 npip_portstate; + struct sas_identify_frame sas_identify; + struct dev_to_host_fis sata_fis; +} __attribute__((packed, aligned(4))); + + +/* + * brief the data structure of REGISTER DEVICE Command + * use to describe MPI REGISTER DEVICE Command (64 bytes) + */ + +struct reg_dev_req { + __le32 tag; + __le32 phyid_portid; + __le32 dtype_dlr_retry; + __le32 firstburstsize_ITNexustimeout; + u32 sas_addr_hi; + u32 sas_addr_low; + __le32 upper_device_id; + u32 reserved[8]; +} __attribute__((packed, aligned(4))); + + +/* + * brief the data structure of DEREGISTER DEVICE Command + * use to request spc to remove all internal resources associated + * with the device id (64 bytes) + */ + +struct dereg_dev_req { + __le32 tag; + __le32 device_id; + u32 reserved[13]; +} __attribute__((packed, aligned(4))); + + +/* + * brief the data structure of DEVICE_REGISTRATION Response + * use to notify the completion of the device registration (64 bytes) + */ + +struct dev_reg_resp { + __le32 tag; + __le32 status; + __le32 device_id; + u32 reserved[12]; +} __attribute__((packed, aligned(4))); + + +/* + * brief the data structure of Local PHY Control Command + * use to issue PHY CONTROL to local phy (64 bytes) + */ +struct local_phy_ctl_req { + __le32 tag; + __le32 phyop_phyid; + u32 reserved1[13]; +} __attribute__((packed, aligned(4))); + + +/** + * brief the data structure of Local Phy Control Response + * use to describe MPI Local Phy Control Response (64 bytes) + */ +struct local_phy_ctl_resp { + __le32 tag; + __le32 phyop_phyid; + __le32 status; + u32 reserved[12]; +} __attribute__((packed, aligned(4))); + + +#define OP_BITS 0x0000FF00 +#define ID_BITS 0x0000000F + +/* + * brief the data structure of PORT Control Command + * use to control port properties (64 bytes) + */ + +struct port_ctl_req { + __le32 tag; + __le32 portop_portid; + __le32 param0; + __le32 param1; + u32 reserved1[11]; +} __attribute__((packed, aligned(4))); + + +/* + * brief the data structure of HW Event Ack Command + * use to acknowledge receive HW event (64 bytes) + */ + +struct hw_event_ack_req { + __le32 tag; + __le32 sea_phyid_portid; + __le32 param0; + __le32 param1; + u32 reserved1[11]; +} __attribute__((packed, aligned(4))); + + +/* + * brief the data structure of SSP Completion Response + * use to indicate a SSP Completion (n bytes) + */ +struct ssp_completion_resp { + __le32 tag; + __le32 status; + __le32 param; + __le32 ssptag_rescv_rescpad; + struct ssp_response_iu ssp_resp_iu; + __le32 residual_count; +} __attribute__((packed, aligned(4))); + + +#define SSP_RESCV_BIT 0x00010000 + +/* + * brief the data structure of SATA EVNET esponse + * use to indicate a SATA Completion (64 bytes) + */ + +struct sata_event_resp { + __le32 tag; + __le32 event; + __le32 port_id; + __le32 device_id; + u32 reserved[11]; +} __attribute__((packed, aligned(4))); + +/* + * brief the data structure of SSP EVNET esponse + * use to indicate a SSP Completion (64 bytes) + */ + +struct ssp_event_resp { + __le32 tag; + __le32 event; + __le32 port_id; + __le32 device_id; + u32 reserved[11]; +} __attribute__((packed, aligned(4))); + +/** + * brief the data structure of General Event Notification Response + * use to describe MPI General Event Notification Response (64 bytes) + */ +struct general_event_resp { + __le32 status; + __le32 inb_IOMB_payload[14]; +} __attribute__((packed, aligned(4))); + + +#define GENERAL_EVENT_PAYLOAD 14 +#define OPCODE_BITS 0x00000fff + +/* + * brief the data structure of SMP Request Command + * use to describe MPI SMP REQUEST Command (64 bytes) + */ +struct smp_req { + __le32 tag; + __le32 device_id; + __le32 len_ip_ir; + /* Bits [0] - Indirect response */ + /* Bits [1] - Indirect Payload */ + /* Bits [15:2] - Reserved */ + /* Bits [23:16] - direct payload Len */ + /* Bits [31:24] - Reserved */ + u8 smp_req16[16]; + union { + u8 smp_req[32]; + struct { + __le64 long_req_addr;/* sg dma address, LE */ + __le32 long_req_size;/* LE */ + u32 _r_a; + __le64 long_resp_addr;/* sg dma address, LE */ + __le32 long_resp_size;/* LE */ + u32 _r_b; + } long_smp_req;/* sequencer extension */ + }; +} __attribute__((packed, aligned(4))); +/* + * brief the data structure of SMP Completion Response + * use to describe MPI SMP Completion Response (64 bytes) + */ +struct smp_completion_resp { + __le32 tag; + __le32 status; + __le32 param; + __le32 _r_a[12]; +} __attribute__((packed, aligned(4))); + +/* + *brief the data structure of SSP SMP SATA Abort Command + * use to describe MPI SSP SMP & SATA Abort Command (64 bytes) + */ +struct task_abort_req { + __le32 tag; + __le32 device_id; + __le32 tag_to_abort; + __le32 abort_all; + u32 reserved[11]; +} __attribute__((packed, aligned(4))); + +/* These flags used for SSP SMP & SATA Abort */ +#define ABORT_MASK 0x3 +#define ABORT_SINGLE 0x0 +#define ABORT_ALL 0x1 + +/** + * brief the data structure of SSP SATA SMP Abort Response + * use to describe SSP SMP & SATA Abort Response ( 64 bytes) + */ +struct task_abort_resp { + __le32 tag; + __le32 status; + __le32 scp; + u32 reserved[12]; +} __attribute__((packed, aligned(4))); + + +/** + * brief the data structure of SAS Diagnostic Start/End Command + * use to describe MPI SAS Diagnostic Start/End Command (64 bytes) + */ +struct sas_diag_start_end_req { + __le32 tag; + __le32 operation_phyid; + u32 reserved[13]; +} __attribute__((packed, aligned(4))); + + +/** + * brief the data structure of SAS Diagnostic Execute Command + * use to describe MPI SAS Diagnostic Execute Command (64 bytes) + */ +struct sas_diag_execute_req{ + __le32 tag; + __le32 cmdtype_cmddesc_phyid; + __le32 pat1_pat2; + __le32 threshold; + __le32 codepat_errmsk; + __le32 pmon; + __le32 pERF1CTL; + u32 reserved[8]; +} __attribute__((packed, aligned(4))); + + +#define SAS_DIAG_PARAM_BYTES 24 + +/* + * brief the data structure of Set Device State Command + * use to describe MPI Set Device State Command (64 bytes) + */ +struct set_dev_state_req { + __le32 tag; + __le32 device_id; + __le32 nds; + u32 reserved[12]; +} __attribute__((packed, aligned(4))); + + +/* + * brief the data structure of SATA Start Command + * use to describe MPI SATA IO Start Command (64 bytes) + */ + +struct sata_start_req { + __le32 tag; + __le32 device_id; + __le32 data_len; + __le32 ncqtag_atap_dir_m; + struct host_to_dev_fis sata_fis; + u32 reserved1; + u32 reserved2; + u32 addr_low; + u32 addr_high; + __le32 len; + __le32 esgl; +} __attribute__((packed, aligned(4))); + +/** + * brief the data structure of SSP INI TM Start Command + * use to describe MPI SSP INI TM Start Command (64 bytes) + */ +struct ssp_ini_tm_start_req { + __le32 tag; + __le32 device_id; + __le32 relate_tag; + __le32 tmf; + u8 lun[8]; + __le32 ds_ads_m; + u32 reserved[8]; +} __attribute__((packed, aligned(4))); + + +struct ssp_info_unit { + u8 lun[8];/* SCSI Logical Unit Number */ + u8 reserved1;/* reserved */ + u8 efb_prio_attr; + /* B7 : enabledFirstBurst */ + /* B6-3 : taskPriority */ + /* B2-0 : taskAttribute */ + u8 reserved2; /* reserved */ + u8 additional_cdb_len; + /* B7-2 : additional_cdb_len */ + /* B1-0 : reserved */ + u8 cdb[16];/* The SCSI CDB up to 16 bytes length */ +} __attribute__((packed, aligned(4))); + + +/** + * brief the data structure of SSP INI IO Start Command + * use to describe MPI SSP INI IO Start Command (64 bytes) + */ +struct ssp_ini_io_start_req { + __le32 tag; + __le32 device_id; + __le32 data_len; + __le32 dir_m_tlr; + struct ssp_info_unit ssp_iu; + __le32 addr_low; + __le32 addr_high; + __le32 len; + __le32 esgl; +} __attribute__((packed, aligned(4))); + + +/** + * brief the data structure of Firmware download + * use to describe MPI FW DOWNLOAD Command (64 bytes) + */ +struct fw_flash_Update_req { + __le32 tag; + __le32 cur_image_offset; + __le32 cur_image_len; + __le32 total_image_len; + u32 reserved0[7]; + __le32 sgl_addr_lo; + __le32 sgl_addr_hi; + __le32 len; + __le32 ext_reserved; +} __attribute__((packed, aligned(4))); + + +#define FWFLASH_IOMB_RESERVED_LEN 0x07 +/** + * brief the data structure of FW_FLASH_UPDATE Response + * use to describe MPI FW_FLASH_UPDATE Response (64 bytes) + * + */ +struct fw_flash_Update_resp { + dma_addr_t tag; + __le32 status; + u32 reserved[13]; +} __attribute__((packed, aligned(4))); + + +/** + * brief the data structure of Get NVM Data Command + * use to get data from NVM in HBA(64 bytes) + */ +struct get_nvm_data_req { + __le32 tag; + __le32 len_ir_vpdd; + __le32 vpd_offset; + u32 reserved[8]; + __le32 resp_addr_lo; + __le32 resp_addr_hi; + __le32 resp_len; + u32 reserved1; +} __attribute__((packed, aligned(4))); + + +struct set_nvm_data_req { + __le32 tag; + __le32 len_ir_vpdd; + __le32 vpd_offset; + u32 reserved[8]; + __le32 resp_addr_lo; + __le32 resp_addr_hi; + __le32 resp_len; + u32 reserved1; +} __attribute__((packed, aligned(4))); + + +#define TWI_DEVICE 0x0 +#define C_SEEPROM 0x1 +#define VPD_FLASH 0x4 +#define AAP1_RDUMP 0x5 +#define IOP_RDUMP 0x6 +#define EXPAN_ROM 0x7 + +#define IPMode 0x80000000 +#define NVMD_TYPE 0x0000000F +#define NVMD_STAT 0x0000FFFF +#define NVMD_LEN 0xFF000000 +/** + * brief the data structure of Get NVMD Data Response + * use to describe MPI Get NVMD Data Response (64 bytes) + */ +struct get_nvm_data_resp { + __le32 tag; + __le32 ir_tda_bn_dps_das_nvm; + __le32 dlen_status; + __le32 nvm_data[12]; +} __attribute__((packed, aligned(4))); + + +/** + * brief the data structure of SAS Diagnostic Start/End Response + * use to describe MPI SAS Diagnostic Start/End Response (64 bytes) + * + */ +struct sas_diag_start_end_resp { + __le32 tag; + __le32 status; + u32 reserved[13]; +} __attribute__((packed, aligned(4))); + + +/** + * brief the data structure of SAS Diagnostic Execute Response + * use to describe MPI SAS Diagnostic Execute Response (64 bytes) + * + */ +struct sas_diag_execute_resp { + __le32 tag; + __le32 cmdtype_cmddesc_phyid; + __le32 Status; + __le32 ReportData; + u32 reserved[11]; +} __attribute__((packed, aligned(4))); + + +/** + * brief the data structure of Set Device State Response + * use to describe MPI Set Device State Response (64 bytes) + * + */ +struct set_dev_state_resp { + __le32 tag; + __le32 status; + __le32 device_id; + __le32 pds_nds; + u32 reserved[11]; +} __attribute__((packed, aligned(4))); + + +#define NDS_BITS 0x0F +#define PDS_BITS 0xF0 + +/* + * HW Events type + */ + +#define HW_EVENT_RESET_START 0x01 +#define HW_EVENT_CHIP_RESET_COMPLETE 0x02 +#define HW_EVENT_PHY_STOP_STATUS 0x03 +#define HW_EVENT_SAS_PHY_UP 0x04 +#define HW_EVENT_SATA_PHY_UP 0x05 +#define HW_EVENT_SATA_SPINUP_HOLD 0x06 +#define HW_EVENT_PHY_DOWN 0x07 +#define HW_EVENT_PORT_INVALID 0x08 +#define HW_EVENT_BROADCAST_CHANGE 0x09 +#define HW_EVENT_PHY_ERROR 0x0A +#define HW_EVENT_BROADCAST_SES 0x0B +#define HW_EVENT_INBOUND_CRC_ERROR 0x0C +#define HW_EVENT_HARD_RESET_RECEIVED 0x0D +#define HW_EVENT_MALFUNCTION 0x0E +#define HW_EVENT_ID_FRAME_TIMEOUT 0x0F +#define HW_EVENT_BROADCAST_EXP 0x10 +#define HW_EVENT_PHY_START_STATUS 0x11 +#define HW_EVENT_LINK_ERR_INVALID_DWORD 0x12 +#define HW_EVENT_LINK_ERR_DISPARITY_ERROR 0x13 +#define HW_EVENT_LINK_ERR_CODE_VIOLATION 0x14 +#define HW_EVENT_LINK_ERR_LOSS_OF_DWORD_SYNCH 0x15 +#define HW_EVENT_LINK_ERR_PHY_RESET_FAILED 0x16 +#define HW_EVENT_PORT_RECOVERY_TIMER_TMO 0x17 +#define HW_EVENT_PORT_RECOVER 0x18 +#define HW_EVENT_PORT_RESET_TIMER_TMO 0x19 +#define HW_EVENT_PORT_RESET_COMPLETE 0x20 +#define EVENT_BROADCAST_ASYNCH_EVENT 0x21 + +/* port state */ +#define PORT_NOT_ESTABLISHED 0x00 +#define PORT_VALID 0x01 +#define PORT_LOSTCOMM 0x02 +#define PORT_IN_RESET 0x04 +#define PORT_INVALID 0x08 + +/* + * SSP/SMP/SATA IO Completion Status values + */ + +#define IO_SUCCESS 0x00 +#define IO_ABORTED 0x01 +#define IO_OVERFLOW 0x02 +#define IO_UNDERFLOW 0x03 +#define IO_FAILED 0x04 +#define IO_ABORT_RESET 0x05 +#define IO_NOT_VALID 0x06 +#define IO_NO_DEVICE 0x07 +#define IO_ILLEGAL_PARAMETER 0x08 +#define IO_LINK_FAILURE 0x09 +#define IO_PROG_ERROR 0x0A +#define IO_EDC_IN_ERROR 0x0B +#define IO_EDC_OUT_ERROR 0x0C +#define IO_ERROR_HW_TIMEOUT 0x0D +#define IO_XFER_ERROR_BREAK 0x0E +#define IO_XFER_ERROR_PHY_NOT_READY 0x0F +#define IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED 0x10 +#define IO_OPEN_CNX_ERROR_ZONE_VIOLATION 0x11 +#define IO_OPEN_CNX_ERROR_BREAK 0x12 +#define IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS 0x13 +#define IO_OPEN_CNX_ERROR_BAD_DESTINATION 0x14 +#define IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED 0x15 +#define IO_OPEN_CNX_ERROR_STP_RESOURCES_BUSY 0x16 +#define IO_OPEN_CNX_ERROR_WRONG_DESTINATION 0x17 +#define IO_OPEN_CNX_ERROR_UNKNOWN_ERROR 0x18 +#define IO_XFER_ERROR_NAK_RECEIVED 0x19 +#define IO_XFER_ERROR_ACK_NAK_TIMEOUT 0x1A +#define IO_XFER_ERROR_PEER_ABORTED 0x1B +#define IO_XFER_ERROR_RX_FRAME 0x1C +#define IO_XFER_ERROR_DMA 0x1D +#define IO_XFER_ERROR_CREDIT_TIMEOUT 0x1E +#define IO_XFER_ERROR_SATA_LINK_TIMEOUT 0x1F +#define IO_XFER_ERROR_SATA 0x20 +#define IO_XFER_ERROR_ABORTED_DUE_TO_SRST 0x22 +#define IO_XFER_ERROR_REJECTED_NCQ_MODE 0x21 +#define IO_XFER_ERROR_ABORTED_NCQ_MODE 0x23 +#define IO_XFER_OPEN_RETRY_TIMEOUT 0x24 +#define IO_XFER_SMP_RESP_CONNECTION_ERROR 0x25 +#define IO_XFER_ERROR_UNEXPECTED_PHASE 0x26 +#define IO_XFER_ERROR_XFER_RDY_OVERRUN 0x27 +#define IO_XFER_ERROR_XFER_RDY_NOT_EXPECTED 0x28 + +#define IO_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT 0x30 +#define IO_XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NAK 0x31 +#define IO_XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK 0x32 + +#define IO_XFER_ERROR_OFFSET_MISMATCH 0x34 +#define IO_XFER_ERROR_XFER_ZERO_DATA_LEN 0x35 +#define IO_XFER_CMD_FRAME_ISSUED 0x36 +#define IO_ERROR_INTERNAL_SMP_RESOURCE 0x37 +#define IO_PORT_IN_RESET 0x38 +#define IO_DS_NON_OPERATIONAL 0x39 +#define IO_DS_IN_RECOVERY 0x3A +#define IO_TM_TAG_NOT_FOUND 0x3B +#define IO_XFER_PIO_SETUP_ERROR 0x3C +#define IO_SSP_EXT_IU_ZERO_LEN_ERROR 0x3D +#define IO_DS_IN_ERROR 0x3E +#define IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY 0x3F +#define IO_ABORT_IN_PROGRESS 0x40 +#define IO_ABORT_DELAYED 0x41 +#define IO_INVALID_LENGTH 0x42 + +/* WARNING: This error code must always be the last number. + * If you add error code, modify this code also + * It is used as an index + */ +#define IO_ERROR_UNKNOWN_GENERIC 0x43 + +/* MSGU CONFIGURATION TABLE*/ + +#define SPC_MSGU_CFG_TABLE_UPDATE 0x01/* Inbound doorbell bit0 */ +#define SPC_MSGU_CFG_TABLE_RESET 0x02/* Inbound doorbell bit1 */ +#define SPC_MSGU_CFG_TABLE_FREEZE 0x04/* Inbound doorbell bit2 */ +#define SPC_MSGU_CFG_TABLE_UNFREEZE 0x08/* Inbound doorbell bit4 */ +#define MSGU_IBDB_SET 0x04 +#define MSGU_HOST_INT_STATUS 0x08 +#define MSGU_HOST_INT_MASK 0x0C +#define MSGU_IOPIB_INT_STATUS 0x18 +#define MSGU_IOPIB_INT_MASK 0x1C +#define MSGU_IBDB_CLEAR 0x20/* RevB - Host not use */ +#define MSGU_MSGU_CONTROL 0x24 +#define MSGU_ODR 0x3C/* RevB */ +#define MSGU_ODCR 0x40/* RevB */ +#define MSGU_SCRATCH_PAD_0 0x44 +#define MSGU_SCRATCH_PAD_1 0x48 +#define MSGU_SCRATCH_PAD_2 0x4C +#define MSGU_SCRATCH_PAD_3 0x50 +#define MSGU_HOST_SCRATCH_PAD_0 0x54 +#define MSGU_HOST_SCRATCH_PAD_1 0x58 +#define MSGU_HOST_SCRATCH_PAD_2 0x5C +#define MSGU_HOST_SCRATCH_PAD_3 0x60 +#define MSGU_HOST_SCRATCH_PAD_4 0x64 +#define MSGU_HOST_SCRATCH_PAD_5 0x68 +#define MSGU_HOST_SCRATCH_PAD_6 0x6C +#define MSGU_HOST_SCRATCH_PAD_7 0x70 +#define MSGU_ODMR 0x74/* RevB */ + +/* bit definition for ODMR register */ +#define ODMR_MASK_ALL 0xFFFFFFFF/* mask all + interrupt vector */ +#define ODMR_CLEAR_ALL 0/* clear all + interrupt vector */ +/* bit definition for ODCR register */ +#define ODCR_CLEAR_ALL 0xFFFFFFFF /* mask all + interrupt vector*/ +/* MSIX Interupts */ +#define MSIX_TABLE_OFFSET 0x2000 +#define MSIX_TABLE_ELEMENT_SIZE 0x10 +#define MSIX_INTERRUPT_CONTROL_OFFSET 0xC +#define MSIX_TABLE_BASE (MSIX_TABLE_OFFSET + MSIX_INTERRUPT_CONTROL_OFFSET) +#define MSIX_INTERRUPT_DISABLE 0x1 +#define MSIX_INTERRUPT_ENABLE 0x0 + + +/* state definition for Scratch Pad1 register */ +#define SCRATCH_PAD1_POR 0x00 /* power on reset state */ +#define SCRATCH_PAD1_SFR 0x01 /* soft reset state */ +#define SCRATCH_PAD1_ERR 0x02 /* error state */ +#define SCRATCH_PAD1_RDY 0x03 /* ready state */ +#define SCRATCH_PAD1_RST 0x04 /* soft reset toggle flag */ +#define SCRATCH_PAD1_AAP1RDY_RST 0x08 /* AAP1 ready for soft reset */ +#define SCRATCH_PAD1_STATE_MASK 0xFFFFFFF0 /* ScratchPad1 + Mask, bit1-0 State, bit2 Soft Reset, bit3 FW RDY for Soft Reset */ +#define SCRATCH_PAD1_RESERVED 0x000003F8 /* Scratch Pad1 + Reserved bit 3 to 9 */ + + /* state definition for Scratch Pad2 register */ +#define SCRATCH_PAD2_POR 0x00 /* power on state */ +#define SCRATCH_PAD2_SFR 0x01 /* soft reset state */ +#define SCRATCH_PAD2_ERR 0x02 /* error state */ +#define SCRATCH_PAD2_RDY 0x03 /* ready state */ +#define SCRATCH_PAD2_FWRDY_RST 0x04 /* FW ready for soft reset flag*/ +#define SCRATCH_PAD2_IOPRDY_RST 0x08 /* IOP ready for soft reset */ +#define SCRATCH_PAD2_STATE_MASK 0xFFFFFFF4 /* ScratchPad 2 + Mask, bit1-0 State */ +#define SCRATCH_PAD2_RESERVED 0x000003FC /* Scratch Pad1 + Reserved bit 2 to 9 */ + +#define SCRATCH_PAD_ERROR_MASK 0xFFFFFC00 /* Error mask bits */ +#define SCRATCH_PAD_STATE_MASK 0x00000003 /* State Mask bits */ + +/* main configuration offset - byte offset */ +#define MAIN_SIGNATURE_OFFSET 0x00/* DWORD 0x00 */ +#define MAIN_INTERFACE_REVISION 0x04/* DWORD 0x01 */ +#define MAIN_FW_REVISION 0x08/* DWORD 0x02 */ +#define MAIN_MAX_OUTSTANDING_IO_OFFSET 0x0C/* DWORD 0x03 */ +#define MAIN_MAX_SGL_OFFSET 0x10/* DWORD 0x04 */ +#define MAIN_CNTRL_CAP_OFFSET 0x14/* DWORD 0x05 */ +#define MAIN_GST_OFFSET 0x18/* DWORD 0x06 */ +#define MAIN_IBQ_OFFSET 0x1C/* DWORD 0x07 */ +#define MAIN_OBQ_OFFSET 0x20/* DWORD 0x08 */ +#define MAIN_IQNPPD_HPPD_OFFSET 0x24/* DWORD 0x09 */ +#define MAIN_OB_HW_EVENT_PID03_OFFSET 0x28/* DWORD 0x0A */ +#define MAIN_OB_HW_EVENT_PID47_OFFSET 0x2C/* DWORD 0x0B */ +#define MAIN_OB_NCQ_EVENT_PID03_OFFSET 0x30/* DWORD 0x0C */ +#define MAIN_OB_NCQ_EVENT_PID47_OFFSET 0x34/* DWORD 0x0D */ +#define MAIN_TITNX_EVENT_PID03_OFFSET 0x38/* DWORD 0x0E */ +#define MAIN_TITNX_EVENT_PID47_OFFSET 0x3C/* DWORD 0x0F */ +#define MAIN_OB_SSP_EVENT_PID03_OFFSET 0x40/* DWORD 0x10 */ +#define MAIN_OB_SSP_EVENT_PID47_OFFSET 0x44/* DWORD 0x11 */ +#define MAIN_OB_SMP_EVENT_PID03_OFFSET 0x48/* DWORD 0x12 */ +#define MAIN_OB_SMP_EVENT_PID47_OFFSET 0x4C/* DWORD 0x13 */ +#define MAIN_EVENT_LOG_ADDR_HI 0x50/* DWORD 0x14 */ +#define MAIN_EVENT_LOG_ADDR_LO 0x54/* DWORD 0x15 */ +#define MAIN_EVENT_LOG_BUFF_SIZE 0x58/* DWORD 0x16 */ +#define MAIN_EVENT_LOG_OPTION 0x5C/* DWORD 0x17 */ +#define MAIN_IOP_EVENT_LOG_ADDR_HI 0x60/* DWORD 0x18 */ +#define MAIN_IOP_EVENT_LOG_ADDR_LO 0x64/* DWORD 0x19 */ +#define MAIN_IOP_EVENT_LOG_BUFF_SIZE 0x68/* DWORD 0x1A */ +#define MAIN_IOP_EVENT_LOG_OPTION 0x6C/* DWORD 0x1B */ +#define MAIN_FATAL_ERROR_INTERRUPT 0x70/* DWORD 0x1C */ +#define MAIN_FATAL_ERROR_RDUMP0_OFFSET 0x74/* DWORD 0x1D */ +#define MAIN_FATAL_ERROR_RDUMP0_LENGTH 0x78/* DWORD 0x1E */ +#define MAIN_FATAL_ERROR_RDUMP1_OFFSET 0x7C/* DWORD 0x1F */ +#define MAIN_FATAL_ERROR_RDUMP1_LENGTH 0x80/* DWORD 0x20 */ +#define MAIN_HDA_FLAGS_OFFSET 0x84/* DWORD 0x21 */ +#define MAIN_ANALOG_SETUP_OFFSET 0x88/* DWORD 0x22 */ + +/* Gereral Status Table offset - byte offset */ +#define GST_GSTLEN_MPIS_OFFSET 0x00 +#define GST_IQ_FREEZE_STATE0_OFFSET 0x04 +#define GST_IQ_FREEZE_STATE1_OFFSET 0x08 +#define GST_MSGUTCNT_OFFSET 0x0C +#define GST_IOPTCNT_OFFSET 0x10 +#define GST_PHYSTATE_OFFSET 0x18 +#define GST_PHYSTATE0_OFFSET 0x18 +#define GST_PHYSTATE1_OFFSET 0x1C +#define GST_PHYSTATE2_OFFSET 0x20 +#define GST_PHYSTATE3_OFFSET 0x24 +#define GST_PHYSTATE4_OFFSET 0x28 +#define GST_PHYSTATE5_OFFSET 0x2C +#define GST_PHYSTATE6_OFFSET 0x30 +#define GST_PHYSTATE7_OFFSET 0x34 +#define GST_RERRINFO_OFFSET 0x44 + +/* General Status Table - MPI state */ +#define GST_MPI_STATE_UNINIT 0x00 +#define GST_MPI_STATE_INIT 0x01 +#define GST_MPI_STATE_TERMINATION 0x02 +#define GST_MPI_STATE_ERROR 0x03 +#define GST_MPI_STATE_MASK 0x07 + +#define MBIC_NMI_ENABLE_VPE0_IOP 0x000418 +#define MBIC_NMI_ENABLE_VPE0_AAP1 0x000418 +/* PCIE registers - BAR2(0x18), BAR1(win) 0x010000 */ +#define PCIE_EVENT_INTERRUPT_ENABLE 0x003040 +#define PCIE_EVENT_INTERRUPT 0x003044 +#define PCIE_ERROR_INTERRUPT_ENABLE 0x003048 +#define PCIE_ERROR_INTERRUPT 0x00304C +/* signature defintion for host scratch pad0 register */ +#define SPC_SOFT_RESET_SIGNATURE 0x252acbcd +/* Signature for Soft Reset */ + +/* SPC Reset register - BAR4(0x20), BAR2(win) (need dynamic mapping) */ +#define SPC_REG_RESET 0x000000/* reset register */ + +/* bit difination for SPC_RESET register */ +#define SPC_REG_RESET_OSSP 0x00000001 +#define SPC_REG_RESET_RAAE 0x00000002 +#define SPC_REG_RESET_PCS_SPBC 0x00000004 +#define SPC_REG_RESET_PCS_IOP_SS 0x00000008 +#define SPC_REG_RESET_PCS_AAP1_SS 0x00000010 +#define SPC_REG_RESET_PCS_AAP2_SS 0x00000020 +#define SPC_REG_RESET_PCS_LM 0x00000040 +#define SPC_REG_RESET_PCS 0x00000080 +#define SPC_REG_RESET_GSM 0x00000100 +#define SPC_REG_RESET_DDR2 0x00010000 +#define SPC_REG_RESET_BDMA_CORE 0x00020000 +#define SPC_REG_RESET_BDMA_SXCBI 0x00040000 +#define SPC_REG_RESET_PCIE_AL_SXCBI 0x00080000 +#define SPC_REG_RESET_PCIE_PWR 0x00100000 +#define SPC_REG_RESET_PCIE_SFT 0x00200000 +#define SPC_REG_RESET_PCS_SXCBI 0x00400000 +#define SPC_REG_RESET_LMS_SXCBI 0x00800000 +#define SPC_REG_RESET_PMIC_SXCBI 0x01000000 +#define SPC_REG_RESET_PMIC_CORE 0x02000000 +#define SPC_REG_RESET_PCIE_PC_SXCBI 0x04000000 +#define SPC_REG_RESET_DEVICE 0x80000000 + +/* registers for BAR Shifting - BAR2(0x18), BAR1(win) */ +#define SPC_IBW_AXI_TRANSLATION_LOW 0x003258 + +#define MBIC_AAP1_ADDR_BASE 0x060000 +#define MBIC_IOP_ADDR_BASE 0x070000 +#define GSM_ADDR_BASE 0x0700000 +/* Dynamic map through Bar4 - 0x00700000 */ +#define GSM_CONFIG_RESET 0x00000000 +#define RAM_ECC_DB_ERR 0x00000018 +#define GSM_READ_ADDR_PARITY_INDIC 0x00000058 +#define GSM_WRITE_ADDR_PARITY_INDIC 0x00000060 +#define GSM_WRITE_DATA_PARITY_INDIC 0x00000068 +#define GSM_READ_ADDR_PARITY_CHECK 0x00000038 +#define GSM_WRITE_ADDR_PARITY_CHECK 0x00000040 +#define GSM_WRITE_DATA_PARITY_CHECK 0x00000048 + +#define RB6_ACCESS_REG 0x6A0000 +#define HDAC_EXEC_CMD 0x0002 +#define HDA_C_PA 0xcb +#define HDA_SEQ_ID_BITS 0x00ff0000 +#define HDA_GSM_OFFSET_BITS 0x00FFFFFF +#define MBIC_AAP1_ADDR_BASE 0x060000 +#define MBIC_IOP_ADDR_BASE 0x070000 +#define GSM_ADDR_BASE 0x0700000 +#define SPC_TOP_LEVEL_ADDR_BASE 0x000000 +#define GSM_CONFIG_RESET_VALUE 0x00003b00 +#define GPIO_ADDR_BASE 0x00090000 +#define GPIO_GPIO_0_0UTPUT_CTL_OFFSET 0x0000010c + +/* RB6 offset */ +#define SPC_RB6_OFFSET 0x80C0 +/* Magic number of soft reset for RB6 */ +#define RB6_MAGIC_NUMBER_RST 0x1234 + +/* Device Register status */ +#define DEVREG_SUCCESS 0x00 +#define DEVREG_FAILURE_OUT_OF_RESOURCE 0x01 +#define DEVREG_FAILURE_DEVICE_ALREADY_REGISTERED 0x02 +#define DEVREG_FAILURE_INVALID_PHY_ID 0x03 +#define DEVREG_FAILURE_PHY_ID_ALREADY_REGISTERED 0x04 +#define DEVREG_FAILURE_PORT_ID_OUT_OF_RANGE 0x05 +#define DEVREG_FAILURE_PORT_NOT_VALID_STATE 0x06 +#define DEVREG_FAILURE_DEVICE_TYPE_NOT_VALID 0x07 + +#endif + diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c new file mode 100644 index 000000000000..811b5d36d5f0 --- /dev/null +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -0,0 +1,888 @@ +/* + * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver + * + * Copyright (c) 2008-2009 USI Co., Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + */ + +#include "pm8001_sas.h" +#include "pm8001_chips.h" + +static struct scsi_transport_template *pm8001_stt; + +static const struct pm8001_chip_info pm8001_chips[] = { + [chip_8001] = { 8, &pm8001_8001_dispatch,}, +}; +static int pm8001_id; + +LIST_HEAD(hba_list); + +/** + * The main structure which LLDD must register for scsi core. + */ +static struct scsi_host_template pm8001_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .queuecommand = sas_queuecommand, + .target_alloc = sas_target_alloc, + .slave_configure = pm8001_slave_configure, + .slave_destroy = sas_slave_destroy, + .scan_finished = pm8001_scan_finished, + .scan_start = pm8001_scan_start, + .change_queue_depth = sas_change_queue_depth, + .change_queue_type = sas_change_queue_type, + .bios_param = sas_bios_param, + .can_queue = 1, + .cmd_per_lun = 1, + .this_id = -1, + .sg_tablesize = SG_ALL, + .max_sectors = SCSI_DEFAULT_MAX_SECTORS, + .use_clustering = ENABLE_CLUSTERING, + .eh_device_reset_handler = sas_eh_device_reset_handler, + .eh_bus_reset_handler = sas_eh_bus_reset_handler, + .slave_alloc = pm8001_slave_alloc, + .target_destroy = sas_target_destroy, + .ioctl = sas_ioctl, + .shost_attrs = pm8001_host_attrs, +}; + +/** + * Sas layer call this function to execute specific task. + */ +static struct sas_domain_function_template pm8001_transport_ops = { + .lldd_dev_found = pm8001_dev_found, + .lldd_dev_gone = pm8001_dev_gone, + + .lldd_execute_task = pm8001_queue_command, + .lldd_control_phy = pm8001_phy_control, + + .lldd_abort_task = pm8001_abort_task, + .lldd_abort_task_set = pm8001_abort_task_set, + .lldd_clear_aca = pm8001_clear_aca, + .lldd_clear_task_set = pm8001_clear_task_set, + .lldd_I_T_nexus_reset = pm8001_I_T_nexus_reset, + .lldd_lu_reset = pm8001_lu_reset, + .lldd_query_task = pm8001_query_task, +}; + +/** + *pm8001_phy_init - initiate our adapter phys + *@pm8001_ha: our hba structure. + *@phy_id: phy id. + */ +static void __devinit pm8001_phy_init(struct pm8001_hba_info *pm8001_ha, + int phy_id) +{ + struct pm8001_phy *phy = &pm8001_ha->phy[phy_id]; + struct asd_sas_phy *sas_phy = &phy->sas_phy; + phy->phy_state = 0; + phy->pm8001_ha = pm8001_ha; + sas_phy->enabled = (phy_id < pm8001_ha->chip->n_phy) ? 1 : 0; + sas_phy->class = SAS; + sas_phy->iproto = SAS_PROTOCOL_ALL; + sas_phy->tproto = 0; + sas_phy->type = PHY_TYPE_PHYSICAL; + sas_phy->role = PHY_ROLE_INITIATOR; + sas_phy->oob_mode = OOB_NOT_CONNECTED; + sas_phy->linkrate = SAS_LINK_RATE_UNKNOWN; + sas_phy->id = phy_id; + sas_phy->sas_addr = &pm8001_ha->sas_addr[0]; + sas_phy->frame_rcvd = &phy->frame_rcvd[0]; + sas_phy->ha = (struct sas_ha_struct *)pm8001_ha->shost->hostdata; + sas_phy->lldd_phy = phy; +} + +/** + *pm8001_free - free hba + *@pm8001_ha: our hba structure. + * + */ +static void pm8001_free(struct pm8001_hba_info *pm8001_ha) +{ + int i; + struct pm8001_wq *wq; + + if (!pm8001_ha) + return; + + for (i = 0; i < USI_MAX_MEMCNT; i++) { + if (pm8001_ha->memoryMap.region[i].virt_ptr != NULL) { + pci_free_consistent(pm8001_ha->pdev, + pm8001_ha->memoryMap.region[i].element_size, + pm8001_ha->memoryMap.region[i].virt_ptr, + pm8001_ha->memoryMap.region[i].phys_addr); + } + } + PM8001_CHIP_DISP->chip_iounmap(pm8001_ha); + if (pm8001_ha->shost) + scsi_host_put(pm8001_ha->shost); + list_for_each_entry(wq, &pm8001_ha->wq_list, entry) + cancel_delayed_work(&wq->work_q); + kfree(pm8001_ha->tags); + kfree(pm8001_ha); +} + +#ifdef PM8001_USE_TASKLET +static void pm8001_tasklet(unsigned long opaque) +{ + struct pm8001_hba_info *pm8001_ha; + pm8001_ha = (struct pm8001_hba_info *)opaque;; + if (unlikely(!pm8001_ha)) + BUG_ON(1); + PM8001_CHIP_DISP->isr(pm8001_ha); +} +#endif + + + /** + * pm8001_interrupt - when HBA originate a interrupt,we should invoke this + * dispatcher to handle each case. + * @irq: irq number. + * @opaque: the passed general host adapter struct + */ +static irqreturn_t pm8001_interrupt(int irq, void *opaque) +{ + struct pm8001_hba_info *pm8001_ha; + irqreturn_t ret = IRQ_HANDLED; + struct sas_ha_struct *sha = opaque; + pm8001_ha = sha->lldd_ha; + if (unlikely(!pm8001_ha)) + return IRQ_NONE; + if (!PM8001_CHIP_DISP->is_our_interupt(pm8001_ha)) + return IRQ_NONE; +#ifdef PM8001_USE_TASKLET + tasklet_schedule(&pm8001_ha->tasklet); +#else + ret = PM8001_CHIP_DISP->isr(pm8001_ha); +#endif + return ret; +} + +/** + * pm8001_alloc - initiate our hba structure and 6 DMAs area. + * @pm8001_ha:our hba structure. + * + */ +static int __devinit pm8001_alloc(struct pm8001_hba_info *pm8001_ha) +{ + int i; + spin_lock_init(&pm8001_ha->lock); + for (i = 0; i < pm8001_ha->chip->n_phy; i++) + pm8001_phy_init(pm8001_ha, i); + + pm8001_ha->tags = kmalloc(sizeof(*pm8001_ha->tags)*PM8001_MAX_DEVICES, + GFP_KERNEL); + + /* MPI Memory region 1 for AAP Event Log for fw */ + pm8001_ha->memoryMap.region[AAP1].num_elements = 1; + pm8001_ha->memoryMap.region[AAP1].element_size = PM8001_EVENT_LOG_SIZE; + pm8001_ha->memoryMap.region[AAP1].total_len = PM8001_EVENT_LOG_SIZE; + pm8001_ha->memoryMap.region[AAP1].alignment = 32; + + /* MPI Memory region 2 for IOP Event Log for fw */ + pm8001_ha->memoryMap.region[IOP].num_elements = 1; + pm8001_ha->memoryMap.region[IOP].element_size = PM8001_EVENT_LOG_SIZE; + pm8001_ha->memoryMap.region[IOP].total_len = PM8001_EVENT_LOG_SIZE; + pm8001_ha->memoryMap.region[IOP].alignment = 32; + + /* MPI Memory region 3 for consumer Index of inbound queues */ + pm8001_ha->memoryMap.region[CI].num_elements = 1; + pm8001_ha->memoryMap.region[CI].element_size = 4; + pm8001_ha->memoryMap.region[CI].total_len = 4; + pm8001_ha->memoryMap.region[CI].alignment = 4; + + /* MPI Memory region 4 for producer Index of outbound queues */ + pm8001_ha->memoryMap.region[PI].num_elements = 1; + pm8001_ha->memoryMap.region[PI].element_size = 4; + pm8001_ha->memoryMap.region[PI].total_len = 4; + pm8001_ha->memoryMap.region[PI].alignment = 4; + + /* MPI Memory region 5 inbound queues */ + pm8001_ha->memoryMap.region[IB].num_elements = 256; + pm8001_ha->memoryMap.region[IB].element_size = 64; + pm8001_ha->memoryMap.region[IB].total_len = 256 * 64; + pm8001_ha->memoryMap.region[IB].alignment = 64; + + /* MPI Memory region 6 inbound queues */ + pm8001_ha->memoryMap.region[OB].num_elements = 256; + pm8001_ha->memoryMap.region[OB].element_size = 64; + pm8001_ha->memoryMap.region[OB].total_len = 256 * 64; + pm8001_ha->memoryMap.region[OB].alignment = 64; + + /* Memory region write DMA*/ + pm8001_ha->memoryMap.region[NVMD].num_elements = 1; + pm8001_ha->memoryMap.region[NVMD].element_size = 4096; + pm8001_ha->memoryMap.region[NVMD].total_len = 4096; + /* Memory region for devices*/ + pm8001_ha->memoryMap.region[DEV_MEM].num_elements = 1; + pm8001_ha->memoryMap.region[DEV_MEM].element_size = PM8001_MAX_DEVICES * + sizeof(struct pm8001_device); + pm8001_ha->memoryMap.region[DEV_MEM].total_len = PM8001_MAX_DEVICES * + sizeof(struct pm8001_device); + + /* Memory region for ccb_info*/ + pm8001_ha->memoryMap.region[CCB_MEM].num_elements = 1; + pm8001_ha->memoryMap.region[CCB_MEM].element_size = PM8001_MAX_CCB * + sizeof(struct pm8001_ccb_info); + pm8001_ha->memoryMap.region[CCB_MEM].total_len = PM8001_MAX_CCB * + sizeof(struct pm8001_ccb_info); + + for (i = 0; i < USI_MAX_MEMCNT; i++) { + if (pm8001_mem_alloc(pm8001_ha->pdev, + &pm8001_ha->memoryMap.region[i].virt_ptr, + &pm8001_ha->memoryMap.region[i].phys_addr, + &pm8001_ha->memoryMap.region[i].phys_addr_hi, + &pm8001_ha->memoryMap.region[i].phys_addr_lo, + pm8001_ha->memoryMap.region[i].total_len, + pm8001_ha->memoryMap.region[i].alignment) != 0) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Mem%d alloc failed\n", + i)); + goto err_out; + } + } + + pm8001_ha->devices = pm8001_ha->memoryMap.region[DEV_MEM].virt_ptr; + for (i = 0; i < PM8001_MAX_DEVICES; i++) { + pm8001_ha->devices[i].dev_type = NO_DEVICE; + pm8001_ha->devices[i].id = i; + pm8001_ha->devices[i].device_id = PM8001_MAX_DEVICES; + pm8001_ha->devices[i].running_req = 0; + } + pm8001_ha->ccb_info = pm8001_ha->memoryMap.region[CCB_MEM].virt_ptr; + for (i = 0; i < PM8001_MAX_CCB; i++) { + pm8001_ha->ccb_info[i].ccb_dma_handle = + pm8001_ha->memoryMap.region[CCB_MEM].phys_addr + + i * sizeof(struct pm8001_ccb_info); + ++pm8001_ha->tags_num; + } + pm8001_ha->flags = PM8001F_INIT_TIME; + /* Initialize tags */ + pm8001_tag_init(pm8001_ha); + return 0; +err_out: + return 1; +} + +/** + * pm8001_ioremap - remap the pci high physical address to kernal virtual + * address so that we can access them. + * @pm8001_ha:our hba structure. + */ +static int pm8001_ioremap(struct pm8001_hba_info *pm8001_ha) +{ + u32 bar; + u32 logicalBar = 0; + struct pci_dev *pdev; + + pdev = pm8001_ha->pdev; + /* map pci mem (PMC pci base 0-3)*/ + for (bar = 0; bar < 6; bar++) { + /* + ** logical BARs for SPC: + ** bar 0 and 1 - logical BAR0 + ** bar 2 and 3 - logical BAR1 + ** bar4 - logical BAR2 + ** bar5 - logical BAR3 + ** Skip the appropriate assignments: + */ + if ((bar == 1) || (bar == 3)) + continue; + if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { + pm8001_ha->io_mem[logicalBar].membase = + pci_resource_start(pdev, bar); + pm8001_ha->io_mem[logicalBar].membase &= + (u32)PCI_BASE_ADDRESS_MEM_MASK; + pm8001_ha->io_mem[logicalBar].memsize = + pci_resource_len(pdev, bar); + pm8001_ha->io_mem[logicalBar].memvirtaddr = + ioremap(pm8001_ha->io_mem[logicalBar].membase, + pm8001_ha->io_mem[logicalBar].memsize); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("PCI: bar %d, logicalBar %d " + "virt_addr=%lx,len=%d\n", bar, logicalBar, + (unsigned long) + pm8001_ha->io_mem[logicalBar].memvirtaddr, + pm8001_ha->io_mem[logicalBar].memsize)); + } else { + pm8001_ha->io_mem[logicalBar].membase = 0; + pm8001_ha->io_mem[logicalBar].memsize = 0; + pm8001_ha->io_mem[logicalBar].memvirtaddr = 0; + } + logicalBar++; + } + return 0; +} + +/** + * pm8001_pci_alloc - initialize our ha card structure + * @pdev: pci device. + * @ent: ent + * @shost: scsi host struct which has been initialized before. + */ +static struct pm8001_hba_info *__devinit +pm8001_pci_alloc(struct pci_dev *pdev, u32 chip_id, struct Scsi_Host *shost) +{ + struct pm8001_hba_info *pm8001_ha; + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + + + pm8001_ha = sha->lldd_ha; + if (!pm8001_ha) + return NULL; + + pm8001_ha->pdev = pdev; + pm8001_ha->dev = &pdev->dev; + pm8001_ha->chip_id = chip_id; + pm8001_ha->chip = &pm8001_chips[pm8001_ha->chip_id]; + pm8001_ha->irq = pdev->irq; + pm8001_ha->sas = sha; + pm8001_ha->shost = shost; + pm8001_ha->id = pm8001_id++; + INIT_LIST_HEAD(&pm8001_ha->wq_list); + pm8001_ha->logging_level = 0x01; + sprintf(pm8001_ha->name, "%s%d", DRV_NAME, pm8001_ha->id); +#ifdef PM8001_USE_TASKLET + tasklet_init(&pm8001_ha->tasklet, pm8001_tasklet, + (unsigned long)pm8001_ha); +#endif + pm8001_ioremap(pm8001_ha); + if (!pm8001_alloc(pm8001_ha)) + return pm8001_ha; + pm8001_free(pm8001_ha); + return NULL; +} + +/** + * pci_go_44 - pm8001 specified, its DMA is 44 bit rather than 64 bit + * @pdev: pci device. + */ +static int pci_go_44(struct pci_dev *pdev) +{ + int rc; + + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(44))) { + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(44)); + if (rc) { + rc = pci_set_consistent_dma_mask(pdev, + DMA_BIT_MASK(32)); + if (rc) { + dev_printk(KERN_ERR, &pdev->dev, + "44-bit DMA enable failed\n"); + return rc; + } + } + } else { + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) { + dev_printk(KERN_ERR, &pdev->dev, + "32-bit DMA enable failed\n"); + return rc; + } + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) { + dev_printk(KERN_ERR, &pdev->dev, + "32-bit consistent DMA enable failed\n"); + return rc; + } + } + return rc; +} + +/** + * pm8001_prep_sas_ha_init - allocate memory in general hba struct && init them. + * @shost: scsi host which has been allocated outside. + * @chip_info: our ha struct. + */ +static int __devinit pm8001_prep_sas_ha_init(struct Scsi_Host * shost, + const struct pm8001_chip_info *chip_info) +{ + int phy_nr, port_nr; + struct asd_sas_phy **arr_phy; + struct asd_sas_port **arr_port; + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + + phy_nr = chip_info->n_phy; + port_nr = phy_nr; + memset(sha, 0x00, sizeof(*sha)); + arr_phy = kcalloc(phy_nr, sizeof(void *), GFP_KERNEL); + if (!arr_phy) + goto exit; + arr_port = kcalloc(port_nr, sizeof(void *), GFP_KERNEL); + if (!arr_port) + goto exit_free2; + + sha->sas_phy = arr_phy; + sha->sas_port = arr_port; + sha->lldd_ha = kzalloc(sizeof(struct pm8001_hba_info), GFP_KERNEL); + if (!sha->lldd_ha) + goto exit_free1; + + shost->transportt = pm8001_stt; + shost->max_id = PM8001_MAX_DEVICES; + shost->max_lun = 8; + shost->max_channel = 0; + shost->unique_id = pm8001_id; + shost->max_cmd_len = 16; + shost->can_queue = PM8001_CAN_QUEUE; + shost->cmd_per_lun = 32; + return 0; +exit_free1: + kfree(arr_port); +exit_free2: + kfree(arr_phy); +exit: + return -1; +} + +/** + * pm8001_post_sas_ha_init - initialize general hba struct defined in libsas + * @shost: scsi host which has been allocated outside + * @chip_info: our ha struct. + */ +static void __devinit pm8001_post_sas_ha_init(struct Scsi_Host *shost, + const struct pm8001_chip_info *chip_info) +{ + int i = 0; + struct pm8001_hba_info *pm8001_ha; + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + + pm8001_ha = sha->lldd_ha; + for (i = 0; i < chip_info->n_phy; i++) { + sha->sas_phy[i] = &pm8001_ha->phy[i].sas_phy; + sha->sas_port[i] = &pm8001_ha->port[i].sas_port; + } + sha->sas_ha_name = DRV_NAME; + sha->dev = pm8001_ha->dev; + + sha->lldd_module = THIS_MODULE; + sha->sas_addr = &pm8001_ha->sas_addr[0]; + sha->num_phys = chip_info->n_phy; + sha->lldd_max_execute_num = 1; + sha->lldd_queue_size = PM8001_CAN_QUEUE; + sha->core.shost = shost; +} + +/** + * pm8001_init_sas_add - initialize sas address + * @chip_info: our ha struct. + * + * Currently we just set the fixed SAS address to our HBA,for manufacture, + * it should read from the EEPROM + */ +static void pm8001_init_sas_add(struct pm8001_hba_info *pm8001_ha) +{ + u8 i; +#ifdef PM8001_READ_VPD + DECLARE_COMPLETION_ONSTACK(completion); + pm8001_ha->nvmd_completion = &completion; + PM8001_CHIP_DISP->get_nvmd_req(pm8001_ha, 0, 0); + wait_for_completion(&completion); + for (i = 0; i < pm8001_ha->chip->n_phy; i++) { + memcpy(&pm8001_ha->phy[i].dev_sas_addr, pm8001_ha->sas_addr, + SAS_ADDR_SIZE); + PM8001_INIT_DBG(pm8001_ha, + pm8001_printk("phy %d sas_addr = %x \n", i, + (u64)pm8001_ha->phy[i].dev_sas_addr)); + } +#else + for (i = 0; i < pm8001_ha->chip->n_phy; i++) { + pm8001_ha->phy[i].dev_sas_addr = 0x500e004010000004ULL; + pm8001_ha->phy[i].dev_sas_addr = + cpu_to_be64((u64) + (*(u64 *)&pm8001_ha->phy[i].dev_sas_addr)); + } + memcpy(pm8001_ha->sas_addr, &pm8001_ha->phy[0].dev_sas_addr, + SAS_ADDR_SIZE); +#endif +} + +#ifdef PM8001_USE_MSIX +/** + * pm8001_setup_msix - enable MSI-X interrupt + * @chip_info: our ha struct. + * @irq_handler: irq_handler + */ +static u32 pm8001_setup_msix(struct pm8001_hba_info *pm8001_ha, + irq_handler_t irq_handler) +{ + u32 i = 0, j = 0; + u32 number_of_intr = 1; + int flag = 0; + u32 max_entry; + int rc; + max_entry = sizeof(pm8001_ha->msix_entries) / + sizeof(pm8001_ha->msix_entries[0]); + flag |= IRQF_DISABLED; + for (i = 0; i < max_entry ; i++) + pm8001_ha->msix_entries[i].entry = i; + rc = pci_enable_msix(pm8001_ha->pdev, pm8001_ha->msix_entries, + number_of_intr); + pm8001_ha->number_of_intr = number_of_intr; + if (!rc) { + for (i = 0; i < number_of_intr; i++) { + if (request_irq(pm8001_ha->msix_entries[i].vector, + irq_handler, flag, DRV_NAME, + SHOST_TO_SAS_HA(pm8001_ha->shost))) { + for (j = 0; j < i; j++) + free_irq( + pm8001_ha->msix_entries[j].vector, + SHOST_TO_SAS_HA(pm8001_ha->shost)); + pci_disable_msix(pm8001_ha->pdev); + break; + } + } + } + return rc; +} +#endif + +/** + * pm8001_request_irq - register interrupt + * @chip_info: our ha struct. + */ +static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha) +{ + struct pci_dev *pdev; + irq_handler_t irq_handler = pm8001_interrupt; + u32 rc; + + pdev = pm8001_ha->pdev; + +#ifdef PM8001_USE_MSIX + if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) + return pm8001_setup_msix(pm8001_ha, irq_handler); + else + goto intx; +#endif + +intx: + /* intialize the INT-X interrupt */ + rc = request_irq(pdev->irq, irq_handler, IRQF_SHARED, DRV_NAME, + SHOST_TO_SAS_HA(pm8001_ha->shost)); + return rc; +} + +/** + * pm8001_pci_probe - probe supported device + * @pdev: pci device which kernel has been prepared for. + * @ent: pci device id + * + * This function is the main initialization function, when register a new + * pci driver it is invoked, all struct an hardware initilization should be done + * here, also, register interrupt + */ +static int __devinit pm8001_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + unsigned int rc; + u32 pci_reg; + struct pm8001_hba_info *pm8001_ha; + struct Scsi_Host *shost = NULL; + const struct pm8001_chip_info *chip; + + dev_printk(KERN_INFO, &pdev->dev, + "pm8001: driver version %s\n", DRV_VERSION); + rc = pci_enable_device(pdev); + if (rc) + goto err_out_enable; + pci_set_master(pdev); + /* + * Enable pci slot busmaster by setting pci command register. + * This is required by FW for Cyclone card. + */ + + pci_read_config_dword(pdev, PCI_COMMAND, &pci_reg); + pci_reg |= 0x157; + pci_write_config_dword(pdev, PCI_COMMAND, pci_reg); + rc = pci_request_regions(pdev, DRV_NAME); + if (rc) + goto err_out_disable; + rc = pci_go_44(pdev); + if (rc) + goto err_out_regions; + + shost = scsi_host_alloc(&pm8001_sht, sizeof(void *)); + if (!shost) { + rc = -ENOMEM; + goto err_out_regions; + } + chip = &pm8001_chips[ent->driver_data]; + SHOST_TO_SAS_HA(shost) = + kcalloc(1, sizeof(struct sas_ha_struct), GFP_KERNEL); + if (!SHOST_TO_SAS_HA(shost)) { + rc = -ENOMEM; + goto err_out_free_host; + } + + rc = pm8001_prep_sas_ha_init(shost, chip); + if (rc) { + rc = -ENOMEM; + goto err_out_free; + } + pci_set_drvdata(pdev, SHOST_TO_SAS_HA(shost)); + pm8001_ha = pm8001_pci_alloc(pdev, chip_8001, shost); + if (!pm8001_ha) { + rc = -ENOMEM; + goto err_out_free; + } + list_add_tail(&pm8001_ha->list, &hba_list); + PM8001_CHIP_DISP->chip_soft_rst(pm8001_ha, 0x252acbcd); + rc = PM8001_CHIP_DISP->chip_init(pm8001_ha); + if (rc) + goto err_out_ha_free; + + rc = scsi_add_host(shost, &pdev->dev); + if (rc) + goto err_out_ha_free; + rc = pm8001_request_irq(pm8001_ha); + if (rc) + goto err_out_shost; + + PM8001_CHIP_DISP->interrupt_enable(pm8001_ha); + pm8001_init_sas_add(pm8001_ha); + pm8001_post_sas_ha_init(shost, chip); + rc = sas_register_ha(SHOST_TO_SAS_HA(shost)); + if (rc) + goto err_out_shost; + scsi_scan_host(pm8001_ha->shost); + return 0; + +err_out_shost: + scsi_remove_host(pm8001_ha->shost); +err_out_ha_free: + pm8001_free(pm8001_ha); +err_out_free: + kfree(SHOST_TO_SAS_HA(shost)); +err_out_free_host: + kfree(shost); +err_out_regions: + pci_release_regions(pdev); +err_out_disable: + pci_disable_device(pdev); +err_out_enable: + return rc; +} + +static void __devexit pm8001_pci_remove(struct pci_dev *pdev) +{ + struct sas_ha_struct *sha = pci_get_drvdata(pdev); + struct pm8001_hba_info *pm8001_ha; + int i; + pm8001_ha = sha->lldd_ha; + pci_set_drvdata(pdev, NULL); + sas_unregister_ha(sha); + sas_remove_host(pm8001_ha->shost); + list_del(&pm8001_ha->list); + scsi_remove_host(pm8001_ha->shost); + PM8001_CHIP_DISP->interrupt_disable(pm8001_ha); + PM8001_CHIP_DISP->chip_soft_rst(pm8001_ha, 0x252acbcd); + +#ifdef PM8001_USE_MSIX + for (i = 0; i < pm8001_ha->number_of_intr; i++) + synchronize_irq(pm8001_ha->msix_entries[i].vector); + for (i = 0; i < pm8001_ha->number_of_intr; i++) + free_irq(pm8001_ha->msix_entries[i].vector, sha); + pci_disable_msix(pdev); +#else + free_irq(pm8001_ha->irq, sha); +#endif +#ifdef PM8001_USE_TASKLET + tasklet_kill(&pm8001_ha->tasklet); +#endif + pm8001_free(pm8001_ha); + kfree(sha->sas_phy); + kfree(sha->sas_port); + kfree(sha); + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +/** + * pm8001_pci_suspend - power management suspend main entry point + * @pdev: PCI device struct + * @state: PM state change to (usually PCI_D3) + * + * Returns 0 success, anything else error. + */ +static int pm8001_pci_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct sas_ha_struct *sha = pci_get_drvdata(pdev); + struct pm8001_hba_info *pm8001_ha; + int i , pos; + u32 device_state; + pm8001_ha = sha->lldd_ha; + flush_scheduled_work(); + scsi_block_requests(pm8001_ha->shost); + pos = pci_find_capability(pdev, PCI_CAP_ID_PM); + if (pos == 0) { + printk(KERN_ERR " PCI PM not supported\n"); + return -ENODEV; + } + PM8001_CHIP_DISP->interrupt_disable(pm8001_ha); + PM8001_CHIP_DISP->chip_soft_rst(pm8001_ha, 0x252acbcd); +#ifdef PM8001_USE_MSIX + for (i = 0; i < pm8001_ha->number_of_intr; i++) + synchronize_irq(pm8001_ha->msix_entries[i].vector); + for (i = 0; i < pm8001_ha->number_of_intr; i++) + free_irq(pm8001_ha->msix_entries[i].vector, sha); + pci_disable_msix(pdev); +#else + free_irq(pm8001_ha->irq, sha); +#endif +#ifdef PM8001_USE_TASKLET + tasklet_kill(&pm8001_ha->tasklet); +#endif + device_state = pci_choose_state(pdev, state); + pm8001_printk("pdev=0x%p, slot=%s, entering " + "operating state [D%d]\n", pdev, + pm8001_ha->name, device_state); + pci_save_state(pdev); + pci_disable_device(pdev); + pci_set_power_state(pdev, device_state); + return 0; +} + +/** + * pm8001_pci_resume - power management resume main entry point + * @pdev: PCI device struct + * + * Returns 0 success, anything else error. + */ +static int pm8001_pci_resume(struct pci_dev *pdev) +{ + struct sas_ha_struct *sha = pci_get_drvdata(pdev); + struct pm8001_hba_info *pm8001_ha; + int rc; + u32 device_state; + pm8001_ha = sha->lldd_ha; + device_state = pdev->current_state; + + pm8001_printk("pdev=0x%p, slot=%s, resuming from previous " + "operating state [D%d]\n", pdev, pm8001_ha->name, device_state); + + pci_set_power_state(pdev, PCI_D0); + pci_enable_wake(pdev, PCI_D0, 0); + pci_restore_state(pdev); + rc = pci_enable_device(pdev); + if (rc) { + pm8001_printk("slot=%s Enable device failed during resume\n", + pm8001_ha->name); + goto err_out_enable; + } + + pci_set_master(pdev); + rc = pci_go_44(pdev); + if (rc) + goto err_out_disable; + + PM8001_CHIP_DISP->chip_soft_rst(pm8001_ha, 0x252acbcd); + rc = PM8001_CHIP_DISP->chip_init(pm8001_ha); + if (rc) + goto err_out_disable; + PM8001_CHIP_DISP->interrupt_disable(pm8001_ha); + rc = pm8001_request_irq(pm8001_ha); + if (rc) + goto err_out_disable; + #ifdef PM8001_USE_TASKLET + tasklet_init(&pm8001_ha->tasklet, pm8001_tasklet, + (unsigned long)pm8001_ha); + #endif + PM8001_CHIP_DISP->interrupt_enable(pm8001_ha); + scsi_unblock_requests(pm8001_ha->shost); + return 0; + +err_out_disable: + scsi_remove_host(pm8001_ha->shost); + pci_disable_device(pdev); +err_out_enable: + return rc; +} + +static struct pci_device_id __devinitdata pm8001_pci_table[] = { + { + PCI_VDEVICE(PMC_Sierra, 0x8001), chip_8001 + }, + { + PCI_DEVICE(0x117c, 0x0042), + .driver_data = chip_8001 + }, + {} /* terminate list */ +}; + +static struct pci_driver pm8001_pci_driver = { + .name = DRV_NAME, + .id_table = pm8001_pci_table, + .probe = pm8001_pci_probe, + .remove = __devexit_p(pm8001_pci_remove), + .suspend = pm8001_pci_suspend, + .resume = pm8001_pci_resume, +}; + +/** + * pm8001_init - initialize scsi transport template + */ +static int __init pm8001_init(void) +{ + int rc; + pm8001_id = 0; + pm8001_stt = sas_domain_attach_transport(&pm8001_transport_ops); + if (!pm8001_stt) + return -ENOMEM; + rc = pci_register_driver(&pm8001_pci_driver); + if (rc) + goto err_out; + return 0; +err_out: + sas_release_transport(pm8001_stt); + return rc; +} + +static void __exit pm8001_exit(void) +{ + pci_unregister_driver(&pm8001_pci_driver); + sas_release_transport(pm8001_stt); +} + +module_init(pm8001_init); +module_exit(pm8001_exit); + +MODULE_AUTHOR("Jack Wang "); +MODULE_DESCRIPTION("PMC-Sierra PM8001 SAS/SATA controller driver"); +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, pm8001_pci_table); + diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c new file mode 100644 index 000000000000..7bf30fa6963a --- /dev/null +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -0,0 +1,1104 @@ +/* + * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver + * + * Copyright (c) 2008-2009 USI Co., Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + */ + +#include "pm8001_sas.h" + +/** + * pm8001_find_tag - from sas task to find out tag that belongs to this task + * @task: the task sent to the LLDD + * @tag: the found tag associated with the task + */ +static int pm8001_find_tag(struct sas_task *task, u32 *tag) +{ + if (task->lldd_task) { + struct pm8001_ccb_info *ccb; + ccb = task->lldd_task; + *tag = ccb->ccb_tag; + return 1; + } + return 0; +} + +/** + * pm8001_tag_clear - clear the tags bitmap + * @pm8001_ha: our hba struct + * @tag: the found tag associated with the task + */ +static void pm8001_tag_clear(struct pm8001_hba_info *pm8001_ha, u32 tag) +{ + void *bitmap = pm8001_ha->tags; + clear_bit(tag, bitmap); +} + +static void pm8001_tag_free(struct pm8001_hba_info *pm8001_ha, u32 tag) +{ + pm8001_tag_clear(pm8001_ha, tag); +} + +static void pm8001_tag_set(struct pm8001_hba_info *pm8001_ha, u32 tag) +{ + void *bitmap = pm8001_ha->tags; + set_bit(tag, bitmap); +} + +/** + * pm8001_tag_alloc - allocate a empty tag for task used. + * @pm8001_ha: our hba struct + * @tag_out: the found empty tag . + */ +inline int pm8001_tag_alloc(struct pm8001_hba_info *pm8001_ha, u32 *tag_out) +{ + unsigned int index, tag; + void *bitmap = pm8001_ha->tags; + + index = find_first_zero_bit(bitmap, pm8001_ha->tags_num); + tag = index; + if (tag >= pm8001_ha->tags_num) + return -SAS_QUEUE_FULL; + pm8001_tag_set(pm8001_ha, tag); + *tag_out = tag; + return 0; +} + +void pm8001_tag_init(struct pm8001_hba_info *pm8001_ha) +{ + int i; + for (i = 0; i < pm8001_ha->tags_num; ++i) + pm8001_tag_clear(pm8001_ha, i); +} + + /** + * pm8001_mem_alloc - allocate memory for pm8001. + * @pdev: pci device. + * @virt_addr: the allocated virtual address + * @pphys_addr_hi: the physical address high byte address. + * @pphys_addr_lo: the physical address low byte address. + * @mem_size: memory size. + */ +int pm8001_mem_alloc(struct pci_dev *pdev, void **virt_addr, + dma_addr_t *pphys_addr, u32 *pphys_addr_hi, + u32 *pphys_addr_lo, u32 mem_size, u32 align) +{ + caddr_t mem_virt_alloc; + dma_addr_t mem_dma_handle; + u64 phys_align; + u64 align_offset = 0; + if (align) + align_offset = (dma_addr_t)align - 1; + mem_virt_alloc = + pci_alloc_consistent(pdev, mem_size + align, &mem_dma_handle); + if (!mem_virt_alloc) { + pm8001_printk("memory allocation error\n"); + return -1; + } + memset((void *)mem_virt_alloc, 0, mem_size+align); + *pphys_addr = mem_dma_handle; + phys_align = (*pphys_addr + align_offset) & ~align_offset; + *virt_addr = (void *)mem_virt_alloc + phys_align - *pphys_addr; + *pphys_addr_hi = upper_32_bits(phys_align); + *pphys_addr_lo = lower_32_bits(phys_align); + return 0; +} +/** + * pm8001_find_ha_by_dev - from domain device which come from sas layer to + * find out our hba struct. + * @dev: the domain device which from sas layer. + */ +static +struct pm8001_hba_info *pm8001_find_ha_by_dev(struct domain_device *dev) +{ + struct sas_ha_struct *sha = dev->port->ha; + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; + return pm8001_ha; +} + +/** + * pm8001_phy_control - this function should be registered to + * sas_domain_function_template to provide libsas used, note: this is just + * control the HBA phy rather than other expander phy if you want control + * other phy, you should use SMP command. + * @sas_phy: which phy in HBA phys. + * @func: the operation. + * @funcdata: always NULL. + */ +int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, + void *funcdata) +{ + int rc = 0, phy_id = sas_phy->id; + struct pm8001_hba_info *pm8001_ha = NULL; + struct sas_phy_linkrates *rates; + DECLARE_COMPLETION_ONSTACK(completion); + pm8001_ha = sas_phy->ha->lldd_ha; + pm8001_ha->phy[phy_id].enable_completion = &completion; + switch (func) { + case PHY_FUNC_SET_LINK_RATE: + rates = funcdata; + if (rates->minimum_linkrate) { + pm8001_ha->phy[phy_id].minimum_linkrate = + rates->minimum_linkrate; + } + if (rates->maximum_linkrate) { + pm8001_ha->phy[phy_id].maximum_linkrate = + rates->maximum_linkrate; + } + if (pm8001_ha->phy[phy_id].phy_state == 0) { + PM8001_CHIP_DISP->phy_start_req(pm8001_ha, phy_id); + wait_for_completion(&completion); + } + PM8001_CHIP_DISP->phy_ctl_req(pm8001_ha, phy_id, + PHY_LINK_RESET); + break; + case PHY_FUNC_HARD_RESET: + if (pm8001_ha->phy[phy_id].phy_state == 0) { + PM8001_CHIP_DISP->phy_start_req(pm8001_ha, phy_id); + wait_for_completion(&completion); + } + PM8001_CHIP_DISP->phy_ctl_req(pm8001_ha, phy_id, + PHY_HARD_RESET); + break; + case PHY_FUNC_LINK_RESET: + if (pm8001_ha->phy[phy_id].phy_state == 0) { + PM8001_CHIP_DISP->phy_start_req(pm8001_ha, phy_id); + wait_for_completion(&completion); + } + PM8001_CHIP_DISP->phy_ctl_req(pm8001_ha, phy_id, + PHY_LINK_RESET); + break; + case PHY_FUNC_RELEASE_SPINUP_HOLD: + PM8001_CHIP_DISP->phy_ctl_req(pm8001_ha, phy_id, + PHY_LINK_RESET); + break; + case PHY_FUNC_DISABLE: + PM8001_CHIP_DISP->phy_stop_req(pm8001_ha, phy_id); + break; + default: + rc = -EOPNOTSUPP; + } + msleep(300); + return rc; +} + +int pm8001_slave_alloc(struct scsi_device *scsi_dev) +{ + struct domain_device *dev = sdev_to_domain_dev(scsi_dev); + if (dev_is_sata(dev)) { + /* We don't need to rescan targets + * if REPORT_LUNS request is failed + */ + if (scsi_dev->lun > 0) + return -ENXIO; + scsi_dev->tagged_supported = 1; + } + return sas_slave_alloc(scsi_dev); +} + +/** + * pm8001_scan_start - we should enable all HBA phys by sending the phy_start + * command to HBA. + * @shost: the scsi host data. + */ +void pm8001_scan_start(struct Scsi_Host *shost) +{ + int i; + struct pm8001_hba_info *pm8001_ha; + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + pm8001_ha = sha->lldd_ha; + for (i = 0; i < pm8001_ha->chip->n_phy; ++i) + PM8001_CHIP_DISP->phy_start_req(pm8001_ha, i); +} + +int pm8001_scan_finished(struct Scsi_Host *shost, unsigned long time) +{ + /* give the phy enabling interrupt event time to come in (1s + * is empirically about all it takes) */ + if (time < HZ) + return 0; + /* Wait for discovery to finish */ + scsi_flush_work(shost); + return 1; +} + +/** + * pm8001_task_prep_smp - the dispatcher function, prepare data for smp task + * @pm8001_ha: our hba card information + * @ccb: the ccb which attached to smp task + */ +static int pm8001_task_prep_smp(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb) +{ + return PM8001_CHIP_DISP->smp_req(pm8001_ha, ccb); +} + +u32 pm8001_get_ncq_tag(struct sas_task *task, u32 *tag) +{ + struct ata_queued_cmd *qc = task->uldd_task; + if (qc) { + if (qc->tf.command == ATA_CMD_FPDMA_WRITE || + qc->tf.command == ATA_CMD_FPDMA_READ) { + *tag = qc->tag; + return 1; + } + } + return 0; +} + +/** + * pm8001_task_prep_ata - the dispatcher function, prepare data for sata task + * @pm8001_ha: our hba card information + * @ccb: the ccb which attached to sata task + */ +static int pm8001_task_prep_ata(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb) +{ + return PM8001_CHIP_DISP->sata_req(pm8001_ha, ccb); +} + +/** + * pm8001_task_prep_ssp_tm - the dispatcher function, prepare task management data + * @pm8001_ha: our hba card information + * @ccb: the ccb which attached to TM + * @tmf: the task management IU + */ +static int pm8001_task_prep_ssp_tm(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb, struct pm8001_tmf_task *tmf) +{ + return PM8001_CHIP_DISP->ssp_tm_req(pm8001_ha, ccb, tmf); +} + +/** + * pm8001_task_prep_ssp - the dispatcher function,prepare ssp data for ssp task + * @pm8001_ha: our hba card information + * @ccb: the ccb which attached to ssp task + */ +static int pm8001_task_prep_ssp(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb) +{ + return PM8001_CHIP_DISP->ssp_io_req(pm8001_ha, ccb); +} +int pm8001_slave_configure(struct scsi_device *sdev) +{ + struct domain_device *dev = sdev_to_domain_dev(sdev); + int ret = sas_slave_configure(sdev); + if (ret) + return ret; + if (dev_is_sata(dev)) { + #ifdef PM8001_DISABLE_NCQ + struct ata_port *ap = dev->sata_dev.ap; + struct ata_device *adev = ap->link.device; + adev->flags |= ATA_DFLAG_NCQ_OFF; + scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, 1); + #endif + } + return 0; +} +/** + * pm8001_task_exec -execute the task which come from upper level, send the + * command or data to DMA area and then increase CI,for queuecommand(ssp), + * it is from upper layer and for smp command,it is from libsas, + * for ata command it is from libata. + * @task: the task to be execute. + * @num: if can_queue great than 1, the task can be queued up. for SMP task, + * we always execute one one time. + * @gfp_flags: gfp_flags. + * @is tmf: if it is task management task. + * @tmf: the task management IU + */ +#define DEV_IS_GONE(pm8001_dev) \ + ((!pm8001_dev || (pm8001_dev->dev_type == NO_DEVICE))) +static int pm8001_task_exec(struct sas_task *task, const int num, + gfp_t gfp_flags, int is_tmf, struct pm8001_tmf_task *tmf) +{ + struct domain_device *dev = task->dev; + struct pm8001_hba_info *pm8001_ha; + struct pm8001_device *pm8001_dev; + struct sas_task *t = task; + struct pm8001_ccb_info *ccb; + u32 tag = 0xdeadbeef, rc, n_elem = 0; + u32 n = num; + unsigned long flags = 0; + + if (!dev->port) { + struct task_status_struct *tsm = &t->task_status; + tsm->resp = SAS_TASK_UNDELIVERED; + tsm->stat = SAS_PHY_DOWN; + if (dev->dev_type != SATA_DEV) + t->task_done(t); + return 0; + } + pm8001_ha = pm8001_find_ha_by_dev(task->dev); + PM8001_IO_DBG(pm8001_ha, pm8001_printk("pm8001_task_exec device \n ")); + spin_lock_irqsave(&pm8001_ha->lock, flags); + do { + dev = t->dev; + pm8001_dev = dev->lldd_dev; + if (DEV_IS_GONE(pm8001_dev)) { + if (pm8001_dev) { + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("device %d not ready.\n", + pm8001_dev->device_id)); + } else { + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("device %016llx not " + "ready.\n", SAS_ADDR(dev->sas_addr))); + } + rc = SAS_PHY_DOWN; + goto out_done; + } + rc = pm8001_tag_alloc(pm8001_ha, &tag); + if (rc) + goto err_out; + ccb = &pm8001_ha->ccb_info[tag]; + + if (!sas_protocol_ata(t->task_proto)) { + if (t->num_scatter) { + n_elem = dma_map_sg(pm8001_ha->dev, + t->scatter, + t->num_scatter, + t->data_dir); + if (!n_elem) { + rc = -ENOMEM; + goto err_out; + } + } + } else { + n_elem = t->num_scatter; + } + + t->lldd_task = NULL; + ccb->n_elem = n_elem; + ccb->ccb_tag = tag; + ccb->task = t; + switch (t->task_proto) { + case SAS_PROTOCOL_SMP: + rc = pm8001_task_prep_smp(pm8001_ha, ccb); + break; + case SAS_PROTOCOL_SSP: + if (is_tmf) + rc = pm8001_task_prep_ssp_tm(pm8001_ha, + ccb, tmf); + else + rc = pm8001_task_prep_ssp(pm8001_ha, ccb); + break; + case SAS_PROTOCOL_SATA: + case SAS_PROTOCOL_STP: + case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: + rc = pm8001_task_prep_ata(pm8001_ha, ccb); + break; + default: + dev_printk(KERN_ERR, pm8001_ha->dev, + "unknown sas_task proto: 0x%x\n", + t->task_proto); + rc = -EINVAL; + break; + } + + if (rc) { + PM8001_IO_DBG(pm8001_ha, + pm8001_printk("rc is %x\n", rc)); + goto err_out_tag; + } + t->lldd_task = ccb; + /* TODO: select normal or high priority */ + spin_lock(&t->task_state_lock); + t->task_state_flags |= SAS_TASK_AT_INITIATOR; + spin_unlock(&t->task_state_lock); + pm8001_dev->running_req++; + if (n > 1) + t = list_entry(t->list.next, struct sas_task, list); + } while (--n); + rc = 0; + goto out_done; + +err_out_tag: + pm8001_tag_free(pm8001_ha, tag); +err_out: + dev_printk(KERN_ERR, pm8001_ha->dev, "pm8001 exec failed[%d]!\n", rc); + if (!sas_protocol_ata(t->task_proto)) + if (n_elem) + dma_unmap_sg(pm8001_ha->dev, t->scatter, n_elem, + t->data_dir); +out_done: + spin_unlock_irqrestore(&pm8001_ha->lock, flags); + return rc; +} + +/** + * pm8001_queue_command - register for upper layer used, all IO commands sent + * to HBA are from this interface. + * @task: the task to be execute. + * @num: if can_queue great than 1, the task can be queued up. for SMP task, + * we always execute one one time + * @gfp_flags: gfp_flags + */ +int pm8001_queue_command(struct sas_task *task, const int num, + gfp_t gfp_flags) +{ + return pm8001_task_exec(task, num, gfp_flags, 0, NULL); +} + +void pm8001_ccb_free(struct pm8001_hba_info *pm8001_ha, u32 ccb_idx) +{ + pm8001_tag_clear(pm8001_ha, ccb_idx); +} + +/** + * pm8001_ccb_task_free - free the sg for ssp and smp command, free the ccb. + * @pm8001_ha: our hba card information + * @ccb: the ccb which attached to ssp task + * @task: the task to be free. + * @ccb_idx: ccb index. + */ +void pm8001_ccb_task_free(struct pm8001_hba_info *pm8001_ha, + struct sas_task *task, struct pm8001_ccb_info *ccb, u32 ccb_idx) +{ + if (!ccb->task) + return; + if (!sas_protocol_ata(task->task_proto)) + if (ccb->n_elem) + dma_unmap_sg(pm8001_ha->dev, task->scatter, + task->num_scatter, task->data_dir); + + switch (task->task_proto) { + case SAS_PROTOCOL_SMP: + dma_unmap_sg(pm8001_ha->dev, &task->smp_task.smp_resp, 1, + PCI_DMA_FROMDEVICE); + dma_unmap_sg(pm8001_ha->dev, &task->smp_task.smp_req, 1, + PCI_DMA_TODEVICE); + break; + + case SAS_PROTOCOL_SATA: + case SAS_PROTOCOL_STP: + case SAS_PROTOCOL_SSP: + default: + /* do nothing */ + break; + } + task->lldd_task = NULL; + ccb->task = NULL; + ccb->ccb_tag = 0xFFFFFFFF; + pm8001_ccb_free(pm8001_ha, ccb_idx); +} + + /** + * pm8001_alloc_dev - find the empty pm8001_device structure, allocate and + * return it for use. + * @pm8001_ha: our hba card information + */ +struct pm8001_device *pm8001_alloc_dev(struct pm8001_hba_info *pm8001_ha) +{ + u32 dev; + for (dev = 0; dev < PM8001_MAX_DEVICES; dev++) { + if (pm8001_ha->devices[dev].dev_type == NO_DEVICE) { + pm8001_ha->devices[dev].id = dev; + return &pm8001_ha->devices[dev]; + } + } + if (dev == PM8001_MAX_DEVICES) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("max support %d devices, ignore ..\n", + PM8001_MAX_DEVICES)); + } + return NULL; +} + +static void pm8001_free_dev(struct pm8001_device *pm8001_dev) +{ + u32 id = pm8001_dev->id; + memset(pm8001_dev, 0, sizeof(*pm8001_dev)); + pm8001_dev->id = id; + pm8001_dev->dev_type = NO_DEVICE; + pm8001_dev->device_id = PM8001_MAX_DEVICES; + pm8001_dev->sas_device = NULL; +} + +/** + * pm8001_dev_found_notify - when libsas find a sas domain device, it should + * tell the LLDD that device is found, and then LLDD register this device to + * HBA FW by the command "OPC_INB_REG_DEV", after that the HBA will assign + * a device ID(according to device's sas address) and returned it to LLDD.from + * now on, we communicate with HBA FW with the device ID which HBA assigned + * rather than sas address. it is the neccessary step for our HBA but it is + * the optional for other HBA driver. + * @dev: the device structure which sas layer used. + */ +static int pm8001_dev_found_notify(struct domain_device *dev) +{ + unsigned long flags = 0; + int res = 0; + struct pm8001_hba_info *pm8001_ha = NULL; + struct domain_device *parent_dev = dev->parent; + struct pm8001_device *pm8001_device; + DECLARE_COMPLETION_ONSTACK(completion); + u32 flag = 0; + pm8001_ha = pm8001_find_ha_by_dev(dev); + spin_lock_irqsave(&pm8001_ha->lock, flags); + + pm8001_device = pm8001_alloc_dev(pm8001_ha); + pm8001_device->sas_device = dev; + if (!pm8001_device) { + res = -1; + goto found_out; + } + dev->lldd_dev = pm8001_device; + pm8001_device->dev_type = dev->dev_type; + pm8001_device->dcompletion = &completion; + if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) { + int phy_id; + struct ex_phy *phy; + for (phy_id = 0; phy_id < parent_dev->ex_dev.num_phys; + phy_id++) { + phy = &parent_dev->ex_dev.ex_phy[phy_id]; + if (SAS_ADDR(phy->attached_sas_addr) + == SAS_ADDR(dev->sas_addr)) { + pm8001_device->attached_phy = phy_id; + break; + } + } + if (phy_id == parent_dev->ex_dev.num_phys) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Error: no attached dev:%016llx" + " at ex:%016llx.\n", SAS_ADDR(dev->sas_addr), + SAS_ADDR(parent_dev->sas_addr))); + res = -1; + } + } else { + if (dev->dev_type == SATA_DEV) { + pm8001_device->attached_phy = + dev->rphy->identify.phy_identifier; + flag = 1; /* directly sata*/ + } + } /*register this device to HBA*/ + PM8001_DISC_DBG(pm8001_ha, pm8001_printk("Found device \n")); + PM8001_CHIP_DISP->reg_dev_req(pm8001_ha, pm8001_device, flag); + spin_unlock_irqrestore(&pm8001_ha->lock, flags); + wait_for_completion(&completion); + if (dev->dev_type == SAS_END_DEV) + msleep(50); + pm8001_ha->flags = PM8001F_RUN_TIME ; + return 0; +found_out: + spin_unlock_irqrestore(&pm8001_ha->lock, flags); + return res; +} + +int pm8001_dev_found(struct domain_device *dev) +{ + return pm8001_dev_found_notify(dev); +} + +/** + * pm8001_alloc_task - allocate a task structure for TMF + */ +static struct sas_task *pm8001_alloc_task(void) +{ + struct sas_task *task = kzalloc(sizeof(*task), GFP_KERNEL); + if (task) { + INIT_LIST_HEAD(&task->list); + spin_lock_init(&task->task_state_lock); + task->task_state_flags = SAS_TASK_STATE_PENDING; + init_timer(&task->timer); + init_completion(&task->completion); + } + return task; +} + +static void pm8001_free_task(struct sas_task *task) +{ + if (task) { + BUG_ON(!list_empty(&task->list)); + kfree(task); + } +} + +static void pm8001_task_done(struct sas_task *task) +{ + if (!del_timer(&task->timer)) + return; + complete(&task->completion); +} + +static void pm8001_tmf_timedout(unsigned long data) +{ + struct sas_task *task = (struct sas_task *)data; + + task->task_state_flags |= SAS_TASK_STATE_ABORTED; + complete(&task->completion); +} + +#define PM8001_TASK_TIMEOUT 20 +/** + * pm8001_exec_internal_tmf_task - when errors or exception happened, we may + * want to do something, for example abort issued task which result in this + * execption, this is by calling this function, note it is also with the task + * execute interface. + * @dev: the wanted device. + * @tmf: which task management wanted to be take. + * @para_len: para_len. + * @parameter: ssp task parameter. + */ +static int pm8001_exec_internal_tmf_task(struct domain_device *dev, + void *parameter, u32 para_len, struct pm8001_tmf_task *tmf) +{ + int res, retry; + struct sas_task *task = NULL; + struct pm8001_hba_info *pm8001_ha = pm8001_find_ha_by_dev(dev); + + for (retry = 0; retry < 3; retry++) { + task = pm8001_alloc_task(); + if (!task) + return -ENOMEM; + + task->dev = dev; + task->task_proto = dev->tproto; + memcpy(&task->ssp_task, parameter, para_len); + task->task_done = pm8001_task_done; + task->timer.data = (unsigned long)task; + task->timer.function = pm8001_tmf_timedout; + task->timer.expires = jiffies + PM8001_TASK_TIMEOUT*HZ; + add_timer(&task->timer); + + res = pm8001_task_exec(task, 1, GFP_KERNEL, 1, tmf); + + if (res) { + del_timer(&task->timer); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Executing internal task " + "failed\n")); + goto ex_err; + } + wait_for_completion(&task->completion); + res = -TMF_RESP_FUNC_FAILED; + /* Even TMF timed out, return direct. */ + if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("TMF task[%x]timeout.\n", + tmf->tmf)); + goto ex_err; + } + } + + if (task->task_status.resp == SAS_TASK_COMPLETE && + task->task_status.stat == SAM_GOOD) { + res = TMF_RESP_FUNC_COMPLETE; + break; + } + + if (task->task_status.resp == SAS_TASK_COMPLETE && + task->task_status.stat == SAS_DATA_UNDERRUN) { + /* no error, but return the number of bytes of + * underrun */ + res = task->task_status.residual; + break; + } + + if (task->task_status.resp == SAS_TASK_COMPLETE && + task->task_status.stat == SAS_DATA_OVERRUN) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Blocked task error.\n")); + res = -EMSGSIZE; + break; + } else { + PM8001_IO_DBG(pm8001_ha, + pm8001_printk(" Task to dev %016llx response: 0x%x" + "status 0x%x\n", + SAS_ADDR(dev->sas_addr), + task->task_status.resp, + task->task_status.stat)); + pm8001_free_task(task); + task = NULL; + } + } +ex_err: + BUG_ON(retry == 3 && task != NULL); + if (task != NULL) + pm8001_free_task(task); + return res; +} + +static int +pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha, + struct pm8001_device *pm8001_dev, struct domain_device *dev, u32 flag, + u32 task_tag) +{ + int res, retry; + u32 rc, ccb_tag; + struct pm8001_ccb_info *ccb; + struct sas_task *task = NULL; + + for (retry = 0; retry < 3; retry++) { + task = pm8001_alloc_task(); + if (!task) + return -ENOMEM; + + task->dev = dev; + task->task_proto = dev->tproto; + task->task_done = pm8001_task_done; + task->timer.data = (unsigned long)task; + task->timer.function = pm8001_tmf_timedout; + task->timer.expires = jiffies + PM8001_TASK_TIMEOUT*HZ; + add_timer(&task->timer); + + rc = pm8001_tag_alloc(pm8001_ha, &ccb_tag); + if (rc) + return rc; + ccb = &pm8001_ha->ccb_info[ccb_tag]; + ccb->device = pm8001_dev; + ccb->ccb_tag = ccb_tag; + ccb->task = task; + + res = PM8001_CHIP_DISP->task_abort(pm8001_ha, + pm8001_dev, flag, task_tag, ccb_tag); + + if (res) { + del_timer(&task->timer); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("Executing internal task " + "failed\n")); + goto ex_err; + } + wait_for_completion(&task->completion); + res = TMF_RESP_FUNC_FAILED; + /* Even TMF timed out, return direct. */ + if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("TMF task timeout.\n")); + goto ex_err; + } + } + + if (task->task_status.resp == SAS_TASK_COMPLETE && + task->task_status.stat == SAM_GOOD) { + res = TMF_RESP_FUNC_COMPLETE; + break; + + } else { + PM8001_IO_DBG(pm8001_ha, + pm8001_printk(" Task to dev %016llx response: " + "0x%x status 0x%x\n", + SAS_ADDR(dev->sas_addr), + task->task_status.resp, + task->task_status.stat)); + pm8001_free_task(task); + task = NULL; + } + } +ex_err: + BUG_ON(retry == 3 && task != NULL); + if (task != NULL) + pm8001_free_task(task); + return res; +} + +/** + * pm8001_dev_gone_notify - see the comments for "pm8001_dev_found_notify" + * @dev: the device structure which sas layer used. + */ +static void pm8001_dev_gone_notify(struct domain_device *dev) +{ + unsigned long flags = 0; + u32 tag; + struct pm8001_hba_info *pm8001_ha; + struct pm8001_device *pm8001_dev = dev->lldd_dev; + u32 device_id = pm8001_dev->device_id; + pm8001_ha = pm8001_find_ha_by_dev(dev); + spin_lock_irqsave(&pm8001_ha->lock, flags); + pm8001_tag_alloc(pm8001_ha, &tag); + if (pm8001_dev) { + PM8001_DISC_DBG(pm8001_ha, + pm8001_printk("found dev[%d:%x] is gone.\n", + pm8001_dev->device_id, pm8001_dev->dev_type)); + if (pm8001_dev->running_req) { + spin_unlock_irqrestore(&pm8001_ha->lock, flags); + pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev , + dev, 1, 0); + spin_lock_irqsave(&pm8001_ha->lock, flags); + } + PM8001_CHIP_DISP->dereg_dev_req(pm8001_ha, device_id); + pm8001_free_dev(pm8001_dev); + } else { + PM8001_DISC_DBG(pm8001_ha, + pm8001_printk("Found dev has gone.\n")); + } + dev->lldd_dev = NULL; + spin_unlock_irqrestore(&pm8001_ha->lock, flags); +} + +void pm8001_dev_gone(struct domain_device *dev) +{ + pm8001_dev_gone_notify(dev); +} + +static int pm8001_issue_ssp_tmf(struct domain_device *dev, + u8 *lun, struct pm8001_tmf_task *tmf) +{ + struct sas_ssp_task ssp_task; + if (!(dev->tproto & SAS_PROTOCOL_SSP)) + return TMF_RESP_FUNC_ESUPP; + + strncpy((u8 *)&ssp_task.LUN, lun, 8); + return pm8001_exec_internal_tmf_task(dev, &ssp_task, sizeof(ssp_task), + tmf); +} + +/** + * Standard mandates link reset for ATA (type 0) and hard reset for + * SSP (type 1) , only for RECOVERY + */ +int pm8001_I_T_nexus_reset(struct domain_device *dev) +{ + int rc = TMF_RESP_FUNC_FAILED; + struct pm8001_device *pm8001_dev; + struct pm8001_hba_info *pm8001_ha; + struct sas_phy *phy; + if (!dev || !dev->lldd_dev) + return -1; + + pm8001_dev = dev->lldd_dev; + pm8001_ha = pm8001_find_ha_by_dev(dev); + phy = sas_find_local_phy(dev); + + if (dev_is_sata(dev)) { + DECLARE_COMPLETION_ONSTACK(completion_setstate); + rc = sas_phy_reset(phy, 1); + msleep(2000); + rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev , + dev, 1, 0); + pm8001_dev->setds_completion = &completion_setstate; + rc = PM8001_CHIP_DISP->set_dev_state_req(pm8001_ha, + pm8001_dev, 0x01); + wait_for_completion(&completion_setstate); + } else{ + rc = sas_phy_reset(phy, 1); + msleep(2000); + } + PM8001_EH_DBG(pm8001_ha, pm8001_printk(" for device[%x]:rc=%d\n", + pm8001_dev->device_id, rc)); + return rc; +} + +/* mandatory SAM-3, the task reset the specified LUN*/ +int pm8001_lu_reset(struct domain_device *dev, u8 *lun) +{ + int rc = TMF_RESP_FUNC_FAILED; + struct pm8001_tmf_task tmf_task; + struct pm8001_device *pm8001_dev = dev->lldd_dev; + struct pm8001_hba_info *pm8001_ha = pm8001_find_ha_by_dev(dev); + if (dev_is_sata(dev)) { + struct sas_phy *phy = sas_find_local_phy(dev); + rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev , + dev, 1, 0); + rc = sas_phy_reset(phy, 1); + rc = PM8001_CHIP_DISP->set_dev_state_req(pm8001_ha, + pm8001_dev, 0x01); + msleep(2000); + } else { + tmf_task.tmf = TMF_LU_RESET; + rc = pm8001_issue_ssp_tmf(dev, lun, &tmf_task); + } + /* If failed, fall-through I_T_Nexus reset */ + PM8001_EH_DBG(pm8001_ha, pm8001_printk("for device[%x]:rc=%d\n", + pm8001_dev->device_id, rc)); + return rc; +} + +/* optional SAM-3 */ +int pm8001_query_task(struct sas_task *task) +{ + u32 tag = 0xdeadbeef; + int i = 0; + struct scsi_lun lun; + struct pm8001_tmf_task tmf_task; + int rc = TMF_RESP_FUNC_FAILED; + if (unlikely(!task || !task->lldd_task || !task->dev)) + return rc; + + if (task->task_proto & SAS_PROTOCOL_SSP) { + struct scsi_cmnd *cmnd = task->uldd_task; + struct domain_device *dev = task->dev; + struct pm8001_hba_info *pm8001_ha = + pm8001_find_ha_by_dev(dev); + + int_to_scsilun(cmnd->device->lun, &lun); + rc = pm8001_find_tag(task, &tag); + if (rc == 0) { + rc = TMF_RESP_FUNC_FAILED; + return rc; + } + PM8001_EH_DBG(pm8001_ha, pm8001_printk("Query:[")); + for (i = 0; i < 16; i++) + printk(KERN_INFO "%02x ", cmnd->cmnd[i]); + printk(KERN_INFO "]\n"); + tmf_task.tmf = TMF_QUERY_TASK; + tmf_task.tag_of_task_to_be_managed = tag; + + rc = pm8001_issue_ssp_tmf(dev, lun.scsi_lun, &tmf_task); + switch (rc) { + /* The task is still in Lun, release it then */ + case TMF_RESP_FUNC_SUCC: + PM8001_EH_DBG(pm8001_ha, + pm8001_printk("The task is still in Lun \n")); + /* The task is not in Lun or failed, reset the phy */ + case TMF_RESP_FUNC_FAILED: + case TMF_RESP_FUNC_COMPLETE: + PM8001_EH_DBG(pm8001_ha, + pm8001_printk("The task is not in Lun or failed," + " reset the phy \n")); + break; + } + } + pm8001_printk(":rc= %d\n", rc); + return rc; +} + +/* mandatory SAM-3, still need free task/ccb info, abord the specified task */ +int pm8001_abort_task(struct sas_task *task) +{ + unsigned long flags; + u32 tag = 0xdeadbeef; + u32 device_id; + struct domain_device *dev ; + struct pm8001_hba_info *pm8001_ha = NULL; + struct pm8001_ccb_info *ccb; + struct scsi_lun lun; + struct pm8001_device *pm8001_dev; + struct pm8001_tmf_task tmf_task; + int rc = TMF_RESP_FUNC_FAILED; + if (unlikely(!task || !task->lldd_task || !task->dev)) + return rc; + spin_lock_irqsave(&task->task_state_lock, flags); + if (task->task_state_flags & SAS_TASK_STATE_DONE) { + spin_unlock_irqrestore(&task->task_state_lock, flags); + rc = TMF_RESP_FUNC_COMPLETE; + goto out; + } + spin_unlock_irqrestore(&task->task_state_lock, flags); + if (task->task_proto & SAS_PROTOCOL_SSP) { + struct scsi_cmnd *cmnd = task->uldd_task; + dev = task->dev; + ccb = task->lldd_task; + pm8001_dev = dev->lldd_dev; + pm8001_ha = pm8001_find_ha_by_dev(dev); + int_to_scsilun(cmnd->device->lun, &lun); + rc = pm8001_find_tag(task, &tag); + if (rc == 0) { + printk(KERN_INFO "No such tag in %s\n", __func__); + rc = TMF_RESP_FUNC_FAILED; + return rc; + } + device_id = pm8001_dev->device_id; + PM8001_EH_DBG(pm8001_ha, + pm8001_printk("abort io to device_id = %d\n", device_id)); + tmf_task.tmf = TMF_ABORT_TASK; + tmf_task.tag_of_task_to_be_managed = tag; + rc = pm8001_issue_ssp_tmf(dev, lun.scsi_lun, &tmf_task); + rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev, + pm8001_dev->sas_device, 0, tag); + } else if (task->task_proto & SAS_PROTOCOL_SATA || + task->task_proto & SAS_PROTOCOL_STP) { + dev = task->dev; + pm8001_dev = dev->lldd_dev; + pm8001_ha = pm8001_find_ha_by_dev(dev); + rc = pm8001_find_tag(task, &tag); + if (rc == 0) { + printk(KERN_INFO "No such tag in %s\n", __func__); + rc = TMF_RESP_FUNC_FAILED; + return rc; + } + rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev, + pm8001_dev->sas_device, 0, tag); + } else if (task->task_proto & SAS_PROTOCOL_SMP) { + /* SMP */ + dev = task->dev; + pm8001_dev = dev->lldd_dev; + pm8001_ha = pm8001_find_ha_by_dev(dev); + rc = pm8001_find_tag(task, &tag); + if (rc == 0) { + printk(KERN_INFO "No such tag in %s\n", __func__); + rc = TMF_RESP_FUNC_FAILED; + return rc; + } + rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev, + pm8001_dev->sas_device, 0, tag); + + } +out: + if (rc != TMF_RESP_FUNC_COMPLETE) + pm8001_printk("rc= %d\n", rc); + return rc; +} + +int pm8001_abort_task_set(struct domain_device *dev, u8 *lun) +{ + int rc = TMF_RESP_FUNC_FAILED; + struct pm8001_tmf_task tmf_task; + + tmf_task.tmf = TMF_ABORT_TASK_SET; + rc = pm8001_issue_ssp_tmf(dev, lun, &tmf_task); + return rc; +} + +int pm8001_clear_aca(struct domain_device *dev, u8 *lun) +{ + int rc = TMF_RESP_FUNC_FAILED; + struct pm8001_tmf_task tmf_task; + + tmf_task.tmf = TMF_CLEAR_ACA; + rc = pm8001_issue_ssp_tmf(dev, lun, &tmf_task); + + return rc; +} + +int pm8001_clear_task_set(struct domain_device *dev, u8 *lun) +{ + int rc = TMF_RESP_FUNC_FAILED; + struct pm8001_tmf_task tmf_task; + struct pm8001_device *pm8001_dev = dev->lldd_dev; + struct pm8001_hba_info *pm8001_ha = pm8001_find_ha_by_dev(dev); + + PM8001_EH_DBG(pm8001_ha, + pm8001_printk("I_T_L_Q clear task set[%x]\n", + pm8001_dev->device_id)); + tmf_task.tmf = TMF_CLEAR_TASK_SET; + rc = pm8001_issue_ssp_tmf(dev, lun, &tmf_task); + return rc; +} + diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h new file mode 100644 index 000000000000..14c676bbb533 --- /dev/null +++ b/drivers/scsi/pm8001/pm8001_sas.h @@ -0,0 +1,480 @@ +/* + * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver + * + * Copyright (c) 2008-2009 USI Co., Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + */ + +#ifndef _PM8001_SAS_H_ +#define _PM8001_SAS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pm8001_defs.h" + +#define DRV_NAME "pm8001" +#define DRV_VERSION "0.1.36" +#define PM8001_FAIL_LOGGING 0x01 /* libsas EH function logging */ +#define PM8001_INIT_LOGGING 0x02 /* driver init logging */ +#define PM8001_DISC_LOGGING 0x04 /* discovery layer logging */ +#define PM8001_IO_LOGGING 0x08 /* I/O path logging */ +#define PM8001_EH_LOGGING 0x10 /* Error message logging */ +#define PM8001_IOCTL_LOGGING 0x20 /* IOCTL message logging */ +#define PM8001_MSG_LOGGING 0x40 /* misc message logging */ +#define pm8001_printk(format, arg...) printk(KERN_INFO "%s %d:" format,\ + __func__, __LINE__, ## arg) +#define PM8001_CHECK_LOGGING(HBA, LEVEL, CMD) \ +do { \ + if (unlikely(HBA->logging_level & LEVEL)) \ + do { \ + CMD; \ + } while (0); \ +} while (0); + +#define PM8001_EH_DBG(HBA, CMD) \ + PM8001_CHECK_LOGGING(HBA, PM8001_EH_LOGGING, CMD) + +#define PM8001_INIT_DBG(HBA, CMD) \ + PM8001_CHECK_LOGGING(HBA, PM8001_INIT_LOGGING, CMD) + +#define PM8001_DISC_DBG(HBA, CMD) \ + PM8001_CHECK_LOGGING(HBA, PM8001_DISC_LOGGING, CMD) + +#define PM8001_IO_DBG(HBA, CMD) \ + PM8001_CHECK_LOGGING(HBA, PM8001_IO_LOGGING, CMD) + +#define PM8001_FAIL_DBG(HBA, CMD) \ + PM8001_CHECK_LOGGING(HBA, PM8001_FAIL_LOGGING, CMD) + +#define PM8001_IOCTL_DBG(HBA, CMD) \ + PM8001_CHECK_LOGGING(HBA, PM8001_IOCTL_LOGGING, CMD) + +#define PM8001_MSG_DBG(HBA, CMD) \ + PM8001_CHECK_LOGGING(HBA, PM8001_MSG_LOGGING, CMD) + + +#define PM8001_USE_TASKLET +#define PM8001_USE_MSIX + + +#define DEV_IS_EXPANDER(type) ((type == EDGE_DEV) || (type == FANOUT_DEV)) + +#define PM8001_NAME_LENGTH 32/* generic length of strings */ +extern struct list_head hba_list; +extern const struct pm8001_dispatch pm8001_8001_dispatch; + +struct pm8001_hba_info; +struct pm8001_ccb_info; +struct pm8001_device; +struct pm8001_tmf_task; +struct pm8001_dispatch { + char *name; + int (*chip_init)(struct pm8001_hba_info *pm8001_ha); + int (*chip_soft_rst)(struct pm8001_hba_info *pm8001_ha, u32 signature); + void (*chip_rst)(struct pm8001_hba_info *pm8001_ha); + int (*chip_ioremap)(struct pm8001_hba_info *pm8001_ha); + void (*chip_iounmap)(struct pm8001_hba_info *pm8001_ha); + void (*isr)(struct pm8001_hba_info *pm8001_ha); + u32 (*is_our_interupt)(struct pm8001_hba_info *pm8001_ha); + int (*isr_process_oq)(struct pm8001_hba_info *pm8001_ha); + void (*interrupt_enable)(struct pm8001_hba_info *pm8001_ha); + void (*interrupt_disable)(struct pm8001_hba_info *pm8001_ha); + void (*make_prd)(struct scatterlist *scatter, int nr, void *prd); + int (*smp_req)(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb); + int (*ssp_io_req)(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb); + int (*sata_req)(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb); + int (*phy_start_req)(struct pm8001_hba_info *pm8001_ha, u8 phy_id); + int (*phy_stop_req)(struct pm8001_hba_info *pm8001_ha, u8 phy_id); + int (*reg_dev_req)(struct pm8001_hba_info *pm8001_ha, + struct pm8001_device *pm8001_dev, u32 flag); + int (*dereg_dev_req)(struct pm8001_hba_info *pm8001_ha, u32 device_id); + int (*phy_ctl_req)(struct pm8001_hba_info *pm8001_ha, + u32 phy_id, u32 phy_op); + int (*task_abort)(struct pm8001_hba_info *pm8001_ha, + struct pm8001_device *pm8001_dev, u8 flag, u32 task_tag, + u32 cmd_tag); + int (*ssp_tm_req)(struct pm8001_hba_info *pm8001_ha, + struct pm8001_ccb_info *ccb, struct pm8001_tmf_task *tmf); + int (*get_nvmd_req)(struct pm8001_hba_info *pm8001_ha, void *payload); + int (*set_nvmd_req)(struct pm8001_hba_info *pm8001_ha, void *payload); + int (*fw_flash_update_req)(struct pm8001_hba_info *pm8001_ha, + void *payload); + int (*set_dev_state_req)(struct pm8001_hba_info *pm8001_ha, + struct pm8001_device *pm8001_dev, u32 state); + int (*sas_diag_start_end_req)(struct pm8001_hba_info *pm8001_ha, + u32 state); + int (*sas_diag_execute_req)(struct pm8001_hba_info *pm8001_ha, + u32 state); +}; + +struct pm8001_chip_info { + u32 n_phy; + const struct pm8001_dispatch *dispatch; +}; +#define PM8001_CHIP_DISP (pm8001_ha->chip->dispatch) + +struct pm8001_port { + struct asd_sas_port sas_port; +}; + +struct pm8001_phy { + struct pm8001_hba_info *pm8001_ha; + struct pm8001_port *port; + struct asd_sas_phy sas_phy; + struct sas_identify identify; + struct scsi_device *sdev; + u64 dev_sas_addr; + u32 phy_type; + struct completion *enable_completion; + u32 frame_rcvd_size; + u8 frame_rcvd[32]; + u8 phy_attached; + u8 phy_state; + enum sas_linkrate minimum_linkrate; + enum sas_linkrate maximum_linkrate; +}; + +struct pm8001_device { + enum sas_dev_type dev_type; + struct domain_device *sas_device; + u32 attached_phy; + u32 id; + struct completion *dcompletion; + struct completion *setds_completion; + u32 device_id; + u32 running_req; +}; + +struct pm8001_prd_imt { + __le32 len; + __le32 e; +}; + +struct pm8001_prd { + __le64 addr; /* 64-bit buffer address */ + struct pm8001_prd_imt im_len; /* 64-bit length */ +} __attribute__ ((packed)); +/* + * CCB(Command Control Block) + */ +struct pm8001_ccb_info { + struct list_head entry; + struct sas_task *task; + u32 n_elem; + u32 ccb_tag; + dma_addr_t ccb_dma_handle; + struct pm8001_device *device; + struct pm8001_prd buf_prd[PM8001_MAX_DMA_SG]; + struct fw_control_ex *fw_control_context; +}; + +struct mpi_mem { + void *virt_ptr; + dma_addr_t phys_addr; + u32 phys_addr_hi; + u32 phys_addr_lo; + u32 total_len; + u32 num_elements; + u32 element_size; + u32 alignment; +}; + +struct mpi_mem_req { + /* The number of element in the mpiMemory array */ + u32 count; + /* The array of structures that define memroy regions*/ + struct mpi_mem region[USI_MAX_MEMCNT]; +}; + +struct main_cfg_table { + u32 signature; + u32 interface_rev; + u32 firmware_rev; + u32 max_out_io; + u32 max_sgl; + u32 ctrl_cap_flag; + u32 gst_offset; + u32 inbound_queue_offset; + u32 outbound_queue_offset; + u32 inbound_q_nppd_hppd; + u32 outbound_hw_event_pid0_3; + u32 outbound_hw_event_pid4_7; + u32 outbound_ncq_event_pid0_3; + u32 outbound_ncq_event_pid4_7; + u32 outbound_tgt_ITNexus_event_pid0_3; + u32 outbound_tgt_ITNexus_event_pid4_7; + u32 outbound_tgt_ssp_event_pid0_3; + u32 outbound_tgt_ssp_event_pid4_7; + u32 outbound_tgt_smp_event_pid0_3; + u32 outbound_tgt_smp_event_pid4_7; + u32 upper_event_log_addr; + u32 lower_event_log_addr; + u32 event_log_size; + u32 event_log_option; + u32 upper_iop_event_log_addr; + u32 lower_iop_event_log_addr; + u32 iop_event_log_size; + u32 iop_event_log_option; + u32 fatal_err_interrupt; + u32 fatal_err_dump_offset0; + u32 fatal_err_dump_length0; + u32 fatal_err_dump_offset1; + u32 fatal_err_dump_length1; + u32 hda_mode_flag; + u32 anolog_setup_table_offset; +}; +struct general_status_table { + u32 gst_len_mpistate; + u32 iq_freeze_state0; + u32 iq_freeze_state1; + u32 msgu_tcnt; + u32 iop_tcnt; + u32 reserved; + u32 phy_state[8]; + u32 reserved1; + u32 reserved2; + u32 reserved3; + u32 recover_err_info[8]; +}; +struct inbound_queue_table { + u32 element_pri_size_cnt; + u32 upper_base_addr; + u32 lower_base_addr; + u32 ci_upper_base_addr; + u32 ci_lower_base_addr; + u32 pi_pci_bar; + u32 pi_offset; + u32 total_length; + void *base_virt; + void *ci_virt; + u32 reserved; + __le32 consumer_index; + u32 producer_idx; +}; +struct outbound_queue_table { + u32 element_size_cnt; + u32 upper_base_addr; + u32 lower_base_addr; + void *base_virt; + u32 pi_upper_base_addr; + u32 pi_lower_base_addr; + u32 ci_pci_bar; + u32 ci_offset; + u32 total_length; + void *pi_virt; + u32 interrup_vec_cnt_delay; + u32 dinterrup_to_pci_offset; + __le32 producer_index; + u32 consumer_idx; +}; +struct pm8001_hba_memspace { + void __iomem *memvirtaddr; + u64 membase; + u32 memsize; +}; +struct pm8001_hba_info { + char name[PM8001_NAME_LENGTH]; + struct list_head list; + unsigned long flags; + spinlock_t lock;/* host-wide lock */ + struct pci_dev *pdev;/* our device */ + struct device *dev; + struct pm8001_hba_memspace io_mem[6]; + struct mpi_mem_req memoryMap; + void __iomem *msg_unit_tbl_addr;/*Message Unit Table Addr*/ + void __iomem *main_cfg_tbl_addr;/*Main Config Table Addr*/ + void __iomem *general_stat_tbl_addr;/*General Status Table Addr*/ + void __iomem *inbnd_q_tbl_addr;/*Inbound Queue Config Table Addr*/ + void __iomem *outbnd_q_tbl_addr;/*Outbound Queue Config Table Addr*/ + struct main_cfg_table main_cfg_tbl; + struct general_status_table gs_tbl; + struct inbound_queue_table inbnd_q_tbl[PM8001_MAX_INB_NUM]; + struct outbound_queue_table outbnd_q_tbl[PM8001_MAX_OUTB_NUM]; + u8 sas_addr[SAS_ADDR_SIZE]; + struct sas_ha_struct *sas;/* SCSI/SAS glue */ + struct Scsi_Host *shost; + u32 chip_id; + const struct pm8001_chip_info *chip; + struct completion *nvmd_completion; + int tags_num; + unsigned long *tags; + struct pm8001_phy phy[PM8001_MAX_PHYS]; + struct pm8001_port port[PM8001_MAX_PHYS]; + u32 id; + u32 irq; + struct pm8001_device *devices; + struct pm8001_ccb_info *ccb_info; +#ifdef PM8001_USE_MSIX + struct msix_entry msix_entries[16];/*for msi-x interrupt*/ + int number_of_intr;/*will be used in remove()*/ +#endif +#ifdef PM8001_USE_TASKLET + struct tasklet_struct tasklet; +#endif + struct list_head wq_list; + u32 logging_level; + u32 fw_status; + const struct firmware *fw_image; +}; + +struct pm8001_wq { + struct delayed_work work_q; + struct pm8001_hba_info *pm8001_ha; + void *data; + int handler; + struct list_head entry; +}; + +struct pm8001_fw_image_header { + u8 vender_id[8]; + u8 product_id; + u8 hardware_rev; + u8 dest_partition; + u8 reserved; + u8 fw_rev[4]; + __be32 image_length; + __be32 image_crc; + __be32 startup_entry; +} __attribute__((packed, aligned(4))); + +/* define task management IU */ +struct pm8001_tmf_task { + u8 tmf; + u32 tag_of_task_to_be_managed; +}; +/** + * FW Flash Update status values + */ +#define FLASH_UPDATE_COMPLETE_PENDING_REBOOT 0x00 +#define FLASH_UPDATE_IN_PROGRESS 0x01 +#define FLASH_UPDATE_HDR_ERR 0x02 +#define FLASH_UPDATE_OFFSET_ERR 0x03 +#define FLASH_UPDATE_CRC_ERR 0x04 +#define FLASH_UPDATE_LENGTH_ERR 0x05 +#define FLASH_UPDATE_HW_ERR 0x06 +#define FLASH_UPDATE_DNLD_NOT_SUPPORTED 0x10 +#define FLASH_UPDATE_DISABLED 0x11 + +/** + * brief param structure for firmware flash update. + */ +struct fw_flash_updata_info { + u32 cur_image_offset; + u32 cur_image_len; + u32 total_image_len; + struct pm8001_prd sgl; +}; + +struct fw_control_info { + u32 retcode;/*ret code (status)*/ + u32 phase;/*ret code phase*/ + u32 phaseCmplt;/*percent complete for the current + update phase */ + u32 version;/*Hex encoded firmware version number*/ + u32 offset;/*Used for downloading firmware */ + u32 len; /*len of buffer*/ + u32 size;/* Used in OS VPD and Trace get size + operations.*/ + u32 reserved;/* padding required for 64 bit + alignment */ + u8 buffer[1];/* Start of buffer */ +}; +struct fw_control_ex { + struct fw_control_info *fw_control; + void *buffer;/* keep buffer pointer to be + freed when the responce comes*/ + void *virtAddr;/* keep virtual address of the data */ + void *usrAddr;/* keep virtual address of the + user data */ + dma_addr_t phys_addr; + u32 len; /* len of buffer */ + void *payload; /* pointer to IOCTL Payload */ + u8 inProgress;/*if 1 - the IOCTL request is in + progress */ + void *param1; + void *param2; + void *param3; +}; + +/******************** function prototype *********************/ +int pm8001_tag_alloc(struct pm8001_hba_info *pm8001_ha, u32 *tag_out); +void pm8001_tag_init(struct pm8001_hba_info *pm8001_ha); +u32 pm8001_get_ncq_tag(struct sas_task *task, u32 *tag); +void pm8001_ccb_free(struct pm8001_hba_info *pm8001_ha, u32 ccb_idx); +void pm8001_ccb_task_free(struct pm8001_hba_info *pm8001_ha, + struct sas_task *task, struct pm8001_ccb_info *ccb, u32 ccb_idx); +int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, + void *funcdata); +int pm8001_slave_alloc(struct scsi_device *scsi_dev); +int pm8001_slave_configure(struct scsi_device *sdev); +void pm8001_scan_start(struct Scsi_Host *shost); +int pm8001_scan_finished(struct Scsi_Host *shost, unsigned long time); +int pm8001_queue_command(struct sas_task *task, const int num, + gfp_t gfp_flags); +int pm8001_abort_task(struct sas_task *task); +int pm8001_abort_task_set(struct domain_device *dev, u8 *lun); +int pm8001_clear_aca(struct domain_device *dev, u8 *lun); +int pm8001_clear_task_set(struct domain_device *dev, u8 *lun); +int pm8001_dev_found(struct domain_device *dev); +void pm8001_dev_gone(struct domain_device *dev); +int pm8001_lu_reset(struct domain_device *dev, u8 *lun); +int pm8001_I_T_nexus_reset(struct domain_device *dev); +int pm8001_query_task(struct sas_task *task); +int pm8001_mem_alloc(struct pci_dev *pdev, void **virt_addr, + dma_addr_t *pphys_addr, u32 *pphys_addr_hi, u32 *pphys_addr_lo, + u32 mem_size, u32 align); + + +/* ctl shared API */ +extern struct device_attribute *pm8001_host_attrs[]; + +#endif + diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b0f0f3851cd4..161fadb291d1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1586,6 +1586,8 @@ #define PCI_VENDOR_ID_COMPEX 0x11f6 #define PCI_DEVICE_ID_COMPEX_ENET100VG4 0x0112 +#define PCI_VENDOR_ID_PMC_Sierra 0x11f8 + #define PCI_VENDOR_ID_RP 0x11fe #define PCI_DEVICE_ID_RP32INTF 0x0001 #define PCI_DEVICE_ID_RP8INTF 0x0002 -- cgit v1.3-8-gc7d7 From e881a172dac4d9ea3b2a1540041d872963c269bd Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 15 Oct 2009 17:46:39 -0700 Subject: [SCSI] modify change_queue_depth to take in reason why it is being called This patch modifies scsi_host_template->change_queue_depth so that it takes an argument indicating why it is being called. This will be used so that if a LLD needs to do some extra processing when handling queue fulls or later ramp ups, it can do so. This is a simple port of the drivers setting a change_queue_depth callback. In the patch I just have these LLDs adjust the queue depth if the user was requesting it. Signed-off-by: Mike Christie [Vasu.Dev: v2 Also converted pmcraid_change_queue_depth and then verified all modules compile using "make allmodconfig" for any new build warnings on X86_64. Updated original description after combing two original patches from Mike to make this patch git bisectable.] Signed-off-by: Vasu Dev [jejb: fixed up 53c700] Signed-off-by: James Bottomley --- drivers/ata/libata-scsi.c | 7 ++++++- drivers/ata/sata_nv.c | 2 +- drivers/message/fusion/mptscsih.c | 9 +++++++-- drivers/message/fusion/mptscsih.h | 3 ++- drivers/s390/scsi/zfcp_scsi.c | 6 +++++- drivers/scsi/3w-9xxx.c | 6 +++++- drivers/scsi/3w-xxxx.c | 6 +++++- drivers/scsi/53c700.c | 7 +++++-- drivers/scsi/aacraid/linit.c | 6 +++++- drivers/scsi/arcmsr/arcmsr_hba.c | 5 ++++- drivers/scsi/hptiop.c | 5 ++++- drivers/scsi/ibmvscsi/ibmvfc.c | 7 ++++++- drivers/scsi/ibmvscsi/ibmvscsi.c | 7 ++++++- drivers/scsi/ipr.c | 7 ++++++- drivers/scsi/libfc/fc_fcp.c | 5 ++++- drivers/scsi/libiscsi.c | 5 ++++- drivers/scsi/libsas/sas_scsi_host.c | 6 +++++- drivers/scsi/megaraid/megaraid_mbox.c | 7 ++++++- drivers/scsi/mpt2sas/mpt2sas_scsih.c | 10 +++++++--- drivers/scsi/pmcraid.c | 7 ++++++- drivers/scsi/qla2xxx/qla_os.c | 7 +++++-- drivers/scsi/scsi_sysfs.c | 3 ++- include/linux/libata.h | 2 +- include/scsi/libfc.h | 2 +- include/scsi/libiscsi.h | 3 ++- include/scsi/libsas.h | 3 ++- include/scsi/scsi_host.h | 8 +++++++- 27 files changed, 119 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index b4ee28dec521..5d52c2fcd076 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1208,6 +1208,7 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev) * ata_scsi_change_queue_depth - SCSI callback for queue depth config * @sdev: SCSI device to configure queue depth for * @queue_depth: new queue depth + * @reason: calling context * * This is libata standard hostt->change_queue_depth callback. * SCSI will call into this callback when user tries to set queue @@ -1219,12 +1220,16 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev) * RETURNS: * Newly configured queue depth. */ -int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth) +int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth, + int reason) { struct ata_port *ap = ata_shost_to_port(sdev->host); struct ata_device *dev; unsigned long flags; + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (queue_depth < 1 || queue_depth == sdev->queue_depth) return sdev->queue_depth; diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 1eb4e020eb5c..0c82d335c55d 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -1975,7 +1975,7 @@ static int nv_swncq_slave_config(struct scsi_device *sdev) ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num)); if (strncmp(model_num, "Maxtor", 6) == 0) { - ata_scsi_change_queue_depth(sdev, 1); + ata_scsi_change_queue_depth(sdev, 1, SCSI_QDEPTH_DEFAULT); ata_dev_printk(dev, KERN_NOTICE, "Disabling SWNCQ mode (depth %x)\n", sdev->queue_depth); } diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index f68ec48a881e..57752751712b 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -2351,11 +2351,12 @@ mptscsih_slave_destroy(struct scsi_device *sdev) * mptscsih_change_queue_depth - This function will set a devices queue depth * @sdev: per scsi_device pointer * @qdepth: requested queue depth + * @reason: calling context * * Adding support for new 'change_queue_depth' api. */ int -mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth) +mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) { MPT_SCSI_HOST *hd = shost_priv(sdev->host); VirtTarget *vtarget; @@ -2367,6 +2368,9 @@ mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth) starget = scsi_target(sdev); vtarget = starget->hostdata; + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (ioc->bus_type == SPI) { if (!(vtarget->tflags & MPT_TARGET_FLAGS_Q_YES)) max_depth = 1; @@ -2433,7 +2437,8 @@ mptscsih_slave_configure(struct scsi_device *sdev) ioc->name, vtarget->negoFlags, vtarget->maxOffset, vtarget->minSyncFactor)); - mptscsih_change_queue_depth(sdev, MPT_SCSI_CMD_PER_DEV_HIGH); + mptscsih_change_queue_depth(sdev, MPT_SCSI_CMD_PER_DEV_HIGH, + SCSI_QDEPTH_DEFAULT); dsprintk(ioc, printk(MYIOC_s_DEBUG_FMT "tagged %d, simple %d, ordered %d\n", ioc->name,sdev->tagged_supported, sdev->simple_tags, diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h index e0b33e04a33b..45a5ff3eff61 100644 --- a/drivers/message/fusion/mptscsih.h +++ b/drivers/message/fusion/mptscsih.h @@ -128,7 +128,8 @@ extern int mptscsih_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_F extern int mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *r); extern int mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply); extern int mptscsih_ioc_reset(MPT_ADAPTER *ioc, int post_reset); -extern int mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth); +extern int mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth, + int reason); extern u8 mptscsih_raid_id_to_num(MPT_ADAPTER *ioc, u8 channel, u8 id); extern int mptscsih_is_phys_disk(MPT_ADAPTER *ioc, u8 channel, u8 id); extern struct device_attribute *mptscsih_host_attrs[]; diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 0e1a34627a2e..ad1154701729 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -29,8 +29,12 @@ char *zfcp_get_fcp_sns_info_ptr(struct fcp_rsp_iu *fcp_rsp_iu) return fcp_sns_info_ptr; } -static int zfcp_scsi_change_queue_depth(struct scsi_device *sdev, int depth) +static int zfcp_scsi_change_queue_depth(struct scsi_device *sdev, int depth, + int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth); return sdev->queue_depth; } diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 36c21b19e5d7..2d16d49fd3cd 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -186,8 +186,12 @@ static ssize_t twa_show_stats(struct device *dev, } /* End twa_show_stats() */ /* This function will set a devices queue depth */ -static int twa_change_queue_depth(struct scsi_device *sdev, int queue_depth) +static int twa_change_queue_depth(struct scsi_device *sdev, int queue_depth, + int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (queue_depth > TW_Q_LENGTH-2) queue_depth = TW_Q_LENGTH-2; scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth); diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index faa0fcfed71e..d224294c38fb 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -521,8 +521,12 @@ static ssize_t tw_show_stats(struct device *dev, struct device_attribute *attr, } /* End tw_show_stats() */ /* This function will set a devices queue depth */ -static int tw_change_queue_depth(struct scsi_device *sdev, int queue_depth) +static int tw_change_queue_depth(struct scsi_device *sdev, int queue_depth, + int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (queue_depth > TW_Q_LENGTH-2) queue_depth = TW_Q_LENGTH-2; scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth); diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index f5a9addb7050..6c60a8060c58 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -175,7 +175,7 @@ STATIC void NCR_700_chip_reset(struct Scsi_Host *host); STATIC int NCR_700_slave_alloc(struct scsi_device *SDpnt); STATIC int NCR_700_slave_configure(struct scsi_device *SDpnt); STATIC void NCR_700_slave_destroy(struct scsi_device *SDpnt); -static int NCR_700_change_queue_depth(struct scsi_device *SDpnt, int depth); +static int NCR_700_change_queue_depth(struct scsi_device *SDpnt, int depth, int reason); static int NCR_700_change_queue_type(struct scsi_device *SDpnt, int depth); STATIC struct device_attribute *NCR_700_dev_attrs[]; @@ -2082,8 +2082,11 @@ NCR_700_slave_destroy(struct scsi_device *SDp) } static int -NCR_700_change_queue_depth(struct scsi_device *SDp, int depth) +NCR_700_change_queue_depth(struct scsi_device *SDp, int depth, int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (depth > NCR_700_MAX_TAGS) depth = NCR_700_MAX_TAGS; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 9b97c3e016fe..e9373a2d14fa 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -472,8 +472,12 @@ static int aac_slave_configure(struct scsi_device *sdev) * total capacity and the queue depth supported by the target device. */ -static int aac_change_queue_depth(struct scsi_device *sdev, int depth) +static int aac_change_queue_depth(struct scsi_device *sdev, int depth, + int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (sdev->tagged_supported && (sdev->type == TYPE_DISK) && (sdev_channel(sdev) == CONTAINER_CHANNEL)) { struct scsi_device * dev; diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index 80aac01b5a6f..47d5d19f8c92 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -98,8 +98,11 @@ static void arcmsr_flush_hbb_cache(struct AdapterControlBlock *acb); static const char *arcmsr_info(struct Scsi_Host *); static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb); static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, - int queue_depth) + int queue_depth, int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (queue_depth > ARCMSR_MAX_CMD_PERLUN) queue_depth = ARCMSR_MAX_CMD_PERLUN; scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth); diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c index a0e7e711ff9d..901a3daeb36b 100644 --- a/drivers/scsi/hptiop.c +++ b/drivers/scsi/hptiop.c @@ -861,10 +861,13 @@ static int hptiop_reset(struct scsi_cmnd *scp) } static int hptiop_adjust_disk_queue_depth(struct scsi_device *sdev, - int queue_depth) + int queue_depth, int reason) { struct hptiop_hba *hba = (struct hptiop_hba *)sdev->host->hostdata; + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (queue_depth > hba->max_requests) queue_depth = hba->max_requests; scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth); diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index bc9beb8c587c..87b536a97cb4 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -2764,12 +2764,17 @@ static int ibmvfc_slave_configure(struct scsi_device *sdev) * ibmvfc_change_queue_depth - Change the device's queue depth * @sdev: scsi device struct * @qdepth: depth to set + * @reason: calling context * * Return value: * actual depth set **/ -static int ibmvfc_change_queue_depth(struct scsi_device *sdev, int qdepth) +static int ibmvfc_change_queue_depth(struct scsi_device *sdev, int qdepth, + int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (qdepth > IBMVFC_MAX_CMDS_PER_LUN) qdepth = IBMVFC_MAX_CMDS_PER_LUN; diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index d9b0e9d31983..e475b7957c2d 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1637,12 +1637,17 @@ static int ibmvscsi_slave_configure(struct scsi_device *sdev) * ibmvscsi_change_queue_depth - Change the device's queue depth * @sdev: scsi device struct * @qdepth: depth to set + * @reason: calling context * * Return value: * actual depth set **/ -static int ibmvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth) +static int ibmvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth, + int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (qdepth > IBMVSCSI_MAX_CMDS_PER_LUN) qdepth = IBMVSCSI_MAX_CMDS_PER_LUN; diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 5f045505a1f4..d40d5c79fff1 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3367,16 +3367,21 @@ static int ipr_free_dump(struct ipr_ioa_cfg *ioa_cfg) { return 0; }; * ipr_change_queue_depth - Change the device's queue depth * @sdev: scsi device struct * @qdepth: depth to set + * @reason: calling context * * Return value: * actual depth set **/ -static int ipr_change_queue_depth(struct scsi_device *sdev, int qdepth) +static int ipr_change_queue_depth(struct scsi_device *sdev, int qdepth, + int reason) { struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)sdev->host->hostdata; struct ipr_resource_entry *res; unsigned long lock_flags = 0; + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); res = (struct ipr_resource_entry *)sdev->hostdata; diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index a67f53a5026c..beaab818d8de 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -2064,8 +2064,11 @@ int fc_slave_alloc(struct scsi_device *sdev) } EXPORT_SYMBOL(fc_slave_alloc); -int fc_change_queue_depth(struct scsi_device *sdev, int qdepth) +int fc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth); return sdev->queue_depth; } diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index f1a4246f890c..67d0f3fc8ac0 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1643,8 +1643,11 @@ fault: } EXPORT_SYMBOL_GPL(iscsi_queuecommand); -int iscsi_change_queue_depth(struct scsi_device *sdev, int depth) +int iscsi_change_queue_depth(struct scsi_device *sdev, int depth, int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth); return sdev->queue_depth; } diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 1c558d3bce18..14b13196b22d 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -820,10 +820,14 @@ void sas_slave_destroy(struct scsi_device *scsi_dev) ata_port_disable(dev->sata_dev.ap); } -int sas_change_queue_depth(struct scsi_device *scsi_dev, int new_depth) +int sas_change_queue_depth(struct scsi_device *scsi_dev, int new_depth, + int reason) { int res = min(new_depth, SAS_MAX_QD); + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (scsi_dev->tagged_supported) scsi_adjust_queue_depth(scsi_dev, scsi_get_tag_type(scsi_dev), res); diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index 234f0b7eb21c..fd181c2a8ae4 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -335,12 +335,17 @@ static struct device_attribute *megaraid_sdev_attrs[] = { * megaraid_change_queue_depth - Change the device's queue depth * @sdev: scsi device struct * @qdepth: depth to set + * @reason: calling context * * Return value: * actual depth set */ -static int megaraid_change_queue_depth(struct scsi_device *sdev, int qdepth) +static int megaraid_change_queue_depth(struct scsi_device *sdev, int qdepth, + int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (qdepth > MBOX_MAX_SCSI_CMDS) qdepth = MBOX_MAX_SCSI_CMDS; scsi_adjust_queue_depth(sdev, 0, qdepth); diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index 8dc682f00fd2..55ee014a7e08 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -1099,11 +1099,12 @@ _scsih_build_scatter_gather(struct MPT2SAS_ADAPTER *ioc, * _scsih_change_queue_depth - setting device queue depth * @sdev: scsi device struct * @qdepth: requested queue depth + * @reason: calling context * * Returns queue depth. */ static int -_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth) +_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) { struct Scsi_Host *shost = sdev->host; int max_depth; @@ -1114,6 +1115,9 @@ _scsih_change_queue_depth(struct scsi_device *sdev, int qdepth) struct _sas_device *sas_device; unsigned long flags; + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + max_depth = shost->can_queue; /* limit max device queue for SATA to 32 */ @@ -1569,7 +1573,7 @@ _scsih_slave_configure(struct scsi_device *sdev) r_level, raid_device->handle, (unsigned long long)raid_device->wwid, raid_device->num_pds, ds); - _scsih_change_queue_depth(sdev, qdepth); + _scsih_change_queue_depth(sdev, qdepth, SCSI_QDEPTH_DEFAULT); return 0; } @@ -1615,7 +1619,7 @@ _scsih_slave_configure(struct scsi_device *sdev) _scsih_display_sata_capabilities(ioc, sas_device, sdev); } - _scsih_change_queue_depth(sdev, qdepth); + _scsih_change_queue_depth(sdev, qdepth, SCSI_QDEPTH_DEFAULT); if (ssp_target) sas_read_port_mode_page(sdev); diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index f7c70e2a8224..86d158ee3572 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -278,12 +278,17 @@ static void pmcraid_slave_destroy(struct scsi_device *scsi_dev) * pmcraid_change_queue_depth - Change the device's queue depth * @scsi_dev: scsi device struct * @depth: depth to set + * @reason: calling context * * Return value * actual depth set */ -static int pmcraid_change_queue_depth(struct scsi_device *scsi_dev, int depth) +static int pmcraid_change_queue_depth(struct scsi_device *scsi_dev, int depth, + int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + if (depth > PMCRAID_MAX_CMD_PER_LUN) depth = PMCRAID_MAX_CMD_PER_LUN; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index ecf2a40d70be..d69744a62fe4 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -138,7 +138,7 @@ static int qla2xxx_eh_target_reset(struct scsi_cmnd *); static int qla2xxx_eh_bus_reset(struct scsi_cmnd *); static int qla2xxx_eh_host_reset(struct scsi_cmnd *); -static int qla2x00_change_queue_depth(struct scsi_device *, int); +static int qla2x00_change_queue_depth(struct scsi_device *, int, int); static int qla2x00_change_queue_type(struct scsi_device *, int); struct scsi_host_template qla2xxx_driver_template = { @@ -1235,8 +1235,11 @@ qla2xxx_slave_destroy(struct scsi_device *sdev) } static int -qla2x00_change_queue_depth(struct scsi_device *sdev, int qdepth) +qla2x00_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) { + if (reason != SCSI_QDEPTH_DEFAULT) + return -EOPNOTSUPP; + scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth); return sdev->queue_depth; } diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 5c7eb63a19d1..a48782866b22 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -766,7 +766,8 @@ sdev_store_queue_depth_rw(struct device *dev, struct device_attribute *attr, if (depth < 1) return -EINVAL; - retval = sht->change_queue_depth(sdev, depth); + retval = sht->change_queue_depth(sdev, depth, + SCSI_QDEPTH_DEFAULT); if (retval < 0) return retval; diff --git a/include/linux/libata.h b/include/linux/libata.h index 87698640c091..85df383fd4bd 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1023,7 +1023,7 @@ extern int ata_std_bios_param(struct scsi_device *sdev, extern int ata_scsi_slave_config(struct scsi_device *sdev); extern void ata_scsi_slave_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, - int queue_depth); + int queue_depth, int reason); extern struct ata_device *ata_dev_pair(struct ata_device *adev); extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev); diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 1662d73d85a7..9617f9365e45 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -919,7 +919,7 @@ int fc_slave_alloc(struct scsi_device *sdev); /* * Adjust the queue depth. */ -int fc_change_queue_depth(struct scsi_device *sdev, int qdepth); +int fc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason); /* * Change the tag type. diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index a72edd4eceec..2db2bc26b1e9 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -333,7 +333,8 @@ struct iscsi_host { /* * scsi host template */ -extern int iscsi_change_queue_depth(struct scsi_device *sdev, int depth); +extern int iscsi_change_queue_depth(struct scsi_device *sdev, int depth, + int reason); extern int iscsi_eh_abort(struct scsi_cmnd *sc); extern int iscsi_eh_target_reset(struct scsi_cmnd *sc); extern int iscsi_eh_device_reset(struct scsi_cmnd *sc); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index e78d3b62d8ec..9eaa3f05f954 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -634,7 +634,8 @@ extern int sas_target_alloc(struct scsi_target *); extern int sas_slave_alloc(struct scsi_device *); extern int sas_slave_configure(struct scsi_device *); extern void sas_slave_destroy(struct scsi_device *); -extern int sas_change_queue_depth(struct scsi_device *, int new_depth); +extern int sas_change_queue_depth(struct scsi_device *, int new_depth, + int reason); extern int sas_change_queue_type(struct scsi_device *, int qt); extern int sas_bios_param(struct scsi_device *, struct block_device *, diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 6e728b176904..603054d8f40c 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -43,6 +43,12 @@ struct blk_queue_tags; #define DISABLE_CLUSTERING 0 #define ENABLE_CLUSTERING 1 +enum { + SCSI_QDEPTH_DEFAULT, /* default requested change, e.g. from sysfs */ + SCSI_QDEPTH_QFULL, /* scsi-ml requested due to queue full */ + SCSI_QDEPTH_RAMP_UP, /* scsi-ml requested due to threshhold event */ +}; + struct scsi_host_template { struct module *module; const char *name; @@ -294,7 +300,7 @@ struct scsi_host_template { * * Status: OPTIONAL */ - int (* change_queue_depth)(struct scsi_device *, int); + int (* change_queue_depth)(struct scsi_device *, int, int); /* * Fill in this function to allow the changing of tag types -- cgit v1.3-8-gc7d7 From 4a84067dbfce436b81779e585bf712b02ceee552 Mon Sep 17 00:00:00 2001 From: Vasu Dev Date: Thu, 22 Oct 2009 15:46:33 -0700 Subject: [SCSI] add queue_depth ramp up code Current FC HBA queue_depth ramp up code depends on last queue full time. The sdev already has last_queue_full_time field to track last queue full time but stored value is truncated by last four bits. So this patch updates last_queue_full_time without truncating last 4 bits to store full value and then updates its only current usages in scsi_track_queue_full to ignore last four bits to keep current usages same while also use this field in added ramp up code. Adds scsi_handle_queue_ramp_up to ramp up queue_depth on successful completion of IO. The scsi_handle_queue_ramp_up will do ramp up on all luns of a target, just same as ramp down done on all luns on a target. The ramp up is skipped in case the change_queue_depth is not supported by LLD or already reached to added max_queue_depth. Updates added max_queue_depth on every new update to default queue_depth value. The ramp up is also skipped if lapsed time since either last queue ramp up or down is less than LLD specified queue_ramp_up_period. Adds queue_ramp_up_period to sysfs but only if change_queue_depth is supported since ramp up and queue_ramp_up_period is needed only in case change_queue_depth is supported first. Initializes queue_ramp_up_period to 120HZ jiffies as initial default value, it is same as used in existing lpfc and qla2xxx. -v2 Combined all ramp code into this single patch. -v3 Moves max_queue_depth initialization after slave_configure is called from after slave_alloc calling done. Also adjusted max_queue_depth check to skip ramp up if current queue_depth is >= max_queue_depth. -v4 Changes sdev->queue_ramp_up_period unit to ms when using sysfs i/f to store or show its value. Signed-off-by: Vasu Dev Tested-by: Christof Schmitt Tested-by: Giridhar Malavali Signed-off-by: James Bottomley --- drivers/scsi/scsi.c | 10 ++++++++-- drivers/scsi/scsi_error.c | 38 ++++++++++++++++++++++++++++++++++++++ drivers/scsi/scsi_scan.c | 3 +++ drivers/scsi/scsi_sysfs.c | 41 +++++++++++++++++++++++++++++++++++++++-- include/scsi/scsi_device.h | 9 ++++++--- 5 files changed, 94 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index dd098cad337b..a60da5555577 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -940,10 +940,16 @@ EXPORT_SYMBOL(scsi_adjust_queue_depth); */ int scsi_track_queue_full(struct scsi_device *sdev, int depth) { - if ((jiffies >> 4) == sdev->last_queue_full_time) + + /* + * Don't let QUEUE_FULLs on the same + * jiffies count, they could all be from + * same event. + */ + if ((jiffies >> 4) == (sdev->last_queue_full_time >> 4)) return 0; - sdev->last_queue_full_time = (jiffies >> 4); + sdev->last_queue_full_time = jiffies; if (sdev->last_queue_full_depth != depth) { sdev->last_queue_full_count = 1; sdev->last_queue_full_depth = depth; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 7b1e20fee906..08ed506e6059 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -331,6 +331,42 @@ static int scsi_check_sense(struct scsi_cmnd *scmd) } } +static void scsi_handle_queue_ramp_up(struct scsi_device *sdev) +{ + struct scsi_host_template *sht = sdev->host->hostt; + struct scsi_device *tmp_sdev; + + if (!sht->change_queue_depth || + sdev->queue_depth >= sdev->max_queue_depth) + return; + + if (time_before(jiffies, + sdev->last_queue_ramp_up + sdev->queue_ramp_up_period)) + return; + + if (time_before(jiffies, + sdev->last_queue_full_time + sdev->queue_ramp_up_period)) + return; + + /* + * Walk all devices of a target and do + * ramp up on them. + */ + shost_for_each_device(tmp_sdev, sdev->host) { + if (tmp_sdev->channel != sdev->channel || + tmp_sdev->id != sdev->id || + tmp_sdev->queue_depth == sdev->max_queue_depth) + continue; + /* + * call back into LLD to increase queue_depth by one + * with ramp up reason code. + */ + sht->change_queue_depth(tmp_sdev, tmp_sdev->queue_depth + 1, + SCSI_QDEPTH_RAMP_UP); + sdev->last_queue_ramp_up = jiffies; + } +} + static void scsi_handle_queue_full(struct scsi_device *sdev) { struct scsi_host_template *sht = sdev->host->hostt; @@ -393,6 +429,7 @@ static int scsi_eh_completed_normally(struct scsi_cmnd *scmd) */ switch (status_byte(scmd->result)) { case GOOD: + scsi_handle_queue_ramp_up(scmd->device); case COMMAND_TERMINATED: return SUCCESS; case CHECK_CONDITION: @@ -1425,6 +1462,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd) */ return ADD_TO_MLQUEUE; case GOOD: + scsi_handle_queue_ramp_up(scmd->device); case COMMAND_TERMINATED: return SUCCESS; case TASK_ABORTED: diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 0547a7f44d42..50526fa207e5 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -251,6 +251,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, sdev->model = scsi_null_device_strs; sdev->rev = scsi_null_device_strs; sdev->host = shost; + sdev->queue_ramp_up_period = SCSI_DEFAULT_RAMP_UP_PERIOD; sdev->id = starget->id; sdev->lun = lun; sdev->channel = starget->channel; @@ -941,6 +942,8 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, } } + sdev->max_queue_depth = sdev->queue_depth; + /* * Ok, the device is now all set up, we can * register it and tell the rest of the kernel diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index a48782866b22..758598ff3b90 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -771,6 +771,8 @@ sdev_store_queue_depth_rw(struct device *dev, struct device_attribute *attr, if (retval < 0) return retval; + sdev->max_queue_depth = sdev->queue_depth; + return count; } @@ -778,6 +780,37 @@ static struct device_attribute sdev_attr_queue_depth_rw = __ATTR(queue_depth, S_IRUGO | S_IWUSR, sdev_show_queue_depth, sdev_store_queue_depth_rw); +static ssize_t +sdev_show_queue_ramp_up_period(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct scsi_device *sdev; + sdev = to_scsi_device(dev); + return snprintf(buf, 20, "%u\n", + jiffies_to_msecs(sdev->queue_ramp_up_period)); +} + +static ssize_t +sdev_store_queue_ramp_up_period(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct scsi_device *sdev = to_scsi_device(dev); + unsigned long period; + + if (strict_strtoul(buf, 10, &period)) + return -EINVAL; + + sdev->queue_ramp_up_period = msecs_to_jiffies(period); + return period; +} + +static struct device_attribute sdev_attr_queue_ramp_up_period = + __ATTR(queue_ramp_up_period, S_IRUGO | S_IWUSR, + sdev_show_queue_ramp_up_period, + sdev_store_queue_ramp_up_period); + static ssize_t sdev_store_queue_type_rw(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -866,8 +899,12 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) } /* create queue files, which may be writable, depending on the host */ - if (sdev->host->hostt->change_queue_depth) - error = device_create_file(&sdev->sdev_gendev, &sdev_attr_queue_depth_rw); + if (sdev->host->hostt->change_queue_depth) { + error = device_create_file(&sdev->sdev_gendev, + &sdev_attr_queue_depth_rw); + error = device_create_file(&sdev->sdev_gendev, + &sdev_attr_queue_ramp_up_period); + } else error = device_create_file(&sdev->sdev_gendev, &dev_attr_queue_depth); if (error) { diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 9af48cbf0036..92c4c3bd916d 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -81,11 +81,14 @@ struct scsi_device { struct list_head starved_entry; struct scsi_cmnd *current_cmnd; /* currently active command */ unsigned short queue_depth; /* How deep of a queue we want */ + unsigned short max_queue_depth; /* max queue depth */ unsigned short last_queue_full_depth; /* These two are used by */ unsigned short last_queue_full_count; /* scsi_track_queue_full() */ - unsigned long last_queue_full_time;/* don't let QUEUE_FULLs on the same - jiffie count on our counter, they - could all be from the same event. */ + unsigned long last_queue_full_time; /* last queue full time */ + unsigned long queue_ramp_up_period; /* ramp up period in jiffies */ +#define SCSI_DEFAULT_RAMP_UP_PERIOD (120 * HZ) + + unsigned long last_queue_ramp_up; /* last queue ramp up time */ unsigned int id, lun, channel; -- cgit v1.3-8-gc7d7 From 3ae31f6a7b6e442fc6a92f29330fbad230dc3992 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Wed, 21 Oct 2009 09:22:46 -0700 Subject: [SCSI] scsi_dh: Change the scsidh_activate interface to be asynchronous Make scsi_dh_activate() function asynchronous, by taking in two additional parameters, one is the callback function and the other is the data to call the callback function with. Signed-off-by: Chandra Seetharaman Signed-off-by: James Bottomley --- drivers/md/dm-mpath.c | 8 ++++---- drivers/scsi/device_handler/scsi_dh.c | 17 ++++++++++++----- drivers/scsi/device_handler/scsi_dh_alua.c | 7 +++++-- drivers/scsi/device_handler/scsi_dh_emc.c | 7 +++++-- drivers/scsi/device_handler/scsi_dh_hp_sw.c | 7 +++++-- drivers/scsi/device_handler/scsi_dh_rdac.c | 7 +++++-- include/scsi/scsi_device.h | 3 ++- include/scsi/scsi_dh.h | 6 ++++-- 8 files changed, 42 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 32d0b878eccc..dce971dbdfa3 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1116,8 +1116,9 @@ static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) return limit_reached; } -static void pg_init_done(struct dm_path *path, int errors) +static void pg_init_done(void *data, int errors) { + struct dm_path *path = data; struct pgpath *pgpath = path_to_pgpath(path); struct priority_group *pg = pgpath->pg; struct multipath *m = pg->m; @@ -1183,12 +1184,11 @@ static void pg_init_done(struct dm_path *path, int errors) static void activate_path(struct work_struct *work) { - int ret; struct pgpath *pgpath = container_of(work, struct pgpath, activate_path); - ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev)); - pg_init_done(&pgpath->path, ret); + scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), + pg_init_done, &pgpath->path); } /* diff --git a/drivers/scsi/device_handler/scsi_dh.c b/drivers/scsi/device_handler/scsi_dh.c index 3ee1cbc89479..6f7f798910e8 100644 --- a/drivers/scsi/device_handler/scsi_dh.c +++ b/drivers/scsi/device_handler/scsi_dh.c @@ -226,7 +226,7 @@ store_dh_state(struct device *dev, struct device_attribute *attr, * Activate a device handler */ if (scsi_dh->activate) - err = scsi_dh->activate(sdev); + err = scsi_dh->activate(sdev, NULL, NULL); else err = 0; } @@ -423,10 +423,17 @@ EXPORT_SYMBOL_GPL(scsi_unregister_device_handler); /* * scsi_dh_activate - activate the path associated with the scsi_device * corresponding to the given request queue. - * @q - Request queue that is associated with the scsi_device to be - * activated. + * Returns immediately without waiting for activation to be completed. + * @q - Request queue that is associated with the scsi_device to be + * activated. + * @fn - Function to be called upon completion of the activation. + * Function fn is called with data (below) and the error code. + * Function fn may be called from the same calling context. So, + * do not hold the lock in the caller which may be needed in fn. + * @data - data passed to the function fn upon completion. + * */ -int scsi_dh_activate(struct request_queue *q) +int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) { int err = 0; unsigned long flags; @@ -445,7 +452,7 @@ int scsi_dh_activate(struct request_queue *q) return err; if (scsi_dh->activate) - err = scsi_dh->activate(sdev); + err = scsi_dh->activate(sdev, fn, data); put_device(&sdev->sdev_gendev); return err; } diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index b5cdefaf2608..e8a8928e58bc 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -652,7 +652,8 @@ out: * based on a certain policy. But until we actually encounter them it * should be okay. */ -static int alua_activate(struct scsi_device *sdev) +static int alua_activate(struct scsi_device *sdev, + activate_complete fn, void *data) { struct alua_dh_data *h = get_alua_data(sdev); int err = SCSI_DH_OK; @@ -667,7 +668,9 @@ static int alua_activate(struct scsi_device *sdev) err = alua_stpg(sdev, TPGS_STATE_OPTIMIZED, h); out: - return err; + if (fn) + fn(data, err); + return 0; } /* diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index 0cffe84976fe..61966750bd60 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -528,7 +528,8 @@ retry: return err; } -static int clariion_activate(struct scsi_device *sdev) +static int clariion_activate(struct scsi_device *sdev, + activate_complete fn, void *data) { struct clariion_dh_data *csdev = get_clariion_data(sdev); int result; @@ -559,7 +560,9 @@ done: csdev->port, lun_state[csdev->lun_state], csdev->default_sp + 'A'); - return result; + if (fn) + fn(data, result); + return 0; } /* * params - parameters in the following format diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index f7da7530875e..0aacafc96f21 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -268,7 +268,8 @@ static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req) * activate the passive path (and deactivate the * previously active one). */ -static int hp_sw_activate(struct scsi_device *sdev) +static int hp_sw_activate(struct scsi_device *sdev, + activate_complete fn, void *data) { int ret = SCSI_DH_OK; struct hp_sw_dh_data *h = get_hp_sw_data(sdev); @@ -283,7 +284,9 @@ static int hp_sw_activate(struct scsi_device *sdev) HP_SW_NAME); } - return ret; + if (fn) + fn(data, ret); + return 0; } static const struct scsi_dh_devlist hp_sw_dh_data_list[] = { diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 268189d31d9c..be362adbd8e7 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -568,7 +568,8 @@ done: return err; } -static int rdac_activate(struct scsi_device *sdev) +static int rdac_activate(struct scsi_device *sdev, + activate_complete fn, void *data) { struct rdac_dh_data *h = get_rdac_data(sdev); int err = SCSI_DH_OK; @@ -580,7 +581,9 @@ static int rdac_activate(struct scsi_device *sdev) if (h->lun_state == RDAC_LUN_UNOWNED) err = send_mode_select(sdev, h); done: - return err; + if (fn) + fn(data, err); + return 0; } static int rdac_prep_fn(struct scsi_device *sdev, struct request *req) diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 92c4c3bd916d..68d185c79bae 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -177,6 +177,7 @@ struct scsi_dh_devlist { char *model; }; +typedef void (*activate_complete)(void *, int); struct scsi_device_handler { /* Used by the infrastructure */ struct list_head list; /* list of scsi_device_handlers */ @@ -188,7 +189,7 @@ struct scsi_device_handler { int (*check_sense)(struct scsi_device *, struct scsi_sense_hdr *); int (*attach)(struct scsi_device *); void (*detach)(struct scsi_device *); - int (*activate)(struct scsi_device *); + int (*activate)(struct scsi_device *, activate_complete, void *); int (*prep_fn)(struct scsi_device *, struct request *); int (*set_params)(struct scsi_device *, const char *); }; diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h index ff2407405b42..e3f2db212ddc 100644 --- a/include/scsi/scsi_dh.h +++ b/include/scsi/scsi_dh.h @@ -56,14 +56,16 @@ enum { SCSI_DH_DRIVER_MAX, }; #if defined(CONFIG_SCSI_DH) || defined(CONFIG_SCSI_DH_MODULE) -extern int scsi_dh_activate(struct request_queue *); +extern int scsi_dh_activate(struct request_queue *, activate_complete, void *); extern int scsi_dh_handler_exist(const char *); extern int scsi_dh_attach(struct request_queue *, const char *); extern void scsi_dh_detach(struct request_queue *); extern int scsi_dh_set_params(struct request_queue *, const char *); #else -static inline int scsi_dh_activate(struct request_queue *req) +static inline int scsi_dh_activate(struct request_queue *req, + activate_complete fn, void *data) { + fn(data, 0); return 0; } static inline int scsi_dh_handler_exist(const char *name) -- cgit v1.3-8-gc7d7 From 65d430fa99cbd0e88d09a3343f697c51fc8a7009 Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Fri, 30 Oct 2009 17:59:29 +0100 Subject: [SCSI] scsi_transport_fc: Introduce helper function for blocking scsi_eh Move the duplicated code from FC LLDs to SCSI FC transport class. Acked-by: James Smart Acked-by: Giridhar Malavali Acked-by: Abhijeet Joglekar Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/scsi/fnic/fnic_scsi.c | 20 ++------------------ drivers/scsi/lpfc/lpfc_scsi.c | 30 ++++-------------------------- drivers/scsi/qla2xxx/qla_os.c | 25 ++++--------------------- drivers/scsi/scsi_transport_fc.c | 26 ++++++++++++++++++++++++++ include/scsi/scsi_transport_fc.h | 1 + 5 files changed, 37 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index b5d17385939b..8d26d7a9f01b 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -1225,22 +1225,6 @@ void fnic_terminate_rport_io(struct fc_rport *rport) } -static void fnic_block_error_handler(struct scsi_cmnd *sc) -{ - struct Scsi_Host *shost = sc->device->host; - struct fc_rport *rport = starget_to_rport(scsi_target(sc->device)); - unsigned long flags; - - spin_lock_irqsave(shost->host_lock, flags); - while (rport->port_state == FC_PORTSTATE_BLOCKED) { - spin_unlock_irqrestore(shost->host_lock, flags); - msleep(1000); - spin_lock_irqsave(shost->host_lock, flags); - } - spin_unlock_irqrestore(shost->host_lock, flags); - -} - /* * This function is exported to SCSI for sending abort cmnds. * A SCSI IO is represented by a io_req in the driver. @@ -1260,7 +1244,7 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) DECLARE_COMPLETION_ONSTACK(tm_done); /* Wait for rport to unblock */ - fnic_block_error_handler(sc); + fc_block_scsi_eh(sc); /* Get local-port, check ready and link up */ lp = shost_priv(sc->device->host); @@ -1542,7 +1526,7 @@ int fnic_device_reset(struct scsi_cmnd *sc) DECLARE_COMPLETION_ONSTACK(tm_done); /* Wait for rport to unblock */ - fnic_block_error_handler(sc); + fc_block_scsi_eh(sc); /* Get local-port, check ready and link up */ lp = shost_priv(sc->device->host); diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index c88f59f0ce30..e25179193a82 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -2916,28 +2916,6 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *)) return 0; } -/** - * lpfc_block_error_handler - Routine to block error handler - * @cmnd: Pointer to scsi_cmnd data structure. - * - * This routine blocks execution till fc_rport state is not FC_PORSTAT_BLCOEKD. - **/ -static void -lpfc_block_error_handler(struct scsi_cmnd *cmnd) -{ - struct Scsi_Host *shost = cmnd->device->host; - struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); - - spin_lock_irq(shost->host_lock); - while (rport->port_state == FC_PORTSTATE_BLOCKED) { - spin_unlock_irq(shost->host_lock); - msleep(1000); - spin_lock_irq(shost->host_lock); - } - spin_unlock_irq(shost->host_lock); - return; -} - /** * lpfc_abort_handler - scsi_host_template eh_abort_handler entry point * @cmnd: Pointer to scsi_cmnd data structure. @@ -2961,7 +2939,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) int ret = SUCCESS; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq); - lpfc_block_error_handler(cmnd); + fc_block_scsi_eh(cmnd); lpfc_cmd = (struct lpfc_scsi_buf *)cmnd->host_scribble; BUG_ON(!lpfc_cmd); @@ -3259,7 +3237,7 @@ lpfc_device_reset_handler(struct scsi_cmnd *cmnd) struct lpfc_scsi_event_header scsi_event; int status; - lpfc_block_error_handler(cmnd); + fc_block_scsi_eh(cmnd); status = lpfc_chk_tgt_mapped(vport, cmnd); if (status == FAILED) { @@ -3318,7 +3296,7 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd) struct lpfc_scsi_event_header scsi_event; int status; - lpfc_block_error_handler(cmnd); + fc_block_scsi_eh(cmnd); status = lpfc_chk_tgt_mapped(vport, cmnd); if (status == FAILED) { @@ -3384,7 +3362,7 @@ lpfc_bus_reset_handler(struct scsi_cmnd *cmnd) fc_host_post_vendor_event(shost, fc_get_event_number(), sizeof(scsi_event), (char *)&scsi_event, LPFC_NL_VENDOR_ID); - lpfc_block_error_handler(cmnd); + fc_block_scsi_eh(cmnd); /* * Since the driver manages a single bus device, reset all diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index d69744a62fe4..41669357b186 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -728,23 +728,6 @@ qla2x00_abort_fcport_cmds(fc_port_t *fcport) spin_unlock_irqrestore(&ha->hardware_lock, flags); } -static void -qla2x00_block_error_handler(struct scsi_cmnd *cmnd) -{ - struct Scsi_Host *shost = cmnd->device->host; - struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); - unsigned long flags; - - spin_lock_irqsave(shost->host_lock, flags); - while (rport->port_state == FC_PORTSTATE_BLOCKED) { - spin_unlock_irqrestore(shost->host_lock, flags); - msleep(1000); - spin_lock_irqsave(shost->host_lock, flags); - } - spin_unlock_irqrestore(shost->host_lock, flags); - return; -} - /************************************************************************** * qla2xxx_eh_abort * @@ -774,7 +757,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) struct req_que *req = vha->req; srb_t *spt; - qla2x00_block_error_handler(cmd); + fc_block_scsi_eh(cmd); if (!CMD_SP(cmd)) return SUCCESS; @@ -905,7 +888,7 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type, fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata; int err; - qla2x00_block_error_handler(cmd); + fc_block_scsi_eh(cmd); if (!fcport) return FAILED; @@ -985,7 +968,7 @@ qla2xxx_eh_bus_reset(struct scsi_cmnd *cmd) unsigned long serial; srb_t *sp = (srb_t *) CMD_SP(cmd); - qla2x00_block_error_handler(cmd); + fc_block_scsi_eh(cmd); id = cmd->device->id; lun = cmd->device->lun; @@ -1048,7 +1031,7 @@ qla2xxx_eh_host_reset(struct scsi_cmnd *cmd) srb_t *sp = (srb_t *) CMD_SP(cmd); scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev); - qla2x00_block_error_handler(cmd); + fc_block_scsi_eh(cmd); id = cmd->device->id; lun = cmd->device->lun; diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index f436e033adaf..3ce56b3b2cd7 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -27,6 +27,7 @@ */ #include #include +#include #include #include #include @@ -3144,6 +3145,31 @@ fc_scsi_scan_rport(struct work_struct *work) spin_unlock_irqrestore(shost->host_lock, flags); } +/** + * fc_block_scsi_eh - Block SCSI eh thread for blocked fc_rport + * @cmnd: SCSI command that scsi_eh is trying to recover + * + * This routine can be called from a FC LLD scsi_eh callback. It + * blocks the scsi_eh thread until the fc_rport leaves the + * FC_PORTSTATE_BLOCKED. This is necessary to avoid the scsi_eh + * failing recovery actions for blocked rports which would lead to + * offlined SCSI devices. + */ +void fc_block_scsi_eh(struct scsi_cmnd *cmnd) +{ + struct Scsi_Host *shost = cmnd->device->host; + struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); + unsigned long flags; + + spin_lock_irqsave(shost->host_lock, flags); + while (rport->port_state == FC_PORTSTATE_BLOCKED) { + spin_unlock_irqrestore(shost->host_lock, flags); + msleep(1000); + spin_lock_irqsave(shost->host_lock, flags); + } + spin_unlock_irqrestore(shost->host_lock, flags); +} +EXPORT_SYMBOL(fc_block_scsi_eh); /** * fc_vport_setup - allocates and creates a FC virtual port. diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index fc50bd64aa4e..8e86a94faf06 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -807,5 +807,6 @@ void fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, struct fc_vport *fc_vport_create(struct Scsi_Host *shost, int channel, struct fc_vport_identifiers *); int fc_vport_terminate(struct fc_vport *vport); +void fc_block_scsi_eh(struct scsi_cmnd *cmnd); #endif /* SCSI_TRANSPORT_FC_H */ -- cgit v1.3-8-gc7d7 From 1a7b75ae719754c77ccd4d18b0d258ae5db38a25 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Tue, 3 Nov 2009 11:45:47 -0800 Subject: [SCSI] libfc: Move non-common routines and prototypes out of libfc.h This patch moves all non-common routines and function prototypes out of libfc.h and into the appropriate .c files. It makes these routines 'static' when necessary and removes any unnecessary EXPORT_SYMBOL statements. A result of moving the fc_exch_seq_send, fc_seq_els_rsp_send, fc_exch_alloc and fc_seq_start_next prototypes out of libfc.h is that they were no longer being imported into fc_exch.c when libfc.h was included. This caused errors where routines in fc_exch.c were looking for undefined symbols. To fix this this patch reorganizes fc_seq_alloc, fc_seq_start_next and fc_seq_start_next_locked. This move also made it so that fc_seq_start_next_locked did not need to be prototyped at the top of fc_exch.c. Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_exch.c | 329 +++++++++++++++++++++++-------------------- include/scsi/libfc.h | 49 ------- 2 files changed, 177 insertions(+), 201 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 170cdf4bac97..659bb05287f3 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -107,7 +107,6 @@ static void fc_seq_ls_rjt(struct fc_seq *, enum fc_els_rjt_reason, enum fc_els_rjt_explan); static void fc_exch_els_rec(struct fc_seq *, struct fc_frame *); static void fc_exch_els_rrq(struct fc_seq *, struct fc_frame *); -static struct fc_seq *fc_seq_start_next_locked(struct fc_seq *sp); /* * Internal implementation notes. @@ -272,7 +271,6 @@ static void fc_exch_setup_hdr(struct fc_exch *ep, struct fc_frame *fp, fh->fh_seq_cnt = htons(ep->seq.cnt); } - /* * Release a reference to an exchange. * If the refcnt goes to zero and the exchange is complete, it is freed. @@ -372,7 +370,104 @@ static void fc_exch_timer_set(struct fc_exch *ep, unsigned int timer_msec) spin_unlock_bh(&ep->ex_lock); } -int fc_seq_exch_abort(const struct fc_seq *req_sp, unsigned int timer_msec) +/** + * send a frame using existing sequence and exchange. + */ +static int fc_seq_send(struct fc_lport *lp, struct fc_seq *sp, + struct fc_frame *fp) +{ + struct fc_exch *ep; + struct fc_frame_header *fh = fc_frame_header_get(fp); + int error; + u32 f_ctl; + + ep = fc_seq_exch(sp); + WARN_ON((ep->esb_stat & ESB_ST_SEQ_INIT) != ESB_ST_SEQ_INIT); + + f_ctl = ntoh24(fh->fh_f_ctl); + fc_exch_setup_hdr(ep, fp, f_ctl); + + /* + * update sequence count if this frame is carrying + * multiple FC frames when sequence offload is enabled + * by LLD. + */ + if (fr_max_payload(fp)) + sp->cnt += DIV_ROUND_UP((fr_len(fp) - sizeof(*fh)), + fr_max_payload(fp)); + else + sp->cnt++; + + /* + * Send the frame. + */ + error = lp->tt.frame_send(lp, fp); + + /* + * Update the exchange and sequence flags, + * assuming all frames for the sequence have been sent. + * We can only be called to send once for each sequence. + */ + spin_lock_bh(&ep->ex_lock); + ep->f_ctl = f_ctl & ~FC_FC_FIRST_SEQ; /* not first seq */ + if (f_ctl & (FC_FC_END_SEQ | FC_FC_SEQ_INIT)) + ep->esb_stat &= ~ESB_ST_SEQ_INIT; + spin_unlock_bh(&ep->ex_lock); + return error; +} + +/** + * fc_seq_alloc() - Allocate a sequence. + * @ep: Exchange pointer + * @seq_id: Sequence ID to allocate a sequence for + * + * We don't support multiple originated sequences on the same exchange. + * By implication, any previously originated sequence on this exchange + * is complete, and we reallocate the same sequence. + */ +static struct fc_seq *fc_seq_alloc(struct fc_exch *ep, u8 seq_id) +{ + struct fc_seq *sp; + + sp = &ep->seq; + sp->ssb_stat = 0; + sp->cnt = 0; + sp->id = seq_id; + return sp; +} + +static struct fc_seq *fc_seq_start_next_locked(struct fc_seq *sp) +{ + struct fc_exch *ep = fc_seq_exch(sp); + + sp = fc_seq_alloc(ep, ep->seq_id++); + FC_EXCH_DBG(ep, "f_ctl %6x seq %2x\n", + ep->f_ctl, sp->id); + return sp; +} + +/** + * Allocate a new sequence on the same exchange as the supplied sequence. + * This will never return NULL. + */ +static struct fc_seq *fc_seq_start_next(struct fc_seq *sp) +{ + struct fc_exch *ep = fc_seq_exch(sp); + + spin_lock_bh(&ep->ex_lock); + sp = fc_seq_start_next_locked(sp); + spin_unlock_bh(&ep->ex_lock); + + return sp; +} + +/** + * This function is for seq_exch_abort function pointer in + * struct libfc_function_template, see comment block on + * seq_exch_abort for description of this function. + */ +static int fc_seq_exch_abort(const struct fc_seq *req_sp, + unsigned int timer_msec) { struct fc_seq *sp; struct fc_exch *ep; @@ -472,24 +567,6 @@ done: fc_exch_release(ep); } -/* - * Allocate a sequence. - * - * We don't support multiple originated sequences on the same exchange. - * By implication, any previously originated sequence on this exchange - * is complete, and we reallocate the same sequence. - */ -static struct fc_seq *fc_seq_alloc(struct fc_exch *ep, u8 seq_id) -{ - struct fc_seq *sp; - - sp = &ep->seq; - sp->ssb_stat = 0; - sp->cnt = 0; - sp->id = seq_id; - return sp; -} - /** * fc_exch_em_alloc() - allocate an exchange from a specified EM. * @lport: ptr to the local port @@ -570,7 +647,8 @@ err: * EM is selected having either a NULL match function pointer * or call to match function returning true. */ -struct fc_exch *fc_exch_alloc(struct fc_lport *lport, struct fc_frame *fp) +static struct fc_exch *fc_exch_alloc(struct fc_lport *lport, + struct fc_frame *fp) { struct fc_exch_mgr_anchor *ema; struct fc_exch *ep; @@ -584,7 +662,6 @@ struct fc_exch *fc_exch_alloc(struct fc_lport *lport, struct fc_frame *fp) } return NULL; } -EXPORT_SYMBOL(fc_exch_alloc); /* * Lookup and hold an exchange. @@ -607,7 +684,13 @@ static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid) return ep; } -void fc_exch_done(struct fc_seq *sp) + +/** + * fc_exch_done() - Indicate that an exchange/sequence tuple is complete and + * the memory allocated for the related objects may be freed. + * @sp: Sequence pointer + */ +static void fc_exch_done(struct fc_seq *sp) { struct fc_exch *ep = fc_seq_exch(sp); int rc; @@ -618,7 +701,6 @@ void fc_exch_done(struct fc_seq *sp) if (!rc) fc_exch_delete(ep); } -EXPORT_SYMBOL(fc_exch_done); /* * Allocate a new exchange as responder. @@ -821,76 +903,15 @@ static void fc_exch_set_addr(struct fc_exch *ep, } } -static struct fc_seq *fc_seq_start_next_locked(struct fc_seq *sp) -{ - struct fc_exch *ep = fc_seq_exch(sp); - - sp = fc_seq_alloc(ep, ep->seq_id++); - FC_EXCH_DBG(ep, "f_ctl %6x seq %2x\n", - ep->f_ctl, sp->id); - return sp; -} -/* - * Allocate a new sequence on the same exchange as the supplied sequence. - * This will never return NULL. +/** + * fc_seq_els_rsp_send() - Send ELS response using mainly infomation + * in exchange and sequence in EM layer. + * @sp: Sequence pointer + * @els_cmd: ELS command + * @els_data: ELS data */ -struct fc_seq *fc_seq_start_next(struct fc_seq *sp) -{ - struct fc_exch *ep = fc_seq_exch(sp); - - spin_lock_bh(&ep->ex_lock); - sp = fc_seq_start_next_locked(sp); - spin_unlock_bh(&ep->ex_lock); - - return sp; -} -EXPORT_SYMBOL(fc_seq_start_next); - -int fc_seq_send(struct fc_lport *lp, struct fc_seq *sp, struct fc_frame *fp) -{ - struct fc_exch *ep; - struct fc_frame_header *fh = fc_frame_header_get(fp); - int error; - u32 f_ctl; - - ep = fc_seq_exch(sp); - WARN_ON((ep->esb_stat & ESB_ST_SEQ_INIT) != ESB_ST_SEQ_INIT); - - f_ctl = ntoh24(fh->fh_f_ctl); - fc_exch_setup_hdr(ep, fp, f_ctl); - - /* - * update sequence count if this frame is carrying - * multiple FC frames when sequence offload is enabled - * by LLD. - */ - if (fr_max_payload(fp)) - sp->cnt += DIV_ROUND_UP((fr_len(fp) - sizeof(*fh)), - fr_max_payload(fp)); - else - sp->cnt++; - - /* - * Send the frame. - */ - error = lp->tt.frame_send(lp, fp); - - /* - * Update the exchange and sequence flags, - * assuming all frames for the sequence have been sent. - * We can only be called to send once for each sequence. - */ - spin_lock_bh(&ep->ex_lock); - ep->f_ctl = f_ctl & ~FC_FC_FIRST_SEQ; /* not first seq */ - if (f_ctl & (FC_FC_END_SEQ | FC_FC_SEQ_INIT)) - ep->esb_stat &= ~ESB_ST_SEQ_INIT; - spin_unlock_bh(&ep->ex_lock); - return error; -} -EXPORT_SYMBOL(fc_seq_send); - -void fc_seq_els_rsp_send(struct fc_seq *sp, enum fc_els_cmd els_cmd, - struct fc_seq_els_data *els_data) +static void fc_seq_els_rsp_send(struct fc_seq *sp, enum fc_els_cmd els_cmd, + struct fc_seq_els_data *els_data) { switch (els_cmd) { case ELS_LS_RJT: @@ -909,7 +930,6 @@ void fc_seq_els_rsp_send(struct fc_seq *sp, enum fc_els_cmd els_cmd, FC_EXCH_DBG(fc_seq_exch(sp), "Invalid ELS CMD:%x\n", els_cmd); } } -EXPORT_SYMBOL(fc_seq_els_rsp_send); /* * Send a sequence, which is also the last sequence in the exchange. @@ -1662,6 +1682,68 @@ cleanup: fc_exch_release(aborted_ep); } + +/** + * This function is for exch_seq_send function pointer in + * struct libfc_function_template, see comment block on + * exch_seq_send for description of this function. + */ +static struct fc_seq *fc_exch_seq_send(struct fc_lport *lp, + struct fc_frame *fp, + void (*resp)(struct fc_seq *, + struct fc_frame *fp, + void *arg), + void (*destructor)(struct fc_seq *, + void *), + void *arg, u32 timer_msec) +{ + struct fc_exch *ep; + struct fc_seq *sp = NULL; + struct fc_frame_header *fh; + int rc = 1; + + ep = fc_exch_alloc(lp, fp); + if (!ep) { + fc_frame_free(fp); + return NULL; + } + ep->esb_stat |= ESB_ST_SEQ_INIT; + fh = fc_frame_header_get(fp); + fc_exch_set_addr(ep, ntoh24(fh->fh_s_id), ntoh24(fh->fh_d_id)); + ep->resp = resp; + ep->destructor = destructor; + ep->arg = arg; + ep->r_a_tov = FC_DEF_R_A_TOV; + ep->lp = lp; + sp = &ep->seq; + + ep->fh_type = fh->fh_type; /* save for possbile timeout handling */ + ep->f_ctl = ntoh24(fh->fh_f_ctl); + fc_exch_setup_hdr(ep, fp, ep->f_ctl); + sp->cnt++; + + if (ep->xid <= lp->lro_xid) + fc_fcp_ddp_setup(fr_fsp(fp), ep->xid); + + if (unlikely(lp->tt.frame_send(lp, fp))) + goto err; + + if (timer_msec) + fc_exch_timer_set_locked(ep, timer_msec); + ep->f_ctl &= ~FC_FC_FIRST_SEQ; /* not first seq */ + + if (ep->f_ctl & FC_FC_SEQ_INIT) + ep->esb_stat &= ~ESB_ST_SEQ_INIT; + spin_unlock_bh(&ep->ex_lock); + return sp; +err: + rc = fc_exch_done_locked(ep); + spin_unlock_bh(&ep->ex_lock); + if (!rc) + fc_exch_delete(ep); + return NULL; +} + /* * Send ELS RRQ - Reinstate Recovery Qualifier. * This tells the remote port to stop blocking the use of @@ -1902,63 +1984,6 @@ void fc_exch_mgr_free(struct fc_lport *lport) } EXPORT_SYMBOL(fc_exch_mgr_free); - -struct fc_seq *fc_exch_seq_send(struct fc_lport *lp, - struct fc_frame *fp, - void (*resp)(struct fc_seq *, - struct fc_frame *fp, - void *arg), - void (*destructor)(struct fc_seq *, void *), - void *arg, u32 timer_msec) -{ - struct fc_exch *ep; - struct fc_seq *sp = NULL; - struct fc_frame_header *fh; - int rc = 1; - - ep = fc_exch_alloc(lp, fp); - if (!ep) { - fc_frame_free(fp); - return NULL; - } - ep->esb_stat |= ESB_ST_SEQ_INIT; - fh = fc_frame_header_get(fp); - fc_exch_set_addr(ep, ntoh24(fh->fh_s_id), ntoh24(fh->fh_d_id)); - ep->resp = resp; - ep->destructor = destructor; - ep->arg = arg; - ep->r_a_tov = FC_DEF_R_A_TOV; - ep->lp = lp; - sp = &ep->seq; - - ep->fh_type = fh->fh_type; /* save for possbile timeout handling */ - ep->f_ctl = ntoh24(fh->fh_f_ctl); - fc_exch_setup_hdr(ep, fp, ep->f_ctl); - sp->cnt++; - - if (ep->xid <= lp->lro_xid) - fc_fcp_ddp_setup(fr_fsp(fp), ep->xid); - - if (unlikely(lp->tt.frame_send(lp, fp))) - goto err; - - if (timer_msec) - fc_exch_timer_set_locked(ep, timer_msec); - ep->f_ctl &= ~FC_FC_FIRST_SEQ; /* not first seq */ - - if (ep->f_ctl & FC_FC_SEQ_INIT) - ep->esb_stat &= ~ESB_ST_SEQ_INIT; - spin_unlock_bh(&ep->ex_lock); - return sp; -err: - rc = fc_exch_done_locked(ep); - spin_unlock_bh(&ep->ex_lock); - if (!rc) - fc_exch_delete(ep); - return NULL; -} -EXPORT_SYMBOL(fc_exch_seq_send); - /* * Receive a frame */ diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 9617f9365e45..f207b6cac06f 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -1011,55 +1011,6 @@ void fc_exch_mgr_free(struct fc_lport *lport); */ void fc_exch_recv(struct fc_lport *lp, struct fc_frame *fp); -/* - * This function is for exch_seq_send function pointer in - * struct libfc_function_template, see comment block on - * exch_seq_send for description of this function. - */ -struct fc_seq *fc_exch_seq_send(struct fc_lport *lp, - struct fc_frame *fp, - void (*resp)(struct fc_seq *sp, - struct fc_frame *fp, - void *arg), - void (*destructor)(struct fc_seq *sp, - void *arg), - void *arg, u32 timer_msec); - -/* - * send a frame using existing sequence and exchange. - */ -int fc_seq_send(struct fc_lport *lp, struct fc_seq *sp, struct fc_frame *fp); - -/* - * Send ELS response using mainly infomation - * in exchange and sequence in EM layer. - */ -void fc_seq_els_rsp_send(struct fc_seq *sp, enum fc_els_cmd els_cmd, - struct fc_seq_els_data *els_data); - -/* - * This function is for seq_exch_abort function pointer in - * struct libfc_function_template, see comment block on - * seq_exch_abort for description of this function. - */ -int fc_seq_exch_abort(const struct fc_seq *req_sp, unsigned int timer_msec); - -/* - * Indicate that an exchange/sequence tuple is complete and the memory - * allocated for the related objects may be freed. - */ -void fc_exch_done(struct fc_seq *sp); - -/* - * Allocate a new exchange and sequence pair. - */ -struct fc_exch *fc_exch_alloc(struct fc_lport *lport, struct fc_frame *fp); -/* - * Start a new sequence on the same exchange as the supplied sequence. - */ -struct fc_seq *fc_seq_start_next(struct fc_seq *sp); - - /* * Reset all EMs of a lport, releasing its all sequences and * exchanges. If sid is non-zero, then reset only exchanges -- cgit v1.3-8-gc7d7 From 255f6386b816b2bc0c251af0ee4985ad5a8461b7 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Tue, 3 Nov 2009 11:45:52 -0800 Subject: [SCSI] libfc: Remove fc_fcp_complete This function is never used, let's remove it. Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_fcp.c | 17 ----------------- include/scsi/libfc.h | 8 -------- 2 files changed, 25 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index 479af9352a42..3ab08f8dfb25 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -1874,23 +1874,6 @@ static void fc_io_compl(struct fc_fcp_pkt *fsp) fc_fcp_pkt_release(fsp); } -/** - * fc_fcp_complete() - complete processing of a fcp packet - * @fsp: fcp packet - * - * This function may sleep if a fsp timer is pending. - * The host lock must not be held by caller. - */ -void fc_fcp_complete(struct fc_fcp_pkt *fsp) -{ - if (fc_fcp_lock_pkt(fsp)) - return; - - fc_fcp_complete_locked(fsp); - fc_fcp_unlock_pkt(fsp); -} -EXPORT_SYMBOL(fc_fcp_complete); - /** * fc_eh_abort() - Abort a command * @sc_cmd: scsi command to abort diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index f207b6cac06f..db2175da2da5 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -887,14 +887,6 @@ int fc_fcp_init(struct fc_lport *); int fc_queuecommand(struct scsi_cmnd *sc_cmd, void (*done)(struct scsi_cmnd *)); -/* - * complete processing of a fcp packet - * - * This function may sleep if a fsp timer is pending. - * The host lock must not be held by caller. - */ -void fc_fcp_complete(struct fc_fcp_pkt *fsp); - /* * Send an ABTS frame to the target device. The sc_cmd argument * is a pointer to the SCSI command to be aborted. -- cgit v1.3-8-gc7d7 From 8866a5d9075b7129194576f5f810e85a693c40ba Mon Sep 17 00:00:00 2001 From: Robert Love Date: Tue, 3 Nov 2009 11:45:58 -0800 Subject: [SCSI] libfc: Add libfc/fc_libfc.[ch] for libfc internal routines include/scsi/libfc.h is currently loaded with common code shared between libfc's sub-modules as well as shared between libfc and fcoe. Previous patches attempted to move out non-common code. This patch creates two files for common libfc routines that will not be shared with fcoe, fnic or any other LLDs. Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/Makefile | 1 + drivers/scsi/libfc/fc_disc.c | 2 + drivers/scsi/libfc/fc_exch.c | 2 + drivers/scsi/libfc/fc_fcp.c | 8 +--- drivers/scsi/libfc/fc_libfc.c | 35 +++++++++++++++ drivers/scsi/libfc/fc_libfc.h | 102 ++++++++++++++++++++++++++++++++++++++++++ drivers/scsi/libfc/fc_lport.c | 2 + drivers/scsi/libfc/fc_rport.c | 2 + include/scsi/libfc.h | 79 -------------------------------- 9 files changed, 147 insertions(+), 86 deletions(-) create mode 100644 drivers/scsi/libfc/fc_libfc.c create mode 100644 drivers/scsi/libfc/fc_libfc.h (limited to 'include') diff --git a/drivers/scsi/libfc/Makefile b/drivers/scsi/libfc/Makefile index 55f982de3a9a..2be549c1db77 100644 --- a/drivers/scsi/libfc/Makefile +++ b/drivers/scsi/libfc/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_LIBFC) += libfc.o libfc-objs := \ + fc_libfc.o \ fc_disc.o \ fc_exch.o \ fc_elsct.o \ diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index d4cb3f9b1a0d..a4bdec28fef5 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -40,6 +40,8 @@ #include +#include "fc_libfc.h" + #define FC_DISC_RETRY_LIMIT 3 /* max retries */ #define FC_DISC_RETRY_DELAY 500UL /* (msecs) delay */ diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 659bb05287f3..ee6031e24c14 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -32,6 +32,8 @@ #include #include +#include "fc_libfc.h" + u16 fc_cpu_mask; /* cpu mask for possible cpus */ EXPORT_SYMBOL(fc_cpu_mask); static u16 fc_cpu_order; /* 2's power to represent total possible cpus */ diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index 3ab08f8dfb25..8a31ced98bd0 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -39,13 +39,7 @@ #include #include -MODULE_AUTHOR("Open-FCoE.org"); -MODULE_DESCRIPTION("libfc"); -MODULE_LICENSE("GPL v2"); - -unsigned int fc_debug_logging; -module_param_named(debug_logging, fc_debug_logging, int, S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(debug_logging, "a bit mask of logging levels"); +#include "fc_libfc.h" static struct kmem_cache *scsi_pkt_cachep; diff --git a/drivers/scsi/libfc/fc_libfc.c b/drivers/scsi/libfc/fc_libfc.c new file mode 100644 index 000000000000..e64ea870a4c8 --- /dev/null +++ b/drivers/scsi/libfc/fc_libfc.c @@ -0,0 +1,35 @@ +/* + * Copyright(c) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Maintained at www.Open-FCoE.org + */ + +#include +#include +#include +#include + +#include + +#include "fc_libfc.h" + +MODULE_AUTHOR("Open-FCoE.org"); +MODULE_DESCRIPTION("libfc"); +MODULE_LICENSE("GPL v2"); + +unsigned int fc_debug_logging; +module_param_named(debug_logging, fc_debug_logging, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(debug_logging, "a bit mask of logging levels"); diff --git a/drivers/scsi/libfc/fc_libfc.h b/drivers/scsi/libfc/fc_libfc.h new file mode 100644 index 000000000000..388fae4364af --- /dev/null +++ b/drivers/scsi/libfc/fc_libfc.h @@ -0,0 +1,102 @@ +/* + * Copyright(c) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Maintained at www.Open-FCoE.org + */ + +#ifndef _FC_LIBFC_H_ +#define _FC_LIBFC_H_ + +#define FC_LIBFC_LOGGING 0x01 /* General logging, not categorized */ +#define FC_LPORT_LOGGING 0x02 /* lport layer logging */ +#define FC_DISC_LOGGING 0x04 /* discovery layer logging */ +#define FC_RPORT_LOGGING 0x08 /* rport layer logging */ +#define FC_FCP_LOGGING 0x10 /* I/O path logging */ +#define FC_EM_LOGGING 0x20 /* Exchange Manager logging */ +#define FC_EXCH_LOGGING 0x40 /* Exchange/Sequence logging */ +#define FC_SCSI_LOGGING 0x80 /* SCSI logging (mostly error handling) */ + +extern unsigned int fc_debug_logging; + +#define FC_CHECK_LOGGING(LEVEL, CMD) \ +do { \ + if (unlikely(fc_debug_logging & LEVEL)) \ + do { \ + CMD; \ + } while (0); \ +} while (0) + +#define FC_LIBFC_DBG(fmt, args...) \ + FC_CHECK_LOGGING(FC_LIBFC_LOGGING, \ + printk(KERN_INFO "libfc: " fmt, ##args)) + +#define FC_LPORT_DBG(lport, fmt, args...) \ + FC_CHECK_LOGGING(FC_LPORT_LOGGING, \ + printk(KERN_INFO "host%u: lport %6x: " fmt, \ + (lport)->host->host_no, \ + fc_host_port_id((lport)->host), ##args)) + +#define FC_DISC_DBG(disc, fmt, args...) \ + FC_CHECK_LOGGING(FC_DISC_LOGGING, \ + printk(KERN_INFO "host%u: disc: " fmt, \ + (disc)->lport->host->host_no, \ + ##args)) + +#define FC_RPORT_ID_DBG(lport, port_id, fmt, args...) \ + FC_CHECK_LOGGING(FC_RPORT_LOGGING, \ + printk(KERN_INFO "host%u: rport %6x: " fmt, \ + (lport)->host->host_no, \ + (port_id), ##args)) + +#define FC_RPORT_DBG(rdata, fmt, args...) \ + FC_RPORT_ID_DBG((rdata)->local_port, (rdata)->ids.port_id, fmt, ##args) + +#define FC_FCP_DBG(pkt, fmt, args...) \ + FC_CHECK_LOGGING(FC_FCP_LOGGING, \ + printk(KERN_INFO "host%u: fcp: %6x: " fmt, \ + (pkt)->lp->host->host_no, \ + pkt->rport->port_id, ##args)) + +#define FC_EXCH_DBG(exch, fmt, args...) \ + FC_CHECK_LOGGING(FC_EXCH_LOGGING, \ + printk(KERN_INFO "host%u: xid %4x: " fmt, \ + (exch)->lp->host->host_no, \ + exch->xid, ##args)) + +#define FC_SCSI_DBG(lport, fmt, args...) \ + FC_CHECK_LOGGING(FC_SCSI_LOGGING, \ + printk(KERN_INFO "host%u: scsi: " fmt, \ + (lport)->host->host_no, ##args)) + +/* + * Set up direct-data placement for this I/O request + */ +void fc_fcp_ddp_setup(struct fc_fcp_pkt *fsp, u16 xid); + +/* + * Module setup functions + */ +int fc_setup_exch_mgr(void); +void fc_destroy_exch_mgr(void); +int fc_setup_rport(void); +void fc_destroy_rport(void); + +/* + * Internal libfc functions + */ +const char *fc_els_resp_type(struct fc_frame *); + +#endif /* _FC_LIBFC_H_ */ diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 536492ae6a88..f7f20a46e494 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -95,6 +95,8 @@ #include #include +#include "fc_libfc.h" + /* Fabric IDs to use for point-to-point mode, chosen on whims. */ #define FC_LOCAL_PTP_FID_LO 0x010101 #define FC_LOCAL_PTP_FID_HI 0x010102 diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 324e156b5d07..622285c81fef 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -55,6 +55,8 @@ #include #include +#include "fc_libfc.h" + struct workqueue_struct *rport_event_queue; static void fc_rport_enter_plogi(struct fc_rport_priv *); diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index db2175da2da5..690f8296e633 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -34,67 +34,6 @@ #include -#define FC_LIBFC_LOGGING 0x01 /* General logging, not categorized */ -#define FC_LPORT_LOGGING 0x02 /* lport layer logging */ -#define FC_DISC_LOGGING 0x04 /* discovery layer logging */ -#define FC_RPORT_LOGGING 0x08 /* rport layer logging */ -#define FC_FCP_LOGGING 0x10 /* I/O path logging */ -#define FC_EM_LOGGING 0x20 /* Exchange Manager logging */ -#define FC_EXCH_LOGGING 0x40 /* Exchange/Sequence logging */ -#define FC_SCSI_LOGGING 0x80 /* SCSI logging (mostly error handling) */ - -extern unsigned int fc_debug_logging; - -#define FC_CHECK_LOGGING(LEVEL, CMD) \ -do { \ - if (unlikely(fc_debug_logging & LEVEL)) \ - do { \ - CMD; \ - } while (0); \ -} while (0) - -#define FC_LIBFC_DBG(fmt, args...) \ - FC_CHECK_LOGGING(FC_LIBFC_LOGGING, \ - printk(KERN_INFO "libfc: " fmt, ##args)) - -#define FC_LPORT_DBG(lport, fmt, args...) \ - FC_CHECK_LOGGING(FC_LPORT_LOGGING, \ - printk(KERN_INFO "host%u: lport %6x: " fmt, \ - (lport)->host->host_no, \ - fc_host_port_id((lport)->host), ##args)) - -#define FC_DISC_DBG(disc, fmt, args...) \ - FC_CHECK_LOGGING(FC_DISC_LOGGING, \ - printk(KERN_INFO "host%u: disc: " fmt, \ - (disc)->lport->host->host_no, \ - ##args)) - -#define FC_RPORT_ID_DBG(lport, port_id, fmt, args...) \ - FC_CHECK_LOGGING(FC_RPORT_LOGGING, \ - printk(KERN_INFO "host%u: rport %6x: " fmt, \ - (lport)->host->host_no, \ - (port_id), ##args)) - -#define FC_RPORT_DBG(rdata, fmt, args...) \ - FC_RPORT_ID_DBG((rdata)->local_port, (rdata)->ids.port_id, fmt, ##args) - -#define FC_FCP_DBG(pkt, fmt, args...) \ - FC_CHECK_LOGGING(FC_FCP_LOGGING, \ - printk(KERN_INFO "host%u: fcp: %6x: " fmt, \ - (pkt)->lp->host->host_no, \ - pkt->rport->port_id, ##args)) - -#define FC_EXCH_DBG(exch, fmt, args...) \ - FC_CHECK_LOGGING(FC_EXCH_LOGGING, \ - printk(KERN_INFO "host%u: xid %4x: " fmt, \ - (exch)->lp->host->host_no, \ - exch->xid, ##args)) - -#define FC_SCSI_DBG(lport, fmt, args...) \ - FC_CHECK_LOGGING(FC_SCSI_LOGGING, \ - printk(KERN_INFO "host%u: scsi: " fmt, \ - (lport)->host->host_no, ##args)) - /* * libfc error codes */ @@ -923,11 +862,6 @@ int fc_change_queue_type(struct scsi_device *sdev, int tag_type); */ void fc_fcp_destroy(struct fc_lport *); -/* - * Set up direct-data placement for this I/O request - */ -void fc_fcp_ddp_setup(struct fc_fcp_pkt *fsp, u16 xid); - /* * ELS/CT interface *****************************/ @@ -1020,17 +954,4 @@ void fc_get_host_port_state(struct Scsi_Host *shost); void fc_set_rport_loss_tmo(struct fc_rport *rport, u32 timeout); struct fc_host_statistics *fc_get_host_stats(struct Scsi_Host *); -/* - * module setup functions. - */ -int fc_setup_exch_mgr(void); -void fc_destroy_exch_mgr(void); -int fc_setup_rport(void); -void fc_destroy_rport(void); - -/* - * Internal libfc functions. - */ -const char *fc_els_resp_type(struct fc_frame *); - #endif /* _LIBFC_H_ */ -- cgit v1.3-8-gc7d7 From 86221969e20a2f60ce104160dc836a964974673b Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 3 Nov 2009 11:46:08 -0800 Subject: [SCSI] libfc: changes to libfc_host_alloc to consolidate initialization with allocation I'd like to keep basic initialization together with allocation, which means this can't just be a tail-call to scsi_host_alloc. This is needed to create a generic libfc host allocation routine for NPIV VN_Ports, which will share the exchange ID space (through sharing exchange manager structures) with the parent lport. In order to clone the exchange manager list when the lport is allocated, the list head must be initialized earlier. Also, update fnic to use the libfc_host_alloc so that later changes do not break it. (contribution by Joe Eykholt) Signed-off-by: Chris Leech Signed-off-by: Joe Eykholt Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/fcoe.c | 8 +++----- drivers/scsi/fnic/fnic_main.c | 10 ++++------ drivers/scsi/libfc/fc_lport.c | 1 - include/scsi/libfc.h | 15 ++++++++++++--- 4 files changed, 19 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 4efbc17a7d7f..8ca488de492d 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -515,8 +515,6 @@ static int fcoe_shost_config(struct fc_lport *lp, struct Scsi_Host *shost, int rc = 0; /* lport scsi host config */ - lp->host = shost; - lp->host->max_lun = FCOE_MAX_LUN; lp->host->max_id = FCOE_MAX_FCP_TARGET; lp->host->max_channel = 0; @@ -734,14 +732,14 @@ static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe, FCOE_NETDEV_DBG(netdev, "Create Interface\n"); - shost = libfc_host_alloc(&fcoe_shost_template, + lport = libfc_host_alloc(&fcoe_shost_template, sizeof(struct fcoe_port)); - if (!shost) { + if (!lport) { FCOE_NETDEV_DBG(netdev, "Could not allocate host structure\n"); rc = -ENOMEM; goto out; } - lport = shost_priv(shost); + shost = lport->host; port = lport_priv(lport); port->lport = lport; port->fcoe = fcoe; diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index fc61f17025ce..018cc427504a 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -424,15 +424,13 @@ static int __devinit fnic_probe(struct pci_dev *pdev, * Allocate SCSI Host and set up association between host, * local port, and fnic */ - host = scsi_host_alloc(&fnic_host_template, - sizeof(struct fc_lport) + sizeof(struct fnic)); - if (!host) { - printk(KERN_ERR PFX "Unable to alloc SCSI host\n"); + lp = libfc_host_alloc(&fnic_host_template, sizeof(struct fnic)); + if (!lp) { + printk(KERN_ERR PFX "Unable to alloc libfc local port\n"); err = -ENOMEM; goto err_out; } - lp = shost_priv(host); - lp->host = host; + host = lp->host; fnic = lport_priv(lp); fnic->lport = lp; diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index f7f20a46e494..41650d336289 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1505,7 +1505,6 @@ int fc_lport_init(struct fc_lport *lport) if (lport->link_supported_speeds & FC_PORTSPEED_10GBIT) fc_host_supported_speeds(lport->host) |= FC_PORTSPEED_10GBIT; - INIT_LIST_HEAD(&lport->ema_list); return 0; } EXPORT_SYMBOL(fc_lport_init); diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 690f8296e633..ed3057b4e78d 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -739,12 +739,21 @@ static inline void *lport_priv(const struct fc_lport *lp) * @sht: ptr to the scsi host templ * @priv_size: size of private data after fc_lport * - * Returns: ptr to Scsi_Host + * Returns: libfc lport */ -static inline struct Scsi_Host * +static inline struct fc_lport * libfc_host_alloc(struct scsi_host_template *sht, int priv_size) { - return scsi_host_alloc(sht, sizeof(struct fc_lport) + priv_size); + struct fc_lport *lport; + struct Scsi_Host *shost; + + shost = scsi_host_alloc(sht, sizeof(*lport) + priv_size); + if (!shost) + return NULL; + lport = shost_priv(shost); + lport->host = shost; + INIT_LIST_HEAD(&lport->ema_list); + return lport; } /* -- cgit v1.3-8-gc7d7 From 174e1ebffd30a7599b889900089f7acef944cc6b Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 3 Nov 2009 11:46:14 -0800 Subject: [SCSI] libfc: add some generic NPIV support routines to libfc Adds a function to create a new VN_Port instances, which share the EM list with the N_Port, VN_Port lookup by fabric ID when responding to a new request (otherwise the exchange lookup from the N_Ports EM list is trusted to return an exchange with a cached lport value for the correct VN_Port), a pointer to a fc_vport structure for VN_Ports, and flags to indicate if an N_Port supports NPIV and if the switch/fabric allows it. Signed-off-by: Chris Leech Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/Makefile | 3 +- drivers/scsi/libfc/fc_exch.c | 29 +++++++++++++++ drivers/scsi/libfc/fc_npiv.c | 86 ++++++++++++++++++++++++++++++++++++++++++++ include/scsi/libfc.h | 20 +++++++++++ 4 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 drivers/scsi/libfc/fc_npiv.c (limited to 'include') diff --git a/drivers/scsi/libfc/Makefile b/drivers/scsi/libfc/Makefile index 2be549c1db77..4bb23ac86a5c 100644 --- a/drivers/scsi/libfc/Makefile +++ b/drivers/scsi/libfc/Makefile @@ -10,4 +10,5 @@ libfc-objs := \ fc_frame.o \ fc_lport.o \ fc_rport.o \ - fc_fcp.o + fc_fcp.o \ + fc_npiv.o diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index ee6031e24c14..751a485685d9 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -1134,6 +1134,15 @@ static void fc_exch_recv_req(struct fc_lport *lp, struct fc_exch_mgr *mp, u32 f_ctl; enum fc_pf_rjt_reason reject; + /* We can have the wrong fc_lport at this point with NPIV, which is a + * problem now that we know a new exchange needs to be allocated + */ + lp = fc_vport_id_lookup(lp, ntoh24(fh->fh_d_id)); + if (!lp) { + fc_frame_free(fp); + return; + } + fr_seq(fp) = NULL; reject = fc_seq_lookup_recip(lp, mp, fp); if (reject == FC_RJT_NONE) { @@ -1900,6 +1909,26 @@ void fc_exch_mgr_del(struct fc_exch_mgr_anchor *ema) } EXPORT_SYMBOL(fc_exch_mgr_del); +/** + * fc_exch_mgr_list_clone() - share all exchange manager objects + * @src: source lport to clone exchange managers from + * @dst: new lport that takes references to all the exchange managers + */ +int fc_exch_mgr_list_clone(struct fc_lport *src, struct fc_lport *dst) +{ + struct fc_exch_mgr_anchor *ema, *tmp; + + list_for_each_entry(ema, &src->ema_list, ema_list) { + if (!fc_exch_mgr_add(dst, ema->mp, ema->match)) + goto err; + } + return 0; +err: + list_for_each_entry_safe(ema, tmp, &dst->ema_list, ema_list) + fc_exch_mgr_del(ema); + return -ENOMEM; +} + struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp, enum fc_class class, u16 min_xid, u16 max_xid, diff --git a/drivers/scsi/libfc/fc_npiv.c b/drivers/scsi/libfc/fc_npiv.c new file mode 100644 index 000000000000..39f02c09a8d9 --- /dev/null +++ b/drivers/scsi/libfc/fc_npiv.c @@ -0,0 +1,86 @@ +/* + * Copyright(c) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Maintained at www.Open-FCoE.org + */ + +/* + * NPIV VN_Port helper functions for libfc + */ + +#include + +/** + * fc_vport_create() - Create a new NPIV vport instance + * @vport: fc_vport structure from scsi_transport_fc + * @privsize: driver private data size to allocate along with the Scsi_Host + */ + +struct fc_lport *libfc_vport_create(struct fc_vport *vport, int privsize) +{ + struct Scsi_Host *shost = vport_to_shost(vport); + struct fc_lport *n_port = shost_priv(shost); + struct fc_lport *vn_port; + + vn_port = libfc_host_alloc(shost->hostt, privsize); + if (!vn_port) + goto err_out; + if (fc_exch_mgr_list_clone(n_port, vn_port)) + goto err_put; + + vn_port->vport = vport; + vport->dd_data = vn_port; + + mutex_lock(&n_port->lp_mutex); + list_add_tail(&vn_port->list, &n_port->vports); + mutex_unlock(&n_port->lp_mutex); + + return vn_port; + +err_put: + scsi_host_put(vn_port->host); +err_out: + return NULL; +} +EXPORT_SYMBOL(libfc_vport_create); + +/** + * fc_vport_id_lookup() - find NPIV lport that matches a given fabric ID + * @n_port: Top level N_Port which may have multiple NPIV VN_Ports + * @port_id: Fabric ID to find a match for + * + * Returns: matching lport pointer or NULL if there is no match + */ +struct fc_lport *fc_vport_id_lookup(struct fc_lport *n_port, u32 port_id) +{ + struct fc_lport *lport = NULL; + struct fc_lport *vn_port; + + if (fc_host_port_id(n_port->host) == port_id) + return n_port; + + mutex_lock(&n_port->lp_mutex); + list_for_each_entry(vn_port, &n_port->vports, list) { + if (fc_host_port_id(vn_port->host) == port_id) { + lport = vn_port; + break; + } + } + mutex_unlock(&n_port->lp_mutex); + + return lport; +} + diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index ed3057b4e78d..2c6d55de8ccd 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -640,6 +640,8 @@ struct fc_lport { /* Associations */ struct Scsi_Host *host; struct list_head ema_list; + struct list_head vports; /* child vports if N_Port */ + struct fc_vport *vport; /* parent vport if VN_Port */ struct fc_rport_priv *dns_rp; struct fc_rport_priv *ptp_rp; void *scsi_priv; @@ -664,6 +666,8 @@ struct fc_lport { u32 seq_offload:1; /* seq offload supported */ u32 crc_offload:1; /* crc offload supported */ u32 lro_enabled:1; /* large receive offload */ + u32 does_npiv:1; /* supports multiple vports */ + u32 npiv_enabled:1; /* switch/fabric allows NPIV */ u32 mfs; /* max FC payload size */ unsigned int service_params; unsigned int e_d_tov; @@ -753,6 +757,7 @@ libfc_host_alloc(struct scsi_host_template *sht, int priv_size) lport = shost_priv(shost); lport->host = shost; INIT_LIST_HEAD(&lport->ema_list); + INIT_LIST_HEAD(&lport->vports); return lport; } @@ -805,6 +810,15 @@ int fc_lport_reset(struct fc_lport *); */ int fc_set_mfs(struct fc_lport *lp, u32 mfs); +/* + * Allocate a new lport struct for an NPIV VN_Port + */ +struct fc_lport *libfc_vport_create(struct fc_vport *vport, int privsize); + +/* + * Find an NPIV VN_Port by port ID + */ +struct fc_lport *fc_vport_id_lookup(struct fc_lport *n_port, u32 port_id); /* * REMOTE PORT LAYER @@ -911,6 +925,12 @@ struct fc_exch_mgr_anchor *fc_exch_mgr_add(struct fc_lport *lport, */ void fc_exch_mgr_del(struct fc_exch_mgr_anchor *ema); +/* + * Clone an exchange manager list, getting reference holds for each EM. + * This is for use with NPIV and sharing the X_ID space between VN_Ports. + */ +int fc_exch_mgr_list_clone(struct fc_lport *src, struct fc_lport *dst); + /* * Allocates an Exchange Manager (EM). * -- cgit v1.3-8-gc7d7 From 8faecddb212d502b1b77936498b9a82b13c4ff44 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 3 Nov 2009 11:46:19 -0800 Subject: [SCSI] libfc: vport link handling and fc_vport state managment NPIV vports are managed in libfc by changing their virtual link state when the parent N_Ports internal state changes. The vport link is only online when the N_Port is in a ready state (logged into the fabric). vport_state is updated as needed in this patch as well, currently the states LINKDOWN, INITIALIZING, ACTIVE, DSIABLED, and NO_FABRIC_SUPP are used. This also changes the fc_host port_state handling to differentiate between LINKDOWN and OFFLINE. Signed-off-by: Chris Leech Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_lport.c | 72 ++++++++++++++++++++++++++++++++--------- drivers/scsi/libfc/fc_npiv.c | 75 +++++++++++++++++++++++++++++++++++++++++++ include/scsi/libfc.h | 8 +++++ 3 files changed, 140 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 41650d336289..46897cf23ea6 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -224,10 +224,18 @@ void fc_get_host_port_state(struct Scsi_Host *shost) { struct fc_lport *lp = shost_priv(shost); - if (lp->link_up) - fc_host_port_state(shost) = FC_PORTSTATE_ONLINE; + mutex_lock(&lp->lp_mutex); + if (!lp->link_up) + fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN; else - fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE; + switch (lp->state) { + case LPORT_ST_READY: + fc_host_port_state(shost) = FC_PORTSTATE_ONLINE; + break; + default: + fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE; + } + mutex_unlock(&lp->lp_mutex); } EXPORT_SYMBOL(fc_get_host_port_state); @@ -493,40 +501,62 @@ int fc_fabric_login(struct fc_lport *lport) EXPORT_SYMBOL(fc_fabric_login); /** - * fc_linkup() - Handler for transport linkup events + * __fc_linkup() - Handler for transport linkup events * @lport: The lport whose link is up + * + * Locking: must be called with the lp_mutex held */ -void fc_linkup(struct fc_lport *lport) +void __fc_linkup(struct fc_lport *lport) { - printk(KERN_INFO "libfc: Link up on port (%6x)\n", - fc_host_port_id(lport->host)); - - mutex_lock(&lport->lp_mutex); if (!lport->link_up) { lport->link_up = 1; if (lport->state == LPORT_ST_RESET) fc_lport_enter_flogi(lport); } +} + +/** + * fc_linkup() - Handler for transport linkup events + * @lport: The lport whose link is up + */ +void fc_linkup(struct fc_lport *lport) +{ + printk(KERN_INFO "libfc: Link up on port (%6x)\n", + fc_host_port_id(lport->host)); + + mutex_lock(&lport->lp_mutex); + __fc_linkup(lport); mutex_unlock(&lport->lp_mutex); } EXPORT_SYMBOL(fc_linkup); /** - * fc_linkdown() - Handler for transport linkdown events + * __fc_linkdown() - Handler for transport linkdown events * @lport: The lport whose link is down + * + * Locking: must be called with the lp_mutex held */ -void fc_linkdown(struct fc_lport *lport) +void __fc_linkdown(struct fc_lport *lport) { - mutex_lock(&lport->lp_mutex); - printk(KERN_INFO "libfc: Link down on port (%6x)\n", - fc_host_port_id(lport->host)); - if (lport->link_up) { lport->link_up = 0; fc_lport_enter_reset(lport); lport->tt.fcp_cleanup(lport); } +} + +/** + * fc_linkdown() - Handler for transport linkdown events + * @lport: The lport whose link is down + */ +void fc_linkdown(struct fc_lport *lport) +{ + printk(KERN_INFO "libfc: Link down on port (%6x)\n", + fc_host_port_id(lport->host)); + + mutex_lock(&lport->lp_mutex); + __fc_linkdown(lport); mutex_unlock(&lport->lp_mutex); } EXPORT_SYMBOL(fc_linkdown); @@ -654,6 +684,9 @@ static void fc_lport_enter_ready(struct fc_lport *lport) fc_lport_state(lport)); fc_lport_state_enter(lport, LPORT_ST_READY); + if (lport->vport) + fc_vport_set_state(lport->vport, FC_VPORT_ACTIVE); + fc_vports_linkchange(lport); if (!lport->ptp_rp) lport->tt.disc_start(fc_lport_disc_callback, lport); @@ -868,7 +901,14 @@ static void fc_lport_enter_reset(struct fc_lport *lport) FC_LPORT_DBG(lport, "Entered RESET state from %s state\n", fc_lport_state(lport)); + if (lport->vport) { + if (lport->link_up) + fc_vport_set_state(lport->vport, FC_VPORT_INITIALIZING); + else + fc_vport_set_state(lport->vport, FC_VPORT_LINKDOWN); + } fc_lport_state_enter(lport, LPORT_ST_RESET); + fc_vports_linkchange(lport); fc_lport_reset_locked(lport); if (lport->link_up) fc_lport_enter_flogi(lport); @@ -887,6 +927,7 @@ static void fc_lport_enter_disabled(struct fc_lport *lport) fc_lport_state(lport)); fc_lport_state_enter(lport, LPORT_ST_DISABLED); + fc_vports_linkchange(lport); fc_lport_reset_locked(lport); } @@ -1333,6 +1374,7 @@ static void fc_lport_enter_logo(struct fc_lport *lport) fc_lport_state(lport)); fc_lport_state_enter(lport, LPORT_ST_LOGO); + fc_vports_linkchange(lport); fp = fc_frame_alloc(lport, sizeof(*logo)); if (!fp) { diff --git a/drivers/scsi/libfc/fc_npiv.c b/drivers/scsi/libfc/fc_npiv.c index 39f02c09a8d9..c68f6c7341c2 100644 --- a/drivers/scsi/libfc/fc_npiv.c +++ b/drivers/scsi/libfc/fc_npiv.c @@ -84,3 +84,78 @@ struct fc_lport *fc_vport_id_lookup(struct fc_lport *n_port, u32 port_id) return lport; } +/* + * When setting the link state of vports during an lport state change, it's + * necessary to hold the lp_mutex of both the N_Port and the VN_Port. + * This tells the lockdep engine to treat the nested locking of the VN_Port + * as a different lock class. + */ +enum libfc_lport_mutex_class { + LPORT_MUTEX_NORMAL = 0, + LPORT_MUTEX_VN_PORT = 1, +}; + +/** + * __fc_vport_setlink() - update link and status on a VN_Port + * @n_port: parent N_Port + * @vn_port: VN_Port to update + * + * Locking: must be called with both the N_Port and VN_Port lp_mutex held + */ +static void __fc_vport_setlink(struct fc_lport *n_port, + struct fc_lport *vn_port) +{ + struct fc_vport *vport = vn_port->vport; + + if (vn_port->state == LPORT_ST_DISABLED) + return; + + if (n_port->state == LPORT_ST_READY) { + if (n_port->npiv_enabled) { + fc_vport_set_state(vport, FC_VPORT_INITIALIZING); + __fc_linkup(vn_port); + } else { + fc_vport_set_state(vport, FC_VPORT_NO_FABRIC_SUPP); + __fc_linkdown(vn_port); + } + } else { + fc_vport_set_state(vport, FC_VPORT_LINKDOWN); + __fc_linkdown(vn_port); + } +} + +/** + * fc_vport_setlink() - update link and status on a VN_Port + * @vn_port: virtual port to update + */ +void fc_vport_setlink(struct fc_lport *vn_port) +{ + struct fc_vport *vport = vn_port->vport; + struct Scsi_Host *shost = vport_to_shost(vport); + struct fc_lport *n_port = shost_priv(shost); + + mutex_lock(&n_port->lp_mutex); + mutex_lock_nested(&vn_port->lp_mutex, LPORT_MUTEX_VN_PORT); + __fc_vport_setlink(n_port, vn_port); + mutex_unlock(&vn_port->lp_mutex); + mutex_unlock(&n_port->lp_mutex); +} +EXPORT_SYMBOL(fc_vport_setlink); + +/** + * fc_vports_linkchange() - change the link state of all vports + * @n_port: Parent N_Port that has changed state + * + * Locking: called with the n_port lp_mutex held + */ +void fc_vports_linkchange(struct fc_lport *n_port) +{ + struct fc_lport *vn_port; + + list_for_each_entry(vn_port, &n_port->vports, list) { + mutex_lock_nested(&vn_port->lp_mutex, LPORT_MUTEX_VN_PORT); + __fc_vport_setlink(n_port, vn_port); + mutex_unlock(&vn_port->lp_mutex); + } +} + diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 2c6d55de8ccd..dfeb1ee4f03f 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -788,11 +788,13 @@ int fc_fabric_login(struct fc_lport *lp); /* * The link is up for the given local port. */ +void __fc_linkup(struct fc_lport *); void fc_linkup(struct fc_lport *); /* * Link is down for the given local port. */ +void __fc_linkdown(struct fc_lport *); void fc_linkdown(struct fc_lport *); /* @@ -820,6 +822,12 @@ struct fc_lport *libfc_vport_create(struct fc_vport *vport, int privsize); */ struct fc_lport *fc_vport_id_lookup(struct fc_lport *n_port, u32 port_id); +/* + * NPIV VN_Port link state management + */ +void fc_vport_setlink(struct fc_lport *vn_port); +void fc_vports_linkchange(struct fc_lport *n_port); + /* * REMOTE PORT LAYER *****************************/ -- cgit v1.3-8-gc7d7 From db36c06cc6802d03bcba08982377f7c03a3cda7f Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 3 Nov 2009 11:46:24 -0800 Subject: [SCSI] libfc, libfcoe: FDISC ELS for NPIV Add FDISC ELS handling to libfc and libfcoe, treat it the same as FLOGI where appropriate. Add checking for NPIV support in the FLOGI LS_ACC service parameters. Signed-off-by: Chris Leech Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/libfcoe.c | 6 +++--- drivers/scsi/libfc/fc_lport.c | 6 +++++- include/scsi/fc/fc_els.h | 4 +++- include/scsi/fc_encode.h | 29 +++++++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c index 11ae5c94608b..d8ea04a29199 100644 --- a/drivers/scsi/fcoe/libfcoe.c +++ b/drivers/scsi/fcoe/libfcoe.c @@ -449,7 +449,7 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, memset(mac, 0, sizeof(mac)); mac->fd_desc.fip_dtype = FIP_DT_MAC; mac->fd_desc.fip_dlen = sizeof(*mac) / FIP_BPW; - if (dtype != FIP_DT_FLOGI) + if (dtype != FIP_DT_FLOGI && dtype != FIP_DT_FDISC) memcpy(mac->fd_mac, fip->data_src_addr, ETH_ALEN); else if (fip->spma) memcpy(mac->fd_mac, fip->ctl_src_addr, ETH_ALEN); @@ -865,8 +865,8 @@ static void fcoe_ctlr_recv_els(struct fcoe_ctlr *fip, struct sk_buff *skb) goto drop; els_op = *(u8 *)(fh + 1); - if (els_dtype == FIP_DT_FLOGI && sub == FIP_SC_REP && - fip->flogi_oxid == ntohs(fh->fh_ox_id) && + if ((els_dtype == FIP_DT_FLOGI || els_dtype == FIP_DT_FDISC) && + sub == FIP_SC_REP && fip->flogi_oxid == ntohs(fh->fh_ox_id) && els_op == ELS_LS_ACC && is_valid_ether_addr(granted_mac)) { fip->flogi_oxid = FC_XID_UNKNOWN; fip->update_mac(fip, fip->data_src_addr, granted_mac); diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 46897cf23ea6..ccba67ca68a1 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1449,6 +1449,9 @@ static void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, e_d_tov = ntohl(flp->fl_csp.sp_e_d_tov); if (csp_flags & FC_SP_FT_EDTR) e_d_tov /= 1000000; + + lport->npiv_enabled = !!(csp_flags & FC_SP_FT_NPIV_ACC); + if ((csp_flags & FC_SP_FT_FPORT) == 0) { if (e_d_tov > lport->e_d_tov) lport->e_d_tov = e_d_tov; @@ -1498,7 +1501,8 @@ void fc_lport_enter_flogi(struct fc_lport *lport) if (!fp) return fc_lport_error(lport, fp); - if (!lport->tt.elsct_send(lport, FC_FID_FLOGI, fp, ELS_FLOGI, + if (!lport->tt.elsct_send(lport, FC_FID_FLOGI, fp, + lport->vport ? ELS_FDISC : ELS_FLOGI, fc_lport_flogi_resp, lport, lport->e_d_tov)) fc_lport_error(lport, NULL); } diff --git a/include/scsi/fc/fc_els.h b/include/scsi/fc/fc_els.h index 195ca014d3ce..b0872afe2d30 100644 --- a/include/scsi/fc/fc_els.h +++ b/include/scsi/fc/fc_els.h @@ -248,10 +248,12 @@ struct fc_els_csp { /* * sp_features */ -#define FC_SP_FT_CIRO 0x8000 /* continuously increasing rel. off. */ +#define FC_SP_FT_NPIV 0x8000 /* multiple N_Port_ID support (FLOGI) */ +#define FC_SP_FT_CIRO 0x8000 /* continuously increasing rel off (PLOGI) */ #define FC_SP_FT_CLAD 0x8000 /* clean address (in FLOGI LS_ACC) */ #define FC_SP_FT_RAND 0x4000 /* random relative offset */ #define FC_SP_FT_VAL 0x2000 /* valid vendor version level */ +#define FC_SP_FT_NPIV_ACC 0x2000 /* NPIV assignment (FLOGI LS_ACC) */ #define FC_SP_FT_FPORT 0x1000 /* F port (1) vs. N port (0) */ #define FC_SP_FT_ABB 0x0800 /* alternate BB_credit management */ #define FC_SP_FT_EDTR 0x0400 /* E_D_TOV Resolution is nanoseconds */ diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h index 27dad703824f..c93ca3ece1a0 100644 --- a/include/scsi/fc_encode.h +++ b/include/scsi/fc_encode.h @@ -198,6 +198,31 @@ static inline void fc_flogi_fill(struct fc_lport *lport, struct fc_frame *fp) sp->sp_bb_data = htons((u16) lport->mfs); cp = &flogi->fl_cssp[3 - 1]; /* class 3 parameters */ cp->cp_class = htons(FC_CPC_VALID | FC_CPC_SEQ); + if (lport->does_npiv) + sp->sp_features = htons(FC_SP_FT_NPIV); +} + +/** + * fc_fdisc_fill - Fill in a fdisc request frame. + */ +static inline void fc_fdisc_fill(struct fc_lport *lport, struct fc_frame *fp) +{ + struct fc_els_csp *sp; + struct fc_els_cssp *cp; + struct fc_els_flogi *fdisc; + + fdisc = fc_frame_payload_get(fp, sizeof(*fdisc)); + memset(fdisc, 0, sizeof(*fdisc)); + fdisc->fl_cmd = (u8) ELS_FDISC; + put_unaligned_be64(lport->wwpn, &fdisc->fl_wwpn); + put_unaligned_be64(lport->wwnn, &fdisc->fl_wwnn); + sp = &fdisc->fl_csp; + sp->sp_hi_ver = 0x20; + sp->sp_lo_ver = 0x20; + sp->sp_bb_cred = htons(10); /* this gets set by gateway */ + sp->sp_bb_data = htons((u16) lport->mfs); + cp = &fdisc->fl_cssp[3 - 1]; /* class 3 parameters */ + cp->cp_class = htons(FC_CPC_VALID | FC_CPC_SEQ); } /** @@ -296,6 +321,10 @@ static inline int fc_els_fill(struct fc_lport *lport, fc_flogi_fill(lport, fp); break; + case ELS_FDISC: + fc_fdisc_fill(lport, fp); + break; + case ELS_LOGO: fc_logo_fill(lport, fp); break; -- cgit v1.3-8-gc7d7 From 11b561886643d4e23d0fd58c205d830a448dd0a2 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 3 Nov 2009 11:46:29 -0800 Subject: [SCSI] libfcoe, fcoe: libfcoe NPIV support The FIP code in libfcoe needed several changes to support NPIV 1) dst_src_addr needs to be managed per-n_port-ID for FPMA fabrics with NPIV enabled. Managing the MAC address is now handled in fcoe, with some slight changes to update_mac() and a new get_src_addr() function pointer. 2) The libfc elsct_send() hook is used to setup FCoE specific response handlers for FIP encapsulated ELS exchanges. This lets the FCoE specific handling know which VN_Port the exchange is for, and doesn't require tracking OX_IDs. It might be possible to roll back to the full FIP frame in these, but for now I've just stashed the contents of the MAC address descriptor in the skb context block for later use. Also, because fcoe_elsct_send() just passes control on to fc_elsct_send(), all transmits still come through the normal frame_send() path. 3) The NPIV changes added a mutex hold in the keep alive sending, the lport mutex is protecting the vport list. We can't take a mutex from a timer, so move the FIP keep alive logic to the link work struct. Signed-off-by: Chris Leech Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/fcoe.c | 147 ++++++++++++++++++++++++++++++++++++------ drivers/scsi/fcoe/fcoe.h | 1 + drivers/scsi/fcoe/libfcoe.c | 82 +++++++++++++---------- drivers/scsi/libfc/fc_elsct.c | 3 +- drivers/scsi/libfc/fc_lport.c | 6 +- include/scsi/fc_frame.h | 3 + include/scsi/libfc.h | 10 +++ include/scsi/libfcoe.h | 15 +++-- 8 files changed, 207 insertions(+), 60 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 8ca488de492d..a64c398c981e 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -226,7 +226,8 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe, } static void fcoe_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb); -static void fcoe_update_src_mac(struct fcoe_ctlr *fip, u8 *old, u8 *new); +static void fcoe_update_src_mac(struct fc_lport *lport, u8 *addr); +static u8 *fcoe_get_src_mac(struct fc_lport *lport); static void fcoe_destroy_work(struct work_struct *work); /** @@ -254,6 +255,7 @@ static struct fcoe_interface *fcoe_interface_create(struct net_device *netdev) fcoe_ctlr_init(&fcoe->ctlr); fcoe->ctlr.send = fcoe_fip_send; fcoe->ctlr.update_mac = fcoe_update_src_mac; + fcoe->ctlr.get_src_addr = fcoe_get_src_mac; fcoe_interface_setup(fcoe, netdev); @@ -286,8 +288,6 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe) /* Delete secondary MAC addresses */ memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN); dev_unicast_delete(netdev, flogi_maddr); - if (!is_zero_ether_addr(fip->data_src_addr)) - dev_unicast_delete(netdev, fip->data_src_addr); if (fip->spma) dev_unicast_delete(netdev, fip->ctl_src_addr); dev_mc_delete(netdev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0); @@ -369,25 +369,37 @@ static void fcoe_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb) /** * fcoe_update_src_mac() - Update Ethernet MAC filters. - * @fip: FCoE controller. - * @old: Unicast MAC address to delete if the MAC is non-zero. - * @new: Unicast MAC address to add. + * @lport: libfc lport + * @addr: Unicast MAC address to add. * * Remove any previously-set unicast MAC filter. * Add secondary FCoE MAC address filter for our OUI. */ -static void fcoe_update_src_mac(struct fcoe_ctlr *fip, u8 *old, u8 *new) +static void fcoe_update_src_mac(struct fc_lport *lport, u8 *addr) { - struct fcoe_interface *fcoe; + struct fcoe_port *port = lport_priv(lport); + struct fcoe_interface *fcoe = port->fcoe; - fcoe = fcoe_from_ctlr(fip); rtnl_lock(); - if (!is_zero_ether_addr(old)) - dev_unicast_delete(fcoe->netdev, old); - dev_unicast_add(fcoe->netdev, new); + if (!is_zero_ether_addr(port->data_src_addr)) + dev_unicast_delete(fcoe->netdev, port->data_src_addr); + if (!is_zero_ether_addr(addr)) + dev_unicast_add(fcoe->netdev, addr); + memcpy(port->data_src_addr, addr, ETH_ALEN); rtnl_unlock(); } +/** + * fcoe_get_src_mac() - return the Ethernet source address for an lport + * @lport: libfc lport + */ +static u8 *fcoe_get_src_mac(struct fc_lport *lport) +{ + struct fcoe_port *port = lport_priv(lport); + + return port->data_src_addr; +} + /** * fcoe_lport_config() - sets up the fc_lport * @lp: ptr to the fc_lport @@ -650,6 +662,11 @@ static void fcoe_if_destroy(struct fc_lport *lport) /* Free existing transmit skbs */ fcoe_clean_pending_queue(lport); + rtnl_lock(); + if (!is_zero_ether_addr(port->data_src_addr)) + dev_unicast_delete(netdev, port->data_src_addr); + rtnl_unlock(); + /* receives may not be stopped until after this */ fcoe_interface_put(fcoe); @@ -706,10 +723,16 @@ static int fcoe_ddp_done(struct fc_lport *lp, u16 xid) return 0; } +static struct fc_seq *fcoe_elsct_send(struct fc_lport *lport, + u32 did, struct fc_frame *fp, unsigned int op, + void (*resp)(struct fc_seq *, struct fc_frame *, void *), + void *arg, u32 timeout); + static struct libfc_function_template fcoe_libfc_fcn_templ = { .frame_send = fcoe_xmit, .ddp_setup = fcoe_ddp_setup, .ddp_done = fcoe_ddp_done, + .elsct_send = fcoe_elsct_send, }; /** @@ -1226,7 +1249,7 @@ int fcoe_xmit(struct fc_lport *lp, struct fc_frame *fp) } if (unlikely(fh->fh_r_ctl == FC_RCTL_ELS_REQ) && - fcoe_ctlr_els_send(&fcoe->ctlr, skb)) + fcoe_ctlr_els_send(&fcoe->ctlr, lp, skb)) return 0; sof = fr_sof(fp); @@ -1291,7 +1314,7 @@ int fcoe_xmit(struct fc_lport *lp, struct fc_frame *fp) if (unlikely(fcoe->ctlr.flogi_oxid != FC_XID_UNKNOWN)) memcpy(eh->h_source, fcoe->ctlr.ctl_src_addr, ETH_ALEN); else - memcpy(eh->h_source, fcoe->ctlr.data_src_addr, ETH_ALEN); + memcpy(eh->h_source, port->data_src_addr, ETH_ALEN); hp = (struct fcoe_hdr *)(eh + 1); memset(hp, 0, sizeof(*hp)); @@ -1464,11 +1487,6 @@ int fcoe_percpu_receive_thread(void *arg) } fr_flags(fp) &= ~FCPHF_CRC_UNCHECKED; } - if (unlikely(port->fcoe->ctlr.flogi_oxid != FC_XID_UNKNOWN) && - fcoe_ctlr_recv_flogi(&port->fcoe->ctlr, fp, mac)) { - fc_frame_free(fp); - continue; - } fc_exch_recv(lp, fp); } return 0; @@ -2061,3 +2079,94 @@ static void __exit fcoe_exit(void) fcoe_if_exit(); } module_exit(fcoe_exit); + +/** + * fcoe_flogi_resp() - FCoE specific FLOGI and FDISC response handler + * @seq: active sequence in the FLOGI or FDISC exchange + * @fp: response frame, or error encoded in a pointer (timeout) + * @arg: pointer the the fcoe_ctlr structure + * + * This handles MAC address managment for FCoE, then passes control on to + * the libfc FLOGI response handler. + */ +static void fcoe_flogi_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg) +{ + struct fcoe_ctlr *fip = arg; + struct fc_exch *exch = fc_seq_exch(seq); + struct fc_lport *lport = exch->lp; + u8 *mac; + + if (IS_ERR(fp)) + goto done; + + mac = fr_cb(fp)->granted_mac; + if (is_zero_ether_addr(mac)) { + /* pre-FIP */ + mac = eth_hdr(&fp->skb)->h_source; + if (fcoe_ctlr_recv_flogi(fip, lport, fp, mac)) { + fc_frame_free(fp); + return; + } + } else { + /* FIP, libfcoe has already seen it */ + fip->update_mac(lport, fr_cb(fp)->granted_mac); + } +done: + fc_lport_flogi_resp(seq, fp, lport); +} + +/** + * fcoe_logo_resp() - FCoE specific LOGO response handler + * @seq: active sequence in the LOGO exchange + * @fp: response frame, or error encoded in a pointer (timeout) + * @arg: pointer the the fcoe_ctlr structure + * + * This handles MAC address managment for FCoE, then passes control on to + * the libfc LOGO response handler. + */ +static void fcoe_logo_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg) +{ + struct fcoe_ctlr *fip = arg; + struct fc_exch *exch = fc_seq_exch(seq); + struct fc_lport *lport = exch->lp; + static u8 zero_mac[ETH_ALEN] = { 0 }; + + if (!IS_ERR(fp)) + fip->update_mac(lport, zero_mac); + fc_lport_logo_resp(seq, fp, lport); +} + +/** + * fcoe_elsct_send - FCoE specific ELS handler + * + * This does special case handling of FIP encapsualted ELS exchanges for FCoE, + * using FCoE specific response handlers and passing the FIP controller as + * the argument (the lport is still available from the exchange). + * + * Most of the work here is just handed off to the libfc routine. + */ +static struct fc_seq *fcoe_elsct_send(struct fc_lport *lport, + u32 did, struct fc_frame *fp, unsigned int op, + void (*resp)(struct fc_seq *, struct fc_frame *, void *), + void *arg, u32 timeout) +{ + struct fcoe_port *port = lport_priv(lport); + struct fcoe_interface *fcoe = port->fcoe; + struct fcoe_ctlr *fip = &fcoe->ctlr; + struct fc_frame_header *fh = fc_frame_header_get(fp); + + switch (op) { + case ELS_FLOGI: + case ELS_FDISC: + return fc_elsct_send(lport, did, fp, op, fcoe_flogi_resp, + fip, timeout); + case ELS_LOGO: + /* only hook onto fabric logouts, not port logouts */ + if (ntoh24(fh->fh_d_id) != FC_FID_FLOGI) + break; + return fc_elsct_send(lport, did, fp, op, fcoe_logo_resp, + fip, timeout); + } + return fc_elsct_send(lport, did, fp, op, resp, arg, timeout); +} + diff --git a/drivers/scsi/fcoe/fcoe.h b/drivers/scsi/fcoe/fcoe.h index a123552847e5..99dfa7c2aeaa 100644 --- a/drivers/scsi/fcoe/fcoe.h +++ b/drivers/scsi/fcoe/fcoe.h @@ -104,6 +104,7 @@ struct fcoe_port { u8 fcoe_pending_queue_active; struct timer_list timer; /* queue timer */ struct work_struct destroy_work; /* to prevent rtnl deadlocks */ + u8 data_src_addr[ETH_ALEN]; }; #define fcoe_from_ctlr(fip) container_of(fip, struct fcoe_interface, ctlr) diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c index d8ea04a29199..6a93ba96569f 100644 --- a/drivers/scsi/fcoe/libfcoe.c +++ b/drivers/scsi/fcoe/libfcoe.c @@ -322,6 +322,7 @@ EXPORT_SYMBOL(fcoe_ctlr_link_down); /** * fcoe_ctlr_send_keep_alive() - Send a keep-alive to the selected FCF. * @fip: FCoE controller. + * @lport: libfc fc_lport to send from * @ports: 0 for controller keep-alive, 1 for port keep-alive. * @sa: source MAC address. * @@ -332,7 +333,9 @@ EXPORT_SYMBOL(fcoe_ctlr_link_down); * The source MAC is the assigned mapped source address. * The destination is the FCF's F-port. */ -static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip, int ports, u8 *sa) +static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip, + struct fc_lport *lport, + int ports, u8 *sa) { struct sk_buff *skb; struct fip_kal { @@ -374,16 +377,14 @@ static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip, int ports, u8 *sa) kal->mac.fd_desc.fip_dtype = FIP_DT_MAC; kal->mac.fd_desc.fip_dlen = sizeof(kal->mac) / FIP_BPW; memcpy(kal->mac.fd_mac, fip->ctl_src_addr, ETH_ALEN); - if (ports) { vn = (struct fip_vn_desc *)(kal + 1); vn->fd_desc.fip_dtype = FIP_DT_VN_ID; vn->fd_desc.fip_dlen = sizeof(*vn) / FIP_BPW; - memcpy(vn->fd_mac, fip->data_src_addr, ETH_ALEN); + memcpy(vn->fd_mac, fip->get_src_addr(lport), ETH_ALEN); hton24(vn->fd_fc_id, fc_host_port_id(lp->host)); put_unaligned_be64(lp->wwpn, &vn->fd_wwpn); } - skb_put(skb, len); skb->protocol = htons(ETH_P_FIP); skb_reset_mac_header(skb); @@ -394,6 +395,7 @@ static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip, int ports, u8 *sa) /** * fcoe_ctlr_encaps() - Encapsulate an ELS frame for FIP, without sending it. * @fip: FCoE controller. + * @lport: libfc fc_lport to use for the source address * @dtype: FIP descriptor type for the frame. * @skb: FCoE ELS frame including FC header but no FCoE headers. * @@ -405,7 +407,7 @@ static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip, int ports, u8 *sa) * Headroom includes the FIP encapsulation description, FIP header, and * Ethernet header. The tailroom is for the FIP MAC descriptor. */ -static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, +static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, struct fc_lport *lport, u8 dtype, struct sk_buff *skb) { struct fip_encaps_head { @@ -450,7 +452,7 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, mac->fd_desc.fip_dtype = FIP_DT_MAC; mac->fd_desc.fip_dlen = sizeof(*mac) / FIP_BPW; if (dtype != FIP_DT_FLOGI && dtype != FIP_DT_FDISC) - memcpy(mac->fd_mac, fip->data_src_addr, ETH_ALEN); + memcpy(mac->fd_mac, fip->get_src_addr(lport), ETH_ALEN); else if (fip->spma) memcpy(mac->fd_mac, fip->ctl_src_addr, ETH_ALEN); @@ -463,6 +465,7 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, /** * fcoe_ctlr_els_send() - Send an ELS frame encapsulated by FIP if appropriate. * @fip: FCoE controller. + * @lport: libfc fc_lport to send from * @skb: FCoE ELS frame including FC header but no FCoE headers. * * Returns a non-zero error code if the frame should not be sent. @@ -471,11 +474,13 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, * The caller must check that the length is a multiple of 4. * The SKB must have enough headroom (28 bytes) and tailroom (8 bytes). */ -int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct sk_buff *skb) +int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, + struct sk_buff *skb) { struct fc_frame_header *fh; u16 old_xid; u8 op; + u8 mac[ETH_ALEN]; fh = (struct fc_frame_header *)skb->data; op = *(u8 *)(fh + 1); @@ -530,14 +535,15 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct sk_buff *skb) * FLOGI. */ fip->flogi_oxid = FC_XID_UNKNOWN; - fc_fcoe_set_mac(fip->data_src_addr, fh->fh_s_id); + fc_fcoe_set_mac(mac, fh->fh_d_id); + fip->update_mac(lport, mac); return 0; default: if (fip->state != FIP_ST_ENABLED) goto drop; return 0; } - if (fcoe_ctlr_encaps(fip, op, skb)) + if (fcoe_ctlr_encaps(fip, lport, op, skb)) goto drop; fip->send(fip, skb); return -EINPROGRESS; @@ -796,7 +802,7 @@ static void fcoe_ctlr_recv_els(struct fcoe_ctlr *fip, struct sk_buff *skb) { struct fc_lport *lp = fip->lp; struct fip_header *fiph; - struct fc_frame *fp; + struct fc_frame *fp = (struct fc_frame *)skb; struct fc_frame_header *fh = NULL; struct fip_desc *desc; struct fip_encaps *els; @@ -835,6 +841,7 @@ static void fcoe_ctlr_recv_els(struct fcoe_ctlr *fip, struct sk_buff *skb) "in FIP ELS\n"); goto drop; } + memcpy(fr_cb(fp)->granted_mac, granted_mac, ETH_ALEN); break; case FIP_DT_FLOGI: case FIP_DT_FDISC: @@ -865,13 +872,10 @@ static void fcoe_ctlr_recv_els(struct fcoe_ctlr *fip, struct sk_buff *skb) goto drop; els_op = *(u8 *)(fh + 1); - if ((els_dtype == FIP_DT_FLOGI || els_dtype == FIP_DT_FDISC) && - sub == FIP_SC_REP && fip->flogi_oxid == ntohs(fh->fh_ox_id) && - els_op == ELS_LS_ACC && is_valid_ether_addr(granted_mac)) { + if (els_dtype == FIP_DT_FLOGI && sub == FIP_SC_REP && + fip->flogi_oxid == ntohs(fh->fh_ox_id) && + els_op == ELS_LS_ACC && is_valid_ether_addr(granted_mac)) fip->flogi_oxid = FC_XID_UNKNOWN; - fip->update_mac(fip, fip->data_src_addr, granted_mac); - memcpy(fip->data_src_addr, granted_mac, ETH_ALEN); - } /* * Convert skb into an fc_frame containing only the ELS. @@ -958,7 +962,7 @@ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip, if (dlen < sizeof(*vp)) return; if (compare_ether_addr(vp->fd_mac, - fip->data_src_addr) == 0 && + fip->get_src_addr(lp)) == 0 && get_unaligned_be64(&vp->fd_wwpn) == lp->wwpn && ntoh24(vp->fd_fc_id) == fc_host_port_id(lp->host)) desc_mask &= ~BIT(FIP_DT_VN_ID); @@ -1113,8 +1117,6 @@ static void fcoe_ctlr_timeout(unsigned long arg) struct fcoe_fcf *sel; struct fcoe_fcf *fcf; unsigned long next_timer = jiffies + msecs_to_jiffies(FIP_VN_KA_PERIOD); - u8 send_ctlr_ka; - u8 send_port_ka; spin_lock_bh(&fip->lock); if (fip->state == FIP_ST_DISABLED) { @@ -1153,12 +1155,10 @@ static void fcoe_ctlr_timeout(unsigned long arg) schedule_work(&fip->link_work); } - send_ctlr_ka = 0; - send_port_ka = 0; if (sel) { if (time_after_eq(jiffies, fip->ctlr_ka_time)) { fip->ctlr_ka_time = jiffies + sel->fka_period; - send_ctlr_ka = 1; + fip->send_ctlr_ka = 1; } if (time_after(next_timer, fip->ctlr_ka_time)) next_timer = fip->ctlr_ka_time; @@ -1166,7 +1166,7 @@ static void fcoe_ctlr_timeout(unsigned long arg) if (time_after_eq(jiffies, fip->port_ka_time)) { fip->port_ka_time += jiffies + msecs_to_jiffies(FIP_VN_KA_PERIOD); - send_port_ka = 1; + fip->send_port_ka = 1; } if (time_after(next_timer, fip->port_ka_time)) next_timer = fip->port_ka_time; @@ -1176,12 +1176,9 @@ static void fcoe_ctlr_timeout(unsigned long arg) msecs_to_jiffies(FCOE_CTLR_START_DELAY); mod_timer(&fip->timer, next_timer); } + if (fip->send_ctlr_ka || fip->send_port_ka) + schedule_work(&fip->link_work); spin_unlock_bh(&fip->lock); - - if (send_ctlr_ka) - fcoe_ctlr_send_keep_alive(fip, 0, fip->ctl_src_addr); - if (send_port_ka) - fcoe_ctlr_send_keep_alive(fip, 1, fip->data_src_addr); } /** @@ -1196,6 +1193,8 @@ static void fcoe_ctlr_timeout(unsigned long arg) static void fcoe_ctlr_link_work(struct work_struct *work) { struct fcoe_ctlr *fip; + struct fc_lport *vport; + u8 *mac; int link; int last_link; @@ -1212,6 +1211,22 @@ static void fcoe_ctlr_link_work(struct work_struct *work) else fcoe_ctlr_reset(fip, FIP_ST_LINK_WAIT); } + + if (fip->send_ctlr_ka) { + fip->send_ctlr_ka = 0; + fcoe_ctlr_send_keep_alive(fip, NULL, 0, fip->ctl_src_addr); + } + if (fip->send_port_ka) { + fip->send_port_ka = 0; + mutex_lock(&fip->lp->lp_mutex); + mac = fip->get_src_addr(fip->lp); + fcoe_ctlr_send_keep_alive(fip, fip->lp, 1, mac); + list_for_each_entry(vport, &fip->lp->vports, list) { + mac = fip->get_src_addr(vport); + fcoe_ctlr_send_keep_alive(fip, vport, 1, mac); + } + mutex_unlock(&fip->lp->lp_mutex); + } } /** @@ -1236,6 +1251,7 @@ static void fcoe_ctlr_recv_work(struct work_struct *recv_work) /** * fcoe_ctlr_recv_flogi() - snoop Pre-FIP receipt of FLOGI response or request. * @fip: FCoE controller. + * @lport: libfc fc_lport instance received on * @fp: FC frame. * @sa: Ethernet source MAC address from received FCoE frame. * @@ -1248,7 +1264,8 @@ static void fcoe_ctlr_recv_work(struct work_struct *recv_work) * * Return non-zero if the frame should not be delivered to libfc. */ -int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *fip, struct fc_frame *fp, u8 *sa) +int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *fip, struct fc_lport *lport, + struct fc_frame *fp, u8 *sa) { struct fc_frame_header *fh; u8 op; @@ -1283,11 +1300,9 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *fip, struct fc_frame *fp, u8 *sa) fip->map_dest = 0; } fip->flogi_oxid = FC_XID_UNKNOWN; - memcpy(mac, fip->data_src_addr, ETH_ALEN); - fc_fcoe_set_mac(fip->data_src_addr, fh->fh_d_id); + fc_fcoe_set_mac(mac, fh->fh_d_id); + fip->update_mac(lport, mac); spin_unlock_bh(&fip->lock); - - fip->update_mac(fip, mac, fip->data_src_addr); } else if (op == ELS_FLOGI && fh->fh_r_ctl == FC_RCTL_ELS_REQ && sa) { /* * Save source MAC for point-to-point responses. @@ -1370,3 +1385,4 @@ int fcoe_libfc_config(struct fc_lport *lp, struct libfc_function_template *tt) return 0; } EXPORT_SYMBOL_GPL(fcoe_libfc_config); + diff --git a/drivers/scsi/libfc/fc_elsct.c b/drivers/scsi/libfc/fc_elsct.c index 92984587ff4d..aae54fe3b299 100644 --- a/drivers/scsi/libfc/fc_elsct.c +++ b/drivers/scsi/libfc/fc_elsct.c @@ -31,7 +31,7 @@ /* * fc_elsct_send - sends ELS/CT frame */ -static struct fc_seq *fc_elsct_send(struct fc_lport *lport, +struct fc_seq *fc_elsct_send(struct fc_lport *lport, u32 did, struct fc_frame *fp, unsigned int op, @@ -63,6 +63,7 @@ static struct fc_seq *fc_elsct_send(struct fc_lport *lport, return lport->tt.exch_seq_send(lport, fp, resp, NULL, arg, timer_msec); } +EXPORT_SYMBOL(fc_elsct_send); int fc_elsct_init(struct fc_lport *lport) { diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index ccba67ca68a1..807f5b3e4efe 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1320,7 +1320,7 @@ static void fc_lport_timeout(struct work_struct *work) * held, but it will lock, call an _enter_* function or fc_lport_error * and then unlock the lport. */ -static void fc_lport_logo_resp(struct fc_seq *sp, struct fc_frame *fp, +void fc_lport_logo_resp(struct fc_seq *sp, struct fc_frame *fp, void *lp_arg) { struct fc_lport *lport = lp_arg; @@ -1357,6 +1357,7 @@ out: err: mutex_unlock(&lport->lp_mutex); } +EXPORT_SYMBOL(fc_lport_logo_resp); /** * fc_rport_enter_logo() - Logout of the fabric @@ -1397,7 +1398,7 @@ static void fc_lport_enter_logo(struct fc_lport *lport) * held, but it will lock, call an _enter_* function or fc_lport_error * and then unlock the lport. */ -static void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, +void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, void *lp_arg) { struct fc_lport *lport = lp_arg; @@ -1480,6 +1481,7 @@ out: err: mutex_unlock(&lport->lp_mutex); } +EXPORT_SYMBOL(fc_lport_flogi_resp); /** * fc_rport_enter_flogi() - Send a FLOGI request to the fabric manager diff --git a/include/scsi/fc_frame.h b/include/scsi/fc_frame.h index 148126dcf9e9..ab2f8d41761b 100644 --- a/include/scsi/fc_frame.h +++ b/include/scsi/fc_frame.h @@ -28,6 +28,8 @@ #include #include +#include + /* * The fc_frame interface is used to pass frame data between functions. * The frame includes the data buffer, length, and SOF / EOF delimiter types. @@ -67,6 +69,7 @@ struct fcoe_rcv_info { enum fc_sof fr_sof; /* start of frame delimiter */ enum fc_eof fr_eof; /* end of frame delimiter */ u8 fr_flags; /* flags - see below */ + u8 granted_mac[ETH_ALEN]; /* FCoE MAC address */ }; diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index dfeb1ee4f03f..dad66ce8673d 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -900,6 +900,16 @@ void fc_fcp_destroy(struct fc_lport *); * Initializes ELS/CT interface */ int fc_elsct_init(struct fc_lport *lp); +struct fc_seq *fc_elsct_send(struct fc_lport *lport, + u32 did, + struct fc_frame *fp, + unsigned int op, + void (*resp)(struct fc_seq *, + struct fc_frame *fp, + void *arg), + void *arg, u32 timer_msec); +void fc_lport_flogi_resp(struct fc_seq *, struct fc_frame *, void *); +void fc_lport_logo_resp(struct fc_seq *, struct fc_frame *, void *); /* diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index b2410605b740..8ef5e209c216 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -74,11 +74,13 @@ enum fip_state { * @last_link: last link state reported to libfc. * @map_dest: use the FC_MAP mode for destination MAC addresses. * @spma: supports SPMA server-provided MACs mode + * @send_ctlr_ka: need to send controller keep alive + * @send_port_ka: need to send port keep alives * @dest_addr: MAC address of the selected FC forwarder. * @ctl_src_addr: the native MAC address of our local port. - * @data_src_addr: the assigned MAC address for the local port after FLOGI. * @send: LLD-supplied function to handle sending of FIP Ethernet frames. * @update_mac: LLD-supplied function to handle changes to MAC addresses. + * @get_src_addr: LLD-supplied function to supply a source MAC address. * @lock: lock protecting this structure. * * This structure is used by all FCoE drivers. It contains information @@ -106,12 +108,14 @@ struct fcoe_ctlr { u8 last_link; u8 map_dest; u8 spma; + u8 send_ctlr_ka; + u8 send_port_ka; u8 dest_addr[ETH_ALEN]; u8 ctl_src_addr[ETH_ALEN]; - u8 data_src_addr[ETH_ALEN]; void (*send)(struct fcoe_ctlr *, struct sk_buff *); - void (*update_mac)(struct fcoe_ctlr *, u8 *old, u8 *new); + void (*update_mac)(struct fc_lport *, u8 *addr); + u8 * (*get_src_addr)(struct fc_lport *); spinlock_t lock; }; @@ -155,9 +159,10 @@ void fcoe_ctlr_init(struct fcoe_ctlr *); void fcoe_ctlr_destroy(struct fcoe_ctlr *); void fcoe_ctlr_link_up(struct fcoe_ctlr *); int fcoe_ctlr_link_down(struct fcoe_ctlr *); -int fcoe_ctlr_els_send(struct fcoe_ctlr *, struct sk_buff *); +int fcoe_ctlr_els_send(struct fcoe_ctlr *, struct fc_lport *, struct sk_buff *); void fcoe_ctlr_recv(struct fcoe_ctlr *, struct sk_buff *); -int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_frame *fp, u8 *sa); +int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *lport, + struct fc_frame *fp, u8 *sa); /* libfcoe funcs */ u64 fcoe_wwn_from_mac(unsigned char mac[], unsigned int, unsigned int); -- cgit v1.3-8-gc7d7 From 28cc0e31d874af05244da421e05565f2ba72fd5c Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 3 Nov 2009 11:46:46 -0800 Subject: [SCSI] libfc: RPN_ID is obsolete and unnecessary RPN_ID has been obsolete per FC-GS-5 for several years. The port name is registered implicitly as part of FLOGI, and it is undesirable for ports to change a registered port name using RPN_ID while logged into the fabric. Signed-off-by: Chris Leech Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_lport.c | 95 ++----------------------------------------- include/scsi/libfc.h | 1 - 2 files changed, 3 insertions(+), 93 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 807f5b3e4efe..47577e4a2e87 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -108,7 +108,6 @@ static void fc_lport_error(struct fc_lport *, struct fc_frame *); static void fc_lport_enter_reset(struct fc_lport *); static void fc_lport_enter_flogi(struct fc_lport *); static void fc_lport_enter_dns(struct fc_lport *); -static void fc_lport_enter_rpn_id(struct fc_lport *); static void fc_lport_enter_rft_id(struct fc_lport *); static void fc_lport_enter_scr(struct fc_lport *); static void fc_lport_enter_ready(struct fc_lport *); @@ -118,7 +117,6 @@ static const char *fc_lport_state_names[] = { [LPORT_ST_DISABLED] = "disabled", [LPORT_ST_FLOGI] = "FLOGI", [LPORT_ST_DNS] = "dNS", - [LPORT_ST_RPN_ID] = "RPN_ID", [LPORT_ST_RFT_ID] = "RFT_ID", [LPORT_ST_SCR] = "SCR", [LPORT_ST_READY] = "Ready", @@ -153,7 +151,7 @@ static void fc_lport_rport_callback(struct fc_lport *lport, case RPORT_EV_READY: if (lport->state == LPORT_ST_DNS) { lport->dns_rp = rdata; - fc_lport_enter_rpn_id(lport); + fc_lport_enter_rft_id(lport); } else { FC_LPORT_DBG(lport, "Received an READY event " "on port (%6x) for the directory " @@ -965,7 +963,6 @@ static void fc_lport_error(struct fc_lport *lport, struct fc_frame *fp) case LPORT_ST_DISABLED: case LPORT_ST_READY: case LPORT_ST_RESET: - case LPORT_ST_RPN_ID: case LPORT_ST_RFT_ID: case LPORT_ST_SCR: case LPORT_ST_DNS: @@ -980,8 +977,8 @@ static void fc_lport_error(struct fc_lport *lport, struct fc_frame *fp) /** * fc_lport_rft_id_resp() - Handle response to Register Fibre - * Channel Types by ID (RPN_ID) request - * @sp: current sequence in RPN_ID exchange + * Channel Types by ID (RFT_ID) request + * @sp: current sequence in RFT_ID exchange * @fp: response frame * @lp_arg: Fibre Channel host port instance * @@ -1032,60 +1029,6 @@ err: mutex_unlock(&lport->lp_mutex); } -/** - * fc_lport_rpn_id_resp() - Handle response to Register Port - * Name by ID (RPN_ID) request - * @sp: current sequence in RPN_ID exchange - * @fp: response frame - * @lp_arg: Fibre Channel host port instance - * - * Locking Note: This function will be called without the lport lock - * held, but it will lock, call an _enter_* function or fc_lport_error - * and then unlock the lport. - */ -static void fc_lport_rpn_id_resp(struct fc_seq *sp, struct fc_frame *fp, - void *lp_arg) -{ - struct fc_lport *lport = lp_arg; - struct fc_frame_header *fh; - struct fc_ct_hdr *ct; - - FC_LPORT_DBG(lport, "Received a RPN_ID %s\n", fc_els_resp_type(fp)); - - if (fp == ERR_PTR(-FC_EX_CLOSED)) - return; - - mutex_lock(&lport->lp_mutex); - - if (lport->state != LPORT_ST_RPN_ID) { - FC_LPORT_DBG(lport, "Received a RPN_ID response, but in state " - "%s\n", fc_lport_state(lport)); - if (IS_ERR(fp)) - goto err; - goto out; - } - - if (IS_ERR(fp)) { - fc_lport_error(lport, fp); - goto err; - } - - fh = fc_frame_header_get(fp); - ct = fc_frame_payload_get(fp, sizeof(*ct)); - if (fh && ct && fh->fh_type == FC_TYPE_CT && - ct->ct_fs_type == FC_FST_DIR && - ct->ct_fs_subtype == FC_NS_SUBTYPE && - ntohs(ct->ct_cmd) == FC_FS_ACC) - fc_lport_enter_rft_id(lport); - else - fc_lport_error(lport, fp); - -out: - fc_frame_free(fp); -err: - mutex_unlock(&lport->lp_mutex); -} - /** * fc_lport_scr_resp() - Handle response to State Change Register (SCR) request * @sp: current sequence in SCR exchange @@ -1203,35 +1146,6 @@ static void fc_lport_enter_rft_id(struct fc_lport *lport) fc_lport_error(lport, fp); } -/** - * fc_rport_enter_rft_id() - Register port name with the name server - * @lport: Fibre Channel local port to register - * - * Locking Note: The lport lock is expected to be held before calling - * this routine. - */ -static void fc_lport_enter_rpn_id(struct fc_lport *lport) -{ - struct fc_frame *fp; - - FC_LPORT_DBG(lport, "Entered RPN_ID state from %s state\n", - fc_lport_state(lport)); - - fc_lport_state_enter(lport, LPORT_ST_RPN_ID); - - fp = fc_frame_alloc(lport, sizeof(struct fc_ct_hdr) + - sizeof(struct fc_ns_rn_id)); - if (!fp) { - fc_lport_error(lport, fp); - return; - } - - if (!lport->tt.elsct_send(lport, FC_FID_DIR_SERV, fp, FC_NS_RPN_ID, - fc_lport_rpn_id_resp, - lport, lport->e_d_tov)) - fc_lport_error(lport, NULL); -} - static struct fc_rport_operations fc_lport_rport_ops = { .event_callback = fc_lport_rport_callback, }; @@ -1293,9 +1207,6 @@ static void fc_lport_timeout(struct work_struct *work) case LPORT_ST_DNS: fc_lport_enter_dns(lport); break; - case LPORT_ST_RPN_ID: - fc_lport_enter_rpn_id(lport); - break; case LPORT_ST_RFT_ID: fc_lport_enter_rft_id(lport); break; diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index dad66ce8673d..75be713ea036 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -61,7 +61,6 @@ enum fc_lport_state { LPORT_ST_DISABLED = 0, LPORT_ST_FLOGI, LPORT_ST_DNS, - LPORT_ST_RPN_ID, LPORT_ST_RFT_ID, LPORT_ST_SCR, LPORT_ST_READY, -- cgit v1.3-8-gc7d7 From c9c7bd7a5e7321aa96289c9b48fdbcc828c105e6 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 3 Nov 2009 11:46:51 -0800 Subject: [SCSI] libfc: RNN_ID may be required before RSNN_NN with some switches One could interpret FC-GS-5 to say that an explicit RNN_ID is required before RSNN_NN is allowed to succeed, which is why RNN_ID was not obsoleted along with RPN_ID acording to this document: ftp://ftp.t11.org/t11/member/fc/gs-5/05-546v2.pdf Signed-off-by: Chris Leech Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_lport.c | 91 ++++++++++++++++++++++++++++++++++++++++++- include/scsi/fc_encode.h | 4 +- include/scsi/libfc.h | 1 + 3 files changed, 93 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 47577e4a2e87..897b5a8487e2 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -108,6 +108,7 @@ static void fc_lport_error(struct fc_lport *, struct fc_frame *); static void fc_lport_enter_reset(struct fc_lport *); static void fc_lport_enter_flogi(struct fc_lport *); static void fc_lport_enter_dns(struct fc_lport *); +static void fc_lport_enter_rnn_id(struct fc_lport *); static void fc_lport_enter_rft_id(struct fc_lport *); static void fc_lport_enter_scr(struct fc_lport *); static void fc_lport_enter_ready(struct fc_lport *); @@ -117,6 +118,7 @@ static const char *fc_lport_state_names[] = { [LPORT_ST_DISABLED] = "disabled", [LPORT_ST_FLOGI] = "FLOGI", [LPORT_ST_DNS] = "dNS", + [LPORT_ST_RNN_ID] = "RNN_ID", [LPORT_ST_RFT_ID] = "RFT_ID", [LPORT_ST_SCR] = "SCR", [LPORT_ST_READY] = "Ready", @@ -151,7 +153,7 @@ static void fc_lport_rport_callback(struct fc_lport *lport, case RPORT_EV_READY: if (lport->state == LPORT_ST_DNS) { lport->dns_rp = rdata; - fc_lport_enter_rft_id(lport); + fc_lport_enter_rnn_id(lport); } else { FC_LPORT_DBG(lport, "Received an READY event " "on port (%6x) for the directory " @@ -963,6 +965,7 @@ static void fc_lport_error(struct fc_lport *lport, struct fc_frame *fp) case LPORT_ST_DISABLED: case LPORT_ST_READY: case LPORT_ST_RESET: + case LPORT_ST_RNN_ID: case LPORT_ST_RFT_ID: case LPORT_ST_SCR: case LPORT_ST_DNS: @@ -1029,6 +1032,60 @@ err: mutex_unlock(&lport->lp_mutex); } +/** + * fc_lport_rnn_id_resp() - Handle response to Register Node + * Name by ID (RNN_ID) request + * @sp: current sequence in RNN_ID exchange + * @fp: response frame + * @lp_arg: Fibre Channel host port instance + * + * Locking Note: This function will be called without the lport lock + * held, but it will lock, call an _enter_* function or fc_lport_error + * and then unlock the lport. + */ +static void fc_lport_rnn_id_resp(struct fc_seq *sp, struct fc_frame *fp, + void *lp_arg) +{ + struct fc_lport *lport = lp_arg; + struct fc_frame_header *fh; + struct fc_ct_hdr *ct; + + FC_LPORT_DBG(lport, "Received a RNN_ID %s\n", fc_els_resp_type(fp)); + + if (fp == ERR_PTR(-FC_EX_CLOSED)) + return; + + mutex_lock(&lport->lp_mutex); + + if (lport->state != LPORT_ST_RNN_ID) { + FC_LPORT_DBG(lport, "Received a RNN_ID response, but in state " + "%s\n", fc_lport_state(lport)); + if (IS_ERR(fp)) + goto err; + goto out; + } + + if (IS_ERR(fp)) { + fc_lport_error(lport, fp); + goto err; + } + + fh = fc_frame_header_get(fp); + ct = fc_frame_payload_get(fp, sizeof(*ct)); + if (fh && ct && fh->fh_type == FC_TYPE_CT && + ct->ct_fs_type == FC_FST_DIR && + ct->ct_fs_subtype == FC_NS_SUBTYPE && + ntohs(ct->ct_cmd) == FC_FS_ACC) + fc_lport_enter_rft_id(lport); + else + fc_lport_error(lport, fp); + +out: + fc_frame_free(fp); +err: + mutex_unlock(&lport->lp_mutex); +} + /** * fc_lport_scr_resp() - Handle response to State Change Register (SCR) request * @sp: current sequence in SCR exchange @@ -1146,6 +1203,35 @@ static void fc_lport_enter_rft_id(struct fc_lport *lport) fc_lport_error(lport, fp); } +/** + * fc_rport_enter_rnn_id() - Register node name with the name server + * @lport: Fibre Channel local port to register + * + * Locking Note: The lport lock is expected to be held before calling + * this routine. + */ +static void fc_lport_enter_rnn_id(struct fc_lport *lport) +{ + struct fc_frame *fp; + + FC_LPORT_DBG(lport, "Entered RNN_ID state from %s state\n", + fc_lport_state(lport)); + + fc_lport_state_enter(lport, LPORT_ST_RNN_ID); + + fp = fc_frame_alloc(lport, sizeof(struct fc_ct_hdr) + + sizeof(struct fc_ns_rn_id)); + if (!fp) { + fc_lport_error(lport, fp); + return; + } + + if (!lport->tt.elsct_send(lport, FC_FID_DIR_SERV, fp, FC_NS_RNN_ID, + fc_lport_rnn_id_resp, + lport, lport->e_d_tov)) + fc_lport_error(lport, fp); +} + static struct fc_rport_operations fc_lport_rport_ops = { .event_callback = fc_lport_rport_callback, }; @@ -1207,6 +1293,9 @@ static void fc_lport_timeout(struct work_struct *work) case LPORT_ST_DNS: fc_lport_enter_dns(lport); break; + case LPORT_ST_RNN_ID: + fc_lport_enter_rnn_id(lport); + break; case LPORT_ST_RFT_ID: fc_lport_enter_rft_id(lport); break; diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h index c93ca3ece1a0..ad13cb1c3eec 100644 --- a/include/scsi/fc_encode.h +++ b/include/scsi/fc_encode.h @@ -128,12 +128,12 @@ static inline int fc_ct_fill(struct fc_lport *lport, ct->payload.rft.fts = lport->fcts; break; - case FC_NS_RPN_ID: + case FC_NS_RNN_ID: ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rn_id)); hton24(ct->payload.rn.fr_fid.fp_fid, fc_host_port_id(lport->host)); ct->payload.rft.fts = lport->fcts; - put_unaligned_be64(lport->wwpn, &ct->payload.rn.fr_wwn); + put_unaligned_be64(lport->wwnn, &ct->payload.rn.fr_wwn); break; default: diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 75be713ea036..3d22dfd67209 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -61,6 +61,7 @@ enum fc_lport_state { LPORT_ST_DISABLED = 0, LPORT_ST_FLOGI, LPORT_ST_DNS, + LPORT_ST_RNN_ID, LPORT_ST_RFT_ID, LPORT_ST_SCR, LPORT_ST_READY, -- cgit v1.3-8-gc7d7 From 5baa17c3e66fc2e414f501b2dd59b962dfc64919 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 3 Nov 2009 11:46:56 -0800 Subject: [SCSI] libfc: Register Symbolic Node Name (RSNN_NN) Register the fc_host symbolic name as the symbolic node name with the fabric name server. Signed-off-by: Chris Leech Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/fcoe.c | 6 +-- drivers/scsi/libfc/fc_lport.c | 91 +++++++++++++++++++++++++++++++++++++++++++ include/scsi/fc/fc_ns.h | 10 +++++ include/scsi/fc_encode.h | 10 +++++ include/scsi/libfc.h | 1 + 5 files changed, 115 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index f23cdb38d5c3..437eacf2732d 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -591,9 +591,9 @@ static int fcoe_shost_config(struct fc_lport *lp, struct Scsi_Host *shost, if (!lp->vport) fc_host_max_npiv_vports(lp->host) = USHORT_MAX; - sprintf(fc_host_symbolic_name(lp->host), "%s v%s over %s", - FCOE_NAME, FCOE_VERSION, - fcoe_netdev(lp)->name); + snprintf(fc_host_symbolic_name(lp->host), FC_SYMBOLIC_NAME_SIZE, + "%s v%s over %s", FCOE_NAME, FCOE_VERSION, + fcoe_netdev(lp)->name); return 0; } diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 897b5a8487e2..cc389c03f698 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -109,6 +109,7 @@ static void fc_lport_enter_reset(struct fc_lport *); static void fc_lport_enter_flogi(struct fc_lport *); static void fc_lport_enter_dns(struct fc_lport *); static void fc_lport_enter_rnn_id(struct fc_lport *); +static void fc_lport_enter_rsnn_nn(struct fc_lport *); static void fc_lport_enter_rft_id(struct fc_lport *); static void fc_lport_enter_scr(struct fc_lport *); static void fc_lport_enter_ready(struct fc_lport *); @@ -119,6 +120,7 @@ static const char *fc_lport_state_names[] = { [LPORT_ST_FLOGI] = "FLOGI", [LPORT_ST_DNS] = "dNS", [LPORT_ST_RNN_ID] = "RNN_ID", + [LPORT_ST_RSNN_NN] = "RSNN_NN", [LPORT_ST_RFT_ID] = "RFT_ID", [LPORT_ST_SCR] = "SCR", [LPORT_ST_READY] = "Ready", @@ -966,6 +968,7 @@ static void fc_lport_error(struct fc_lport *lport, struct fc_frame *fp) case LPORT_ST_READY: case LPORT_ST_RESET: case LPORT_ST_RNN_ID: + case LPORT_ST_RSNN_NN: case LPORT_ST_RFT_ID: case LPORT_ST_SCR: case LPORT_ST_DNS: @@ -1032,6 +1035,60 @@ err: mutex_unlock(&lport->lp_mutex); } +/** + * fc_lport_rsnn_nn_resp() - Handle response to Register Symbolic Node Name + * by Node Name (RSNN_NN) request + * @sp: current sequence in RSNN_NN exchange + * @fp: response frame + * @lp_arg: Fibre Channel host port instance + * + * Locking Note: This function will be called without the lport lock + * held, but it will lock, call an _enter_* function or fc_lport_error + * and then unlock the lport. + */ +static void fc_lport_rsnn_nn_resp(struct fc_seq *sp, struct fc_frame *fp, + void *lp_arg) +{ + struct fc_lport *lport = lp_arg; + struct fc_frame_header *fh; + struct fc_ct_hdr *ct; + + FC_LPORT_DBG(lport, "Received a RSNN_NN %s\n", fc_els_resp_type(fp)); + + if (fp == ERR_PTR(-FC_EX_CLOSED)) + return; + + mutex_lock(&lport->lp_mutex); + + if (lport->state != LPORT_ST_RSNN_NN) { + FC_LPORT_DBG(lport, "Received a RSNN_NN response, but in state " + "%s\n", fc_lport_state(lport)); + if (IS_ERR(fp)) + goto err; + goto out; + } + + if (IS_ERR(fp)) { + fc_lport_error(lport, fp); + goto err; + } + + fh = fc_frame_header_get(fp); + ct = fc_frame_payload_get(fp, sizeof(*ct)); + if (fh && ct && fh->fh_type == FC_TYPE_CT && + ct->ct_fs_type == FC_FST_DIR && + ct->ct_fs_subtype == FC_NS_SUBTYPE && + ntohs(ct->ct_cmd) == FC_FS_ACC) + fc_lport_enter_rsnn_nn(lport); + else + fc_lport_error(lport, fp); + +out: + fc_frame_free(fp); +err: + mutex_unlock(&lport->lp_mutex); +} + /** * fc_lport_rnn_id_resp() - Handle response to Register Node * Name by ID (RNN_ID) request @@ -1203,6 +1260,37 @@ static void fc_lport_enter_rft_id(struct fc_lport *lport) fc_lport_error(lport, fp); } +/** + * fc_rport_enter_rsnn_nn() - Register symbolic node name with the name server + * @lport: Fibre Channel local port to register + * + * Locking Note: The lport lock is expected to be held before calling + * this routine. + */ +static void fc_lport_enter_rsnn_nn(struct fc_lport *lport) +{ + struct fc_frame *fp; + size_t len; + + FC_LPORT_DBG(lport, "Entered RSNN_NN state from %s state\n", + fc_lport_state(lport)); + + fc_lport_state_enter(lport, LPORT_ST_RSNN_NN); + + len = strnlen(fc_host_symbolic_name(lport->host), 255); + fp = fc_frame_alloc(lport, sizeof(struct fc_ct_hdr) + + sizeof(struct fc_ns_rsnn) + len); + if (!fp) { + fc_lport_error(lport, fp); + return; + } + + if (!lport->tt.elsct_send(lport, FC_FID_DIR_SERV, fp, FC_NS_RSNN_NN, + fc_lport_rsnn_nn_resp, + lport, lport->e_d_tov)) + fc_lport_error(lport, fp); +} + /** * fc_rport_enter_rnn_id() - Register node name with the name server * @lport: Fibre Channel local port to register @@ -1296,6 +1384,9 @@ static void fc_lport_timeout(struct work_struct *work) case LPORT_ST_RNN_ID: fc_lport_enter_rnn_id(lport); break; + case LPORT_ST_RSNN_NN: + fc_lport_enter_rsnn_nn(lport); + break; case LPORT_ST_RFT_ID: fc_lport_enter_rft_id(lport); break; diff --git a/include/scsi/fc/fc_ns.h b/include/scsi/fc/fc_ns.h index 790d7b97d4bc..fa8283056325 100644 --- a/include/scsi/fc/fc_ns.h +++ b/include/scsi/fc/fc_ns.h @@ -47,6 +47,7 @@ enum fc_ns_req { FC_NS_RFT_ID = 0x0217, /* reg FC4 type for ID */ FC_NS_RPN_ID = 0x0212, /* reg port name for ID */ FC_NS_RNN_ID = 0x0213, /* reg node name for ID */ + FC_NS_RSNN_NN = 0x0239, /* reg symbolic node name */ }; /* @@ -156,4 +157,13 @@ struct fc_ns_rn_id { __be64 fr_wwn; /* node name or port name */ } __attribute__((__packed__)); +/* + * RSNN_NN request - register symbolic node name + */ +struct fc_ns_rsnn { + __be64 fr_wwn; /* node name */ + __u8 fr_name_len; + char fr_name[]; +} __attribute__((__packed__)); + #endif /* _FC_NS_H_ */ diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h index ad13cb1c3eec..89981afba72d 100644 --- a/include/scsi/fc_encode.h +++ b/include/scsi/fc_encode.h @@ -33,6 +33,7 @@ struct fc_ct_req { struct fc_ns_rn_id rn; struct fc_ns_rft rft; struct fc_ns_fid fid; + struct fc_ns_rsnn snn; } payload; }; @@ -136,6 +137,15 @@ static inline int fc_ct_fill(struct fc_lport *lport, put_unaligned_be64(lport->wwnn, &ct->payload.rn.fr_wwn); break; + case FC_NS_RSNN_NN: + ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rsnn)); + put_unaligned_be64(lport->wwnn, &ct->payload.snn.fr_wwn); + strncpy(ct->payload.snn.fr_name, + fc_host_symbolic_name(lport->host), 255); + ct->payload.snn.fr_name_len = + strnlen(ct->payload.snn.fr_name, 255); + break; + default: return -EINVAL; } diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 3d22dfd67209..1a632069c402 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -62,6 +62,7 @@ enum fc_lport_state { LPORT_ST_FLOGI, LPORT_ST_DNS, LPORT_ST_RNN_ID, + LPORT_ST_RSNN_NN, LPORT_ST_RFT_ID, LPORT_ST_SCR, LPORT_ST_READY, -- cgit v1.3-8-gc7d7 From c9866a548024c33e30f35a14bbcb71ba78266383 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 3 Nov 2009 11:47:01 -0800 Subject: [SCSI] libfc: Register Symbolic Port Name (RSPN_ID) Register the fc_host symbolic name as the symbolic port name with the fabric name server. Signed-off-by: Chris Leech Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_lport.c | 90 +++++++++++++++++++++++++++++++++++++++++++ include/scsi/fc/fc_ns.h | 10 +++++ include/scsi/fc_encode.h | 11 ++++++ include/scsi/libfc.h | 1 + 4 files changed, 112 insertions(+) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index cc389c03f698..28a35da1493b 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -110,6 +110,7 @@ static void fc_lport_enter_flogi(struct fc_lport *); static void fc_lport_enter_dns(struct fc_lport *); static void fc_lport_enter_rnn_id(struct fc_lport *); static void fc_lport_enter_rsnn_nn(struct fc_lport *); +static void fc_lport_enter_rspn_id(struct fc_lport *); static void fc_lport_enter_rft_id(struct fc_lport *); static void fc_lport_enter_scr(struct fc_lport *); static void fc_lport_enter_ready(struct fc_lport *); @@ -121,6 +122,7 @@ static const char *fc_lport_state_names[] = { [LPORT_ST_DNS] = "dNS", [LPORT_ST_RNN_ID] = "RNN_ID", [LPORT_ST_RSNN_NN] = "RSNN_NN", + [LPORT_ST_RSPN_ID] = "RSPN_ID", [LPORT_ST_RFT_ID] = "RFT_ID", [LPORT_ST_SCR] = "SCR", [LPORT_ST_READY] = "Ready", @@ -969,6 +971,7 @@ static void fc_lport_error(struct fc_lport *lport, struct fc_frame *fp) case LPORT_ST_RESET: case LPORT_ST_RNN_ID: case LPORT_ST_RSNN_NN: + case LPORT_ST_RSPN_ID: case LPORT_ST_RFT_ID: case LPORT_ST_SCR: case LPORT_ST_DNS: @@ -1035,6 +1038,59 @@ err: mutex_unlock(&lport->lp_mutex); } +/** + * fc_lport_rspn_id_resp() - Handle response to Register Symbolic Port Name + * by ID (RSPN_ID) request + * @sp: current sequence in RSPN_ID exchange + * @fp: response frame + * @lp_arg: Fibre Channel host port instance + * + * Locking Note: This function will be called without the lport lock + * held, but it will lock, call an _enter_* function or fc_lport_error + * and then unlock the lport. + */ +static void fc_lport_rspn_id_resp(struct fc_seq *sp, struct fc_frame *fp, + void *lp_arg) +{ + struct fc_lport *lport = lp_arg; + struct fc_frame_header *fh; + struct fc_ct_hdr *ct; + + FC_LPORT_DBG(lport, "Received a RSPN_ID %s\n", fc_els_resp_type(fp)); + + if (fp == ERR_PTR(-FC_EX_CLOSED)) + return; + + mutex_lock(&lport->lp_mutex); + + if (lport->state != LPORT_ST_RSPN_ID) { + FC_LPORT_DBG(lport, "Received a RSPN_ID response, but in state " + "%s\n", fc_lport_state(lport)); + if (IS_ERR(fp)) + goto err; + goto out; + } + + if (IS_ERR(fp)) { + fc_lport_error(lport, fp); + goto err; + } + + fh = fc_frame_header_get(fp); + ct = fc_frame_payload_get(fp, sizeof(*ct)); + if (fh && ct && fh->fh_type == FC_TYPE_CT && + ct->ct_fs_type == FC_FST_DIR && + ct->ct_fs_subtype == FC_NS_SUBTYPE && + ntohs(ct->ct_cmd) == FC_FS_ACC) + fc_lport_enter_rspn_id(lport); + else + fc_lport_error(lport, fp); + +out: + fc_frame_free(fp); +err: + mutex_unlock(&lport->lp_mutex); +} /** * fc_lport_rsnn_nn_resp() - Handle response to Register Symbolic Node Name * by Node Name (RSNN_NN) request @@ -1260,6 +1316,37 @@ static void fc_lport_enter_rft_id(struct fc_lport *lport) fc_lport_error(lport, fp); } +/** + * fc_rport_enter_rspn_id() - Register symbolic port name with the name server + * @lport: Fibre Channel local port to register + * + * Locking Note: The lport lock is expected to be held before calling + * this routine. + */ +static void fc_lport_enter_rspn_id(struct fc_lport *lport) +{ + struct fc_frame *fp; + size_t len; + + FC_LPORT_DBG(lport, "Entered RSPN_ID state from %s state\n", + fc_lport_state(lport)); + + fc_lport_state_enter(lport, LPORT_ST_RSPN_ID); + + len = strnlen(fc_host_symbolic_name(lport->host), 255); + fp = fc_frame_alloc(lport, sizeof(struct fc_ct_hdr) + + sizeof(struct fc_ns_rspn) + len); + if (!fp) { + fc_lport_error(lport, fp); + return; + } + + if (!lport->tt.elsct_send(lport, FC_FID_DIR_SERV, fp, FC_NS_RSPN_ID, + fc_lport_rspn_id_resp, + lport, lport->e_d_tov)) + fc_lport_error(lport, fp); +} + /** * fc_rport_enter_rsnn_nn() - Register symbolic node name with the name server * @lport: Fibre Channel local port to register @@ -1387,6 +1474,9 @@ static void fc_lport_timeout(struct work_struct *work) case LPORT_ST_RSNN_NN: fc_lport_enter_rsnn_nn(lport); break; + case LPORT_ST_RSPN_ID: + fc_lport_enter_rspn_id(lport); + break; case LPORT_ST_RFT_ID: fc_lport_enter_rft_id(lport); break; diff --git a/include/scsi/fc/fc_ns.h b/include/scsi/fc/fc_ns.h index fa8283056325..3fd59a2cb81f 100644 --- a/include/scsi/fc/fc_ns.h +++ b/include/scsi/fc/fc_ns.h @@ -47,6 +47,7 @@ enum fc_ns_req { FC_NS_RFT_ID = 0x0217, /* reg FC4 type for ID */ FC_NS_RPN_ID = 0x0212, /* reg port name for ID */ FC_NS_RNN_ID = 0x0213, /* reg node name for ID */ + FC_NS_RSPN_ID = 0x0218, /* reg symbolic port name */ FC_NS_RSNN_NN = 0x0239, /* reg symbolic node name */ }; @@ -166,4 +167,13 @@ struct fc_ns_rsnn { char fr_name[]; } __attribute__((__packed__)); +/* + * RSPN_ID request - register symbolic port name + */ +struct fc_ns_rspn { + struct fc_ns_fid fr_fid; /* port ID object */ + __u8 fr_name_len; + char fr_name[]; +} __attribute__((__packed__)); + #endif /* _FC_NS_H_ */ diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h index 89981afba72d..9afcbb94ec30 100644 --- a/include/scsi/fc_encode.h +++ b/include/scsi/fc_encode.h @@ -34,6 +34,7 @@ struct fc_ct_req { struct fc_ns_rft rft; struct fc_ns_fid fid; struct fc_ns_rsnn snn; + struct fc_ns_rspn spn; } payload; }; @@ -137,6 +138,16 @@ static inline int fc_ct_fill(struct fc_lport *lport, put_unaligned_be64(lport->wwnn, &ct->payload.rn.fr_wwn); break; + case FC_NS_RSPN_ID: + ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rspn)); + hton24(ct->payload.spn.fr_fid.fp_fid, + fc_host_port_id(lport->host)); + strncpy(ct->payload.spn.fr_name, + fc_host_symbolic_name(lport->host), 255); + ct->payload.spn.fr_name_len = + strnlen(ct->payload.spn.fr_name, 255); + break; + case FC_NS_RSNN_NN: ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rsnn)); put_unaligned_be64(lport->wwnn, &ct->payload.snn.fr_wwn); diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 1a632069c402..8258edfa328c 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -63,6 +63,7 @@ enum fc_lport_state { LPORT_ST_DNS, LPORT_ST_RNN_ID, LPORT_ST_RSNN_NN, + LPORT_ST_RSPN_ID, LPORT_ST_RFT_ID, LPORT_ST_SCR, LPORT_ST_READY, -- cgit v1.3-8-gc7d7 From 07aac328342d6ca1725d901e1c5da8a1aa88f557 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Tue, 3 Nov 2009 11:47:23 -0800 Subject: [SCSI] libfc: Export FC headers Export fc_els.h, fc_fs.h, fc_gs.h and fc_ns.h so that they may be used by applications. This will be needed for FC Passthrough applications like fcping, but could be used by other applications. Fix to include to exported files provided by Chris Leech . Signed-off-by: Robert Love Signed-off-by: James Bottomley --- include/scsi/Kbuild | 1 + include/scsi/fc/Kbuild | 4 ++++ include/scsi/fc/fc_els.h | 2 ++ include/scsi/fc/fc_fs.h | 2 ++ include/scsi/fc/fc_gs.h | 2 ++ include/scsi/fc/fc_ns.h | 2 ++ 6 files changed, 13 insertions(+) create mode 100644 include/scsi/fc/Kbuild (limited to 'include') diff --git a/include/scsi/Kbuild b/include/scsi/Kbuild index 33b2750e9283..b3a0ee6b2f1c 100644 --- a/include/scsi/Kbuild +++ b/include/scsi/Kbuild @@ -2,3 +2,4 @@ header-y += scsi.h header-y += scsi_netlink.h header-y += scsi_netlink_fc.h header-y += scsi_bsg_fc.h +header-y += fc/ diff --git a/include/scsi/fc/Kbuild b/include/scsi/fc/Kbuild new file mode 100644 index 000000000000..56603813c6cd --- /dev/null +++ b/include/scsi/fc/Kbuild @@ -0,0 +1,4 @@ +header-y += fc_els.h +header-y += fc_fs.h +header-y += fc_gs.h +header-y += fc_ns.h diff --git a/include/scsi/fc/fc_els.h b/include/scsi/fc/fc_els.h index b0872afe2d30..f94328132a26 100644 --- a/include/scsi/fc/fc_els.h +++ b/include/scsi/fc/fc_els.h @@ -20,6 +20,8 @@ #ifndef _FC_ELS_H_ #define _FC_ELS_H_ +#include + /* * Fibre Channel Switch - Enhanced Link Services definitions. * From T11 FC-LS Rev 1.2 June 7, 2005. diff --git a/include/scsi/fc/fc_fs.h b/include/scsi/fc/fc_fs.h index ac4cd38c860e..50f28b143451 100644 --- a/include/scsi/fc/fc_fs.h +++ b/include/scsi/fc/fc_fs.h @@ -20,6 +20,8 @@ #ifndef _FC_FS_H_ #define _FC_FS_H_ +#include + /* * Fibre Channel Framing and Signalling definitions. * From T11 FC-FS-2 Rev 0.90 - 9 August 2005. diff --git a/include/scsi/fc/fc_gs.h b/include/scsi/fc/fc_gs.h index 324dd0e3c622..a37346d47eb1 100644 --- a/include/scsi/fc/fc_gs.h +++ b/include/scsi/fc/fc_gs.h @@ -20,6 +20,8 @@ #ifndef _FC_GS_H_ #define _FC_GS_H_ +#include + /* * Fibre Channel Services - Common Transport. * From T11.org FC-GS-2 Rev 5.3 November 1998. diff --git a/include/scsi/fc/fc_ns.h b/include/scsi/fc/fc_ns.h index 3fd59a2cb81f..f4d354eb26b9 100644 --- a/include/scsi/fc/fc_ns.h +++ b/include/scsi/fc/fc_ns.h @@ -20,6 +20,8 @@ #ifndef _FC_NS_H_ #define _FC_NS_H_ +#include + /* * Fibre Channel Services - Name Service (dNS) * From T11.org FC-GS-2 Rev 5.3 November 1998. -- cgit v1.3-8-gc7d7 From a51ab39606042e76a483547620699530caa12c40 Mon Sep 17 00:00:00 2001 From: Steve Ma Date: Tue, 3 Nov 2009 11:47:34 -0800 Subject: [SCSI] libfc, fcoe: Add FC passthrough support This is the Open-FCoE implementation of the FC passthrough support via bsg interface. Passthrough support is added to both N_Ports and VN_Ports. Signed-off-by: Steve Ma Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/fcoe.c | 4 + drivers/scsi/libfc/fc_lport.c | 267 ++++++++++++++++++++++++++++++++++++++++++ include/scsi/libfc.h | 7 ++ 3 files changed, 278 insertions(+) (limited to 'include') diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index f1c126b798af..8f078d306a0a 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -134,6 +134,8 @@ struct fc_function_template fcoe_transport_function = { .vport_delete = fcoe_vport_destroy, .vport_disable = fcoe_vport_disable, .set_vport_symbolic_name = fcoe_set_vport_symbolic_name, + + .bsg_request = fc_lport_bsg_request, }; struct fc_function_template fcoe_vport_transport_function = { @@ -167,6 +169,8 @@ struct fc_function_template fcoe_vport_transport_function = { .issue_fc_host_lip = fcoe_reset, .terminate_rport_io = fc_rport_terminate_io, + + .bsg_request = fc_lport_bsg_request, }; static struct scsi_host_template fcoe_shost_template = { diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index dfea6c572dfb..2162e6b0f43e 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -94,6 +94,7 @@ #include #include +#include #include "fc_libfc.h" @@ -127,6 +128,24 @@ static const char *fc_lport_state_names[] = { [LPORT_ST_RESET] = "reset", }; +/** + * struct fc_bsg_info - FC Passthrough managemet structure + * @job: The passthrough job + * @lport: The local port to pass through a command + * @rsp_code: The expected response code + * @sg: job->reply_payload.sg_list + * @nents: job->reply_payload.sg_cnt + * @offset: The offset into the response data + */ +struct fc_bsg_info { + struct fc_bsg_job *job; + struct fc_lport *lport; + u16 rsp_code; + struct scatterlist *sg; + u32 nents; + size_t offset; +}; + static int fc_frame_drop(struct fc_lport *lport, struct fc_frame *fp) { fc_frame_free(fp); @@ -1512,3 +1531,251 @@ int fc_lport_init(struct fc_lport *lport) return 0; } EXPORT_SYMBOL(fc_lport_init); + +/** + * fc_lport_bsg_resp() - The common response handler for fc pass-thru requests + * @sp: current sequence in the fc pass-thru request exchange + * @fp: received response frame + * @info_arg: pointer to struct fc_bsg_info + */ +static void fc_lport_bsg_resp(struct fc_seq *sp, struct fc_frame *fp, + void *info_arg) +{ + struct fc_bsg_info *info = info_arg; + struct fc_bsg_job *job = info->job; + struct fc_lport *lport = info->lport; + struct fc_frame_header *fh; + size_t len; + void *buf; + + if (IS_ERR(fp)) { + job->reply->result = (PTR_ERR(fp) == -FC_EX_CLOSED) ? + -ECONNABORTED : -ETIMEDOUT; + job->reply_len = sizeof(uint32_t); + job->state_flags |= FC_RQST_STATE_DONE; + job->job_done(job); + kfree(info); + return; + } + + mutex_lock(&lport->lp_mutex); + fh = fc_frame_header_get(fp); + len = fr_len(fp) - sizeof(*fh); + buf = fc_frame_payload_get(fp, 0); + + if (fr_sof(fp) == FC_SOF_I3 && !ntohs(fh->fh_seq_cnt)) { + /* Get the response code from the first frame payload */ + unsigned short cmd = (info->rsp_code == FC_FS_ACC) ? + ntohs(((struct fc_ct_hdr *)buf)->ct_cmd) : + (unsigned short)fc_frame_payload_op(fp); + + /* Save the reply status of the job */ + job->reply->reply_data.ctels_reply.status = + (cmd == info->rsp_code) ? + FC_CTELS_STATUS_OK : FC_CTELS_STATUS_REJECT; + } + + job->reply->reply_payload_rcv_len += + fc_copy_buffer_to_sglist(buf, len, info->sg, &info->nents, + &info->offset, KM_BIO_SRC_IRQ, NULL); + + if (fr_eof(fp) == FC_EOF_T && + (ntoh24(fh->fh_f_ctl) & (FC_FC_LAST_SEQ | FC_FC_END_SEQ)) == + (FC_FC_LAST_SEQ | FC_FC_END_SEQ)) { + if (job->reply->reply_payload_rcv_len > + job->reply_payload.payload_len) + job->reply->reply_payload_rcv_len = + job->reply_payload.payload_len; + job->reply->result = 0; + job->state_flags |= FC_RQST_STATE_DONE; + job->job_done(job); + kfree(info); + } + fc_frame_free(fp); + mutex_unlock(&lport->lp_mutex); +} + +/** + * fc_lport_els_request() - Send ELS pass-thru request + * @job: The bsg fc pass-thru job structure + * @lport: The local port sending the request + * @did: The destination port id. + * + * Locking Note: The lport lock is expected to be held before calling + * this routine. + */ +static int fc_lport_els_request(struct fc_bsg_job *job, + struct fc_lport *lport, + u32 did, u32 tov) +{ + struct fc_bsg_info *info; + struct fc_frame *fp; + struct fc_frame_header *fh; + char *pp; + int len; + + fp = fc_frame_alloc(lport, sizeof(struct fc_frame_header) + + job->request_payload.payload_len); + if (!fp) + return -ENOMEM; + + len = job->request_payload.payload_len; + pp = fc_frame_payload_get(fp, len); + + sg_copy_to_buffer(job->request_payload.sg_list, + job->request_payload.sg_cnt, + pp, len); + + fh = fc_frame_header_get(fp); + fh->fh_r_ctl = FC_RCTL_ELS_REQ; + hton24(fh->fh_d_id, did); + hton24(fh->fh_s_id, fc_host_port_id(lport->host)); + fh->fh_type = FC_TYPE_ELS; + hton24(fh->fh_f_ctl, FC_FC_FIRST_SEQ | + FC_FC_END_SEQ | FC_FC_SEQ_INIT); + fh->fh_cs_ctl = 0; + fh->fh_df_ctl = 0; + fh->fh_parm_offset = 0; + + info = kzalloc(sizeof(struct fc_bsg_info), GFP_KERNEL); + if (!info) { + fc_frame_free(fp); + return -ENOMEM; + } + + info->job = job; + info->lport = lport; + info->rsp_code = ELS_LS_ACC; + info->nents = job->reply_payload.sg_cnt; + info->sg = job->reply_payload.sg_list; + + if (!lport->tt.exch_seq_send(lport, fp, fc_lport_bsg_resp, + NULL, info, tov)) + return -ECOMM; + return 0; +} + +/** + * fc_lport_ct_request() - Send CT pass-thru request + * @job: The bsg fc pass-thru job structure + * @lport: The local port sending the request + * @did: The destination FC-ID + * @tov: The time to wait for a response + * + * Locking Note: The lport lock is expected to be held before calling + * this routine. + */ +static int fc_lport_ct_request(struct fc_bsg_job *job, + struct fc_lport *lport, u32 did, u32 tov) +{ + struct fc_bsg_info *info; + struct fc_frame *fp; + struct fc_frame_header *fh; + struct fc_ct_req *ct; + size_t len; + + fp = fc_frame_alloc(lport, sizeof(struct fc_ct_hdr) + + job->request_payload.payload_len); + if (!fp) + return -ENOMEM; + + len = job->request_payload.payload_len; + ct = fc_frame_payload_get(fp, len); + + sg_copy_to_buffer(job->request_payload.sg_list, + job->request_payload.sg_cnt, + ct, len); + + fh = fc_frame_header_get(fp); + fh->fh_r_ctl = FC_RCTL_DD_UNSOL_CTL; + hton24(fh->fh_d_id, did); + hton24(fh->fh_s_id, fc_host_port_id(lport->host)); + fh->fh_type = FC_TYPE_CT; + hton24(fh->fh_f_ctl, FC_FC_FIRST_SEQ | + FC_FC_END_SEQ | FC_FC_SEQ_INIT); + fh->fh_cs_ctl = 0; + fh->fh_df_ctl = 0; + fh->fh_parm_offset = 0; + + info = kzalloc(sizeof(struct fc_bsg_info), GFP_KERNEL); + if (!info) { + fc_frame_free(fp); + return -ENOMEM; + } + + info->job = job; + info->lport = lport; + info->rsp_code = FC_FS_ACC; + info->nents = job->reply_payload.sg_cnt; + info->sg = job->reply_payload.sg_list; + + if (!lport->tt.exch_seq_send(lport, fp, fc_lport_bsg_resp, + NULL, info, tov)) + return -ECOMM; + return 0; +} + +/** + * fc_lport_bsg_request() - The common entry point for sending + * fc pass-thru requests + * @job: The fc pass-thru job structure + */ +int fc_lport_bsg_request(struct fc_bsg_job *job) +{ + struct request *rsp = job->req->next_rq; + struct Scsi_Host *shost = job->shost; + struct fc_lport *lport = shost_priv(shost); + struct fc_rport *rport; + struct fc_rport_priv *rdata; + int rc = -EINVAL; + u32 did; + + job->reply->reply_payload_rcv_len = 0; + rsp->resid_len = job->reply_payload.payload_len; + + mutex_lock(&lport->lp_mutex); + + switch (job->request->msgcode) { + case FC_BSG_RPT_ELS: + rport = job->rport; + if (!rport) + break; + + rdata = rport->dd_data; + rc = fc_lport_els_request(job, lport, rport->port_id, + rdata->e_d_tov); + break; + + case FC_BSG_RPT_CT: + rport = job->rport; + if (!rport) + break; + + rdata = rport->dd_data; + rc = fc_lport_ct_request(job, lport, rport->port_id, + rdata->e_d_tov); + break; + + case FC_BSG_HST_CT: + did = ntoh24(job->request->rqst_data.h_ct.port_id); + if (did == FC_FID_DIR_SERV) + rdata = lport->dns_rp; + else + rdata = lport->tt.rport_lookup(lport, did); + + if (!rdata) + break; + + rc = fc_lport_ct_request(job, lport, did, rdata->e_d_tov); + break; + + case FC_BSG_HST_ELS_NOLOGIN: + did = ntoh24(job->request->rqst_data.h_els.port_id); + rc = fc_lport_els_request(job, lport, did, lport->e_d_tov); + break; + } + + mutex_unlock(&lport->lp_mutex); + return rc; +} +EXPORT_SYMBOL(fc_lport_bsg_request); diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 8258edfa328c..54df9fe00c14 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -830,6 +831,12 @@ struct fc_lport *fc_vport_id_lookup(struct fc_lport *n_port, u32 port_id); void fc_vport_setlink(struct fc_lport *vn_port); void fc_vports_linkchange(struct fc_lport *n_port); +/* + * Issue fc pass-thru request via bsg interface + */ +int fc_lport_bsg_request(struct fc_bsg_job *job); + + /* * REMOTE PORT LAYER *****************************/ -- cgit v1.3-8-gc7d7 From 3a3b42bf89a9b90ae9ed2c57fdc378e5473a0ef9 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Tue, 3 Nov 2009 11:47:39 -0800 Subject: [SCSI] libfc: Formatting cleanups across libfc This patch makes a variety of cleanup changes to all libfc files. This patch adds kernel-doc headers to all functions lacking them and attempts to better format existing headers. It also add kernel-doc headers to structures. This patch ensures that the current naming conventions for local ports, remote ports and remote port private data is upheld in the following manner. struct instance (i.e. variable name) -------------------------------------------------- fc_lport lport fc_rport rport fc_rport_libfc_priv rpriv fc_rport_priv rdata I also renamed dns_rp and ptp_rp to dns_rdata and ptp_rdata respectively. I used emacs 'indent-region' and 'tabify' on all libfc files to correct spacing alignments. I feel sorry for anyone attempting to review this patch. Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_disc.c | 78 ++-- drivers/scsi/libfc/fc_elsct.c | 33 +- drivers/scsi/libfc/fc_exch.c | 587 +++++++++++++++++-------- drivers/scsi/libfc/fc_fcp.c | 667 ++++++++++++++++------------ drivers/scsi/libfc/fc_libfc.c | 4 +- drivers/scsi/libfc/fc_libfc.h | 34 +- drivers/scsi/libfc/fc_lport.c | 247 ++++++----- drivers/scsi/libfc/fc_rport.c | 235 +++++----- include/scsi/libfc.h | 994 +++++++++++++++++++++--------------------- 9 files changed, 1624 insertions(+), 1255 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index a4bdec28fef5..7b790ad15a93 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -53,8 +53,8 @@ static int fc_disc_single(struct fc_lport *, struct fc_disc_port *); static void fc_disc_restart(struct fc_disc *); /** - * fc_disc_stop_rports() - delete all the remote ports associated with the lport - * @disc: The discovery job to stop rports on + * fc_disc_stop_rports() - Delete all the remote ports associated with the lport + * @disc: The discovery job to stop remote ports on * * Locking Note: This function expects that the lport mutex is locked before * calling it. @@ -74,9 +74,9 @@ void fc_disc_stop_rports(struct fc_disc *disc) /** * fc_disc_recv_rscn_req() - Handle Registered State Change Notification (RSCN) - * @sp: Current sequence of the RSCN exchange - * @fp: RSCN Frame - * @lport: Fibre Channel host port instance + * @sp: The sequence of the RSCN exchange + * @fp: The RSCN frame + * @lport: The local port that the request will be sent on * * Locking Note: This function expects that the disc_mutex is locked * before it is called. @@ -185,9 +185,9 @@ reject: /** * fc_disc_recv_req() - Handle incoming requests - * @sp: Current sequence of the request exchange - * @fp: The frame - * @lport: The FC local port + * @sp: The sequence of the request exchange + * @fp: The request frame + * @lport: The local port receiving the request * * Locking Note: This function is called from the EM and will lock * the disc_mutex before calling the handler for the @@ -215,7 +215,7 @@ static void fc_disc_recv_req(struct fc_seq *sp, struct fc_frame *fp, /** * fc_disc_restart() - Restart discovery - * @lport: FC discovery context + * @disc: The discovery object to be restarted * * Locking Note: This function expects that the disc mutex * is already locked. @@ -242,9 +242,9 @@ static void fc_disc_restart(struct fc_disc *disc) } /** - * fc_disc_start() - Fibre Channel Target discovery - * @lport: FC local port - * @disc_callback: function to be called when discovery is complete + * fc_disc_start() - Start discovery on a local port + * @lport: The local port to have discovery started on + * @disc_callback: Callback function to be called when discovery is complete */ static void fc_disc_start(void (*disc_callback)(struct fc_lport *, enum fc_disc_event), @@ -265,8 +265,8 @@ static void fc_disc_start(void (*disc_callback)(struct fc_lport *, /** * fc_disc_done() - Discovery has been completed - * @disc: FC discovery context - * @event: discovery completion status + * @disc: The discovery context + * @event: The discovery completion status * * Locking Note: This function expects that the disc mutex is locked before * it is called. The discovery callback is then made with the lock released, @@ -286,8 +286,8 @@ static void fc_disc_done(struct fc_disc *disc, enum fc_disc_event event) } /* - * Go through all remote ports. If they were found in the latest - * discovery, reverify or log them in. Otherwise, log them out. + * Go through all remote ports. If they were found in the latest + * discovery, reverify or log them in. Otherwise, log them out. * Skip ports which were never discovered. These are the dNS port * and ports which were created by PLOGI. */ @@ -307,8 +307,8 @@ static void fc_disc_done(struct fc_disc *disc, enum fc_disc_event event) /** * fc_disc_error() - Handle error on dNS request - * @disc: FC discovery context - * @fp: The frame pointer + * @disc: The discovery context + * @fp: The error code encoded as a frame pointer */ static void fc_disc_error(struct fc_disc *disc, struct fc_frame *fp) { @@ -344,7 +344,7 @@ static void fc_disc_error(struct fc_disc *disc, struct fc_frame *fp) /** * fc_disc_gpn_ft_req() - Send Get Port Names by FC-4 type (GPN_FT) request - * @lport: FC discovery context + * @lport: The discovery context * * Locking Note: This function expects that the disc_mutex is locked * before it is called. @@ -378,9 +378,9 @@ err: /** * fc_disc_gpn_ft_parse() - Parse the body of the dNS GPN_FT response. - * @lport: Fibre Channel host port instance - * @buf: GPN_FT response buffer - * @len: size of response buffer + * @lport: The local port the GPN_FT was received on + * @buf: The GPN_FT response buffer + * @len: The size of response buffer * * Goes through the list of IDs and names resulting from a request. */ @@ -479,10 +479,8 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len) } /** - * fc_disc_timeout() - Retry handler for the disc component - * @work: Structure holding disc obj that needs retry discovery - * - * Handle retry of memory allocation for remote ports. + * fc_disc_timeout() - Handler for discovery timeouts + * @work: Structure holding discovery context that needs to retry discovery */ static void fc_disc_timeout(struct work_struct *work) { @@ -496,9 +494,9 @@ static void fc_disc_timeout(struct work_struct *work) /** * fc_disc_gpn_ft_resp() - Handle a response frame from Get Port Names (GPN_FT) - * @sp: Current sequence of GPN_FT exchange - * @fp: response frame - * @lp_arg: Fibre Channel host port instance + * @sp: The sequence that the GPN_FT response was received on + * @fp: The GPN_FT response frame + * @lp_arg: The discovery context * * Locking Note: This function is called without disc mutex held, and * should do all its processing with the mutex held @@ -569,9 +567,9 @@ static void fc_disc_gpn_ft_resp(struct fc_seq *sp, struct fc_frame *fp, /** * fc_disc_gpn_id_resp() - Handle a response frame from Get Port Names (GPN_ID) - * @sp: exchange sequence - * @fp: response frame - * @rdata_arg: remote port private data + * @sp: The sequence the GPN_ID is on + * @fp: The response frame + * @rdata_arg: The remote port that sent the GPN_ID response * * Locking Note: This function is called without disc mutex held. */ @@ -639,7 +637,7 @@ out: /** * fc_disc_gpn_id_req() - Send Get Port Names by ID (GPN_ID) request - * @lport: local port + * @lport: The local port to initiate discovery on * @rdata: remote port private data * * Locking Note: This function expects that the disc_mutex is locked @@ -656,7 +654,7 @@ static int fc_disc_gpn_id_req(struct fc_lport *lport, if (!fp) return -ENOMEM; if (!lport->tt.elsct_send(lport, rdata->ids.port_id, fp, FC_NS_GPN_ID, - fc_disc_gpn_id_resp, rdata, lport->e_d_tov)) + fc_disc_gpn_id_resp, rdata, lport->e_d_tov)) return -ENOMEM; kref_get(&rdata->kref); return 0; @@ -664,8 +662,8 @@ static int fc_disc_gpn_id_req(struct fc_lport *lport, /** * fc_disc_single() - Discover the directory information for a single target - * @lport: local port - * @dp: The port to rediscover + * @lport: The local port the remote port is associated with + * @dp: The port to rediscover * * Locking Note: This function expects that the disc_mutex is locked * before it is called. @@ -683,7 +681,7 @@ static int fc_disc_single(struct fc_lport *lport, struct fc_disc_port *dp) /** * fc_disc_stop() - Stop discovery for a given lport - * @lport: The lport that discovery should stop for + * @lport: The local port that discovery should stop on */ void fc_disc_stop(struct fc_lport *lport) { @@ -697,7 +695,7 @@ void fc_disc_stop(struct fc_lport *lport) /** * fc_disc_stop_final() - Stop discovery for a given lport - * @lport: The lport that discovery should stop for + * @lport: The lport that discovery should stop on * * This function will block until discovery has been * completely stopped and all rports have been deleted. @@ -709,8 +707,8 @@ void fc_disc_stop_final(struct fc_lport *lport) } /** - * fc_disc_init() - Initialize the discovery block - * @lport: FC local port + * fc_disc_init() - Initialize the discovery layer for a local port + * @lport: The local port that needs the discovery layer to be initialized */ int fc_disc_init(struct fc_lport *lport) { diff --git a/drivers/scsi/libfc/fc_elsct.c b/drivers/scsi/libfc/fc_elsct.c index aae54fe3b299..01be43f80f34 100644 --- a/drivers/scsi/libfc/fc_elsct.c +++ b/drivers/scsi/libfc/fc_elsct.c @@ -28,17 +28,22 @@ #include #include -/* - * fc_elsct_send - sends ELS/CT frame +/** + * fc_elsct_send() - Send an ELS or CT frame + * @lport: The local port to send the frame on + * @did: The destination ID for the frame + * @fp: The frame to be sent + * @op: The operational code + * @resp: The callback routine when the response is received + * @arg: The argument to pass to the response callback routine + * @timer_msec: The timeout period for the frame (in msecs) */ -struct fc_seq *fc_elsct_send(struct fc_lport *lport, - u32 did, - struct fc_frame *fp, - unsigned int op, - void (*resp)(struct fc_seq *, - struct fc_frame *fp, - void *arg), - void *arg, u32 timer_msec) +struct fc_seq *fc_elsct_send(struct fc_lport *lport, u32 did, + struct fc_frame *fp, unsigned int op, + void (*resp)(struct fc_seq *, + struct fc_frame *, + void *), + void *arg, u32 timer_msec) { enum fc_rctl r_ctl; enum fc_fh_type fh_type; @@ -65,6 +70,10 @@ struct fc_seq *fc_elsct_send(struct fc_lport *lport, } EXPORT_SYMBOL(fc_elsct_send); +/** + * fc_elsct_init() - Initialize the ELS/CT layer + * @lport: The local port to initialize the ELS/CT layer for + */ int fc_elsct_init(struct fc_lport *lport) { if (!lport->tt.elsct_send) @@ -75,8 +84,8 @@ int fc_elsct_init(struct fc_lport *lport) EXPORT_SYMBOL(fc_elsct_init); /** - * fc_els_resp_type() - return string describing ELS response for debug. - * @fp: frame pointer with possible error code. + * fc_els_resp_type() - Return a string describing the ELS response + * @fp: The frame pointer or possible error code */ const char *fc_els_resp_type(struct fc_frame *fp) { diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 751a485685d9..0f45bb8521f1 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -37,7 +37,7 @@ u16 fc_cpu_mask; /* cpu mask for possible cpus */ EXPORT_SYMBOL(fc_cpu_mask); static u16 fc_cpu_order; /* 2's power to represent total possible cpus */ -static struct kmem_cache *fc_em_cachep; /* cache for exchanges */ +static struct kmem_cache *fc_em_cachep; /* cache for exchanges */ /* * Structure and function definitions for managing Fibre Channel Exchanges @@ -52,34 +52,46 @@ static struct kmem_cache *fc_em_cachep; /* cache for exchanges */ * fc_seq holds the state for an individual sequence. */ -/* - * Per cpu exchange pool +/** + * struct fc_exch_pool - Per cpu exchange pool + * @next_index: Next possible free exchange index + * @total_exches: Total allocated exchanges + * @lock: Exch pool lock + * @ex_list: List of exchanges * * This structure manages per cpu exchanges in array of exchange pointers. * This array is allocated followed by struct fc_exch_pool memory for * assigned range of exchanges to per cpu pool. */ struct fc_exch_pool { - u16 next_index; /* next possible free exchange index */ - u16 total_exches; /* total allocated exchanges */ - spinlock_t lock; /* exch pool lock */ - struct list_head ex_list; /* allocated exchanges list */ + u16 next_index; + u16 total_exches; + spinlock_t lock; + struct list_head ex_list; }; -/* - * Exchange manager. +/** + * struct fc_exch_mgr - The Exchange Manager (EM). + * @class: Default class for new sequences + * @kref: Reference counter + * @min_xid: Minimum exchange ID + * @max_xid: Maximum exchange ID + * @ep_pool: Reserved exchange pointers + * @pool_max_index: Max exch array index in exch pool + * @pool: Per cpu exch pool + * @stats: Statistics structure * * This structure is the center for creating exchanges and sequences. * It manages the allocation of exchange IDs. */ struct fc_exch_mgr { - enum fc_class class; /* default class for sequences */ - struct kref kref; /* exchange mgr reference count */ - u16 min_xid; /* min exchange ID */ - u16 max_xid; /* max exchange ID */ - mempool_t *ep_pool; /* reserve ep's */ - u16 pool_max_index; /* max exch array index in exch pool */ - struct fc_exch_pool *pool; /* per cpu exch pool */ + enum fc_class class; + struct kref kref; + u16 min_xid; + u16 max_xid; + mempool_t *ep_pool; + u16 pool_max_index; + struct fc_exch_pool *pool; /* * currently exchange mgr stats are updated but not used. @@ -97,6 +109,18 @@ struct fc_exch_mgr { }; #define fc_seq_exch(sp) container_of(sp, struct fc_exch, seq) +/** + * struct fc_exch_mgr_anchor - primary structure for list of EMs + * @ema_list: Exchange Manager Anchor list + * @mp: Exchange Manager associated with this anchor + * @match: Routine to determine if this anchor's EM should be used + * + * When walking the list of anchors the match routine will be called + * for each anchor to determine if that EM should be used. The last + * anchor in the list will always match to handle any exchanges not + * handled by other EMs. The non-default EMs would be added to the + * anchor list by HW that provides FCoE offloads. + */ struct fc_exch_mgr_anchor { struct list_head ema_list; struct fc_exch_mgr *mp; @@ -196,6 +220,15 @@ static char *fc_exch_rctl_names[] = FC_RCTL_NAMES_INIT; #define FC_TABLE_SIZE(x) (sizeof(x) / sizeof(x[0])) +/** + * fc_exch_name_lookup() - Lookup name by opcode + * @op: Opcode to be looked up + * @table: Opcode/name table + * @max_index: Index not to be exceeded + * + * This routine is used to determine a human-readable string identifying + * a R_CTL opcode. + */ static inline const char *fc_exch_name_lookup(unsigned int op, char **table, unsigned int max_index) { @@ -208,25 +241,34 @@ static inline const char *fc_exch_name_lookup(unsigned int op, char **table, return name; } +/** + * fc_exch_rctl_name() - Wrapper routine for fc_exch_name_lookup() + * @op: The opcode to be looked up + */ static const char *fc_exch_rctl_name(unsigned int op) { return fc_exch_name_lookup(op, fc_exch_rctl_names, FC_TABLE_SIZE(fc_exch_rctl_names)); } -/* - * Hold an exchange - keep it from being freed. +/** + * fc_exch_hold() - Increment an exchange's reference count + * @ep: Echange to be held */ -static void fc_exch_hold(struct fc_exch *ep) +static inline void fc_exch_hold(struct fc_exch *ep) { atomic_inc(&ep->ex_refcnt); } -/* - * setup fc hdr by initializing few more FC header fields and sof/eof. - * Initialized fields by this func: - * - fh_ox_id, fh_rx_id, fh_seq_id, fh_seq_cnt - * - sof and eof +/** + * fc_exch_setup_hdr() - Initialize a FC header by initializing some fields + * and determine SOF and EOF. + * @ep: The exchange to that will use the header + * @fp: The frame whose header is to be modified + * @f_ctl: F_CTL bits that will be used for the frame header + * + * The fields initialized by this routine are: fh_ox_id, fh_rx_id, + * fh_seq_id, fh_seq_cnt and the SOF and EOF. */ static void fc_exch_setup_hdr(struct fc_exch *ep, struct fc_frame *fp, u32 f_ctl) @@ -243,7 +285,7 @@ static void fc_exch_setup_hdr(struct fc_exch *ep, struct fc_frame *fp, if (fc_sof_needs_ack(ep->class)) fr_eof(fp) = FC_EOF_N; /* - * Form f_ctl. + * From F_CTL. * The number of fill bytes to make the length a 4-byte * multiple is the low order 2-bits of the f_ctl. * The fill itself will have been cleared by the frame @@ -273,9 +315,12 @@ static void fc_exch_setup_hdr(struct fc_exch *ep, struct fc_frame *fp, fh->fh_seq_cnt = htons(ep->seq.cnt); } -/* - * Release a reference to an exchange. - * If the refcnt goes to zero and the exchange is complete, it is freed. +/** + * fc_exch_release() - Decrement an exchange's reference count + * @ep: Exchange to be released + * + * If the reference count reaches zero and the exchange is complete, + * it is freed. */ static void fc_exch_release(struct fc_exch *ep) { @@ -290,6 +335,10 @@ static void fc_exch_release(struct fc_exch *ep) } } +/** + * fc_exch_done_locked() - Complete an exchange with the exchange lock held + * @ep: The exchange that is complete + */ static int fc_exch_done_locked(struct fc_exch *ep) { int rc = 1; @@ -314,6 +363,15 @@ static int fc_exch_done_locked(struct fc_exch *ep) return rc; } +/** + * fc_exch_ptr_get() - Return an exchange from an exchange pool + * @pool: Exchange Pool to get an exchange from + * @index: Index of the exchange within the pool + * + * Use the index to get an exchange from within an exchange pool. exches + * will point to an array of exchange pointers. The index will select + * the exchange within the array. + */ static inline struct fc_exch *fc_exch_ptr_get(struct fc_exch_pool *pool, u16 index) { @@ -321,12 +379,22 @@ static inline struct fc_exch *fc_exch_ptr_get(struct fc_exch_pool *pool, return exches[index]; } +/** + * fc_exch_ptr_set() - Assign an exchange to a slot in an exchange pool + * @pool: The pool to assign the exchange to + * @index: The index in the pool where the exchange will be assigned + * @ep: The exchange to assign to the pool + */ static inline void fc_exch_ptr_set(struct fc_exch_pool *pool, u16 index, struct fc_exch *ep) { ((struct fc_exch **)(pool + 1))[index] = ep; } +/** + * fc_exch_delete() - Delete an exchange + * @ep: The exchange to be deleted + */ static void fc_exch_delete(struct fc_exch *ep) { struct fc_exch_pool *pool; @@ -342,8 +410,14 @@ static void fc_exch_delete(struct fc_exch *ep) fc_exch_release(ep); /* drop hold for exch in mp */ } -/* - * Internal version of fc_exch_timer_set - used with lock held. +/** + * fc_exch_timer_set_locked() - Start a timer for an exchange w/ the + * the exchange lock held + * @ep: The exchange whose timer will start + * @timer_msec: The timeout period + * + * Used for upper level protocols to time out the exchange. + * The timer is cancelled when it fires or when the exchange completes. */ static inline void fc_exch_timer_set_locked(struct fc_exch *ep, unsigned int timer_msec) @@ -358,12 +432,10 @@ static inline void fc_exch_timer_set_locked(struct fc_exch *ep, fc_exch_hold(ep); /* hold for timer */ } -/* - * Set timer for an exchange. - * The time is a minimum delay in milliseconds until the timer fires. - * Used for upper level protocols to time out the exchange. - * The timer is cancelled when it fires or when the exchange completes. - * Returns non-zero if a timer couldn't be allocated. +/** + * fc_exch_timer_set() - Lock the exchange and set the timer + * @ep: The exchange whose timer will start + * @timer_msec: The timeout period */ static void fc_exch_timer_set(struct fc_exch *ep, unsigned int timer_msec) { @@ -373,15 +445,18 @@ static void fc_exch_timer_set(struct fc_exch *ep, unsigned int timer_msec) } /** - * send a frame using existing sequence and exchange. + * fc_seq_send() - Send a frame using existing sequence/exchange pair + * @lport: The local port that the exchange will be sent on + * @sp: The sequence to be sent + * @fp: The frame to be sent on the exchange */ -static int fc_seq_send(struct fc_lport *lp, struct fc_seq *sp, +static int fc_seq_send(struct fc_lport *lport, struct fc_seq *sp, struct fc_frame *fp) { struct fc_exch *ep; struct fc_frame_header *fh = fc_frame_header_get(fp); int error; - u32 f_ctl; + u32 f_ctl; ep = fc_seq_exch(sp); WARN_ON((ep->esb_stat & ESB_ST_SEQ_INIT) != ESB_ST_SEQ_INIT); @@ -403,7 +478,7 @@ static int fc_seq_send(struct fc_lport *lp, struct fc_seq *sp, /* * Send the frame. */ - error = lp->tt.frame_send(lp, fp); + error = lport->tt.frame_send(lport, fp); /* * Update the exchange and sequence flags, @@ -419,9 +494,9 @@ static int fc_seq_send(struct fc_lport *lp, struct fc_seq *sp, } /** - * fc_seq_alloc() - Allocate a sequence. - * @ep: Exchange pointer - * @seq_id: Sequence ID to allocate a sequence for + * fc_seq_alloc() - Allocate a sequence for a given exchange + * @ep: The exchange to allocate a new sequence for + * @seq_id: The sequence ID to be used * * We don't support multiple originated sequences on the same exchange. * By implication, any previously originated sequence on this exchange @@ -438,6 +513,11 @@ static struct fc_seq *fc_seq_alloc(struct fc_exch *ep, u8 seq_id) return sp; } +/** + * fc_seq_start_next_locked() - Allocate a new sequence on the same + * exchange as the supplied sequence + * @sp: The sequence/exchange to get a new sequence for + */ static struct fc_seq *fc_seq_start_next_locked(struct fc_seq *sp) { struct fc_exch *ep = fc_seq_exch(sp); @@ -449,8 +529,9 @@ static struct fc_seq *fc_seq_start_next_locked(struct fc_seq *sp) } /** - * Allocate a new sequence on the same exchange as the supplied sequence. - * This will never return NULL. + * fc_seq_start_next() - Lock the exchange and get a new sequence + * for a given sequence/exchange pair + * @sp: The sequence/exchange to get a new exchange for */ static struct fc_seq *fc_seq_start_next(struct fc_seq *sp) { @@ -464,9 +545,11 @@ static struct fc_seq *fc_seq_start_next(struct fc_seq *sp) } /** - * This function is for seq_exch_abort function pointer in - * struct libfc_function_template, see comment block on - * seq_exch_abort for description of this function. + * fc_seq_exch_abort() - Abort an exchange and sequence + * @req_sp: The sequence to be aborted + * @timer_msec: The period of time to wait before aborting + * + * Generally called because of a timeout or an abort from the upper layer. */ static int fc_seq_exch_abort(const struct fc_seq *req_sp, unsigned int timer_msec) @@ -519,9 +602,9 @@ static int fc_seq_exch_abort(const struct fc_seq *req_sp, return error; } -/* - * Exchange timeout - handle exchange timer expiration. - * The timer will have been cancelled before this is called. +/** + * fc_exch_timeout() - Handle exchange timer expiration + * @work: The work_struct identifying the exchange that timed out */ static void fc_exch_timeout(struct work_struct *work) { @@ -570,9 +653,9 @@ done: } /** - * fc_exch_em_alloc() - allocate an exchange from a specified EM. - * @lport: ptr to the local port - * @mp: ptr to the exchange manager + * fc_exch_em_alloc() - Allocate an exchange from a specified EM. + * @lport: The local port that the exchange is for + * @mp: The exchange manager that will allocate the exchange * * Returns pointer to allocated fc_exch with exch lock held. */ @@ -640,14 +723,15 @@ err: } /** - * fc_exch_alloc() - allocate an exchange. - * @lport: ptr to the local port - * @fp: ptr to the FC frame + * fc_exch_alloc() - Allocate an exchange from an EM on a + * local port's list of EMs. + * @lport: The local port that will own the exchange + * @fp: The FC frame that the exchange will be for * - * This function walks the list of the exchange manager(EM) - * anchors to select a EM for new exchange allocation. The - * EM is selected having either a NULL match function pointer - * or call to match function returning true. + * This function walks the list of exchange manager(EM) + * anchors to select an EM for a new exchange allocation. The + * EM is selected when a NULL match function pointer is encountered + * or when a call to a match function returns true. */ static struct fc_exch *fc_exch_alloc(struct fc_lport *lport, struct fc_frame *fp) @@ -665,8 +749,10 @@ static struct fc_exch *fc_exch_alloc(struct fc_lport *lport, return NULL; } -/* - * Lookup and hold an exchange. +/** + * fc_exch_find() - Lookup and hold an exchange + * @mp: The exchange manager to lookup the exchange from + * @xid: The XID of the exchange to look up */ static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid) { @@ -689,8 +775,8 @@ static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid) /** * fc_exch_done() - Indicate that an exchange/sequence tuple is complete and - * the memory allocated for the related objects may be freed. - * @sp: Sequence pointer + * the memory allocated for the related objects may be freed. + * @sp: The sequence that has completed */ static void fc_exch_done(struct fc_seq *sp) { @@ -704,8 +790,12 @@ static void fc_exch_done(struct fc_seq *sp) fc_exch_delete(ep); } -/* - * Allocate a new exchange as responder. +/** + * fc_exch_resp() - Allocate a new exchange for a response frame + * @lport: The local port that the exchange was for + * @mp: The exchange manager to allocate the exchange from + * @fp: The response frame + * * Sets the responder ID in the frame header. */ static struct fc_exch *fc_exch_resp(struct fc_lport *lport, @@ -746,8 +836,13 @@ static struct fc_exch *fc_exch_resp(struct fc_lport *lport, return ep; } -/* - * Find a sequence for receive where the other end is originating the sequence. +/** + * fc_seq_lookup_recip() - Find a sequence where the other end + * originated the sequence + * @lport: The local port that the frame was sent to + * @mp: The Exchange Manager to lookup the exchange from + * @fp: The frame associated with the sequence we're looking for + * * If fc_pf_rjt_reason is FC_RJT_NONE then this function will have a hold * on the ep that should be released by the caller. */ @@ -853,10 +948,12 @@ rel: return reject; } -/* - * Find the sequence for a frame being received. - * We originated the sequence, so it should be found. - * We may or may not have originated the exchange. +/** + * fc_seq_lookup_orig() - Find a sequence where this end + * originated the sequence + * @mp: The Exchange Manager to lookup the exchange from + * @fp: The frame associated with the sequence we're looking for + * * Does not hold the sequence for the caller. */ static struct fc_seq *fc_seq_lookup_orig(struct fc_exch_mgr *mp, @@ -888,8 +985,12 @@ static struct fc_seq *fc_seq_lookup_orig(struct fc_exch_mgr *mp, return sp; } -/* - * Set addresses for an exchange. +/** + * fc_exch_set_addr() - Set the source and destination IDs for an exchange + * @ep: The exchange to set the addresses for + * @orig_id: The originator's ID + * @resp_id: The responder's ID + * * Note this must be done before the first sequence of the exchange is sent. */ static void fc_exch_set_addr(struct fc_exch *ep, @@ -906,11 +1007,11 @@ static void fc_exch_set_addr(struct fc_exch *ep, } /** - * fc_seq_els_rsp_send() - Send ELS response using mainly infomation - * in exchange and sequence in EM layer. - * @sp: Sequence pointer - * @els_cmd: ELS command - * @els_data: ELS data + * fc_seq_els_rsp_send() - Send an ELS response using infomation from + * the existing sequence/exchange. + * @sp: The sequence/exchange to get information from + * @els_cmd: The ELS command to be sent + * @els_data: The ELS data to be sent */ static void fc_seq_els_rsp_send(struct fc_seq *sp, enum fc_els_cmd els_cmd, struct fc_seq_els_data *els_data) @@ -933,8 +1034,12 @@ static void fc_seq_els_rsp_send(struct fc_seq *sp, enum fc_els_cmd els_cmd, } } -/* - * Send a sequence, which is also the last sequence in the exchange. +/** + * fc_seq_send_last() - Send a sequence that is the last in the exchange + * @sp: The sequence that is to be sent + * @fp: The frame that will be sent on the sequence + * @rctl: The R_CTL information to be sent + * @fh_type: The frame header type */ static void fc_seq_send_last(struct fc_seq *sp, struct fc_frame *fp, enum fc_rctl rctl, enum fc_fh_type fh_type) @@ -948,9 +1053,12 @@ static void fc_seq_send_last(struct fc_seq *sp, struct fc_frame *fp, fc_seq_send(ep->lp, sp, fp); } -/* +/** + * fc_seq_send_ack() - Send an acknowledgement that we've received a frame + * @sp: The sequence to send the ACK on + * @rx_fp: The received frame that is being acknoledged + * * Send ACK_1 (or equiv.) indicating we received something. - * The frame we're acking is supplied. */ static void fc_seq_send_ack(struct fc_seq *sp, const struct fc_frame *rx_fp) { @@ -958,14 +1066,14 @@ static void fc_seq_send_ack(struct fc_seq *sp, const struct fc_frame *rx_fp) struct fc_frame_header *rx_fh; struct fc_frame_header *fh; struct fc_exch *ep = fc_seq_exch(sp); - struct fc_lport *lp = ep->lp; + struct fc_lport *lport = ep->lp; unsigned int f_ctl; /* * Don't send ACKs for class 3. */ if (fc_sof_needs_ack(fr_sof(rx_fp))) { - fp = fc_frame_alloc(lp, 0); + fp = fc_frame_alloc(lport, 0); if (!fp) return; @@ -1000,12 +1108,16 @@ static void fc_seq_send_ack(struct fc_seq *sp, const struct fc_frame *rx_fp) else fr_eof(fp) = FC_EOF_N; - (void) lp->tt.frame_send(lp, fp); + lport->tt.frame_send(lport, fp); } } -/* - * Send BLS Reject. +/** + * fc_exch_send_ba_rjt() - Send BLS Reject + * @rx_fp: The frame being rejected + * @reason: The reason the frame is being rejected + * @explan: The explaination for the rejection + * * This is for rejecting BA_ABTS only. */ static void fc_exch_send_ba_rjt(struct fc_frame *rx_fp, @@ -1016,11 +1128,11 @@ static void fc_exch_send_ba_rjt(struct fc_frame *rx_fp, struct fc_frame_header *rx_fh; struct fc_frame_header *fh; struct fc_ba_rjt *rp; - struct fc_lport *lp; + struct fc_lport *lport; unsigned int f_ctl; - lp = fr_dev(rx_fp); - fp = fc_frame_alloc(lp, sizeof(*rp)); + lport = fr_dev(rx_fp); + fp = fc_frame_alloc(lport, sizeof(*rp)); if (!fp) return; fh = fc_frame_header_get(fp); @@ -1065,13 +1177,17 @@ static void fc_exch_send_ba_rjt(struct fc_frame *rx_fp, if (fc_sof_needs_ack(fr_sof(fp))) fr_eof(fp) = FC_EOF_N; - (void) lp->tt.frame_send(lp, fp); + lport->tt.frame_send(lport, fp); } -/* - * Handle an incoming ABTS. This would be for target mode usually, - * but could be due to lost FCP transfer ready, confirm or RRQ. - * We always handle this as an exchange abort, ignoring the parameter. +/** + * fc_exch_recv_abts() - Handle an incoming ABTS + * @ep: The exchange the abort was on + * @rx_fp: The ABTS frame + * + * This would be for target mode usually, but could be due to lost + * FCP transfer ready, confirm or RRQ. We always handle this as an + * exchange abort, ignoring the parameter. */ static void fc_exch_recv_abts(struct fc_exch *ep, struct fc_frame *rx_fp) { @@ -1120,10 +1236,14 @@ free: fc_frame_free(rx_fp); } -/* - * Handle receive where the other end is originating the sequence. +/** + * fc_exch_recv_req() - Handler for an incoming request where is other + * end is originating the sequence + * @lport: The local port that received the request + * @mp: The EM that the exchange is on + * @fp: The request frame */ -static void fc_exch_recv_req(struct fc_lport *lp, struct fc_exch_mgr *mp, +static void fc_exch_recv_req(struct fc_lport *lport, struct fc_exch_mgr *mp, struct fc_frame *fp) { struct fc_frame_header *fh = fc_frame_header_get(fp); @@ -1137,14 +1257,14 @@ static void fc_exch_recv_req(struct fc_lport *lp, struct fc_exch_mgr *mp, /* We can have the wrong fc_lport at this point with NPIV, which is a * problem now that we know a new exchange needs to be allocated */ - lp = fc_vport_id_lookup(lp, ntoh24(fh->fh_d_id)); - if (!lp) { + lport = fc_vport_id_lookup(lport, ntoh24(fh->fh_d_id)); + if (!lport) { fc_frame_free(fp); return; } fr_seq(fp) = NULL; - reject = fc_seq_lookup_recip(lp, mp, fp); + reject = fc_seq_lookup_recip(lport, mp, fp); if (reject == FC_RJT_NONE) { sp = fr_seq(fp); /* sequence will be held */ ep = fc_seq_exch(sp); @@ -1167,17 +1287,21 @@ static void fc_exch_recv_req(struct fc_lport *lp, struct fc_exch_mgr *mp, if (ep->resp) ep->resp(sp, fp, ep->arg); else - lp->tt.lport_recv(lp, sp, fp); + lport->tt.lport_recv(lport, sp, fp); fc_exch_release(ep); /* release from lookup */ } else { - FC_LPORT_DBG(lp, "exch/seq lookup failed: reject %x\n", reject); + FC_LPORT_DBG(lport, "exch/seq lookup failed: reject %x\n", + reject); fc_frame_free(fp); } } -/* - * Handle receive where the other end is originating the sequence in - * response to our exchange. +/** + * fc_exch_recv_seq_resp() - Handler for an incoming response where the other + * end is the originator of the sequence that is a + * response to our initial exchange + * @mp: The EM that the exchange is on + * @fp: The response frame */ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) { @@ -1268,8 +1392,11 @@ out: fc_frame_free(fp); } -/* - * Handle receive for a sequence where other end is responding to our sequence. +/** + * fc_exch_recv_resp() - Handler for a sequence where other end is + * responding to our sequence + * @mp: The EM that the exchange is on + * @fp: The response frame */ static void fc_exch_recv_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) { @@ -1285,9 +1412,13 @@ static void fc_exch_recv_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) fc_frame_free(fp); } -/* - * Handle the response to an ABTS for exchange or sequence. - * This can be BA_ACC or BA_RJT. +/** + * fc_exch_abts_resp() - Handler for a response to an ABT + * @ep: The exchange that the frame is on + * @fp: The response frame + * + * This response would be to an ABTS cancelling an exchange or sequence. + * The response can be either BA_ACC or BA_RJT */ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp) { @@ -1362,9 +1493,12 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp) } -/* - * Receive BLS sequence. - * This is always a sequence initiated by the remote side. +/** + * fc_exch_recv_bls() - Handler for a BLS sequence + * @mp: The EM that the exchange is on + * @fp: The request frame + * + * The BLS frame is always a sequence initiated by the remote side. * We may be either the originator or recipient of the exchange. */ static void fc_exch_recv_bls(struct fc_exch_mgr *mp, struct fc_frame *fp) @@ -1421,8 +1555,10 @@ static void fc_exch_recv_bls(struct fc_exch_mgr *mp, struct fc_frame *fp) fc_exch_release(ep); /* release hold taken by fc_exch_find */ } -/* - * Accept sequence with LS_ACC. +/** + * fc_seq_ls_acc() - Accept sequence with LS_ACC + * @req_sp: The request sequence + * * If this fails due to allocation or transmit congestion, assume the * originator will repeat the sequence. */ @@ -1442,8 +1578,12 @@ static void fc_seq_ls_acc(struct fc_seq *req_sp) } } -/* - * Reject sequence with ELS LS_RJT. +/** + * fc_seq_ls_rjt() - Reject a sequence with ELS LS_RJT + * @req_sp: The request sequence + * @reason: The reason the sequence is being rejected + * @explan: The explaination for the rejection + * * If this fails due to allocation or transmit congestion, assume the * originator will repeat the sequence. */ @@ -1466,6 +1606,10 @@ static void fc_seq_ls_rjt(struct fc_seq *req_sp, enum fc_els_rjt_reason reason, } } +/** + * fc_exch_reset() - Reset an exchange + * @ep: The exchange to be reset + */ static void fc_exch_reset(struct fc_exch *ep) { struct fc_seq *sp; @@ -1500,16 +1644,16 @@ static void fc_exch_reset(struct fc_exch *ep) } /** - * fc_exch_pool_reset() - Resets an per cpu exches pool. - * @lport: ptr to the local port - * @pool: ptr to the per cpu exches pool - * @sid: source FC ID - * @did: destination FC ID + * fc_exch_pool_reset() - Reset a per cpu exchange pool + * @lport: The local port that the exchange pool is on + * @pool: The exchange pool to be reset + * @sid: The source ID + * @did: The destination ID * - * Resets an per cpu exches pool, releasing its all sequences - * and exchanges. If sid is non-zero, then reset only exchanges - * we sourced from that FID. If did is non-zero, reset only - * exchanges destined to that FID. + * Resets a per cpu exches pool, releasing all of its sequences + * and exchanges. If sid is non-zero then reset only exchanges + * we sourced from the local port's FID. If did is non-zero then + * only reset exchanges destined for the local port's FID. */ static void fc_exch_pool_reset(struct fc_lport *lport, struct fc_exch_pool *pool, @@ -1543,15 +1687,15 @@ restart: } /** - * fc_exch_mgr_reset() - Resets all EMs of a lport - * @lport: ptr to the local port - * @sid: source FC ID - * @did: destination FC ID + * fc_exch_mgr_reset() - Reset all EMs of a local port + * @lport: The local port whose EMs are to be reset + * @sid: The source ID + * @did: The destination ID * - * Reset all EMs of a lport, releasing its all sequences and - * exchanges. If sid is non-zero, then reset only exchanges - * we sourced from that FID. If did is non-zero, reset only - * exchanges destined to that FID. + * Reset all EMs associated with a given local port. Release all + * sequences and exchanges. If sid is non-zero then reset only the + * exchanges sent from the local port's FID. If did is non-zero then + * reset only exchanges destined for the local port's FID. */ void fc_exch_mgr_reset(struct fc_lport *lport, u32 sid, u32 did) { @@ -1567,8 +1711,11 @@ void fc_exch_mgr_reset(struct fc_lport *lport, u32 sid, u32 did) } EXPORT_SYMBOL(fc_exch_mgr_reset); -/* - * Handle incoming ELS REC - Read Exchange Concise. +/** + * fc_exch_els_rec() - Handler for ELS REC (Read Exchange Concise) requests + * @sp: The sequence the REC is on + * @rfp: The REC frame + * * Note that the requesting port may be different than the S_ID in the request. */ static void fc_exch_els_rec(struct fc_seq *sp, struct fc_frame *rfp) @@ -1650,10 +1797,11 @@ reject: fc_frame_free(rfp); } -/* - * Handle response from RRQ. - * Not much to do here, really. - * Should report errors. +/** + * fc_exch_rrq_resp() - Handler for RRQ responses + * @sp: The sequence that the RRQ is on + * @fp: The RRQ frame + * @arg: The exchange that the RRQ is on * * TODO: fix error handler. */ @@ -1695,11 +1843,25 @@ cleanup: /** - * This function is for exch_seq_send function pointer in - * struct libfc_function_template, see comment block on - * exch_seq_send for description of this function. + * fc_exch_seq_send() - Send a frame using a new exchange and sequence + * @lport: The local port to send the frame on + * @fp: The frame to be sent + * @resp: The response handler for this request + * @destructor: The destructor for the exchange + * @arg: The argument to be passed to the response handler + * @timer_msec: The timeout period for the exchange + * + * The frame pointer with some of the header's fields must be + * filled before calling this routine, those fields are: + * + * - routing control + * - FC port did + * - FC port sid + * - FC header type + * - frame control + * - parameter or relative offset */ -static struct fc_seq *fc_exch_seq_send(struct fc_lport *lp, +static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport, struct fc_frame *fp, void (*resp)(struct fc_seq *, struct fc_frame *fp, @@ -1713,7 +1875,7 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lp, struct fc_frame_header *fh; int rc = 1; - ep = fc_exch_alloc(lp, fp); + ep = fc_exch_alloc(lport, fp); if (!ep) { fc_frame_free(fp); return NULL; @@ -1725,7 +1887,7 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lp, ep->destructor = destructor; ep->arg = arg; ep->r_a_tov = FC_DEF_R_A_TOV; - ep->lp = lp; + ep->lp = lport; sp = &ep->seq; ep->fh_type = fh->fh_type; /* save for possbile timeout handling */ @@ -1733,10 +1895,10 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lp, fc_exch_setup_hdr(ep, fp, ep->f_ctl); sp->cnt++; - if (ep->xid <= lp->lro_xid) + if (ep->xid <= lport->lro_xid) fc_fcp_ddp_setup(fr_fsp(fp), ep->xid); - if (unlikely(lp->tt.frame_send(lp, fp))) + if (unlikely(lport->tt.frame_send(lport, fp))) goto err; if (timer_msec) @@ -1755,21 +1917,23 @@ err: return NULL; } -/* - * Send ELS RRQ - Reinstate Recovery Qualifier. +/** + * fc_exch_rrq() - Send an ELS RRQ (Reinstate Recovery Qualifier) command + * @ep: The exchange to send the RRQ on + * * This tells the remote port to stop blocking the use of * the exchange and the seq_cnt range. */ static void fc_exch_rrq(struct fc_exch *ep) { - struct fc_lport *lp; + struct fc_lport *lport; struct fc_els_rrq *rrq; struct fc_frame *fp; u32 did; - lp = ep->lp; + lport = ep->lp; - fp = fc_frame_alloc(lp, sizeof(*rrq)); + fp = fc_frame_alloc(lport, sizeof(*rrq)); if (!fp) goto retry; @@ -1785,10 +1949,11 @@ static void fc_exch_rrq(struct fc_exch *ep) did = ep->sid; fc_fill_fc_hdr(fp, FC_RCTL_ELS_REQ, did, - fc_host_port_id(lp->host), FC_TYPE_ELS, + fc_host_port_id(lport->host), FC_TYPE_ELS, FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0); - if (fc_exch_seq_send(lp, fp, fc_exch_rrq_resp, NULL, ep, lp->e_d_tov)) + if (fc_exch_seq_send(lport, fp, fc_exch_rrq_resp, NULL, ep, + lport->e_d_tov)) return; retry: @@ -1805,8 +1970,10 @@ retry: } -/* - * Handle incoming ELS RRQ - Reset Recovery Qualifier. +/** + * fc_exch_els_rrq() - Handler for ELS RRQ (Reset Recovery Qualifier) requests + * @sp: The sequence that the RRQ is on + * @fp: The RRQ frame */ static void fc_exch_els_rrq(struct fc_seq *sp, struct fc_frame *fp) { @@ -1872,6 +2039,12 @@ out: fc_exch_release(ep); /* drop hold from fc_exch_find */ } +/** + * fc_exch_mgr_add() - Add an exchange manager to a local port's list of EMs + * @lport: The local port to add the exchange manager to + * @mp: The exchange manager to be added to the local port + * @match: The match routine that indicates when this EM should be used + */ struct fc_exch_mgr_anchor *fc_exch_mgr_add(struct fc_lport *lport, struct fc_exch_mgr *mp, bool (*match)(struct fc_frame *)) @@ -1891,6 +2064,10 @@ struct fc_exch_mgr_anchor *fc_exch_mgr_add(struct fc_lport *lport, } EXPORT_SYMBOL(fc_exch_mgr_add); +/** + * fc_exch_mgr_destroy() - Destroy an exchange manager + * @kref: The reference to the EM to be destroyed + */ static void fc_exch_mgr_destroy(struct kref *kref) { struct fc_exch_mgr *mp = container_of(kref, struct fc_exch_mgr, kref); @@ -1900,6 +2077,10 @@ static void fc_exch_mgr_destroy(struct kref *kref) kfree(mp); } +/** + * fc_exch_mgr_del() - Delete an EM from a local port's list + * @ema: The exchange manager anchor identifying the EM to be deleted + */ void fc_exch_mgr_del(struct fc_exch_mgr_anchor *ema) { /* remove EM anchor from EM anchors list */ @@ -1910,9 +2091,9 @@ void fc_exch_mgr_del(struct fc_exch_mgr_anchor *ema) EXPORT_SYMBOL(fc_exch_mgr_del); /** - * fc_exch_mgr_list_clone() - share all exchange manager objects - * @src: source lport to clone exchange managers from - * @dst: new lport that takes references to all the exchange managers + * fc_exch_mgr_list_clone() - Share all exchange manager objects + * @src: Source lport to clone exchange managers from + * @dst: New lport that takes references to all the exchange managers */ int fc_exch_mgr_list_clone(struct fc_lport *src, struct fc_lport *dst) { @@ -1929,7 +2110,15 @@ err: return -ENOMEM; } -struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp, +/** + * fc_exch_mgr_alloc() - Allocate an exchange manager + * @lport: The local port that the new EM will be associated with + * @class: The default FC class for new exchanges + * @min_xid: The minimum XID for exchanges from the new EM + * @max_xid: The maximum XID for exchanges from the new EM + * @match: The match routine for the new EM + */ +struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lport, enum fc_class class, u16 min_xid, u16 max_xid, bool (*match)(struct fc_frame *)) @@ -1942,7 +2131,7 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp, if (max_xid <= min_xid || max_xid == FC_XID_UNKNOWN || (min_xid & fc_cpu_mask) != 0) { - FC_LPORT_DBG(lp, "Invalid min_xid 0x:%x and max_xid 0x:%x\n", + FC_LPORT_DBG(lport, "Invalid min_xid 0x:%x and max_xid 0x:%x\n", min_xid, max_xid); return NULL; } @@ -1985,7 +2174,7 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp, } kref_init(&mp->kref); - if (!fc_exch_mgr_add(lp, mp, match)) { + if (!fc_exch_mgr_add(lport, mp, match)) { free_percpu(mp->pool); goto free_mempool; } @@ -2006,6 +2195,10 @@ free_mp: } EXPORT_SYMBOL(fc_exch_mgr_alloc); +/** + * fc_exch_mgr_free() - Free all exchange managers on a local port + * @lport: The local port whose EMs are to be freed + */ void fc_exch_mgr_free(struct fc_lport *lport) { struct fc_exch_mgr_anchor *ema, *next; @@ -2015,10 +2208,12 @@ void fc_exch_mgr_free(struct fc_lport *lport) } EXPORT_SYMBOL(fc_exch_mgr_free); -/* - * Receive a frame +/** + * fc_exch_recv() - Handler for received frames + * @lport: The local port the frame was received on + * @fp: The received frame */ -void fc_exch_recv(struct fc_lport *lp, struct fc_frame *fp) +void fc_exch_recv(struct fc_lport *lport, struct fc_frame *fp) { struct fc_frame_header *fh = fc_frame_header_get(fp); struct fc_exch_mgr_anchor *ema; @@ -2026,8 +2221,8 @@ void fc_exch_recv(struct fc_lport *lp, struct fc_frame *fp) u16 oxid; /* lport lock ? */ - if (!lp || lp->state == LPORT_ST_DISABLED) { - FC_LPORT_DBG(lp, "Receiving frames for an lport that " + if (!lport || lport->state == LPORT_ST_DISABLED) { + FC_LPORT_DBG(lport, "Receiving frames for an lport that " "has not been initialized correctly\n"); fc_frame_free(fp); return; @@ -2036,7 +2231,7 @@ void fc_exch_recv(struct fc_lport *lp, struct fc_frame *fp) f_ctl = ntoh24(fh->fh_f_ctl); oxid = ntohs(fh->fh_ox_id); if (f_ctl & FC_FC_EX_CTX) { - list_for_each_entry(ema, &lp->ema_list, ema_list) { + list_for_each_entry(ema, &lport->ema_list, ema_list) { if ((oxid >= ema->mp->min_xid) && (oxid <= ema->mp->max_xid)) { found = 1; @@ -2045,13 +2240,13 @@ void fc_exch_recv(struct fc_lport *lp, struct fc_frame *fp) } if (!found) { - FC_LPORT_DBG(lp, "Received response for out " + FC_LPORT_DBG(lport, "Received response for out " "of range oxid:%hx\n", oxid); fc_frame_free(fp); return; } } else - ema = list_entry(lp->ema_list.prev, typeof(*ema), ema_list); + ema = list_entry(lport->ema_list.prev, typeof(*ema), ema_list); /* * If frame is marked invalid, just drop it. @@ -2070,37 +2265,42 @@ void fc_exch_recv(struct fc_lport *lp, struct fc_frame *fp) else if (f_ctl & FC_FC_SEQ_CTX) fc_exch_recv_resp(ema->mp, fp); else - fc_exch_recv_req(lp, ema->mp, fp); + fc_exch_recv_req(lport, ema->mp, fp); break; default: - FC_LPORT_DBG(lp, "dropping invalid frame (eof %x)", fr_eof(fp)); + FC_LPORT_DBG(lport, "dropping invalid frame (eof %x)", + fr_eof(fp)); fc_frame_free(fp); } } EXPORT_SYMBOL(fc_exch_recv); -int fc_exch_init(struct fc_lport *lp) +/** + * fc_exch_init() - Initialize the exchange layer for a local port + * @lport: The local port to initialize the exchange layer for + */ +int fc_exch_init(struct fc_lport *lport) { - if (!lp->tt.seq_start_next) - lp->tt.seq_start_next = fc_seq_start_next; + if (!lport->tt.seq_start_next) + lport->tt.seq_start_next = fc_seq_start_next; - if (!lp->tt.exch_seq_send) - lp->tt.exch_seq_send = fc_exch_seq_send; + if (!lport->tt.exch_seq_send) + lport->tt.exch_seq_send = fc_exch_seq_send; - if (!lp->tt.seq_send) - lp->tt.seq_send = fc_seq_send; + if (!lport->tt.seq_send) + lport->tt.seq_send = fc_seq_send; - if (!lp->tt.seq_els_rsp_send) - lp->tt.seq_els_rsp_send = fc_seq_els_rsp_send; + if (!lport->tt.seq_els_rsp_send) + lport->tt.seq_els_rsp_send = fc_seq_els_rsp_send; - if (!lp->tt.exch_done) - lp->tt.exch_done = fc_exch_done; + if (!lport->tt.exch_done) + lport->tt.exch_done = fc_exch_done; - if (!lp->tt.exch_mgr_reset) - lp->tt.exch_mgr_reset = fc_exch_mgr_reset; + if (!lport->tt.exch_mgr_reset) + lport->tt.exch_mgr_reset = fc_exch_mgr_reset; - if (!lp->tt.seq_exch_abort) - lp->tt.seq_exch_abort = fc_seq_exch_abort; + if (!lport->tt.seq_exch_abort) + lport->tt.seq_exch_abort = fc_seq_exch_abort; return 0; } @@ -2141,7 +2341,10 @@ int fc_setup_exch_mgr() return 0; } -void fc_destroy_exch_mgr(void) +/** + * fc_destroy_exch_mgr() - Destroy an exchange manager + */ +void fc_destroy_exch_mgr() { kmem_cache_destroy(fc_em_cachep); } diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index 98279fe0d0c7..970b54f653b7 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -67,10 +67,16 @@ struct kmem_cache *scsi_pkt_cachep; #define CMD_SCSI_STATUS(Cmnd) ((Cmnd)->SCp.Status) #define CMD_RESID_LEN(Cmnd) ((Cmnd)->SCp.buffers_residual) +/** + * struct fc_fcp_internal - FCP layer internal data + * @scsi_pkt_pool: Memory pool to draw FCP packets from + * @scsi_pkt_queue: Current FCP packets + * @throttled: The FCP packet queue is throttled + */ struct fc_fcp_internal { - mempool_t *scsi_pkt_pool; + mempool_t *scsi_pkt_pool; struct list_head scsi_pkt_queue; - u8 throttled; + u8 throttled; }; #define fc_get_scsi_internal(x) ((struct fc_fcp_internal *)(x)->scsi_priv) @@ -84,9 +90,9 @@ static void fc_fcp_recv(struct fc_seq *, struct fc_frame *, void *); static void fc_fcp_resp(struct fc_fcp_pkt *, struct fc_frame *); static void fc_fcp_complete_locked(struct fc_fcp_pkt *); static void fc_tm_done(struct fc_seq *, struct fc_frame *, void *); -static void fc_fcp_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp); +static void fc_fcp_error(struct fc_fcp_pkt *, struct fc_frame *); static void fc_timeout_error(struct fc_fcp_pkt *); -static void fc_fcp_timeout(unsigned long data); +static void fc_fcp_timeout(unsigned long); static void fc_fcp_rec(struct fc_fcp_pkt *); static void fc_fcp_rec_error(struct fc_fcp_pkt *, struct fc_frame *); static void fc_fcp_rec_resp(struct fc_seq *, struct fc_frame *, void *); @@ -125,23 +131,22 @@ static void fc_fcp_srr_error(struct fc_fcp_pkt *, struct fc_frame *); #define FC_FCP_DFLT_QUEUE_DEPTH 32 /** - * fc_fcp_pkt_alloc - allocation routine for scsi_pkt packet - * @lp: fc lport struct - * @gfp: gfp flags for allocation + * fc_fcp_pkt_alloc() - Allocate a fcp_pkt + * @lport: The local port that the FCP packet is for + * @gfp: GFP flags for allocation * - * This is used by upper layer scsi driver. - * Return Value : scsi_pkt structure or null on allocation failure. - * Context : call from process context. no locking required. + * Return value: fcp_pkt structure or null on allocation failure. + * Context: Can be called from process context, no lock is required. */ -static struct fc_fcp_pkt *fc_fcp_pkt_alloc(struct fc_lport *lp, gfp_t gfp) +static struct fc_fcp_pkt *fc_fcp_pkt_alloc(struct fc_lport *lport, gfp_t gfp) { - struct fc_fcp_internal *si = fc_get_scsi_internal(lp); + struct fc_fcp_internal *si = fc_get_scsi_internal(lport); struct fc_fcp_pkt *fsp; fsp = mempool_alloc(si->scsi_pkt_pool, gfp); if (fsp) { memset(fsp, 0, sizeof(*fsp)); - fsp->lp = lp; + fsp->lp = lport; atomic_set(&fsp->ref_cnt, 1); init_timer(&fsp->timer); INIT_LIST_HEAD(&fsp->list); @@ -151,12 +156,11 @@ static struct fc_fcp_pkt *fc_fcp_pkt_alloc(struct fc_lport *lp, gfp_t gfp) } /** - * fc_fcp_pkt_release() - release hold on scsi_pkt packet - * @fsp: fcp packet struct + * fc_fcp_pkt_release() - Release hold on a fcp_pkt + * @fsp: The FCP packet to be released * - * This is used by upper layer scsi driver. - * Context : call from process and interrupt context. - * no locking required + * Context: Can be called from process or interrupt context, + * no lock is required. */ static void fc_fcp_pkt_release(struct fc_fcp_pkt *fsp) { @@ -167,20 +171,25 @@ static void fc_fcp_pkt_release(struct fc_fcp_pkt *fsp) } } +/** + * fc_fcp_pkt_hold() - Hold a fcp_pkt + * @fsp: The FCP packet to be held + */ static void fc_fcp_pkt_hold(struct fc_fcp_pkt *fsp) { atomic_inc(&fsp->ref_cnt); } /** - * fc_fcp_pkt_destory() - release hold on scsi_pkt packet - * @seq: exchange sequence - * @fsp: fcp packet struct + * fc_fcp_pkt_destory() - Release hold on a fcp_pkt + * @seq: The sequence that the FCP packet is on (required by destructor API) + * @fsp: The FCP packet to be released + * + * This routine is called by a destructor callback in the exch_seq_send() + * routine of the libfc Transport Template. The 'struct fc_seq' is a required + * argument even though it is not used by this routine. * - * Release hold on scsi_pkt packet set to keep scsi_pkt - * till EM layer exch resource is not freed. - * Context : called from from EM layer. - * no locking required + * Context: No locking required. */ static void fc_fcp_pkt_destroy(struct fc_seq *seq, void *fsp) { @@ -188,10 +197,10 @@ static void fc_fcp_pkt_destroy(struct fc_seq *seq, void *fsp) } /** - * fc_fcp_lock_pkt() - lock a packet and get a ref to it. - * @fsp: fcp packet + * fc_fcp_lock_pkt() - Lock a fcp_pkt and increase its reference count + * @fsp: The FCP packet to be locked and incremented * - * We should only return error if we return a command to scsi-ml before + * We should only return error if we return a command to SCSI-ml before * getting a response. This can happen in cases where we send a abort, but * do not wait for the response and the abort and command can be passing * each other on the wire/network-layer. @@ -216,18 +225,33 @@ static inline int fc_fcp_lock_pkt(struct fc_fcp_pkt *fsp) return 0; } +/** + * fc_fcp_unlock_pkt() - Release a fcp_pkt's lock and decrement its + * reference count + * @fsp: The FCP packet to be unlocked and decremented + */ static inline void fc_fcp_unlock_pkt(struct fc_fcp_pkt *fsp) { spin_unlock_bh(&fsp->scsi_pkt_lock); fc_fcp_pkt_release(fsp); } +/** + * fc_fcp_timer_set() - Start a timer for a fcp_pkt + * @fsp: The FCP packet to start a timer for + * @delay: The timeout period for the timer + */ static void fc_fcp_timer_set(struct fc_fcp_pkt *fsp, unsigned long delay) { if (!(fsp->state & FC_SRB_COMPL)) mod_timer(&fsp->timer, jiffies + delay); } +/** + * fc_fcp_send_abort() - Send an abort for exchanges associated with a + * fcp_pkt + * @fsp: The FCP packet to abort exchanges on + */ static int fc_fcp_send_abort(struct fc_fcp_pkt *fsp) { if (!fsp->seq_ptr) @@ -237,9 +261,14 @@ static int fc_fcp_send_abort(struct fc_fcp_pkt *fsp) return fsp->lp->tt.seq_exch_abort(fsp->seq_ptr, 0); } -/* - * Retry command. - * An abort isn't needed. +/** + * fc_fcp_retry_cmd() - Retry a fcp_pkt + * @fsp: The FCP packet to be retried + * + * Sets the status code to be FC_ERROR and then calls + * fc_fcp_complete_locked() which in turn calls fc_io_compl(). + * fc_io_compl() will notify the SCSI-ml that the I/O is done. + * The SCSI-ml will retry the command. */ static void fc_fcp_retry_cmd(struct fc_fcp_pkt *fsp) { @@ -254,43 +283,35 @@ static void fc_fcp_retry_cmd(struct fc_fcp_pkt *fsp) fc_fcp_complete_locked(fsp); } -/* - * fc_fcp_ddp_setup - calls to LLD's ddp_setup to set up DDP - * transfer for a read I/O indicated by the fc_fcp_pkt. - * @fsp: ptr to the fc_fcp_pkt - * - * This is called in exch_seq_send() when we have a newly allocated - * exchange with a valid exchange id to setup ddp. - * - * returns: none +/** + * fc_fcp_ddp_setup() - Calls a LLD's ddp_setup routine to set up DDP context + * @fsp: The FCP packet that will manage the DDP frames + * @xid: The XID that will be used for the DDP exchange */ void fc_fcp_ddp_setup(struct fc_fcp_pkt *fsp, u16 xid) { - struct fc_lport *lp; + struct fc_lport *lport; if (!fsp) return; - lp = fsp->lp; + lport = fsp->lp; if ((fsp->req_flags & FC_SRB_READ) && - (lp->lro_enabled) && (lp->tt.ddp_setup)) { - if (lp->tt.ddp_setup(lp, xid, scsi_sglist(fsp->cmd), - scsi_sg_count(fsp->cmd))) + (lport->lro_enabled) && (lport->tt.ddp_setup)) { + if (lport->tt.ddp_setup(lport, xid, scsi_sglist(fsp->cmd), + scsi_sg_count(fsp->cmd))) fsp->xfer_ddp = xid; } } -/* - * fc_fcp_ddp_done - calls to LLD's ddp_done to release any - * DDP related resources for this I/O if it is initialized - * as a ddp transfer - * @fsp: ptr to the fc_fcp_pkt - * - * returns: none +/** + * fc_fcp_ddp_done() - Calls a LLD's ddp_done routine to release any + * DDP related resources for a fcp_pkt + * @fsp: The FCP packet that DDP had been used on */ static void fc_fcp_ddp_done(struct fc_fcp_pkt *fsp) { - struct fc_lport *lp; + struct fc_lport *lport; if (!fsp) return; @@ -298,22 +319,22 @@ static void fc_fcp_ddp_done(struct fc_fcp_pkt *fsp) if (fsp->xfer_ddp == FC_XID_UNKNOWN) return; - lp = fsp->lp; - if (lp->tt.ddp_done) { - fsp->xfer_len = lp->tt.ddp_done(lp, fsp->xfer_ddp); + lport = fsp->lp; + if (lport->tt.ddp_done) { + fsp->xfer_len = lport->tt.ddp_done(lport, fsp->xfer_ddp); fsp->xfer_ddp = FC_XID_UNKNOWN; } } - -/* - * Receive SCSI data from target. - * Called after receiving solicited data. +/** + * fc_fcp_recv_data() - Handler for receiving SCSI-FCP data from a target + * @fsp: The FCP packet the data is on + * @fp: The data frame */ static void fc_fcp_recv_data(struct fc_fcp_pkt *fsp, struct fc_frame *fp) { struct scsi_cmnd *sc = fsp->cmd; - struct fc_lport *lp = fsp->lp; + struct fc_lport *lport = fsp->lp; struct fcoe_dev_stats *stats; struct fc_frame_header *fh; size_t start_offset; @@ -363,13 +384,13 @@ static void fc_fcp_recv_data(struct fc_fcp_pkt *fsp, struct fc_frame *fp) if (~crc != le32_to_cpu(fr_crc(fp))) { crc_err: - stats = fc_lport_get_stats(lp); + stats = fc_lport_get_stats(lport); stats->ErrorFrames++; /* FIXME - per cpu count, not total count! */ if (stats->InvalidCRCCount++ < 5) printk(KERN_WARNING "libfc: CRC error on data " "frame for port (%6x)\n", - fc_host_port_id(lp->host)); + fc_host_port_id(lport->host)); /* * Assume the frame is total garbage. * We may have copied it over the good part @@ -397,18 +418,17 @@ crc_err: } /** - * fc_fcp_send_data() - Send SCSI data to target. - * @fsp: ptr to fc_fcp_pkt - * @sp: ptr to this sequence - * @offset: starting offset for this data request - * @seq_blen: the burst length for this data request + * fc_fcp_send_data() - Send SCSI data to a target + * @fsp: The FCP packet the data is on + * @sp: The sequence the data is to be sent on + * @offset: The starting offset for this data request + * @seq_blen: The burst length for this data request * * Called after receiving a Transfer Ready data descriptor. - * if LLD is capable of seq offload then send down seq_blen - * size of data in single frame, otherwise send multiple FC - * frames of max FC frame payload supported by target port. - * - * Returns : 0 for success. + * If the LLD is capable of sequence offload then send down the + * seq_blen ammount of data in single frame, otherwise send + * multiple frames of the maximum frame payload supported by + * the target port. */ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq, size_t offset, size_t seq_blen) @@ -417,7 +437,7 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq, struct scsi_cmnd *sc; struct scatterlist *sg; struct fc_frame *fp = NULL; - struct fc_lport *lp = fsp->lp; + struct fc_lport *lport = fsp->lp; size_t remaining; size_t t_blen; size_t tlen; @@ -426,7 +446,7 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq, int error; void *data = NULL; void *page_addr; - int using_sg = lp->sg_supp; + int using_sg = lport->sg_supp; u32 f_ctl; WARN_ON(seq_blen <= 0); @@ -448,10 +468,10 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq, * to max FC frame payload previously set in fsp->max_payload. */ t_blen = fsp->max_payload; - if (lp->seq_offload) { - t_blen = min(seq_blen, (size_t)lp->lso_max); + if (lport->seq_offload) { + t_blen = min(seq_blen, (size_t)lport->lso_max); FC_FCP_DBG(fsp, "fsp=%p:lso:blen=%zx lso_max=0x%x t_blen=%zx\n", - fsp, seq_blen, lp->lso_max, t_blen); + fsp, seq_blen, lport->lso_max, t_blen); } WARN_ON(t_blen < FC_MIN_MAX_PAYLOAD); @@ -463,7 +483,7 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq, remaining = seq_blen; fh_parm_offset = frame_offset = offset; tlen = 0; - seq = lp->tt.seq_start_next(seq); + seq = lport->tt.seq_start_next(seq); f_ctl = FC_FC_REL_OFF; WARN_ON(!seq); @@ -486,11 +506,11 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq, if (tlen % 4) using_sg = 0; if (using_sg) { - fp = _fc_frame_alloc(lp, 0); + fp = _fc_frame_alloc(lport, 0); if (!fp) return -ENOMEM; } else { - fp = fc_frame_alloc(lp, tlen); + fp = fc_frame_alloc(lport, tlen); if (!fp) return -ENOMEM; @@ -550,7 +570,7 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq, /* * send fragment using for a sequence. */ - error = lp->tt.seq_send(lp, seq, fp); + error = lport->tt.seq_send(lport, seq, fp); if (error) { WARN_ON(1); /* send error should be rare */ fc_fcp_retry_cmd(fsp); @@ -562,6 +582,11 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq, return 0; } +/** + * fc_fcp_abts_resp() - Send an ABTS response + * @fsp: The FCP packet that is being aborted + * @fp: The response frame + */ static void fc_fcp_abts_resp(struct fc_fcp_pkt *fsp, struct fc_frame *fp) { int ba_done = 1; @@ -598,8 +623,8 @@ static void fc_fcp_abts_resp(struct fc_fcp_pkt *fsp, struct fc_frame *fp) } /** - * fc_fcp_reduce_can_queue() - drop can_queue - * @lp: lport to drop queueing for + * fc_fcp_reduce_can_queue() - Reduce the can_queue value for a local port + * @lport: The local port to reduce can_queue on * * If we are getting memory allocation failures, then we may * be trying to execute too many commands. We let the running @@ -607,37 +632,36 @@ static void fc_fcp_abts_resp(struct fc_fcp_pkt *fsp, struct fc_frame *fp) * can_queue. Eventually we will hit the point where we run * on all reserved structs. */ -static void fc_fcp_reduce_can_queue(struct fc_lport *lp) +static void fc_fcp_reduce_can_queue(struct fc_lport *lport) { - struct fc_fcp_internal *si = fc_get_scsi_internal(lp); + struct fc_fcp_internal *si = fc_get_scsi_internal(lport); unsigned long flags; int can_queue; - spin_lock_irqsave(lp->host->host_lock, flags); + spin_lock_irqsave(lport->host->host_lock, flags); if (si->throttled) goto done; si->throttled = 1; - can_queue = lp->host->can_queue; + can_queue = lport->host->can_queue; can_queue >>= 1; if (!can_queue) can_queue = 1; - lp->host->can_queue = can_queue; - shost_printk(KERN_ERR, lp->host, "libfc: Could not allocate frame.\n" + lport->host->can_queue = can_queue; + shost_printk(KERN_ERR, lport->host, "libfc: Could not allocate frame.\n" "Reducing can_queue to %d.\n", can_queue); done: - spin_unlock_irqrestore(lp->host->host_lock, flags); + spin_unlock_irqrestore(lport->host->host_lock, flags); } /** - * fc_fcp_recv() - Reveive FCP frames + * fc_fcp_recv() - Reveive an FCP frame * @seq: The sequence the frame is on - * @fp: The FC frame + * @fp: The received frame * @arg: The related FCP packet * - * Return : None - * Context : called from Soft IRQ context - * can not called holding list lock + * Context: Called from Soft IRQ context. Can not be called + * holding the FCP packet list lock. */ static void fc_fcp_recv(struct fc_seq *seq, struct fc_frame *fp, void *arg) { @@ -710,6 +734,11 @@ errout: fc_fcp_reduce_can_queue(lport); } +/** + * fc_fcp_resp() - Handler for FCP responses + * @fsp: The FCP packet the response is for + * @fp: The response frame + */ static void fc_fcp_resp(struct fc_fcp_pkt *fsp, struct fc_frame *fp) { struct fc_frame_header *fh; @@ -823,15 +852,16 @@ err: } /** - * fc_fcp_complete_locked() - complete processing of a fcp packet - * @fsp: fcp packet + * fc_fcp_complete_locked() - Complete processing of a fcp_pkt with the + * fcp_pkt lock held + * @fsp: The FCP packet to be completed * * This function may sleep if a timer is pending. The packet lock must be * held, and the host lock must not be held. */ static void fc_fcp_complete_locked(struct fc_fcp_pkt *fsp) { - struct fc_lport *lp = fsp->lp; + struct fc_lport *lport = fsp->lp; struct fc_seq *seq; struct fc_exch *ep; u32 f_ctl; @@ -862,7 +892,7 @@ static void fc_fcp_complete_locked(struct fc_fcp_pkt *fsp) struct fc_frame *conf_frame; struct fc_seq *csp; - csp = lp->tt.seq_start_next(seq); + csp = lport->tt.seq_start_next(seq); conf_frame = fc_frame_alloc(fsp->lp, 0); if (conf_frame) { f_ctl = FC_FC_SEQ_INIT; @@ -871,43 +901,48 @@ static void fc_fcp_complete_locked(struct fc_fcp_pkt *fsp) fc_fill_fc_hdr(conf_frame, FC_RCTL_DD_SOL_CTL, ep->did, ep->sid, FC_TYPE_FCP, f_ctl, 0); - lp->tt.seq_send(lp, csp, conf_frame); + lport->tt.seq_send(lport, csp, conf_frame); } } - lp->tt.exch_done(seq); + lport->tt.exch_done(seq); } fc_io_compl(fsp); } +/** + * fc_fcp_cleanup_cmd() - Cancel the active exchange on a fcp_pkt + * @fsp: The FCP packet whose exchanges should be canceled + * @error: The reason for the cancellation + */ static void fc_fcp_cleanup_cmd(struct fc_fcp_pkt *fsp, int error) { - struct fc_lport *lp = fsp->lp; + struct fc_lport *lport = fsp->lp; if (fsp->seq_ptr) { - lp->tt.exch_done(fsp->seq_ptr); + lport->tt.exch_done(fsp->seq_ptr); fsp->seq_ptr = NULL; } fsp->status_code = error; } /** - * fc_fcp_cleanup_each_cmd() - Cleanup active commads - * @lp: logical port - * @id: target id - * @lun: lun - * @error: fsp status code + * fc_fcp_cleanup_each_cmd() - Cancel all exchanges on a local port + * @lport: The local port whose exchanges should be canceled + * @id: The target's ID + * @lun: The LUN + * @error: The reason for cancellation * * If lun or id is -1, they are ignored. */ -static void fc_fcp_cleanup_each_cmd(struct fc_lport *lp, unsigned int id, +static void fc_fcp_cleanup_each_cmd(struct fc_lport *lport, unsigned int id, unsigned int lun, int error) { - struct fc_fcp_internal *si = fc_get_scsi_internal(lp); + struct fc_fcp_internal *si = fc_get_scsi_internal(lport); struct fc_fcp_pkt *fsp; struct scsi_cmnd *sc_cmd; unsigned long flags; - spin_lock_irqsave(lp->host->host_lock, flags); + spin_lock_irqsave(lport->host->host_lock, flags); restart: list_for_each_entry(fsp, &si->scsi_pkt_queue, list) { sc_cmd = fsp->cmd; @@ -918,7 +953,7 @@ restart: continue; fc_fcp_pkt_hold(fsp); - spin_unlock_irqrestore(lp->host->host_lock, flags); + spin_unlock_irqrestore(lport->host->host_lock, flags); if (!fc_fcp_lock_pkt(fsp)) { fc_fcp_cleanup_cmd(fsp, error); @@ -927,35 +962,36 @@ restart: } fc_fcp_pkt_release(fsp); - spin_lock_irqsave(lp->host->host_lock, flags); + spin_lock_irqsave(lport->host->host_lock, flags); /* * while we dropped the lock multiple pkts could * have been released, so we have to start over. */ goto restart; } - spin_unlock_irqrestore(lp->host->host_lock, flags); + spin_unlock_irqrestore(lport->host->host_lock, flags); } -static void fc_fcp_abort_io(struct fc_lport *lp) +/** + * fc_fcp_abort_io() - Abort all FCP-SCSI exchanges on a local port + * @lport: The local port whose exchanges are to be aborted + */ +static void fc_fcp_abort_io(struct fc_lport *lport) { - fc_fcp_cleanup_each_cmd(lp, -1, -1, FC_HRD_ERROR); + fc_fcp_cleanup_each_cmd(lport, -1, -1, FC_HRD_ERROR); } /** - * fc_fcp_pkt_send() - send a fcp packet to the lower level. - * @lp: fc lport - * @fsp: fc packet. + * fc_fcp_pkt_send() - Send a fcp_pkt + * @lport: The local port to send the FCP packet on + * @fsp: The FCP packet to send * - * This is called by upper layer protocol. - * Return : zero for success and -1 for failure - * Context : called from queuecommand which can be called from process - * or scsi soft irq. - * Locks : called with the host lock and irqs disabled. + * Return: Zero for success and -1 for failure + * Locks: Called with the host lock and irqs disabled. */ -static int fc_fcp_pkt_send(struct fc_lport *lp, struct fc_fcp_pkt *fsp) +static int fc_fcp_pkt_send(struct fc_lport *lport, struct fc_fcp_pkt *fsp) { - struct fc_fcp_internal *si = fc_get_scsi_internal(lp); + struct fc_fcp_internal *si = fc_get_scsi_internal(lport); int rc; fsp->cmd->SCp.ptr = (char *)fsp; @@ -967,16 +1003,22 @@ static int fc_fcp_pkt_send(struct fc_lport *lp, struct fc_fcp_pkt *fsp) memcpy(fsp->cdb_cmd.fc_cdb, fsp->cmd->cmnd, fsp->cmd->cmd_len); list_add_tail(&fsp->list, &si->scsi_pkt_queue); - spin_unlock_irq(lp->host->host_lock); - rc = lp->tt.fcp_cmd_send(lp, fsp, fc_fcp_recv); - spin_lock_irq(lp->host->host_lock); + spin_unlock_irq(lport->host->host_lock); + rc = lport->tt.fcp_cmd_send(lport, fsp, fc_fcp_recv); + spin_lock_irq(lport->host->host_lock); if (rc) list_del(&fsp->list); return rc; } -static int fc_fcp_cmd_send(struct fc_lport *lp, struct fc_fcp_pkt *fsp, +/** + * fc_fcp_cmd_send() - Send a FCP command + * @lport: The local port to send the command on + * @fsp: The FCP packet the command is on + * @resp: The handler for the response + */ +static int fc_fcp_cmd_send(struct fc_lport *lport, struct fc_fcp_pkt *fsp, void (*resp)(struct fc_seq *, struct fc_frame *fp, void *arg)) @@ -984,14 +1026,14 @@ static int fc_fcp_cmd_send(struct fc_lport *lp, struct fc_fcp_pkt *fsp, struct fc_frame *fp; struct fc_seq *seq; struct fc_rport *rport; - struct fc_rport_libfc_priv *rp; + struct fc_rport_libfc_priv *rpriv; const size_t len = sizeof(fsp->cdb_cmd); int rc = 0; if (fc_fcp_lock_pkt(fsp)) return 0; - fp = fc_frame_alloc(lp, sizeof(fsp->cdb_cmd)); + fp = fc_frame_alloc(lport, sizeof(fsp->cdb_cmd)); if (!fp) { rc = -1; goto unlock; @@ -1001,13 +1043,14 @@ static int fc_fcp_cmd_send(struct fc_lport *lp, struct fc_fcp_pkt *fsp, fr_fsp(fp) = fsp; rport = fsp->rport; fsp->max_payload = rport->maxframe_size; - rp = rport->dd_data; + rpriv = rport->dd_data; fc_fill_fc_hdr(fp, FC_RCTL_DD_UNSOL_CMD, rport->port_id, - fc_host_port_id(rp->local_port->host), FC_TYPE_FCP, + fc_host_port_id(rpriv->local_port->host), FC_TYPE_FCP, FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0); - seq = lp->tt.exch_seq_send(lp, fp, resp, fc_fcp_pkt_destroy, fsp, 0); + seq = lport->tt.exch_seq_send(lport, fp, resp, fc_fcp_pkt_destroy, + fsp, 0); if (!seq) { rc = -1; goto unlock; @@ -1025,8 +1068,10 @@ unlock: return rc; } -/* - * transport error handler +/** + * fc_fcp_error() - Handler for FCP layer errors + * @fsp: The FCP packet the error is on + * @fp: The frame that has errored */ static void fc_fcp_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp) { @@ -1051,9 +1096,11 @@ unlock: fc_fcp_unlock_pkt(fsp); } -/* - * Scsi abort handler- calls to send an abort - * and then wait for abort completion +/** + * fc_fcp_pkt_abort() - Abort a fcp_pkt + * @fsp: The FCP packet to abort on + * + * Called to send an abort and then wait for abort completion */ static int fc_fcp_pkt_abort(struct fc_fcp_pkt *fsp) { @@ -1082,14 +1129,15 @@ static int fc_fcp_pkt_abort(struct fc_fcp_pkt *fsp) return rc; } -/* - * Retry LUN reset after resource allocation failed. +/** + * fc_lun_reset_send() - Send LUN reset command + * @data: The FCP packet that identifies the LUN to be reset */ static void fc_lun_reset_send(unsigned long data) { struct fc_fcp_pkt *fsp = (struct fc_fcp_pkt *)data; - struct fc_lport *lp = fsp->lp; - if (lp->tt.fcp_cmd_send(lp, fsp, fc_tm_done)) { + struct fc_lport *lport = fsp->lp; + if (lport->tt.fcp_cmd_send(lport, fsp, fc_tm_done)) { if (fsp->recov_retry++ >= FC_MAX_RECOV_RETRY) return; if (fc_fcp_lock_pkt(fsp)) @@ -1100,11 +1148,15 @@ static void fc_lun_reset_send(unsigned long data) } } -/* - * Scsi device reset handler- send a LUN RESET to the device - * and wait for reset reply +/** + * fc_lun_reset() - Send a LUN RESET command to a device + * and wait for the reply + * @lport: The local port to sent the comand on + * @fsp: The FCP packet that identifies the LUN to be reset + * @id: The SCSI command ID + * @lun: The LUN ID to be reset */ -static int fc_lun_reset(struct fc_lport *lp, struct fc_fcp_pkt *fsp, +static int fc_lun_reset(struct fc_lport *lport, struct fc_fcp_pkt *fsp, unsigned int id, unsigned int lun) { int rc; @@ -1132,14 +1184,14 @@ static int fc_lun_reset(struct fc_lport *lp, struct fc_fcp_pkt *fsp, spin_lock_bh(&fsp->scsi_pkt_lock); if (fsp->seq_ptr) { - lp->tt.exch_done(fsp->seq_ptr); + lport->tt.exch_done(fsp->seq_ptr); fsp->seq_ptr = NULL; } fsp->wait_for_comp = 0; spin_unlock_bh(&fsp->scsi_pkt_lock); if (!rc) { - FC_SCSI_DBG(lp, "lun reset failed\n"); + FC_SCSI_DBG(lport, "lun reset failed\n"); return FAILED; } @@ -1147,13 +1199,16 @@ static int fc_lun_reset(struct fc_lport *lp, struct fc_fcp_pkt *fsp, if (fsp->cdb_status != FCP_TMF_CMPL) return FAILED; - FC_SCSI_DBG(lp, "lun reset to lun %u completed\n", lun); - fc_fcp_cleanup_each_cmd(lp, id, lun, FC_CMD_ABORTED); + FC_SCSI_DBG(lport, "lun reset to lun %u completed\n", lun); + fc_fcp_cleanup_each_cmd(lport, id, lun, FC_CMD_ABORTED); return SUCCESS; } -/* - * Task Managment response handler +/** + * fc_tm_done() - Task Managment response handler + * @seq: The sequence that the response is on + * @fp: The response frame + * @arg: The FCP packet the response is for */ static void fc_tm_done(struct fc_seq *seq, struct fc_frame *fp, void *arg) { @@ -1190,34 +1245,31 @@ static void fc_tm_done(struct fc_seq *seq, struct fc_frame *fp, void *arg) fc_fcp_unlock_pkt(fsp); } -static void fc_fcp_cleanup(struct fc_lport *lp) +/** + * fc_fcp_cleanup() - Cleanup all FCP exchanges on a local port + * @lport: The local port to be cleaned up + */ +static void fc_fcp_cleanup(struct fc_lport *lport) { - fc_fcp_cleanup_each_cmd(lp, -1, -1, FC_ERROR); + fc_fcp_cleanup_each_cmd(lport, -1, -1, FC_ERROR); } -/* - * fc_fcp_timeout: called by OS timer function. - * - * The timer has been inactivated and must be reactivated if desired - * using fc_fcp_timer_set(). - * - * Algorithm: - * - * If REC is supported, just issue it, and return. The REC exchange will - * complete or time out, and recovery can continue at that point. - * - * Otherwise, if the response has been received without all the data, - * it has been ER_TIMEOUT since the response was received. +/** + * fc_fcp_timeout() - Handler for fcp_pkt timeouts + * @data: The FCP packet that has timed out * - * If the response has not been received, - * we see if data was received recently. If it has been, we continue waiting, - * otherwise, we abort the command. + * If REC is supported then just issue it and return. The REC exchange will + * complete or time out and recovery can continue at that point. Otherwise, + * if the response has been received without all the data it has been + * ER_TIMEOUT since the response was received. If the response has not been + * received we see if data was received recently. If it has been then we + * continue waiting, otherwise, we abort the command. */ static void fc_fcp_timeout(unsigned long data) { struct fc_fcp_pkt *fsp = (struct fc_fcp_pkt *)data; struct fc_rport *rport = fsp->rport; - struct fc_rport_libfc_priv *rp = rport->dd_data; + struct fc_rport_libfc_priv *rpriv = rport->dd_data; if (fc_fcp_lock_pkt(fsp)) return; @@ -1227,7 +1279,7 @@ static void fc_fcp_timeout(unsigned long data) fsp->state |= FC_SRB_FCP_PROCESSING_TMO; - if (rp->flags & FC_RP_FLAGS_REC_SUPPORTED) + if (rpriv->flags & FC_RP_FLAGS_REC_SUPPORTED) fc_fcp_rec(fsp); else if (time_after_eq(fsp->last_pkt_time + (FC_SCSI_ER_TIMEOUT / 2), jiffies)) @@ -1241,35 +1293,37 @@ unlock: fc_fcp_unlock_pkt(fsp); } -/* - * Send a REC ELS request +/** + * fc_fcp_rec() - Send a REC ELS request + * @fsp: The FCP packet to send the REC request on */ static void fc_fcp_rec(struct fc_fcp_pkt *fsp) { - struct fc_lport *lp; + struct fc_lport *lport; struct fc_frame *fp; struct fc_rport *rport; - struct fc_rport_libfc_priv *rp; + struct fc_rport_libfc_priv *rpriv; - lp = fsp->lp; + lport = fsp->lp; rport = fsp->rport; - rp = rport->dd_data; - if (!fsp->seq_ptr || rp->rp_state != RPORT_ST_READY) { + rpriv = rport->dd_data; + if (!fsp->seq_ptr || rpriv->rp_state != RPORT_ST_READY) { fsp->status_code = FC_HRD_ERROR; fsp->io_status = 0; fc_fcp_complete_locked(fsp); return; } - fp = fc_frame_alloc(lp, sizeof(struct fc_els_rec)); + fp = fc_frame_alloc(lport, sizeof(struct fc_els_rec)); if (!fp) goto retry; fr_seq(fp) = fsp->seq_ptr; fc_fill_fc_hdr(fp, FC_RCTL_ELS_REQ, rport->port_id, - fc_host_port_id(rp->local_port->host), FC_TYPE_ELS, + fc_host_port_id(rpriv->local_port->host), FC_TYPE_ELS, FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0); - if (lp->tt.elsct_send(lp, rport->port_id, fp, ELS_REC, fc_fcp_rec_resp, - fsp, jiffies_to_msecs(FC_SCSI_REC_TOV))) { + if (lport->tt.elsct_send(lport, rport->port_id, fp, ELS_REC, + fc_fcp_rec_resp, fsp, + jiffies_to_msecs(FC_SCSI_REC_TOV))) { fc_fcp_pkt_hold(fsp); /* hold while REC outstanding */ return; } @@ -1280,12 +1334,16 @@ retry: fc_timeout_error(fsp); } -/* - * Receive handler for REC ELS frame - * if it is a reject then let the scsi layer to handle - * the timeout. if it is a LS_ACC then if the io was not completed - * then set the timeout and return otherwise complete the exchange - * and tell the scsi layer to restart the I/O. +/** + * fc_fcp_rec_resp() - Handler for REC ELS responses + * @seq: The sequence the response is on + * @fp: The response frame + * @arg: The FCP packet the response is on + * + * If the response is a reject then the scsi layer will handle + * the timeout. If the response is a LS_ACC then if the I/O was not completed + * set the timeout and return. If the I/O was completed then complete the + * exchange and tell the SCSI layer. */ static void fc_fcp_rec_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg) { @@ -1297,7 +1355,7 @@ static void fc_fcp_rec_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg) u32 offset; enum dma_data_direction data_dir; enum fc_rctl r_ctl; - struct fc_rport_libfc_priv *rp; + struct fc_rport_libfc_priv *rpriv; if (IS_ERR(fp)) { fc_fcp_rec_error(fsp, fp); @@ -1320,13 +1378,13 @@ static void fc_fcp_rec_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg) /* fall through */ case ELS_RJT_UNSUP: FC_FCP_DBG(fsp, "device does not support REC\n"); - rp = fsp->rport->dd_data; + rpriv = fsp->rport->dd_data; /* * if we do not spport RECs or got some bogus * reason then resetup timer so we check for * making progress. */ - rp->flags &= ~FC_RP_FLAGS_REC_SUPPORTED; + rpriv->flags &= ~FC_RP_FLAGS_REC_SUPPORTED; fc_fcp_timer_set(fsp, FC_SCSI_ER_TIMEOUT); break; case ELS_RJT_LOGIC: @@ -1423,8 +1481,10 @@ out: fc_frame_free(fp); } -/* - * Handle error response or timeout for REC exchange. +/** + * fc_fcp_rec_error() - Handler for REC errors + * @fsp: The FCP packet the error is on + * @fp: The REC frame */ static void fc_fcp_rec_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp) { @@ -1463,10 +1523,9 @@ out: fc_fcp_pkt_release(fsp); /* drop hold for outstanding REC */ } -/* - * Time out error routine: - * abort's the I/O close the exchange and - * send completion notification to scsi layer +/** + * fc_timeout_error() - Handler for fcp_pkt timeouts + * @fsp: The FCP packt that has timed out */ static void fc_timeout_error(struct fc_fcp_pkt *fsp) { @@ -1480,16 +1539,18 @@ static void fc_timeout_error(struct fc_fcp_pkt *fsp) fc_fcp_send_abort(fsp); } -/* - * Sequence retransmission request. +/** + * fc_fcp_srr() - Send a SRR request (Sequence Retransmission Request) + * @fsp: The FCP packet the SRR is to be sent on + * @r_ctl: The R_CTL field for the SRR request * This is called after receiving status but insufficient data, or * when expecting status but the request has timed out. */ static void fc_fcp_srr(struct fc_fcp_pkt *fsp, enum fc_rctl r_ctl, u32 offset) { - struct fc_lport *lp = fsp->lp; + struct fc_lport *lport = fsp->lp; struct fc_rport *rport; - struct fc_rport_libfc_priv *rp; + struct fc_rport_libfc_priv *rpriv; struct fc_exch *ep = fc_seq_exch(fsp->seq_ptr); struct fc_seq *seq; struct fcp_srr *srr; @@ -1497,12 +1558,13 @@ static void fc_fcp_srr(struct fc_fcp_pkt *fsp, enum fc_rctl r_ctl, u32 offset) u8 cdb_op; rport = fsp->rport; - rp = rport->dd_data; + rpriv = rport->dd_data; cdb_op = fsp->cdb_cmd.fc_cdb[0]; - if (!(rp->flags & FC_RP_FLAGS_RETRY) || rp->rp_state != RPORT_ST_READY) + if (!(rpriv->flags & FC_RP_FLAGS_RETRY) || + rpriv->rp_state != RPORT_ST_READY) goto retry; /* shouldn't happen */ - fp = fc_frame_alloc(lp, sizeof(*srr)); + fp = fc_frame_alloc(lport, sizeof(*srr)); if (!fp) goto retry; @@ -1515,11 +1577,11 @@ static void fc_fcp_srr(struct fc_fcp_pkt *fsp, enum fc_rctl r_ctl, u32 offset) srr->srr_rel_off = htonl(offset); fc_fill_fc_hdr(fp, FC_RCTL_ELS4_REQ, rport->port_id, - fc_host_port_id(rp->local_port->host), FC_TYPE_FCP, + fc_host_port_id(rpriv->local_port->host), FC_TYPE_FCP, FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0); - seq = lp->tt.exch_seq_send(lp, fp, fc_fcp_srr_resp, NULL, - fsp, jiffies_to_msecs(FC_SCSI_REC_TOV)); + seq = lport->tt.exch_seq_send(lport, fp, fc_fcp_srr_resp, NULL, + fsp, jiffies_to_msecs(FC_SCSI_REC_TOV)); if (!seq) goto retry; @@ -1533,8 +1595,11 @@ retry: fc_fcp_retry_cmd(fsp); } -/* - * Handle response from SRR. +/** + * fc_fcp_srr_resp() - Handler for SRR response + * @seq: The sequence the SRR is on + * @fp: The SRR frame + * @arg: The FCP packet the SRR is on */ static void fc_fcp_srr_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg) { @@ -1580,6 +1645,11 @@ out: fc_fcp_pkt_release(fsp); /* drop hold for outstanding SRR */ } +/** + * fc_fcp_srr_error() - Handler for SRR errors + * @fsp: The FCP packet that the SRR error is on + * @fp: The SRR frame + */ static void fc_fcp_srr_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp) { if (fc_fcp_lock_pkt(fsp)) @@ -1604,31 +1674,36 @@ out: fc_fcp_pkt_release(fsp); /* drop hold for outstanding SRR */ } -static inline int fc_fcp_lport_queue_ready(struct fc_lport *lp) +/** + * fc_fcp_lport_queue_ready() - Determine if the lport and it's queue is ready + * @lport: The local port to be checked + */ +static inline int fc_fcp_lport_queue_ready(struct fc_lport *lport) { /* lock ? */ - return (lp->state == LPORT_ST_READY) && lp->link_up && !lp->qfull; + return (lport->state == LPORT_ST_READY) && + lport->link_up && !lport->qfull; } /** - * fc_queuecommand - The queuecommand function of the scsi template - * @cmd: struct scsi_cmnd to be executed - * @done: Callback function to be called when cmd is completed + * fc_queuecommand() - The queuecommand function of the SCSI template + * @cmd: The scsi_cmnd to be executed + * @done: The callback function to be called when the scsi_cmnd is complete * - * this is the i/o strategy routine, called by the scsi layer - * this routine is called with holding the host_lock. + * This is the i/o strategy routine, called by the SCSI layer. This routine + * is called with the host_lock held. */ int fc_queuecommand(struct scsi_cmnd *sc_cmd, void (*done)(struct scsi_cmnd *)) { - struct fc_lport *lp; + struct fc_lport *lport; struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device)); struct fc_fcp_pkt *fsp; - struct fc_rport_libfc_priv *rp; + struct fc_rport_libfc_priv *rpriv; int rval; int rc = 0; struct fcoe_dev_stats *stats; - lp = shost_priv(sc_cmd->device->host); + lport = shost_priv(sc_cmd->device->host); rval = fc_remote_port_chkready(rport); if (rval) { @@ -1647,14 +1722,14 @@ int fc_queuecommand(struct scsi_cmnd *sc_cmd, void (*done)(struct scsi_cmnd *)) goto out; } - rp = rport->dd_data; + rpriv = rport->dd_data; - if (!fc_fcp_lport_queue_ready(lp)) { + if (!fc_fcp_lport_queue_ready(lport)) { rc = SCSI_MLQUEUE_HOST_BUSY; goto out; } - fsp = fc_fcp_pkt_alloc(lp, GFP_ATOMIC); + fsp = fc_fcp_pkt_alloc(lport, GFP_ATOMIC); if (fsp == NULL) { rc = SCSI_MLQUEUE_HOST_BUSY; goto out; @@ -1664,7 +1739,7 @@ int fc_queuecommand(struct scsi_cmnd *sc_cmd, void (*done)(struct scsi_cmnd *)) * build the libfc request pkt */ fsp->cmd = sc_cmd; /* save the cmd */ - fsp->lp = lp; /* save the softc ptr */ + fsp->lp = lport; /* save the softc ptr */ fsp->rport = rport; /* set the remote port ptr */ fsp->xfer_ddp = FC_XID_UNKNOWN; sc_cmd->scsi_done = done; @@ -1678,7 +1753,7 @@ int fc_queuecommand(struct scsi_cmnd *sc_cmd, void (*done)(struct scsi_cmnd *)) /* * setup the data direction */ - stats = fc_lport_get_stats(lp); + stats = fc_lport_get_stats(lport); if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE) { fsp->req_flags = FC_SRB_READ; stats->InputRequests++; @@ -1692,7 +1767,7 @@ int fc_queuecommand(struct scsi_cmnd *sc_cmd, void (*done)(struct scsi_cmnd *)) stats->ControlRequests++; } - fsp->tgt_flags = rp->flags; + fsp->tgt_flags = rpriv->flags; init_timer(&fsp->timer); fsp->timer.data = (unsigned long)fsp; @@ -1702,7 +1777,7 @@ int fc_queuecommand(struct scsi_cmnd *sc_cmd, void (*done)(struct scsi_cmnd *)) * if we get -1 return then put the request in the pending * queue. */ - rval = fc_fcp_pkt_send(lp, fsp); + rval = fc_fcp_pkt_send(lport, fsp); if (rval != 0) { fsp->state = FC_SRB_FREE; fc_fcp_pkt_release(fsp); @@ -1714,18 +1789,17 @@ out: EXPORT_SYMBOL(fc_queuecommand); /** - * fc_io_compl() - Handle responses for completed commands - * @fsp: scsi packet - * - * Translates a error to a Linux SCSI error. + * fc_io_compl() - Handle responses for completed commands + * @fsp: The FCP packet that is complete * + * Translates fcp_pkt errors to a Linux SCSI errors. * The fcp packet lock must be held when calling. */ static void fc_io_compl(struct fc_fcp_pkt *fsp) { struct fc_fcp_internal *si; struct scsi_cmnd *sc_cmd; - struct fc_lport *lp; + struct fc_lport *lport; unsigned long flags; /* release outstanding ddp context */ @@ -1738,11 +1812,11 @@ static void fc_io_compl(struct fc_fcp_pkt *fsp) spin_lock_bh(&fsp->scsi_pkt_lock); } - lp = fsp->lp; - si = fc_get_scsi_internal(lp); - spin_lock_irqsave(lp->host->host_lock, flags); + lport = fsp->lp; + si = fc_get_scsi_internal(lport); + spin_lock_irqsave(lport->host->host_lock, flags); if (!fsp->cmd) { - spin_unlock_irqrestore(lp->host->host_lock, flags); + spin_unlock_irqrestore(lport->host->host_lock, flags); return; } @@ -1759,7 +1833,7 @@ static void fc_io_compl(struct fc_fcp_pkt *fsp) fsp->cmd = NULL; if (!sc_cmd->SCp.ptr) { - spin_unlock_irqrestore(lp->host->host_lock, flags); + spin_unlock_irqrestore(lport->host->host_lock, flags); return; } @@ -1826,7 +1900,7 @@ static void fc_io_compl(struct fc_fcp_pkt *fsp) list_del(&fsp->list); sc_cmd->SCp.ptr = NULL; sc_cmd->scsi_done(sc_cmd); - spin_unlock_irqrestore(lp->host->host_lock, flags); + spin_unlock_irqrestore(lport->host->host_lock, flags); /* release ref from initial allocation in queue command */ fc_fcp_pkt_release(fsp); @@ -1834,35 +1908,34 @@ static void fc_io_compl(struct fc_fcp_pkt *fsp) /** * fc_eh_abort() - Abort a command - * @sc_cmd: scsi command to abort + * @sc_cmd: The SCSI command to abort * - * From scsi host template. - * send ABTS to the target device and wait for the response - * sc_cmd is the pointer to the command to be aborted. + * From SCSI host template. + * Send an ABTS to the target device and wait for the response. */ int fc_eh_abort(struct scsi_cmnd *sc_cmd) { struct fc_fcp_pkt *fsp; - struct fc_lport *lp; + struct fc_lport *lport; int rc = FAILED; unsigned long flags; - lp = shost_priv(sc_cmd->device->host); - if (lp->state != LPORT_ST_READY) + lport = shost_priv(sc_cmd->device->host); + if (lport->state != LPORT_ST_READY) return rc; - else if (!lp->link_up) + else if (!lport->link_up) return rc; - spin_lock_irqsave(lp->host->host_lock, flags); + spin_lock_irqsave(lport->host->host_lock, flags); fsp = CMD_SP(sc_cmd); if (!fsp) { /* command completed while scsi eh was setting up */ - spin_unlock_irqrestore(lp->host->host_lock, flags); + spin_unlock_irqrestore(lport->host->host_lock, flags); return SUCCESS; } /* grab a ref so the fsp and sc_cmd cannot be relased from under us */ fc_fcp_pkt_hold(fsp); - spin_unlock_irqrestore(lp->host->host_lock, flags); + spin_unlock_irqrestore(lport->host->host_lock, flags); if (fc_fcp_lock_pkt(fsp)) { /* completed while we were waiting for timer to be deleted */ @@ -1880,34 +1953,32 @@ release_pkt: EXPORT_SYMBOL(fc_eh_abort); /** - * fc_eh_device_reset() Reset a single LUN - * @sc_cmd: scsi command + * fc_eh_device_reset() - Reset a single LUN + * @sc_cmd: The SCSI command which identifies the device whose + * LUN is to be reset * - * Set from scsi host template to send tm cmd to the target and wait for the - * response. + * Set from SCSI host template. */ int fc_eh_device_reset(struct scsi_cmnd *sc_cmd) { - struct fc_lport *lp; + struct fc_lport *lport; struct fc_fcp_pkt *fsp; struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device)); int rc = FAILED; - struct fc_rport_libfc_priv *rp; int rval; rval = fc_remote_port_chkready(rport); if (rval) goto out; - rp = rport->dd_data; - lp = shost_priv(sc_cmd->device->host); + lport = shost_priv(sc_cmd->device->host); - if (lp->state != LPORT_ST_READY) + if (lport->state != LPORT_ST_READY) return rc; - FC_SCSI_DBG(lp, "Resetting rport (%6x)\n", rport->port_id); + FC_SCSI_DBG(lport, "Resetting rport (%6x)\n", rport->port_id); - fsp = fc_fcp_pkt_alloc(lp, GFP_NOIO); + fsp = fc_fcp_pkt_alloc(lport, GFP_NOIO); if (fsp == NULL) { printk(KERN_WARNING "libfc: could not allocate scsi_pkt\n"); sc_cmd->result = DID_NO_CONNECT << 16; @@ -1919,13 +1990,13 @@ int fc_eh_device_reset(struct scsi_cmnd *sc_cmd) * the sc passed in is not setup for execution like when sent * through the queuecommand callout. */ - fsp->lp = lp; /* save the softc ptr */ + fsp->lp = lport; /* save the softc ptr */ fsp->rport = rport; /* set the remote port ptr */ /* * flush outstanding commands */ - rc = fc_lun_reset(lp, fsp, scmd_id(sc_cmd), sc_cmd->device->lun); + rc = fc_lun_reset(lport, fsp, scmd_id(sc_cmd), sc_cmd->device->lun); fsp->state = FC_SRB_FREE; fc_fcp_pkt_release(fsp); @@ -1935,38 +2006,39 @@ out: EXPORT_SYMBOL(fc_eh_device_reset); /** - * fc_eh_host_reset() - The reset function will reset the ports on the host. - * @sc_cmd: scsi command + * fc_eh_host_reset() - Reset a Scsi_Host. + * @sc_cmd: The SCSI command that identifies the SCSI host to be reset */ int fc_eh_host_reset(struct scsi_cmnd *sc_cmd) { struct Scsi_Host *shost = sc_cmd->device->host; - struct fc_lport *lp = shost_priv(shost); + struct fc_lport *lport = shost_priv(shost); unsigned long wait_tmo; - FC_SCSI_DBG(lp, "Resetting host\n"); + FC_SCSI_DBG(lport, "Resetting host\n"); - lp->tt.lport_reset(lp); + lport->tt.lport_reset(lport); wait_tmo = jiffies + FC_HOST_RESET_TIMEOUT; - while (!fc_fcp_lport_queue_ready(lp) && time_before(jiffies, wait_tmo)) + while (!fc_fcp_lport_queue_ready(lport) && time_before(jiffies, + wait_tmo)) msleep(1000); - if (fc_fcp_lport_queue_ready(lp)) { + if (fc_fcp_lport_queue_ready(lport)) { shost_printk(KERN_INFO, shost, "libfc: Host reset succeeded " - "on port (%6x)\n", fc_host_port_id(lp->host)); + "on port (%6x)\n", fc_host_port_id(lport->host)); return SUCCESS; } else { shost_printk(KERN_INFO, shost, "libfc: Host reset failed, " "port (%6x) is not ready.\n", - fc_host_port_id(lp->host)); + fc_host_port_id(lport->host)); return FAILED; } } EXPORT_SYMBOL(fc_eh_host_reset); /** - * fc_slave_alloc() - configure queue depth - * @sdev: scsi device + * fc_slave_alloc() - Configure the queue depth of a Scsi_Host + * @sdev: The SCSI device that identifies the SCSI host * * Configures queue depth based on host's cmd_per_len. If not set * then we use the libfc default. @@ -1988,6 +2060,12 @@ int fc_slave_alloc(struct scsi_device *sdev) } EXPORT_SYMBOL(fc_slave_alloc); +/** + * fc_change_queue_depth() - Change a device's queue depth + * @sdev: The SCSI device whose queue depth is to change + * @qdepth: The new queue depth + * @reason: The resason for the change + */ int fc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) { switch (reason) { @@ -2007,6 +2085,11 @@ int fc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) } EXPORT_SYMBOL(fc_change_queue_depth); +/** + * fc_change_queue_type() - Change a device's queue type + * @sdev: The SCSI device whose queue depth is to change + * @tag_type: Identifier for queue type + */ int fc_change_queue_type(struct scsi_device *sdev, int tag_type) { if (sdev->tagged_supported) { @@ -2022,17 +2105,21 @@ int fc_change_queue_type(struct scsi_device *sdev, int tag_type) } EXPORT_SYMBOL(fc_change_queue_type); -void fc_fcp_destroy(struct fc_lport *lp) +/** + * fc_fcp_destory() - Tear down the FCP layer for a given local port + * @lport: The local port that no longer needs the FCP layer + */ +void fc_fcp_destroy(struct fc_lport *lport) { - struct fc_fcp_internal *si = fc_get_scsi_internal(lp); + struct fc_fcp_internal *si = fc_get_scsi_internal(lport); if (!list_empty(&si->scsi_pkt_queue)) printk(KERN_ERR "libfc: Leaked SCSI packets when destroying " - "port (%6x)\n", fc_host_port_id(lp->host)); + "port (%6x)\n", fc_host_port_id(lport->host)); mempool_destroy(si->scsi_pkt_pool); kfree(si); - lp->scsi_priv = NULL; + lport->scsi_priv = NULL; } EXPORT_SYMBOL(fc_fcp_destroy); @@ -2058,24 +2145,28 @@ void fc_destroy_fcp() kmem_cache_destroy(scsi_pkt_cachep); } -int fc_fcp_init(struct fc_lport *lp) +/** + * fc_fcp_init() - Initialize the FCP layer for a local port + * @lport: The local port to initialize the exchange layer for + */ +int fc_fcp_init(struct fc_lport *lport) { int rc; struct fc_fcp_internal *si; - if (!lp->tt.fcp_cmd_send) - lp->tt.fcp_cmd_send = fc_fcp_cmd_send; + if (!lport->tt.fcp_cmd_send) + lport->tt.fcp_cmd_send = fc_fcp_cmd_send; - if (!lp->tt.fcp_cleanup) - lp->tt.fcp_cleanup = fc_fcp_cleanup; + if (!lport->tt.fcp_cleanup) + lport->tt.fcp_cleanup = fc_fcp_cleanup; - if (!lp->tt.fcp_abort_io) - lp->tt.fcp_abort_io = fc_fcp_abort_io; + if (!lport->tt.fcp_abort_io) + lport->tt.fcp_abort_io = fc_fcp_abort_io; si = kzalloc(sizeof(struct fc_fcp_internal), GFP_KERNEL); if (!si) return -ENOMEM; - lp->scsi_priv = si; + lport->scsi_priv = si; INIT_LIST_HEAD(&si->scsi_pkt_queue); si->scsi_pkt_pool = mempool_create_slab_pool(2, scsi_pkt_cachep); diff --git a/drivers/scsi/libfc/fc_libfc.c b/drivers/scsi/libfc/fc_libfc.c index 295eafb0316f..39f4b6ab04b4 100644 --- a/drivers/scsi/libfc/fc_libfc.c +++ b/drivers/scsi/libfc/fc_libfc.c @@ -75,7 +75,7 @@ module_exit(libfc_exit); /** * fc_copy_buffer_to_sglist() - This routine copies the data of a buffer - * into a scatter-gather list (SG list). + * into a scatter-gather list (SG list). * * @buf: pointer to the data buffer. * @len: the byte-length of the data buffer. @@ -84,7 +84,7 @@ module_exit(libfc_exit); * @offset: pointer to the current offset in the SG list. * @km_type: dedicated page table slot type for kmap_atomic. * @crc: pointer to the 32-bit crc value. - * If crc is NULL, CRC is not calculated. + * If crc is NULL, CRC is not calculated. */ u32 fc_copy_buffer_to_sglist(void *buf, size_t len, struct scatterlist *sg, diff --git a/drivers/scsi/libfc/fc_libfc.h b/drivers/scsi/libfc/fc_libfc.h index e4b5e9280cb0..741fd5c72e13 100644 --- a/drivers/scsi/libfc/fc_libfc.h +++ b/drivers/scsi/libfc/fc_libfc.h @@ -22,22 +22,22 @@ #define FC_LIBFC_LOGGING 0x01 /* General logging, not categorized */ #define FC_LPORT_LOGGING 0x02 /* lport layer logging */ -#define FC_DISC_LOGGING 0x04 /* discovery layer logging */ +#define FC_DISC_LOGGING 0x04 /* discovery layer logging */ #define FC_RPORT_LOGGING 0x08 /* rport layer logging */ -#define FC_FCP_LOGGING 0x10 /* I/O path logging */ -#define FC_EM_LOGGING 0x20 /* Exchange Manager logging */ -#define FC_EXCH_LOGGING 0x40 /* Exchange/Sequence logging */ -#define FC_SCSI_LOGGING 0x80 /* SCSI logging (mostly error handling) */ +#define FC_FCP_LOGGING 0x10 /* I/O path logging */ +#define FC_EM_LOGGING 0x20 /* Exchange Manager logging */ +#define FC_EXCH_LOGGING 0x40 /* Exchange/Sequence logging */ +#define FC_SCSI_LOGGING 0x80 /* SCSI logging (mostly error handling) */ extern unsigned int fc_debug_logging; -#define FC_CHECK_LOGGING(LEVEL, CMD) \ -do { \ - if (unlikely(fc_debug_logging & LEVEL)) \ - do { \ - CMD; \ - } while (0); \ -} while (0) +#define FC_CHECK_LOGGING(LEVEL, CMD) \ + do { \ + if (unlikely(fc_debug_logging & LEVEL)) \ + do { \ + CMD; \ + } while (0); \ + } while (0) #define FC_LIBFC_DBG(fmt, args...) \ FC_CHECK_LOGGING(FC_LIBFC_LOGGING, \ @@ -49,10 +49,10 @@ do { \ (lport)->host->host_no, \ fc_host_port_id((lport)->host), ##args)) -#define FC_DISC_DBG(disc, fmt, args...) \ - FC_CHECK_LOGGING(FC_DISC_LOGGING, \ - printk(KERN_INFO "host%u: disc: " fmt, \ - (disc)->lport->host->host_no, \ +#define FC_DISC_DBG(disc, fmt, args...) \ + FC_CHECK_LOGGING(FC_DISC_LOGGING, \ + printk(KERN_INFO "host%u: disc: " fmt, \ + (disc)->lport->host->host_no, \ ##args)) #define FC_RPORT_ID_DBG(lport, port_id, fmt, args...) \ @@ -77,7 +77,7 @@ do { \ exch->xid, ##args)) #define FC_SCSI_DBG(lport, fmt, args...) \ - FC_CHECK_LOGGING(FC_SCSI_LOGGING, \ + FC_CHECK_LOGGING(FC_SCSI_LOGGING, \ printk(KERN_INFO "host%u: scsi: " fmt, \ (lport)->host->host_no, ##args)) diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 2162e6b0f43e..90930c435455 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -133,7 +133,7 @@ static const char *fc_lport_state_names[] = { * @job: The passthrough job * @lport: The local port to pass through a command * @rsp_code: The expected response code - * @sg: job->reply_payload.sg_list + * @sg: job->reply_payload.sg_list * @nents: job->reply_payload.sg_cnt * @offset: The offset into the response data */ @@ -146,6 +146,11 @@ struct fc_bsg_info { size_t offset; }; +/** + * fc_frame_drop() - Dummy frame handler + * @lport: The local port the frame was received on + * @fp: The received frame + */ static int fc_frame_drop(struct fc_lport *lport, struct fc_frame *fp) { fc_frame_free(fp); @@ -172,7 +177,7 @@ static void fc_lport_rport_callback(struct fc_lport *lport, switch (event) { case RPORT_EV_READY: if (lport->state == LPORT_ST_DNS) { - lport->dns_rp = rdata; + lport->dns_rdata = rdata; fc_lport_enter_ns(lport, LPORT_ST_RNN_ID); } else { FC_LPORT_DBG(lport, "Received an READY event " @@ -187,7 +192,7 @@ static void fc_lport_rport_callback(struct fc_lport *lport, case RPORT_EV_LOGO: case RPORT_EV_FAILED: case RPORT_EV_STOP: - lport->dns_rp = NULL; + lport->dns_rdata = NULL; break; case RPORT_EV_NONE: break; @@ -211,8 +216,8 @@ static const char *fc_lport_state(struct fc_lport *lport) /** * fc_lport_ptp_setup() - Create an rport for point-to-point mode - * @lport: The lport to attach the ptp rport to - * @fid: The FID of the ptp rport + * @lport: The lport to attach the ptp rport to + * @remote_fid: The FID of the ptp rport * @remote_wwpn: The WWPN of the ptp rport * @remote_wwnn: The WWNN of the ptp rport */ @@ -221,18 +226,22 @@ static void fc_lport_ptp_setup(struct fc_lport *lport, u64 remote_wwnn) { mutex_lock(&lport->disc.disc_mutex); - if (lport->ptp_rp) - lport->tt.rport_logoff(lport->ptp_rp); - lport->ptp_rp = lport->tt.rport_create(lport, remote_fid); - lport->ptp_rp->ids.port_name = remote_wwpn; - lport->ptp_rp->ids.node_name = remote_wwnn; + if (lport->ptp_rdata) + lport->tt.rport_logoff(lport->ptp_rdata); + lport->ptp_rdata = lport->tt.rport_create(lport, remote_fid); + lport->ptp_rdata->ids.port_name = remote_wwpn; + lport->ptp_rdata->ids.node_name = remote_wwnn; mutex_unlock(&lport->disc.disc_mutex); - lport->tt.rport_login(lport->ptp_rp); + lport->tt.rport_login(lport->ptp_rdata); fc_lport_enter_ready(lport); } +/** + * fc_get_host_port_type() - Return the port type of the given Scsi_Host + * @shost: The SCSI host whose port type is to be determined + */ void fc_get_host_port_type(struct Scsi_Host *shost) { /* TODO - currently just NPORT */ @@ -240,25 +249,33 @@ void fc_get_host_port_type(struct Scsi_Host *shost) } EXPORT_SYMBOL(fc_get_host_port_type); +/** + * fc_get_host_port_state() - Return the port state of the given Scsi_Host + * @shost: The SCSI host whose port state is to be determined + */ void fc_get_host_port_state(struct Scsi_Host *shost) { - struct fc_lport *lp = shost_priv(shost); + struct fc_lport *lport = shost_priv(shost); - mutex_lock(&lp->lp_mutex); - if (!lp->link_up) + mutex_lock(&lport->lp_mutex); + if (!lport->link_up) fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN; else - switch (lp->state) { + switch (lport->state) { case LPORT_ST_READY: fc_host_port_state(shost) = FC_PORTSTATE_ONLINE; break; default: fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE; } - mutex_unlock(&lp->lp_mutex); + mutex_unlock(&lport->lp_mutex); } EXPORT_SYMBOL(fc_get_host_port_state); +/** + * fc_get_host_speed() - Return the speed of the given Scsi_Host + * @shost: The SCSI host whose port speed is to be determined + */ void fc_get_host_speed(struct Scsi_Host *shost) { struct fc_lport *lport = shost_priv(shost); @@ -267,24 +284,28 @@ void fc_get_host_speed(struct Scsi_Host *shost) } EXPORT_SYMBOL(fc_get_host_speed); +/** + * fc_get_host_stats() - Return the Scsi_Host's statistics + * @shost: The SCSI host whose statistics are to be returned + */ struct fc_host_statistics *fc_get_host_stats(struct Scsi_Host *shost) { struct fc_host_statistics *fcoe_stats; - struct fc_lport *lp = shost_priv(shost); + struct fc_lport *lport = shost_priv(shost); struct timespec v0, v1; unsigned int cpu; - fcoe_stats = &lp->host_stats; + fcoe_stats = &lport->host_stats; memset(fcoe_stats, 0, sizeof(struct fc_host_statistics)); jiffies_to_timespec(jiffies, &v0); - jiffies_to_timespec(lp->boot_time, &v1); + jiffies_to_timespec(lport->boot_time, &v1); fcoe_stats->seconds_since_last_reset = (v0.tv_sec - v1.tv_sec); for_each_possible_cpu(cpu) { struct fcoe_dev_stats *stats; - stats = per_cpu_ptr(lp->dev_stats, cpu); + stats = per_cpu_ptr(lport->dev_stats, cpu); fcoe_stats->tx_frames += stats->TxFrames; fcoe_stats->tx_words += stats->TxWords; @@ -309,12 +330,15 @@ struct fc_host_statistics *fc_get_host_stats(struct Scsi_Host *shost) } EXPORT_SYMBOL(fc_get_host_stats); -/* - * Fill in FLOGI command for request. +/** + * fc_lport_flogi_fill() - Fill in FLOGI command for request + * @lport: The local port the FLOGI is for + * @flogi: The FLOGI command + * @op: The opcode */ -static void -fc_lport_flogi_fill(struct fc_lport *lport, struct fc_els_flogi *flogi, - unsigned int op) +static void fc_lport_flogi_fill(struct fc_lport *lport, + struct fc_els_flogi *flogi, + unsigned int op) { struct fc_els_csp *sp; struct fc_els_cssp *cp; @@ -342,8 +366,10 @@ fc_lport_flogi_fill(struct fc_lport *lport, struct fc_els_flogi *flogi, } } -/* - * Add a supported FC-4 type. +/** + * fc_lport_add_fc4_type() - Add a supported FC-4 type to a local port + * @lport: The local port to add a new FC-4 type to + * @type: The new FC-4 type */ static void fc_lport_add_fc4_type(struct fc_lport *lport, enum fc_fh_type type) { @@ -355,9 +381,9 @@ static void fc_lport_add_fc4_type(struct fc_lport *lport, enum fc_fh_type type) /** * fc_lport_recv_rlir_req() - Handle received Registered Link Incident Report. + * @sp: The sequence in the RLIR exchange + * @fp: The RLIR request frame * @lport: Fibre Channel local port recieving the RLIR - * @sp: current sequence in the RLIR exchange - * @fp: RLIR request frame * * Locking Note: The lport lock is expected to be held before calling * this function. @@ -374,9 +400,9 @@ static void fc_lport_recv_rlir_req(struct fc_seq *sp, struct fc_frame *fp, /** * fc_lport_recv_echo_req() - Handle received ECHO request - * @lport: Fibre Channel local port recieving the ECHO - * @sp: current sequence in the ECHO exchange - * @fp: ECHO request frame + * @sp: The sequence in the ECHO exchange + * @fp: ECHO request frame + * @lport: The local port recieving the ECHO * * Locking Note: The lport lock is expected to be held before calling * this function. @@ -483,9 +509,9 @@ static void fc_lport_recv_rnid_req(struct fc_seq *sp, struct fc_frame *in_fp, /** * fc_lport_recv_logo_req() - Handle received fabric LOGO request - * @lport: Fibre Channel local port recieving the LOGO - * @sp: current sequence in the LOGO exchange - * @fp: LOGO request frame + * @sp: The sequence in the LOGO exchange + * @fp: The LOGO request frame + * @lport: The local port recieving the LOGO * * Locking Note: The lport lock is exected to be held before calling * this function. @@ -500,7 +526,7 @@ static void fc_lport_recv_logo_req(struct fc_seq *sp, struct fc_frame *fp, /** * fc_fabric_login() - Start the lport state machine - * @lport: The lport that should log into the fabric + * @lport: The local port that should log into the fabric * * Locking Note: This function should not be called * with the lport lock held. @@ -538,7 +564,7 @@ void __fc_linkup(struct fc_lport *lport) /** * fc_linkup() - Handler for transport linkup events - * @lport: The lport whose link is up + * @lport: The local port whose link is up */ void fc_linkup(struct fc_lport *lport) { @@ -568,7 +594,7 @@ void __fc_linkdown(struct fc_lport *lport) /** * fc_linkdown() - Handler for transport linkdown events - * @lport: The lport whose link is down + * @lport: The local port whose link is down */ void fc_linkdown(struct fc_lport *lport) { @@ -583,7 +609,7 @@ EXPORT_SYMBOL(fc_linkdown); /** * fc_fabric_logoff() - Logout of the fabric - * @lport: fc_lport pointer to logoff the fabric + * @lport: The local port to logoff the fabric * * Return value: * 0 for success, -1 for failure @@ -592,8 +618,8 @@ int fc_fabric_logoff(struct fc_lport *lport) { lport->tt.disc_stop_final(lport); mutex_lock(&lport->lp_mutex); - if (lport->dns_rp) - lport->tt.rport_logoff(lport->dns_rp); + if (lport->dns_rdata) + lport->tt.rport_logoff(lport->dns_rdata); mutex_unlock(&lport->lp_mutex); lport->tt.rport_flush_queue(); mutex_lock(&lport->lp_mutex); @@ -605,11 +631,9 @@ int fc_fabric_logoff(struct fc_lport *lport) EXPORT_SYMBOL(fc_fabric_logoff); /** - * fc_lport_destroy() - unregister a fc_lport - * @lport: fc_lport pointer to unregister + * fc_lport_destroy() - Unregister a fc_lport + * @lport: The local port to unregister * - * Return value: - * None * Note: * exit routine for fc_lport instance * clean-up all the allocated memory @@ -632,13 +656,9 @@ int fc_lport_destroy(struct fc_lport *lport) EXPORT_SYMBOL(fc_lport_destroy); /** - * fc_set_mfs() - sets up the mfs for the corresponding fc_lport - * @lport: fc_lport pointer to unregister - * @mfs: the new mfs for fc_lport - * - * Set mfs for the given fc_lport to the new mfs. - * - * Return: 0 for success + * fc_set_mfs() - Set the maximum frame size for a local port + * @lport: The local port to set the MFS for + * @mfs: The new MFS */ int fc_set_mfs(struct fc_lport *lport, u32 mfs) { @@ -669,7 +689,7 @@ EXPORT_SYMBOL(fc_set_mfs); /** * fc_lport_disc_callback() - Callback for discovery events - * @lport: FC local port + * @lport: The local port receiving the event * @event: The discovery event */ void fc_lport_disc_callback(struct fc_lport *lport, enum fc_disc_event event) @@ -693,7 +713,7 @@ void fc_lport_disc_callback(struct fc_lport *lport, enum fc_disc_event event) /** * fc_rport_enter_ready() - Enter the ready state and start discovery - * @lport: Fibre Channel local port that is ready + * @lport: The local port that is ready * * Locking Note: The lport lock is expected to be held before calling * this routine. @@ -708,15 +728,15 @@ static void fc_lport_enter_ready(struct fc_lport *lport) fc_vport_set_state(lport->vport, FC_VPORT_ACTIVE); fc_vports_linkchange(lport); - if (!lport->ptp_rp) + if (!lport->ptp_rdata) lport->tt.disc_start(fc_lport_disc_callback, lport); } /** * fc_lport_recv_flogi_req() - Receive a FLOGI request * @sp_in: The sequence the FLOGI is on - * @rx_fp: The frame the FLOGI is in - * @lport: The lport that recieved the request + * @rx_fp: The FLOGI frame + * @lport: The local port that recieved the request * * A received FLOGI request indicates a point-to-point connection. * Accept it with the common service parameters indicating our N port. @@ -802,9 +822,9 @@ out: /** * fc_lport_recv_req() - The generic lport request handler - * @lport: The lport that received the request - * @sp: The sequence the request is on - * @fp: The frame the request is in + * @lport: The local port that received the request + * @sp: The sequence the request is on + * @fp: The request frame * * This function will see if the lport handles the request or * if an rport should handle the request. @@ -872,8 +892,8 @@ static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp, } /** - * fc_lport_reset() - Reset an lport - * @lport: The lport which should be reset + * fc_lport_reset() - Reset a local port + * @lport: The local port which should be reset * * Locking Note: This functions should not be called with the * lport lock held. @@ -889,18 +909,18 @@ int fc_lport_reset(struct fc_lport *lport) EXPORT_SYMBOL(fc_lport_reset); /** - * fc_lport_reset_locked() - Reset the local port - * @lport: Fibre Channel local port to be reset + * fc_lport_reset_locked() - Reset the local port w/ the lport lock held + * @lport: The local port to be reset * * Locking Note: The lport lock is expected to be held before calling * this routine. */ static void fc_lport_reset_locked(struct fc_lport *lport) { - if (lport->dns_rp) - lport->tt.rport_logoff(lport->dns_rp); + if (lport->dns_rdata) + lport->tt.rport_logoff(lport->dns_rdata); - lport->ptp_rp = NULL; + lport->ptp_rdata = NULL; lport->tt.disc_stop(lport); @@ -911,7 +931,7 @@ static void fc_lport_reset_locked(struct fc_lport *lport) /** * fc_lport_enter_reset() - Reset the local port - * @lport: Fibre Channel local port to be reset + * @lport: The local port to be reset * * Locking Note: The lport lock is expected to be held before calling * this routine. @@ -935,8 +955,8 @@ static void fc_lport_enter_reset(struct fc_lport *lport) } /** - * fc_lport_enter_disabled() - disable the local port - * @lport: Fibre Channel local port to be reset + * fc_lport_enter_disabled() - Disable the local port + * @lport: The local port to be reset * * Locking Note: The lport lock is expected to be held before calling * this routine. @@ -953,8 +973,8 @@ static void fc_lport_enter_disabled(struct fc_lport *lport) /** * fc_lport_error() - Handler for any errors - * @lport: The fc_lport object - * @fp: The frame pointer + * @lport: The local port that the error was on + * @fp: The error code encoded in a frame pointer * * If the error was caused by a resource allocation failure * then wait for half a second and retry, otherwise retry @@ -1002,13 +1022,13 @@ static void fc_lport_error(struct fc_lport *lport, struct fc_frame *fp) /** * fc_lport_ns_resp() - Handle response to a name server - * registration exchange - * @sp: current sequence in exchange - * @fp: response frame + * registration exchange + * @sp: current sequence in exchange + * @fp: response frame * @lp_arg: Fibre Channel host port instance * * Locking Note: This function will be called without the lport lock - * held, but it will lock, call an _enter_* function or fc_lport_error + * held, but it will lock, call an _enter_* function or fc_lport_error() * and then unlock the lport. */ static void fc_lport_ns_resp(struct fc_seq *sp, struct fc_frame *fp, @@ -1027,7 +1047,7 @@ static void fc_lport_ns_resp(struct fc_seq *sp, struct fc_frame *fp, if (lport->state < LPORT_ST_RNN_ID || lport->state > LPORT_ST_RFT_ID) { FC_LPORT_DBG(lport, "Received a name server response, " - "but in state %s\n", fc_lport_state(lport)); + "but in state %s\n", fc_lport_state(lport)); if (IS_ERR(fp)) goto err; goto out; @@ -1072,8 +1092,8 @@ err: /** * fc_lport_scr_resp() - Handle response to State Change Register (SCR) request - * @sp: current sequence in SCR exchange - * @fp: response frame + * @sp: current sequence in SCR exchange + * @fp: response frame * @lp_arg: Fibre Channel lport port instance that sent the registration request * * Locking Note: This function will be called without the lport lock @@ -1119,8 +1139,8 @@ err: } /** - * fc_lport_enter_scr() - Send a State Change Register (SCR) request - * @lport: Fibre Channel local port to register for state changes + * fc_lport_enter_scr() - Send a SCR (State Change Register) request + * @lport: The local port to register for state changes * * Locking Note: The lport lock is expected to be held before calling * this routine. @@ -1212,8 +1232,8 @@ static struct fc_rport_operations fc_lport_rport_ops = { }; /** - * fc_rport_enter_dns() - Create a rport to the name server - * @lport: Fibre Channel local port requesting a rport for the name server + * fc_rport_enter_dns() - Create a fc_rport for the name server + * @lport: The local port requesting a remote port for the name server * * Locking Note: The lport lock is expected to be held before calling * this routine. @@ -1242,8 +1262,8 @@ err: } /** - * fc_lport_timeout() - Handler for the retry_work timer. - * @work: The work struct of the fc_lport + * fc_lport_timeout() - Handler for the retry_work timer + * @work: The work struct of the local port */ static void fc_lport_timeout(struct work_struct *work) { @@ -1287,16 +1307,16 @@ static void fc_lport_timeout(struct work_struct *work) /** * fc_lport_logo_resp() - Handle response to LOGO request - * @sp: current sequence in LOGO exchange - * @fp: response frame - * @lp_arg: Fibre Channel lport port instance that sent the LOGO request + * @sp: The sequence that the LOGO was on + * @fp: The LOGO frame + * @lp_arg: The lport port that received the LOGO request * * Locking Note: This function will be called without the lport lock - * held, but it will lock, call an _enter_* function or fc_lport_error + * held, but it will lock, call an _enter_* function or fc_lport_error() * and then unlock the lport. */ void fc_lport_logo_resp(struct fc_seq *sp, struct fc_frame *fp, - void *lp_arg) + void *lp_arg) { struct fc_lport *lport = lp_arg; u8 op; @@ -1336,7 +1356,7 @@ EXPORT_SYMBOL(fc_lport_logo_resp); /** * fc_rport_enter_logo() - Logout of the fabric - * @lport: Fibre Channel local port to be logged out + * @lport: The local port to be logged out * * Locking Note: The lport lock is expected to be held before calling * this routine. @@ -1365,16 +1385,16 @@ static void fc_lport_enter_logo(struct fc_lport *lport) /** * fc_lport_flogi_resp() - Handle response to FLOGI request - * @sp: current sequence in FLOGI exchange - * @fp: response frame - * @lp_arg: Fibre Channel lport port instance that sent the FLOGI request + * @sp: The sequence that the FLOGI was on + * @fp: The FLOGI response frame + * @lp_arg: The lport port that received the FLOGI response * * Locking Note: This function will be called without the lport lock - * held, but it will lock, call an _enter_* function or fc_lport_error + * held, but it will lock, call an _enter_* function or fc_lport_error() * and then unlock the lport. */ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, - void *lp_arg) + void *lp_arg) { struct fc_lport *lport = lp_arg; struct fc_frame_header *fh; @@ -1484,7 +1504,10 @@ void fc_lport_enter_flogi(struct fc_lport *lport) fc_lport_error(lport, NULL); } -/* Configure a fc_lport */ +/** + * fc_lport_config() - Configure a fc_lport + * @lport: The local port to be configured + */ int fc_lport_config(struct fc_lport *lport) { INIT_DELAYED_WORK(&lport->retry_work, fc_lport_timeout); @@ -1499,6 +1522,10 @@ int fc_lport_config(struct fc_lport *lport) } EXPORT_SYMBOL(fc_lport_config); +/** + * fc_lport_init() - Initialize the lport layer for a local port + * @lport: The local port to initialize the exchange layer for + */ int fc_lport_init(struct fc_lport *lport) { if (!lport->tt.lport_recv) @@ -1533,10 +1560,10 @@ int fc_lport_init(struct fc_lport *lport) EXPORT_SYMBOL(fc_lport_init); /** - * fc_lport_bsg_resp() - The common response handler for fc pass-thru requests - * @sp: current sequence in the fc pass-thru request exchange - * @fp: received response frame - * @info_arg: pointer to struct fc_bsg_info + * fc_lport_bsg_resp() - The common response handler for FC Passthrough requests + * @sp: The sequence for the FC Passthrough response + * @fp: The response frame + * @info_arg: The BSG info that the response is for */ static void fc_lport_bsg_resp(struct fc_seq *sp, struct fc_frame *fp, void *info_arg) @@ -1596,10 +1623,10 @@ static void fc_lport_bsg_resp(struct fc_seq *sp, struct fc_frame *fp, } /** - * fc_lport_els_request() - Send ELS pass-thru request - * @job: The bsg fc pass-thru job structure + * fc_lport_els_request() - Send ELS passthrough request + * @job: The BSG Passthrough job * @lport: The local port sending the request - * @did: The destination port id. + * @did: The destination port id * * Locking Note: The lport lock is expected to be held before calling * this routine. @@ -1656,11 +1683,11 @@ static int fc_lport_els_request(struct fc_bsg_job *job, } /** - * fc_lport_ct_request() - Send CT pass-thru request - * @job: The bsg fc pass-thru job structure + * fc_lport_ct_request() - Send CT Passthrough request + * @job: The BSG Passthrough job * @lport: The local port sending the request * @did: The destination FC-ID - * @tov: The time to wait for a response + * @tov: The timeout period to wait for the response * * Locking Note: The lport lock is expected to be held before calling * this routine. @@ -1717,8 +1744,8 @@ static int fc_lport_ct_request(struct fc_bsg_job *job, /** * fc_lport_bsg_request() - The common entry point for sending - * fc pass-thru requests - * @job: The fc pass-thru job structure + * FC Passthrough requests + * @job: The BSG passthrough job */ int fc_lport_bsg_request(struct fc_bsg_job *job) { @@ -1759,7 +1786,7 @@ int fc_lport_bsg_request(struct fc_bsg_job *job) case FC_BSG_HST_CT: did = ntoh24(job->request->rqst_data.h_ct.port_id); if (did == FC_FID_DIR_SERV) - rdata = lport->dns_rp; + rdata = lport->dns_rdata; else rdata = lport->tt.rport_lookup(lport, did); diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 622285c81fef..6578968a753d 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -92,9 +92,9 @@ static const char *fc_rport_state_names[] = { }; /** - * fc_rport_lookup() - lookup a remote port by port_id - * @lport: Fibre Channel host port instance - * @port_id: remote port port_id to match + * fc_rport_lookup() - Lookup a remote port by port_id + * @lport: The local port to lookup the remote port on + * @port_id: The remote port ID to look up */ static struct fc_rport_priv *fc_rport_lookup(const struct fc_lport *lport, u32 port_id) @@ -109,8 +109,10 @@ static struct fc_rport_priv *fc_rport_lookup(const struct fc_lport *lport, /** * fc_rport_create() - Create a new remote port - * @lport: The local port that the new remote port is for - * @port_id: The port ID for the new remote port + * @lport: The local port this remote port will be associated with + * @ids: The identifiers for the new remote port + * + * The remote port will start in the INIT state. * * Locking note: must be called with the disc_mutex held. */ @@ -149,8 +151,8 @@ static struct fc_rport_priv *fc_rport_create(struct fc_lport *lport, } /** - * fc_rport_destroy() - free a remote port after last reference is released. - * @kref: pointer to kref inside struct fc_rport_priv + * fc_rport_destroy() - Free a remote port after last reference is released + * @kref: The remote port's kref */ static void fc_rport_destroy(struct kref *kref) { @@ -161,8 +163,8 @@ static void fc_rport_destroy(struct kref *kref) } /** - * fc_rport_state() - return a string for the state the rport is in - * @rdata: remote port private data + * fc_rport_state() - Return a string identifying the remote port's state + * @rdata: The remote port */ static const char *fc_rport_state(struct fc_rport_priv *rdata) { @@ -175,9 +177,9 @@ static const char *fc_rport_state(struct fc_rport_priv *rdata) } /** - * fc_set_rport_loss_tmo() - Set the remote port loss timeout in seconds. - * @rport: Pointer to Fibre Channel remote port structure - * @timeout: timeout in seconds + * fc_set_rport_loss_tmo() - Set the remote port loss timeout + * @rport: The remote port that gets a new timeout value + * @timeout: The new timeout value (in seconds) */ void fc_set_rport_loss_tmo(struct fc_rport *rport, u32 timeout) { @@ -189,9 +191,11 @@ void fc_set_rport_loss_tmo(struct fc_rport *rport, u32 timeout) EXPORT_SYMBOL(fc_set_rport_loss_tmo); /** - * fc_plogi_get_maxframe() - Get max payload from the common service parameters - * @flp: FLOGI payload structure - * @maxval: upper limit, may be less than what is in the service parameters + * fc_plogi_get_maxframe() - Get the maximum payload from the common service + * parameters in a FLOGI frame + * @flp: The FLOGI payload + * @maxval: The maximum frame size upper limit; this may be less than what + * is in the service parameters */ static unsigned int fc_plogi_get_maxframe(struct fc_els_flogi *flp, unsigned int maxval) @@ -212,9 +216,9 @@ static unsigned int fc_plogi_get_maxframe(struct fc_els_flogi *flp, } /** - * fc_rport_state_enter() - Change the rport's state - * @rdata: The rport whose state should change - * @new: The new state of the rport + * fc_rport_state_enter() - Change the state of a remote port + * @rdata: The remote port whose state should change + * @new: The new state * * Locking Note: Called with the rport lock held */ @@ -226,12 +230,16 @@ static void fc_rport_state_enter(struct fc_rport_priv *rdata, rdata->rp_state = new; } +/** + * fc_rport_work() - Handler for remote port events in the rport_event_queue + * @work: Handle to the remote port being dequeued + */ static void fc_rport_work(struct work_struct *work) { u32 port_id; struct fc_rport_priv *rdata = container_of(work, struct fc_rport_priv, event_work); - struct fc_rport_libfc_priv *rp; + struct fc_rport_libfc_priv *rpriv; enum fc_rport_event event; struct fc_lport *lport = rdata->local_port; struct fc_rport_operations *rport_ops; @@ -268,12 +276,12 @@ static void fc_rport_work(struct work_struct *work) rport->maxframe_size = rdata->maxframe_size; rport->supported_classes = rdata->supported_classes; - rp = rport->dd_data; - rp->local_port = lport; - rp->rp_state = rdata->rp_state; - rp->flags = rdata->flags; - rp->e_d_tov = rdata->e_d_tov; - rp->r_a_tov = rdata->r_a_tov; + rpriv = rport->dd_data; + rpriv->local_port = lport; + rpriv->rp_state = rdata->rp_state; + rpriv->flags = rdata->flags; + rpriv->e_d_tov = rdata->e_d_tov; + rpriv->r_a_tov = rdata->r_a_tov; mutex_unlock(&rdata->rp_mutex); if (rport_ops && rport_ops->event_callback) { @@ -319,8 +327,8 @@ static void fc_rport_work(struct work_struct *work) lport->tt.exch_mgr_reset(lport, port_id, 0); if (rport) { - rp = rport->dd_data; - rp->rp_state = RPORT_ST_DELETE; + rpriv = rport->dd_data; + rpriv->rp_state = RPORT_ST_DELETE; mutex_lock(&rdata->rp_mutex); rdata->rport = NULL; mutex_unlock(&rdata->rp_mutex); @@ -343,7 +351,7 @@ static void fc_rport_work(struct work_struct *work) /** * fc_rport_login() - Start the remote port login state machine - * @rdata: private remote port + * @rdata: The remote port to be logged in to * * Locking Note: Called without the rport lock held. This * function will hold the rport lock, call an _enter_* @@ -379,9 +387,9 @@ int fc_rport_login(struct fc_rport_priv *rdata) } /** - * fc_rport_enter_delete() - schedule a remote port to be deleted. - * @rdata: private remote port - * @event: event to report as the reason for deletion + * fc_rport_enter_delete() - Schedule a remote port to be deleted + * @rdata: The remote port to be deleted + * @event: The event to report as the reason for deletion * * Locking Note: Called with the rport lock held. * @@ -408,8 +416,8 @@ static void fc_rport_enter_delete(struct fc_rport_priv *rdata, } /** - * fc_rport_logoff() - Logoff and remove an rport - * @rdata: private remote port + * fc_rport_logoff() - Logoff and remove a remote port + * @rdata: The remote port to be logged off of * * Locking Note: Called without the rport lock held. This * function will hold the rport lock, call an _enter_* @@ -442,8 +450,8 @@ out: } /** - * fc_rport_enter_ready() - The rport is ready - * @rdata: private remote port + * fc_rport_enter_ready() - Transition to the RPORT_ST_READY state + * @rdata: The remote port that is ready * * Locking Note: The rport lock is expected to be held before calling * this routine. @@ -460,8 +468,8 @@ static void fc_rport_enter_ready(struct fc_rport_priv *rdata) } /** - * fc_rport_timeout() - Handler for the retry_work timer. - * @work: The work struct of the fc_rport_priv + * fc_rport_timeout() - Handler for the retry_work timer + * @work: Handle to the remote port that has timed out * * Locking Note: Called without the rport lock held. This * function will hold the rport lock, call an _enter_* @@ -502,8 +510,8 @@ static void fc_rport_timeout(struct work_struct *work) /** * fc_rport_error() - Error handler, called once retries have been exhausted - * @rdata: private remote port - * @fp: The frame pointer + * @rdata: The remote port the error is happened on + * @fp: The error code encapsulated in a frame pointer * * Locking Note: The rport lock is expected to be held before * calling this routine @@ -535,9 +543,9 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp) } /** - * fc_rport_error_retry() - Error handler when retries are desired - * @rdata: private remote port data - * @fp: The frame pointer + * fc_rport_error_retry() - Handler for remote port state retries + * @rdata: The remote port whose state is to be retried + * @fp: The error code encapsulated in a frame pointer * * If the error was an exchange timeout retry immediately, * otherwise wait for E_D_TOV. @@ -569,10 +577,10 @@ static void fc_rport_error_retry(struct fc_rport_priv *rdata, } /** - * fc_rport_plogi_recv_resp() - Handle incoming ELS PLOGI response - * @sp: current sequence in the PLOGI exchange - * @fp: response frame - * @rdata_arg: private remote port data + * fc_rport_plogi_recv_resp() - Handler for ELS PLOGI responses + * @sp: The sequence the PLOGI is on + * @fp: The PLOGI response frame + * @rdata_arg: The remote port that sent the PLOGI response * * Locking Note: This function will be called without the rport lock * held, but it will lock, call an _enter_* function or fc_rport_error @@ -635,8 +643,8 @@ err: } /** - * fc_rport_enter_plogi() - Send Port Login (PLOGI) request to peer - * @rdata: private remote port data + * fc_rport_enter_plogi() - Send Port Login (PLOGI) request + * @rdata: The remote port to send a PLOGI to * * Locking Note: The rport lock is expected to be held before calling * this routine. @@ -668,9 +676,9 @@ static void fc_rport_enter_plogi(struct fc_rport_priv *rdata) /** * fc_rport_prli_resp() - Process Login (PRLI) response handler - * @sp: current sequence in the PRLI exchange - * @fp: response frame - * @rdata_arg: private remote port data + * @sp: The sequence the PRLI response was on + * @fp: The PRLI response frame + * @rdata_arg: The remote port that sent the PRLI response * * Locking Note: This function will be called without the rport lock * held, but it will lock, call an _enter_* function or fc_rport_error @@ -739,10 +747,10 @@ err: } /** - * fc_rport_logo_resp() - Logout (LOGO) response handler - * @sp: current sequence in the LOGO exchange - * @fp: response frame - * @rdata_arg: private remote port data + * fc_rport_logo_resp() - Handler for logout (LOGO) responses + * @sp: The sequence the LOGO was on + * @fp: The LOGO response frame + * @rdata_arg: The remote port that sent the LOGO response * * Locking Note: This function will be called without the rport lock * held, but it will lock, call an _enter_* function or fc_rport_error @@ -785,8 +793,8 @@ err: } /** - * fc_rport_enter_prli() - Send Process Login (PRLI) request to peer - * @rdata: private remote port data + * fc_rport_enter_prli() - Send Process Login (PRLI) request + * @rdata: The remote port to send the PRLI request to * * Locking Note: The rport lock is expected to be held before calling * this routine. @@ -828,10 +836,10 @@ static void fc_rport_enter_prli(struct fc_rport_priv *rdata) } /** - * fc_rport_els_rtv_resp() - Request Timeout Value response handler - * @sp: current sequence in the RTV exchange - * @fp: response frame - * @rdata_arg: private remote port data + * fc_rport_els_rtv_resp() - Handler for Request Timeout Value (RTV) responses + * @sp: The sequence the RTV was on + * @fp: The RTV response frame + * @rdata_arg: The remote port that sent the RTV response * * Many targets don't seem to support this. * @@ -894,8 +902,8 @@ err: } /** - * fc_rport_enter_rtv() - Send Request Timeout Value (RTV) request to peer - * @rdata: private remote port data + * fc_rport_enter_rtv() - Send Request Timeout Value (RTV) request + * @rdata: The remote port to send the RTV request to * * Locking Note: The rport lock is expected to be held before calling * this routine. @@ -917,15 +925,15 @@ static void fc_rport_enter_rtv(struct fc_rport_priv *rdata) } if (!lport->tt.elsct_send(lport, rdata->ids.port_id, fp, ELS_RTV, - fc_rport_rtv_resp, rdata, lport->e_d_tov)) + fc_rport_rtv_resp, rdata, lport->e_d_tov)) fc_rport_error_retry(rdata, NULL); else kref_get(&rdata->kref); } /** - * fc_rport_enter_logo() - Send Logout (LOGO) request to peer - * @rdata: private remote port data + * fc_rport_enter_logo() - Send a logout (LOGO) request + * @rdata: The remote port to send the LOGO request to * * Locking Note: The rport lock is expected to be held before calling * this routine. @@ -954,17 +962,17 @@ static void fc_rport_enter_logo(struct fc_rport_priv *rdata) } /** - * fc_rport_els_adisc_resp() - Address Discovery response handler - * @sp: current sequence in the ADISC exchange - * @fp: response frame - * @rdata_arg: remote port private. + * fc_rport_els_adisc_resp() - Handler for Address Discovery (ADISC) responses + * @sp: The sequence the ADISC response was on + * @fp: The ADISC response frame + * @rdata_arg: The remote port that sent the ADISC response * * Locking Note: This function will be called without the rport lock * held, but it will lock, call an _enter_* function or fc_rport_error * and then unlock the rport. */ static void fc_rport_adisc_resp(struct fc_seq *sp, struct fc_frame *fp, - void *rdata_arg) + void *rdata_arg) { struct fc_rport_priv *rdata = rdata_arg; struct fc_els_adisc *adisc; @@ -1012,8 +1020,8 @@ err: } /** - * fc_rport_enter_adisc() - Send Address Discover (ADISC) request to peer - * @rdata: remote port private data + * fc_rport_enter_adisc() - Send Address Discover (ADISC) request + * @rdata: The remote port to send the ADISC request to * * Locking Note: The rport lock is expected to be held before calling * this routine. @@ -1041,10 +1049,10 @@ static void fc_rport_enter_adisc(struct fc_rport_priv *rdata) } /** - * fc_rport_recv_adisc_req() - Handle incoming Address Discovery (ADISC) Request - * @rdata: remote port private - * @sp: current sequence in the ADISC exchange - * @in_fp: ADISC request frame + * fc_rport_recv_adisc_req() - Handler for Address Discovery (ADISC) requests + * @rdata: The remote port that sent the ADISC request + * @sp: The sequence the ADISC request was on + * @in_fp: The ADISC request frame * * Locking Note: Called with the lport and rport locks held. */ @@ -1085,10 +1093,10 @@ drop: } /** - * fc_rport_recv_els_req() - handle a validated ELS request. - * @lport: Fibre Channel local port - * @sp: current sequence in the PLOGI exchange - * @fp: response frame + * fc_rport_recv_els_req() - Handler for validated ELS requests + * @lport: The local port that received the ELS request + * @sp: The sequence that the ELS request was on + * @fp: The ELS request frame * * Handle incoming ELS requests that require port login. * The ELS opcode has already been validated by the caller. @@ -1160,10 +1168,10 @@ reject: } /** - * fc_rport_recv_req() - Handle a received ELS request from a rport - * @sp: current sequence in the PLOGI exchange - * @fp: response frame - * @lport: Fibre Channel local port + * fc_rport_recv_req() - Handler for requests + * @sp: The sequence the request was on + * @fp: The request frame + * @lport: The local port that received the request * * Locking Note: Called with the lport lock held. */ @@ -1203,10 +1211,10 @@ void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp, } /** - * fc_rport_recv_plogi_req() - Handle incoming Port Login (PLOGI) request - * @lport: local port - * @sp: current sequence in the PLOGI exchange - * @fp: PLOGI request frame + * fc_rport_recv_plogi_req() - Handler for Port Login (PLOGI) requests + * @lport: The local port that received the PLOGI request + * @sp: The sequence that the PLOGI request was on + * @rx_fp: The PLOGI request frame * * Locking Note: The rport lock is held before calling this function. */ @@ -1328,10 +1336,10 @@ reject: } /** - * fc_rport_recv_prli_req() - Handle incoming Process Login (PRLI) request - * @rdata: private remote port data - * @sp: current sequence in the PRLI exchange - * @fp: PRLI request frame + * fc_rport_recv_prli_req() - Handler for process login (PRLI) requests + * @rdata: The remote port that sent the PRLI request + * @sp: The sequence that the PRLI was on + * @rx_fp: The PRLI request frame * * Locking Note: The rport lock is exected to be held before calling * this function. @@ -1485,10 +1493,10 @@ static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata, } /** - * fc_rport_recv_prlo_req() - Handle incoming Process Logout (PRLO) request - * @rdata: private remote port data - * @sp: current sequence in the PRLO exchange - * @fp: PRLO request frame + * fc_rport_recv_prlo_req() - Handler for process logout (PRLO) requests + * @rdata: The remote port that sent the PRLO request + * @sp: The sequence that the PRLO was on + * @fp: The PRLO request frame * * Locking Note: The rport lock is exected to be held before calling * this function. @@ -1515,10 +1523,10 @@ static void fc_rport_recv_prlo_req(struct fc_rport_priv *rdata, } /** - * fc_rport_recv_logo_req() - Handle incoming Logout (LOGO) request - * @lport: local port. - * @sp: current sequence in the LOGO exchange - * @fp: LOGO request frame + * fc_rport_recv_logo_req() - Handler for logout (LOGO) requests + * @lport: The local port that received the LOGO request + * @sp: The sequence that the LOGO request was on + * @fp: The LOGO request frame * * Locking Note: The rport lock is exected to be held before calling * this function. @@ -1559,11 +1567,18 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport, fc_frame_free(fp); } +/** + * fc_rport_flush_queue() - Flush the rport_event_queue + */ static void fc_rport_flush_queue(void) { flush_workqueue(rport_event_queue); } +/** + * fc_rport_init() - Initialize the remote port layer for a local port + * @lport: The local port to initialize the remote port layer for + */ int fc_rport_init(struct fc_lport *lport) { if (!lport->tt.rport_lookup) @@ -1591,7 +1606,10 @@ int fc_rport_init(struct fc_lport *lport) } EXPORT_SYMBOL(fc_rport_init); -int fc_setup_rport(void) +/** + * fc_setup_rport() - Initialize the rport_event_queue + */ +int fc_setup_rport() { rport_event_queue = create_singlethread_workqueue("fc_rport_eq"); if (!rport_event_queue) @@ -1599,15 +1617,22 @@ int fc_setup_rport(void) return 0; } -void fc_destroy_rport(void) +/** + * fc_destroy_rport() - Destroy the rport_event_queue + */ +void fc_destroy_rport() { destroy_workqueue(rport_event_queue); } +/** + * fc_rport_terminate_io() - Stop all outstanding I/O on a remote port + * @rport: The remote port whose I/O should be terminated + */ void fc_rport_terminate_io(struct fc_rport *rport) { - struct fc_rport_libfc_priv *rp = rport->dd_data; - struct fc_lport *lport = rp->local_port; + struct fc_rport_libfc_priv *rpriv = rport->dd_data; + struct fc_lport *lport = rpriv->local_port; lport->tt.exch_mgr_reset(lport, 0, rport->port_id); lport->tt.exch_mgr_reset(lport, rport->port_id, 0); diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 54df9fe00c14..310d8a22b726 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -55,8 +55,17 @@ p[2] = ((v) & 0xFF); \ } while (0) -/* - * FC HBA status +/** + * enum fc_lport_state - Local port states + * @LPORT_ST_DISABLED: Disabled + * @LPORT_ST_FLOGI: Fabric login (FLOGI) sent + * @LPORT_ST_DNS: Waiting for name server remote port to become ready + * @LPORT_ST_RPN_ID: Register port name by ID (RPN_ID) sent + * @LPORT_ST_RFT_ID: Register Fibre Channel types by ID (RFT_ID) sent + * @LPORT_ST_SCR: State Change Register (SCR) sent + * @LPORT_ST_READY: Ready for use + * @LPORT_ST_LOGO: Local port logout (LOGO) sent + * @LPORT_ST_RESET: Local port reset */ enum fc_lport_state { LPORT_ST_DISABLED = 0, @@ -78,16 +87,28 @@ enum fc_disc_event { DISC_EV_FAILED }; +/** + * enum fc_rport_state - Remote port states + * @RPORT_ST_INIT: Initialized + * @RPORT_ST_PLOGI: Waiting for PLOGI completion + * @RPORT_ST_PRLI: Waiting for PRLI completion + * @RPORT_ST_RTV: Waiting for RTV completion + * @RPORT_ST_READY: Ready for use + * @RPORT_ST_LOGO: Remote port logout (LOGO) sent + * @RPORT_ST_ADISC: Discover Address sent + * @RPORT_ST_DELETE: Remote port being deleted + * @RPORT_ST_RESTART: Remote port being deleted and will restart +*/ enum fc_rport_state { - RPORT_ST_INIT, /* initialized */ - RPORT_ST_PLOGI, /* waiting for PLOGI completion */ - RPORT_ST_PRLI, /* waiting for PRLI completion */ - RPORT_ST_RTV, /* waiting for RTV completion */ - RPORT_ST_READY, /* ready for use */ - RPORT_ST_LOGO, /* port logout sent */ - RPORT_ST_ADISC, /* Discover Address sent */ - RPORT_ST_DELETE, /* port being deleted */ - RPORT_ST_RESTART, /* remote port being deleted and will restart */ + RPORT_ST_INIT, + RPORT_ST_PLOGI, + RPORT_ST_PRLI, + RPORT_ST_RTV, + RPORT_ST_READY, + RPORT_ST_LOGO, + RPORT_ST_ADISC, + RPORT_ST_DELETE, + RPORT_ST_RESTART, }; /** @@ -98,12 +119,20 @@ enum fc_rport_state { * @port_id: Port ID of the discovered port */ struct fc_disc_port { - struct fc_lport *lp; - struct list_head peers; - struct work_struct rport_work; - u32 port_id; + struct fc_lport *lp; + struct list_head peers; + struct work_struct rport_work; + u32 port_id; }; +/** + * enum fc_rport_event - Remote port events + * @RPORT_EV_NONE: No event + * @RPORT_EV_READY: Remote port is ready for use + * @RPORT_EV_FAILED: State machine failed, remote port is not ready + * @RPORT_EV_STOP: Remote port has been stopped + * @RPORT_EV_LOGO: Remote port logout (LOGO) sent + */ enum fc_rport_event { RPORT_EV_NONE = 0, RPORT_EV_READY, @@ -114,6 +143,10 @@ enum fc_rport_event { struct fc_rport_priv; +/** + * struct fc_rport_operations - Operations for a remote port + * @event_callback: Function to be called for remote port events + */ struct fc_rport_operations { void (*event_callback)(struct fc_lport *, struct fc_rport_priv *, enum fc_rport_event); @@ -121,11 +154,11 @@ struct fc_rport_operations { /** * struct fc_rport_libfc_priv - libfc internal information about a remote port - * @local_port: Fibre Channel host port instance - * @rp_state: indicates READY for I/O or DELETE when blocked. - * @flags: REC and RETRY supported flags - * @e_d_tov: error detect timeout value (in msec) - * @r_a_tov: resource allocation timeout value (in msec) + * @local_port: The associated local port + * @rp_state: Indicates READY for I/O or DELETE when blocked + * @flags: REC and RETRY supported flags + * @e_d_tov: Error detect timeout value (in msec) + * @r_a_tov: Resource allocation timeout value (in msec) */ struct fc_rport_libfc_priv { struct fc_lport *local_port; @@ -138,47 +171,64 @@ struct fc_rport_libfc_priv { }; /** - * struct fc_rport_priv - libfc rport and discovery info about a remote port - * @local_port: Fibre Channel host port instance - * @rport: transport remote port - * @kref: reference counter - * @rp_state: state tracks progress of PLOGI, PRLI, and RTV exchanges - * @ids: remote port identifiers and roles - * @flags: REC and RETRY supported flags - * @max_seq: maximum number of concurrent sequences - * @disc_id: discovery identifier - * @maxframe_size: maximum frame size - * @retries: retry count in current state - * @e_d_tov: error detect timeout value (in msec) - * @r_a_tov: resource allocation timeout value (in msec) - * @rp_mutex: mutex protects rport - * @retry_work: - * @event_callback: Callback for rport READY, FAILED or LOGO + * struct fc_rport_priv - libfc remote port and discovery info + * @local_port: The associated local port + * @rport: The FC transport remote port + * @kref: Reference counter + * @rp_state: Enumeration that tracks progress of PLOGI, PRLI, + * and RTV exchanges + * @ids: The remote port identifiers and roles + * @flags: REC and RETRY supported flags + * @max_seq: Maximum number of concurrent sequences + * @disc_id: The discovery identifier + * @maxframe_size: The maximum frame size + * @retries: The retry count for the current state + * @e_d_tov: Error detect timeout value (in msec) + * @r_a_tov: Resource allocation timeout value (in msec) + * @rp_mutex: The mutex that protects the remote port + * @retry_work: Handle for retries + * @event_callback: Callback when READY, FAILED or LOGO states complete */ struct fc_rport_priv { - struct fc_lport *local_port; - struct fc_rport *rport; - struct kref kref; - enum fc_rport_state rp_state; + struct fc_lport *local_port; + struct fc_rport *rport; + struct kref kref; + enum fc_rport_state rp_state; struct fc_rport_identifiers ids; - u16 flags; - u16 max_seq; - u16 disc_id; - u16 maxframe_size; - unsigned int retries; - unsigned int e_d_tov; - unsigned int r_a_tov; - struct mutex rp_mutex; - struct delayed_work retry_work; - enum fc_rport_event event; - struct fc_rport_operations *ops; - struct list_head peers; - struct work_struct event_work; - u32 supported_classes; + u16 flags; + u16 max_seq; + u16 disc_id; + u16 maxframe_size; + unsigned int retries; + unsigned int e_d_tov; + unsigned int r_a_tov; + struct mutex rp_mutex; + struct delayed_work retry_work; + enum fc_rport_event event; + struct fc_rport_operations *ops; + struct list_head peers; + struct work_struct event_work; + u32 supported_classes; }; -/* - * fcoe stats structure +/** + * struct fcoe_dev_stats - fcoe stats structure + * @SecondsSinceLastReset: Seconds since the last reset + * @TxFrames: Number of transmitted frames + * @TxWords: Number of transmitted words + * @RxFrames: Number of received frames + * @RxWords: Number of received words + * @ErrorFrames: Number of received error frames + * @DumpedFrames: Number of dumped frames + * @LinkFailureCount: Number of link failures + * @LossOfSignalCount: Number for signal losses + * @InvalidTxWordCount: Number of invalid transmitted words + * @InvalidCRCCount: Number of invalid CRCs + * @InputRequests: Number of input requests + * @OutputRequests: Number of output requests + * @ControlRequests: Number of control requests + * @InputMegabytes: Number of received megabytes + * @OutputMegabytes: Number of transmitted megabytes */ struct fcoe_dev_stats { u64 SecondsSinceLastReset; @@ -199,10 +249,13 @@ struct fcoe_dev_stats { u64 OutputMegabytes; }; -/* - * els data is used for passing ELS respone specific - * data to send ELS response mainly using infomation - * in exchange and sequence in EM layer. +/** + * struct fc_seq_els_data - ELS data used for passing ELS specific responses + * @fp: The ELS frame + * @reason: The reason for rejection + * @explan: The explaination of the rejection + * + * Mainly used by the exchange manager layer. */ struct fc_seq_els_data { struct fc_frame *fp; @@ -210,77 +263,87 @@ struct fc_seq_els_data { enum fc_els_rjt_explan explan; }; -/* - * FCP request structure, one for each scsi cmd request +/** + * struct fc_fcp_pkt - FCP request structure (one for each scsi_cmnd request) + * @lp: The associated local port + * @state: The state of the I/O + * @tgt_flags: Target's flags + * @ref_cnt: Reference count + * @scsi_pkt_lock: Lock to protect the SCSI packet (must be taken before the + * host_lock if both are to be held at the same time) + * @cmd: The SCSI command (set and clear with the host_lock held) + * @list: Tracks queued commands (accessed with the host_lock held) + * @timer: The command timer + * @tm_done: Completion indicator + * @wait_for_comp: Indicator to wait for completion of the I/O (in jiffies) + * @start_time: Timestamp indicating the start of the I/O (in jiffies) + * @end_time: Timestamp indicating the end of the I/O (in jiffies) + * @last_pkt_time: Timestamp of the last frame received (in jiffies) + * @data_len: The length of the data + * @cdb_cmd: The CDB command + * @xfer_len: The transfer length + * @xfer_ddp: Indicates if this transfer used DDP (XID of the exchange + * will be set here if DDP was setup) + * @xfer_contig_end: The offset into the buffer if the buffer is contiguous + * (Tx and Rx) + * @max_payload: The maximum payload size (in bytes) + * @io_status: SCSI result (upper 24 bits) + * @cdb_status: CDB status + * @status_code: FCP I/O status + * @scsi_comp_flags: Completion flags (bit 3 Underrun bit 2: overrun) + * @req_flags: Request flags (bit 0: read bit:1 write) + * @scsi_resid: SCSI residule length + * @rport: The remote port that the SCSI command is targeted at + * @seq_ptr: The sequence that will carry the SCSI command + * @recov_retry: Number of recovery retries + * @recov_seq: The sequence for REC or SRR */ struct fc_fcp_pkt { - /* - * housekeeping stuff - */ - struct fc_lport *lp; /* handle to hba struct */ - u16 state; /* scsi_pkt state state */ - u16 tgt_flags; /* target flags */ - atomic_t ref_cnt; /* fcp pkt ref count */ - spinlock_t scsi_pkt_lock; /* Must be taken before the host lock - * if both are held at the same time */ - /* - * SCSI I/O related stuff - */ - struct scsi_cmnd *cmd; /* scsi command pointer. set/clear - * under host lock */ - struct list_head list; /* tracks queued commands. access under - * host lock */ - /* - * timeout related stuff - */ - struct timer_list timer; /* command timer */ + /* Housekeeping information */ + struct fc_lport *lp; + u16 state; + u16 tgt_flags; + atomic_t ref_cnt; + spinlock_t scsi_pkt_lock; + + /* SCSI I/O related information */ + struct scsi_cmnd *cmd; + struct list_head list; + + /* Timeout related information */ + struct timer_list timer; struct completion tm_done; - int wait_for_comp; - unsigned long start_time; /* start jiffie */ - unsigned long end_time; /* end jiffie */ - unsigned long last_pkt_time; /* jiffies of last frame received */ - - /* - * scsi cmd and data transfer information - */ - u32 data_len; - /* - * transport related veriables - */ - struct fcp_cmnd cdb_cmd; - size_t xfer_len; - u16 xfer_ddp; /* this xfer is ddped */ - u32 xfer_contig_end; /* offset of end of contiguous xfer */ - u16 max_payload; /* max payload size in bytes */ - - /* - * scsi/fcp return status - */ - u32 io_status; /* SCSI result upper 24 bits */ - u8 cdb_status; - u8 status_code; /* FCP I/O status */ - /* bit 3 Underrun bit 2: overrun */ - u8 scsi_comp_flags; - u32 req_flags; /* bit 0: read bit:1 write */ - u32 scsi_resid; /* residule length */ - - struct fc_rport *rport; /* remote port pointer */ - struct fc_seq *seq_ptr; /* current sequence pointer */ - /* - * Error Processing - */ - u8 recov_retry; /* count of recovery retries */ - struct fc_seq *recov_seq; /* sequence for REC or SRR */ + int wait_for_comp; + unsigned long start_time; + unsigned long end_time; + unsigned long last_pkt_time; + + /* SCSI command and data transfer information */ + u32 data_len; + + /* Transport related veriables */ + struct fcp_cmnd cdb_cmd; + size_t xfer_len; + u16 xfer_ddp; + u32 xfer_contig_end; + u16 max_payload; + + /* SCSI/FCP return status */ + u32 io_status; + u8 cdb_status; + u8 status_code; + u8 scsi_comp_flags; + u32 req_flags; + u32 scsi_resid; + + /* Associated structures */ + struct fc_rport *rport; + struct fc_seq *seq_ptr; + + /* Error Processing information */ + u8 recov_retry; + struct fc_seq *recov_seq; }; -/* - * FC_FCP HELPER FUNCTIONS - *****************************/ -static inline bool fc_fcp_is_read(const struct fc_fcp_pkt *fsp) -{ - if (fsp && fsp->cmd) - return fsp->cmd->sc_data_direction == DMA_FROM_DEVICE; - return false; -} /* * Structure and function definitions for managing Fibre Channel Exchanges @@ -293,23 +356,51 @@ static inline bool fc_fcp_is_read(const struct fc_fcp_pkt *fsp) struct fc_exch_mgr; struct fc_exch_mgr_anchor; -extern u16 fc_cpu_mask; /* cpu mask for possible cpus */ +extern u16 fc_cpu_mask; /* cpu mask for possible cpus */ -/* - * Sequence. +/** + * struct fc_seq - FC sequence + * @id: The sequence ID + * @ssb_stat: Status flags for the sequence status block (SSB) + * @cnt: Number of frames sent so far + * @rec_data: FC-4 value for REC */ struct fc_seq { - u8 id; /* seq ID */ - u16 ssb_stat; /* status flags for sequence status block */ - u16 cnt; /* frames sent so far on sequence */ - u32 rec_data; /* FC-4 value for REC */ + u8 id; + u16 ssb_stat; + u16 cnt; + u32 rec_data; }; #define FC_EX_DONE (1 << 0) /* ep is completed */ #define FC_EX_RST_CLEANUP (1 << 1) /* reset is forcing completion */ -/* - * Exchange. +/** + * struct fc_exch - Fibre Channel Exchange + * @em: Exchange manager + * @pool: Exchange pool + * @state: The exchange's state + * @xid: The exchange ID + * @ex_list: Handle used by the EM to track free exchanges + * @ex_lock: Lock that protects the exchange + * @ex_refcnt: Reference count + * @timeout_work: Handle for timeout handler + * @lp: The local port that this exchange is on + * @oxid: Originator's exchange ID + * @rxid: Responder's exchange ID + * @oid: Originator's FCID + * @sid: Source FCID + * @did: Destination FCID + * @esb_stat: ESB exchange status + * @r_a_tov: Resouce allocation time out value (in msecs) + * @seq_id: The next sequence ID to use + * @f_ctl: F_CTL flags for the sequence + * @fh_type: The frame type + * @class: The class of service + * @seq: The sequence in use on this exchange + * @resp: Callback for responses on this exchange + * @destructor: Called when destroying the exchange + * @arg: Passed as a void pointer to the resp() callback * * Locking notes: The ex_lock protects following items: * state, esb_stat, f_ctl, seq.ssb_stat @@ -317,76 +408,59 @@ struct fc_seq { * sequence allocation */ struct fc_exch { - struct fc_exch_mgr *em; /* exchange manager */ - struct fc_exch_pool *pool; /* per cpu exches pool */ - u32 state; /* internal driver state */ - u16 xid; /* our exchange ID */ - struct list_head ex_list; /* free or busy list linkage */ - spinlock_t ex_lock; /* lock covering exchange state */ - atomic_t ex_refcnt; /* reference counter */ - struct delayed_work timeout_work; /* timer for upper level protocols */ - struct fc_lport *lp; /* fc device instance */ - u16 oxid; /* originator's exchange ID */ - u16 rxid; /* responder's exchange ID */ - u32 oid; /* originator's FCID */ - u32 sid; /* source FCID */ - u32 did; /* destination FCID */ - u32 esb_stat; /* exchange status for ESB */ - u32 r_a_tov; /* r_a_tov from rport (msec) */ - u8 seq_id; /* next sequence ID to use */ - u32 f_ctl; /* F_CTL flags for sequences */ - u8 fh_type; /* frame type */ - enum fc_class class; /* class of service */ - struct fc_seq seq; /* single sequence */ - /* - * Handler for responses to this current exchange. - */ - void (*resp)(struct fc_seq *, struct fc_frame *, void *); - void (*destructor)(struct fc_seq *, void *); - /* - * arg is passed as void pointer to exchange - * resp and destructor handlers - */ - void *arg; + struct fc_exch_mgr *em; + struct fc_exch_pool *pool; + u32 state; + u16 xid; + struct list_head ex_list; + spinlock_t ex_lock; + atomic_t ex_refcnt; + struct delayed_work timeout_work; + struct fc_lport *lp; + u16 oxid; + u16 rxid; + u32 oid; + u32 sid; + u32 did; + u32 esb_stat; + u32 r_a_tov; + u8 seq_id; + u32 f_ctl; + u8 fh_type; + enum fc_class class; + struct fc_seq seq; + + void (*resp)(struct fc_seq *, struct fc_frame *, void *); + void *arg; + + void (*destructor)(struct fc_seq *, void *); + }; #define fc_seq_exch(sp) container_of(sp, struct fc_exch, seq) -struct libfc_function_template { +struct libfc_function_template { /* * Interface to send a FC frame * * STATUS: REQUIRED */ - int (*frame_send)(struct fc_lport *lp, struct fc_frame *fp); + int (*frame_send)(struct fc_lport *, struct fc_frame *); /* * Interface to send ELS/CT frames * * STATUS: OPTIONAL */ - struct fc_seq *(*elsct_send)(struct fc_lport *lport, - u32 did, - struct fc_frame *fp, - unsigned int op, + struct fc_seq *(*elsct_send)(struct fc_lport *, u32 did, + struct fc_frame *, unsigned int op, void (*resp)(struct fc_seq *, - struct fc_frame *fp, - void *arg), + struct fc_frame *, void *arg), void *arg, u32 timer_msec); /* * Send the FC frame payload using a new exchange and sequence. * - * The frame pointer with some of the header's fields must be - * filled before calling exch_seq_send(), those fields are, - * - * - routing control - * - FC port did - * - FC port sid - * - FC header type - * - frame control - * - parameter or relative offset - * * The exchange response handler is set in this routine to resp() * function pointer. It can be called in two scenarios: if a timeout * occurs or if a response frame is received for the exchange. The @@ -407,14 +481,13 @@ struct libfc_function_template { * * STATUS: OPTIONAL */ - struct fc_seq *(*exch_seq_send)(struct fc_lport *lp, - struct fc_frame *fp, - void (*resp)(struct fc_seq *sp, - struct fc_frame *fp, - void *arg), - void (*destructor)(struct fc_seq *sp, - void *arg), - void *arg, unsigned int timer_msec); + struct fc_seq *(*exch_seq_send)(struct fc_lport *, struct fc_frame *, + void (*resp)(struct fc_seq *, + struct fc_frame *, + void *), + void (*destructor)(struct fc_seq *, + void *), + void *, unsigned int timer_msec); /* * Sets up the DDP context for a given exchange id on the given @@ -422,22 +495,22 @@ struct libfc_function_template { * * STATUS: OPTIONAL */ - int (*ddp_setup)(struct fc_lport *lp, u16 xid, - struct scatterlist *sgl, unsigned int sgc); + int (*ddp_setup)(struct fc_lport *, u16, struct scatterlist *, + unsigned int); /* * Completes the DDP transfer and returns the length of data DDPed * for the given exchange id. * * STATUS: OPTIONAL */ - int (*ddp_done)(struct fc_lport *lp, u16 xid); + int (*ddp_done)(struct fc_lport *, u16); /* * Send a frame using an existing sequence and exchange. * * STATUS: OPTIONAL */ - int (*seq_send)(struct fc_lport *lp, struct fc_seq *sp, - struct fc_frame *fp); + int (*seq_send)(struct fc_lport *, struct fc_seq *, + struct fc_frame *); /* * Send an ELS response using infomation from a previous @@ -445,8 +518,8 @@ struct libfc_function_template { * * STATUS: OPTIONAL */ - void (*seq_els_rsp_send)(struct fc_seq *sp, enum fc_els_cmd els_cmd, - struct fc_seq_els_data *els_data); + void (*seq_els_rsp_send)(struct fc_seq *, enum fc_els_cmd, + struct fc_seq_els_data *); /* * Abort an exchange and sequence. Generally called because of a @@ -458,7 +531,7 @@ struct libfc_function_template { * * STATUS: OPTIONAL */ - int (*seq_exch_abort)(const struct fc_seq *req_sp, + int (*seq_exch_abort)(const struct fc_seq *, unsigned int timer_msec); /* @@ -467,14 +540,14 @@ struct libfc_function_template { * * STATUS: OPTIONAL */ - void (*exch_done)(struct fc_seq *sp); + void (*exch_done)(struct fc_seq *); /* * Start a new sequence on the same exchange/sequence tuple. * * STATUS: OPTIONAL */ - struct fc_seq *(*seq_start_next)(struct fc_seq *sp); + struct fc_seq *(*seq_start_next)(struct fc_seq *); /* * Reset an exchange manager, completing all sequences and exchanges. @@ -483,8 +556,7 @@ struct libfc_function_template { * * STATUS: OPTIONAL */ - void (*exch_mgr_reset)(struct fc_lport *, - u32 s_id, u32 d_id); + void (*exch_mgr_reset)(struct fc_lport *, u32 s_id, u32 d_id); /* * Flush the rport work queue. Generally used before shutdown. @@ -498,8 +570,8 @@ struct libfc_function_template { * * STATUS: OPTIONAL */ - void (*lport_recv)(struct fc_lport *lp, struct fc_seq *sp, - struct fc_frame *fp); + void (*lport_recv)(struct fc_lport *, struct fc_seq *, + struct fc_frame *); /* * Reset the local port. @@ -565,31 +637,31 @@ struct libfc_function_template { * * STATUS: OPTIONAL */ - int (*fcp_cmd_send)(struct fc_lport *lp, struct fc_fcp_pkt *fsp, - void (*resp)(struct fc_seq *, struct fc_frame *fp, - void *arg)); + int (*fcp_cmd_send)(struct fc_lport *, struct fc_fcp_pkt *, + void (*resp)(struct fc_seq *, struct fc_frame *, + void *)); /* * Cleanup the FCP layer, used durring link down and reset * * STATUS: OPTIONAL */ - void (*fcp_cleanup)(struct fc_lport *lp); + void (*fcp_cleanup)(struct fc_lport *); /* * Abort all I/O on a local port * * STATUS: OPTIONAL */ - void (*fcp_abort_io)(struct fc_lport *lp); + void (*fcp_abort_io)(struct fc_lport *); /* * Receive a request for the discovery layer. * * STATUS: OPTIONAL */ - void (*disc_recv_req)(struct fc_seq *, - struct fc_frame *, struct fc_lport *); + void (*disc_recv_req)(struct fc_seq *, struct fc_frame *, + struct fc_lport *); /* * Start discovery for a local port. @@ -618,133 +690,224 @@ struct libfc_function_template { void (*disc_stop_final) (struct fc_lport *); }; -/* information used by the discovery layer */ +/** + * struct fc_disc - Discovery context + * @retry_count: Number of retries + * @pending: 1 if discovery is pending, 0 if not + * @requesting: 1 if discovery has been requested, 0 if not + * @seq_count: Number of sequences used for discovery + * @buf_len: Length of the discovery buffer + * @disc_id: Discovery ID + * @rports: List of discovered remote ports + * @lport: The local port that discovery is for + * @disc_mutex: Mutex that protects the discovery context + * @partial_buf: Partial name buffer (if names are returned + * in multiple frames) + * @disc_work: handle for delayed work context + * @disc_callback: Callback routine called when discovery completes + */ struct fc_disc { - unsigned char retry_count; - unsigned char pending; - unsigned char requested; - unsigned short seq_count; - unsigned char buf_len; - u16 disc_id; + unsigned char retry_count; + unsigned char pending; + unsigned char requested; + unsigned short seq_count; + unsigned char buf_len; + u16 disc_id; + + struct list_head rports; + struct fc_lport *lport; + struct mutex disc_mutex; + struct fc_gpn_ft_resp partial_buf; + struct delayed_work disc_work; void (*disc_callback)(struct fc_lport *, enum fc_disc_event); - - struct list_head rports; - struct fc_lport *lport; - struct mutex disc_mutex; - struct fc_gpn_ft_resp partial_buf; /* partial name buffer */ - struct delayed_work disc_work; }; +/** + * struct fc_lport - Local port + * @host: The SCSI host associated with a local port + * @ema_list: Exchange manager anchor list + * @dns_rdata: The directory server remote port + * @ptp_rdata: Point to point remote port + * @scsi_priv: FCP layer internal data + * @disc: Discovery context + * @vports: Child vports if N_Port + * @vport: Parent vport if VN_Port + * @tt: Libfc function template + * @link_up: Link state (1 = link up, 0 = link down) + * @qfull: Queue state (1 queue is full, 0 queue is not full) + * @state: Identifies the state + * @boot_time: Timestamp indicating when the local port came online + * @host_stats: SCSI host statistics + * @dev_stats: FCoE device stats (TODO: libfc should not be + * FCoE aware) + * @retry_count: Number of retries in the current state + * @wwpn: World Wide Port Name + * @wwnn: World Wide Node Name + * @service_params: Common service parameters + * @e_d_tov: Error detection timeout value + * @r_a_tov: Resouce allocation timeout value + * @rnid_gen: RNID information + * @sg_supp: Indicates if scatter gather is supported + * @seq_offload: Indicates if sequence offload is supported + * @crc_offload: Indicates if CRC offload is supported + * @lro_enabled: Indicates if large receive offload is supported + * @does_npiv: Supports multiple vports + * @npiv_enabled: Switch/fabric allows NPIV + * @mfs: The maximum Fibre Channel payload size + * @max_retry_count: The maximum retry attempts + * @max_rport_retry_count: The maximum remote port retry attempts + * @lro_xid: The maximum XID for LRO + * @lso_max: The maximum large offload send size + * @fcts: FC-4 type mask + * @lp_mutex: Mutex to protect the local port + * @list: Handle for list of local ports + * @retry_work: Handle to local port for delayed retry context + */ struct fc_lport { - struct list_head list; - /* Associations */ - struct Scsi_Host *host; - struct list_head ema_list; - struct list_head vports; /* child vports if N_Port */ - struct fc_vport *vport; /* parent vport if VN_Port */ - struct fc_rport_priv *dns_rp; - struct fc_rport_priv *ptp_rp; - void *scsi_priv; - struct fc_disc disc; + struct Scsi_Host *host; + struct list_head ema_list; + struct fc_rport_priv *dns_rdata; + struct fc_rport_priv *ptp_rdata; + void *scsi_priv; + struct fc_disc disc; + + /* Virtual port information */ + struct list_head vports; + struct fc_vport *vport; /* Operational Information */ struct libfc_function_template tt; - u8 link_up; - u8 qfull; - enum fc_lport_state state; - unsigned long boot_time; - - struct fc_host_statistics host_stats; - struct fcoe_dev_stats *dev_stats; - - u64 wwpn; - u64 wwnn; - u8 retry_count; + u8 link_up; + u8 qfull; + enum fc_lport_state state; + unsigned long boot_time; + struct fc_host_statistics host_stats; + struct fcoe_dev_stats *dev_stats; + u8 retry_count; + + /* Fabric information */ + u64 wwpn; + u64 wwnn; + unsigned int service_params; + unsigned int e_d_tov; + unsigned int r_a_tov; + struct fc_els_rnid_gen rnid_gen; /* Capabilities */ - u32 sg_supp:1; /* scatter gather supported */ - u32 seq_offload:1; /* seq offload supported */ - u32 crc_offload:1; /* crc offload supported */ - u32 lro_enabled:1; /* large receive offload */ - u32 does_npiv:1; /* supports multiple vports */ - u32 npiv_enabled:1; /* switch/fabric allows NPIV */ - u32 mfs; /* max FC payload size */ - unsigned int service_params; - unsigned int e_d_tov; - unsigned int r_a_tov; - u8 max_retry_count; - u8 max_rport_retry_count; - u16 link_speed; - u16 link_supported_speeds; - u16 lro_xid; /* max xid for fcoe lro */ - unsigned int lso_max; /* max large send size */ - struct fc_ns_fts fcts; /* FC-4 type masks */ - struct fc_els_rnid_gen rnid_gen; /* RNID information */ - - /* Semaphores */ - struct mutex lp_mutex; + u32 sg_supp:1; + u32 seq_offload:1; + u32 crc_offload:1; + u32 lro_enabled:1; + u32 does_npiv:1; + u32 npiv_enabled:1; + u32 mfs; + u8 max_retry_count; + u8 max_rport_retry_count; + u16 link_speed; + u16 link_supported_speeds; + u16 lro_xid; + unsigned int lso_max; + struct fc_ns_fts fcts; /* Miscellaneous */ - struct delayed_work retry_work; + struct mutex lp_mutex; + struct list_head list; + struct delayed_work retry_work; }; /* * FC_LPORT HELPER FUNCTIONS *****************************/ -static inline int fc_lport_test_ready(struct fc_lport *lp) + +/** + * fc_lport_test_ready() - Determine if a local port is in the READY state + * @lport: The local port to test + */ +static inline int fc_lport_test_ready(struct fc_lport *lport) { - return lp->state == LPORT_ST_READY; + return lport->state == LPORT_ST_READY; } -static inline void fc_set_wwnn(struct fc_lport *lp, u64 wwnn) +/** + * fc_set_wwnn() - Set the World Wide Node Name of a local port + * @lport: The local port whose WWNN is to be set + * @wwnn: The new WWNN + */ +static inline void fc_set_wwnn(struct fc_lport *lport, u64 wwnn) { - lp->wwnn = wwnn; + lport->wwnn = wwnn; } -static inline void fc_set_wwpn(struct fc_lport *lp, u64 wwnn) +/** + * fc_set_wwpn() - Set the World Wide Port Name of a local port + * @lport: The local port whose WWPN is to be set + * @wwnn: The new WWPN + */ +static inline void fc_set_wwpn(struct fc_lport *lport, u64 wwnn) { - lp->wwpn = wwnn; + lport->wwpn = wwnn; } -static inline void fc_lport_state_enter(struct fc_lport *lp, +/** + * fc_lport_state_enter() - Change a local port's state + * @lport: The local port whose state is to change + * @state: The new state + */ +static inline void fc_lport_state_enter(struct fc_lport *lport, enum fc_lport_state state) { - if (state != lp->state) - lp->retry_count = 0; - lp->state = state; + if (state != lport->state) + lport->retry_count = 0; + lport->state = state; } -static inline int fc_lport_init_stats(struct fc_lport *lp) +/** + * fc_lport_init_stats() - Allocate per-CPU statistics for a local port + * @lport: The local port whose statistics are to be initialized + */ +static inline int fc_lport_init_stats(struct fc_lport *lport) { - /* allocate per cpu stats block */ - lp->dev_stats = alloc_percpu(struct fcoe_dev_stats); - if (!lp->dev_stats) + lport->dev_stats = alloc_percpu(struct fcoe_dev_stats); + if (!lport->dev_stats) return -ENOMEM; return 0; } -static inline void fc_lport_free_stats(struct fc_lport *lp) +/** + * fc_lport_free_stats() - Free memory for a local port's statistics + * @lport: The local port whose statistics are to be freed + */ +static inline void fc_lport_free_stats(struct fc_lport *lport) { - free_percpu(lp->dev_stats); + free_percpu(lport->dev_stats); } -static inline struct fcoe_dev_stats *fc_lport_get_stats(struct fc_lport *lp) +/** + * fc_lport_get_stats() - Get a local port's statistics + * @lport: The local port whose statistics are to be retreived + */ +static inline struct fcoe_dev_stats *fc_lport_get_stats(struct fc_lport *lport) { - return per_cpu_ptr(lp->dev_stats, smp_processor_id()); + return per_cpu_ptr(lport->dev_stats, smp_processor_id()); } -static inline void *lport_priv(const struct fc_lport *lp) +/** + * lport_priv() - Return the private data from a local port + * @lport: The local port whose private data is to be retreived + */ +static inline void *lport_priv(const struct fc_lport *lport) { - return (void *)(lp + 1); + return (void *)(lport + 1); } /** - * libfc_host_alloc() - Allocate a Scsi_Host with room for the fc_lport - * @sht: ptr to the scsi host templ - * @priv_size: size of private data after fc_lport + * libfc_host_alloc() - Allocate a Scsi_Host with room for a local port and + * LLD private data + * @sht: The SCSI host template + * @priv_size: Size of private data * * Returns: libfc lport */ @@ -765,156 +928,73 @@ libfc_host_alloc(struct scsi_host_template *sht, int priv_size) } /* - * LOCAL PORT LAYER + * FC_FCP HELPER FUNCTIONS *****************************/ -int fc_lport_init(struct fc_lport *lp); - -/* - * Destroy the specified local port by finding and freeing all - * fc_rports associated with it and then by freeing the fc_lport - * itself. - */ -int fc_lport_destroy(struct fc_lport *lp); - -/* - * Logout the specified local port from the fabric - */ -int fc_fabric_logoff(struct fc_lport *lp); - -/* - * Initiate the LP state machine. This handler will use fc_host_attr - * to store the FLOGI service parameters, so fc_host_attr must be - * initialized before calling this handler. - */ -int fc_fabric_login(struct fc_lport *lp); +static inline bool fc_fcp_is_read(const struct fc_fcp_pkt *fsp) +{ + if (fsp && fsp->cmd) + return fsp->cmd->sc_data_direction == DMA_FROM_DEVICE; + return false; +} /* - * The link is up for the given local port. - */ + * LOCAL PORT LAYER + *****************************/ +int fc_lport_init(struct fc_lport *); +int fc_lport_destroy(struct fc_lport *); +int fc_fabric_logoff(struct fc_lport *); +int fc_fabric_login(struct fc_lport *); void __fc_linkup(struct fc_lport *); void fc_linkup(struct fc_lport *); - -/* - * Link is down for the given local port. - */ void __fc_linkdown(struct fc_lport *); void fc_linkdown(struct fc_lport *); - -/* - * Configure the local port. - */ +void fc_vport_setlink(struct fc_lport *); +void fc_vports_linkchange(struct fc_lport *); int fc_lport_config(struct fc_lport *); - -/* - * Reset the local port. - */ int fc_lport_reset(struct fc_lport *); - -/* - * Set the mfs or reset - */ -int fc_set_mfs(struct fc_lport *lp, u32 mfs); - -/* - * Allocate a new lport struct for an NPIV VN_Port - */ -struct fc_lport *libfc_vport_create(struct fc_vport *vport, int privsize); - -/* - * Find an NPIV VN_Port by port ID - */ -struct fc_lport *fc_vport_id_lookup(struct fc_lport *n_port, u32 port_id); - -/* - * NPIV VN_Port link state management - */ -void fc_vport_setlink(struct fc_lport *vn_port); -void fc_vports_linkchange(struct fc_lport *n_port); - -/* - * Issue fc pass-thru request via bsg interface - */ -int fc_lport_bsg_request(struct fc_bsg_job *job); - +int fc_set_mfs(struct fc_lport *, u32 mfs); +struct fc_lport *libfc_vport_create(struct fc_vport *, int privsize); +struct fc_lport *fc_vport_id_lookup(struct fc_lport *, u32 port_id); +int fc_lport_bsg_request(struct fc_bsg_job *); /* * REMOTE PORT LAYER *****************************/ -int fc_rport_init(struct fc_lport *lp); -void fc_rport_terminate_io(struct fc_rport *rp); +int fc_rport_init(struct fc_lport *); +void fc_rport_terminate_io(struct fc_rport *); /* * DISCOVERY LAYER *****************************/ -int fc_disc_init(struct fc_lport *lp); - +int fc_disc_init(struct fc_lport *); /* - * SCSI LAYER + * FCP LAYER *****************************/ -/* - * Initialize the SCSI block of libfc - */ int fc_fcp_init(struct fc_lport *); +void fc_fcp_destroy(struct fc_lport *); /* - * This section provides an API which allows direct interaction - * with the SCSI-ml. Each of these functions satisfies a function - * pointer defined in Scsi_Host and therefore is always called - * directly from the SCSI-ml. - */ -int fc_queuecommand(struct scsi_cmnd *sc_cmd, + * SCSI INTERACTION LAYER + *****************************/ +int fc_queuecommand(struct scsi_cmnd *, void (*done)(struct scsi_cmnd *)); - -/* - * Send an ABTS frame to the target device. The sc_cmd argument - * is a pointer to the SCSI command to be aborted. - */ -int fc_eh_abort(struct scsi_cmnd *sc_cmd); - -/* - * Reset a LUN by sending send the tm cmd to the target. - */ -int fc_eh_device_reset(struct scsi_cmnd *sc_cmd); - -/* - * Reset the host adapter. - */ -int fc_eh_host_reset(struct scsi_cmnd *sc_cmd); - -/* - * Check rport status. - */ -int fc_slave_alloc(struct scsi_device *sdev); - -/* - * Adjust the queue depth. - */ -int fc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason); - -/* - * Change the tag type. - */ -int fc_change_queue_type(struct scsi_device *sdev, int tag_type); - -/* - * Free memory pools used by the FCP layer. - */ -void fc_fcp_destroy(struct fc_lport *); +int fc_eh_abort(struct scsi_cmnd *); +int fc_eh_device_reset(struct scsi_cmnd *); +int fc_eh_host_reset(struct scsi_cmnd *); +int fc_slave_alloc(struct scsi_device *); +int fc_change_queue_depth(struct scsi_device *, int qdepth, int reason); +int fc_change_queue_type(struct scsi_device *, int tag_type); /* * ELS/CT interface *****************************/ -/* - * Initializes ELS/CT interface - */ -int fc_elsct_init(struct fc_lport *lp); -struct fc_seq *fc_elsct_send(struct fc_lport *lport, - u32 did, - struct fc_frame *fp, +int fc_elsct_init(struct fc_lport *); +struct fc_seq *fc_elsct_send(struct fc_lport *, u32 did, + struct fc_frame *, unsigned int op, void (*resp)(struct fc_seq *, - struct fc_frame *fp, + struct fc_frame *, void *arg), void *arg, u32 timer_msec); void fc_lport_flogi_resp(struct fc_seq *, struct fc_frame *, void *); @@ -924,90 +1004,26 @@ void fc_lport_logo_resp(struct fc_seq *, struct fc_frame *, void *); /* * EXCHANGE MANAGER LAYER *****************************/ -/* - * Initializes Exchange Manager related - * function pointers in struct libfc_function_template. - */ -int fc_exch_init(struct fc_lport *lp); - -/* - * Adds Exchange Manager (EM) mp to lport. - * - * Adds specified mp to lport using struct fc_exch_mgr_anchor, - * the struct fc_exch_mgr_anchor allows same EM sharing by - * more than one lport with their specified match function, - * the match function is used in allocating exchange from - * added mp. - */ -struct fc_exch_mgr_anchor *fc_exch_mgr_add(struct fc_lport *lport, - struct fc_exch_mgr *mp, +int fc_exch_init(struct fc_lport *); +struct fc_exch_mgr_anchor *fc_exch_mgr_add(struct fc_lport *, + struct fc_exch_mgr *, bool (*match)(struct fc_frame *)); - -/* - * Deletes Exchange Manager (EM) from lport by removing - * its anchor ema from lport. - * - * If removed anchor ema was the last user of its associated EM - * then also destroys associated EM. - */ -void fc_exch_mgr_del(struct fc_exch_mgr_anchor *ema); - -/* - * Clone an exchange manager list, getting reference holds for each EM. - * This is for use with NPIV and sharing the X_ID space between VN_Ports. - */ +void fc_exch_mgr_del(struct fc_exch_mgr_anchor *); int fc_exch_mgr_list_clone(struct fc_lport *src, struct fc_lport *dst); - -/* - * Allocates an Exchange Manager (EM). - * - * The EM manages exchanges for their allocation and - * free, also allows exchange lookup for received - * frame. - * - * The class is used for initializing FC class of - * allocated exchange from EM. - * - * The min_xid and max_xid will limit new - * exchange ID (XID) within this range for - * a new exchange. - * The LLD may choose to have multiple EMs, - * e.g. one EM instance per CPU receive thread in LLD. - * - * Specified match function is used in allocating exchanges - * from newly allocated EM. - */ -struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp, - enum fc_class class, - u16 min_xid, - u16 max_xid, +struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *, enum fc_class class, + u16 min_xid, u16 max_xid, bool (*match)(struct fc_frame *)); - -/* - * Free all exchange managers of a lport. - */ -void fc_exch_mgr_free(struct fc_lport *lport); - -/* - * Receive a frame on specified local port and exchange manager. - */ -void fc_exch_recv(struct fc_lport *lp, struct fc_frame *fp); - -/* - * Reset all EMs of a lport, releasing its all sequences and - * exchanges. If sid is non-zero, then reset only exchanges - * we sourced from that FID. If did is non-zero, reset only - * exchanges destined to that FID. - */ +void fc_exch_mgr_free(struct fc_lport *); +void fc_exch_recv(struct fc_lport *, struct fc_frame *); void fc_exch_mgr_reset(struct fc_lport *, u32 s_id, u32 d_id); /* * Functions for fc_functions_template */ -void fc_get_host_speed(struct Scsi_Host *shost); -void fc_get_host_port_type(struct Scsi_Host *shost); -void fc_get_host_port_state(struct Scsi_Host *shost); -void fc_set_rport_loss_tmo(struct fc_rport *rport, u32 timeout); +void fc_get_host_speed(struct Scsi_Host *); +void fc_get_host_port_type(struct Scsi_Host *); +void fc_get_host_port_state(struct Scsi_Host *); +void fc_set_rport_loss_tmo(struct fc_rport *, u32 timeout); struct fc_host_statistics *fc_get_host_stats(struct Scsi_Host *); #endif /* _LIBFC_H_ */ -- cgit v1.3-8-gc7d7 From 70b51aabf3b03fbf8d61c14847ccce4c69fb0cdd Mon Sep 17 00:00:00 2001 From: Robert Love Date: Tue, 3 Nov 2009 11:47:45 -0800 Subject: [SCSI] libfcoe: formatting and comment cleanups Ensures that there are kernel-doc style comments for all routines and structures. There were also a few instances of fc_lport's named 'lp' which were switched to 'lport' as per the libfc/libfcoe/fcoe naming convention. Also, emacs 'indent-region' and 'tabify' were ran on libfcoe.c. Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/libfcoe.c | 216 +++++++++++++++++++++++--------------------- include/scsi/libfcoe.h | 84 ++++++++--------- 2 files changed, 153 insertions(+), 147 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c index 6a93ba96569f..6b07a8400889 100644 --- a/drivers/scsi/fcoe/libfcoe.c +++ b/drivers/scsi/fcoe/libfcoe.c @@ -59,15 +59,15 @@ unsigned int libfcoe_debug_logging; module_param_named(debug_logging, libfcoe_debug_logging, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(debug_logging, "a bit mask of logging levels"); -#define LIBFCOE_LOGGING 0x01 /* General logging, not categorized */ +#define LIBFCOE_LOGGING 0x01 /* General logging, not categorized */ #define LIBFCOE_FIP_LOGGING 0x02 /* FIP logging */ -#define LIBFCOE_CHECK_LOGGING(LEVEL, CMD) \ -do { \ - if (unlikely(libfcoe_debug_logging & LEVEL)) \ - do { \ - CMD; \ - } while (0); \ +#define LIBFCOE_CHECK_LOGGING(LEVEL, CMD) \ +do { \ + if (unlikely(libfcoe_debug_logging & LEVEL)) \ + do { \ + CMD; \ + } while (0); \ } while (0) #define LIBFCOE_DBG(fmt, args...) \ @@ -78,7 +78,10 @@ do { \ LIBFCOE_CHECK_LOGGING(LIBFCOE_FIP_LOGGING, \ printk(KERN_INFO "fip: " fmt, ##args);) -/* +/** + * fcoe_ctlr_mtu_valid() - Check if a FCF's MTU is valid + * @fcf: The FCF to check + * * Return non-zero if FCF fcoe_size has been validated. */ static inline int fcoe_ctlr_mtu_valid(const struct fcoe_fcf *fcf) @@ -86,7 +89,10 @@ static inline int fcoe_ctlr_mtu_valid(const struct fcoe_fcf *fcf) return (fcf->flags & FIP_FL_SOL) != 0; } -/* +/** + * fcoe_ctlr_fcf_usable() - Check if a FCF is usable + * @fcf: The FCF to check + * * Return non-zero if the FCF is usable. */ static inline int fcoe_ctlr_fcf_usable(struct fcoe_fcf *fcf) @@ -97,8 +103,8 @@ static inline int fcoe_ctlr_fcf_usable(struct fcoe_fcf *fcf) } /** - * fcoe_ctlr_init() - Initialize the FCoE Controller instance. - * @fip: FCoE controller. + * fcoe_ctlr_init() - Initialize the FCoE Controller instance + * @fip: The FCoE controller to initialize */ void fcoe_ctlr_init(struct fcoe_ctlr *fip) { @@ -114,8 +120,8 @@ void fcoe_ctlr_init(struct fcoe_ctlr *fip) EXPORT_SYMBOL(fcoe_ctlr_init); /** - * fcoe_ctlr_reset_fcfs() - Reset and free all FCFs for a controller. - * @fip: FCoE controller. + * fcoe_ctlr_reset_fcfs() - Reset and free all FCFs for a controller + * @fip: The FCoE controller whose FCFs are to be reset * * Called with &fcoe_ctlr lock held. */ @@ -134,8 +140,8 @@ static void fcoe_ctlr_reset_fcfs(struct fcoe_ctlr *fip) } /** - * fcoe_ctlr_destroy() - Disable and tear-down the FCoE controller. - * @fip: FCoE controller. + * fcoe_ctlr_destroy() - Disable and tear down a FCoE controller + * @fip: The FCoE controller to tear down * * This is called by FCoE drivers before freeing the &fcoe_ctlr. * @@ -162,8 +168,8 @@ void fcoe_ctlr_destroy(struct fcoe_ctlr *fip) EXPORT_SYMBOL(fcoe_ctlr_destroy); /** - * fcoe_ctlr_fcoe_size() - Return the maximum FCoE size required for VN_Port. - * @fip: FCoE controller. + * fcoe_ctlr_fcoe_size() - Return the maximum FCoE size required for VN_Port + * @fip: The FCoE controller to get the maximum FCoE size from * * Returns the maximum packet size including the FCoE header and trailer, * but not including any Ethernet or VLAN headers. @@ -180,9 +186,9 @@ static inline u32 fcoe_ctlr_fcoe_size(struct fcoe_ctlr *fip) } /** - * fcoe_ctlr_solicit() - Send a solicitation. - * @fip: FCoE controller. - * @fcf: Destination FCF. If NULL, a multicast solicitation is sent. + * fcoe_ctlr_solicit() - Send a FIP solicitation + * @fip: The FCoE controller to send the solicitation on + * @fcf: The destination FCF (if NULL, a multicast solicitation is sent) */ static void fcoe_ctlr_solicit(struct fcoe_ctlr *fip, struct fcoe_fcf *fcf) { @@ -241,8 +247,8 @@ static void fcoe_ctlr_solicit(struct fcoe_ctlr *fip, struct fcoe_fcf *fcf) } /** - * fcoe_ctlr_link_up() - Start FCoE controller. - * @fip: FCoE controller. + * fcoe_ctlr_link_up() - Start FCoE controller + * @fip: The FCoE controller to start * * Called from the LLD when the network link is ready. */ @@ -268,15 +274,15 @@ void fcoe_ctlr_link_up(struct fcoe_ctlr *fip) EXPORT_SYMBOL(fcoe_ctlr_link_up); /** - * fcoe_ctlr_reset() - Reset FIP. - * @fip: FCoE controller. - * @new_state: FIP state to be entered. + * fcoe_ctlr_reset() - Reset a FCoE controller + * @fip: The FCoE controller to reset + * @new_state: The FIP state to be entered * * Returns non-zero if the link was up and now isn't. */ static int fcoe_ctlr_reset(struct fcoe_ctlr *fip, enum fip_state new_state) { - struct fc_lport *lp = fip->lp; + struct fc_lport *lport = fip->lp; int link_dropped; spin_lock_bh(&fip->lock); @@ -294,19 +300,19 @@ static int fcoe_ctlr_reset(struct fcoe_ctlr *fip, enum fip_state new_state) spin_unlock_bh(&fip->lock); if (link_dropped) - fc_linkdown(lp); + fc_linkdown(lport); if (new_state == FIP_ST_ENABLED) { fcoe_ctlr_solicit(fip, NULL); - fc_linkup(lp); + fc_linkup(lport); link_dropped = 0; } return link_dropped; } /** - * fcoe_ctlr_link_down() - Stop FCoE controller. - * @fip: FCoE controller. + * fcoe_ctlr_link_down() - Stop a FCoE controller + * @fip: The FCoE controller to be stopped * * Returns non-zero if the link was up and now isn't. * @@ -320,11 +326,11 @@ int fcoe_ctlr_link_down(struct fcoe_ctlr *fip) EXPORT_SYMBOL(fcoe_ctlr_link_down); /** - * fcoe_ctlr_send_keep_alive() - Send a keep-alive to the selected FCF. - * @fip: FCoE controller. - * @lport: libfc fc_lport to send from - * @ports: 0 for controller keep-alive, 1 for port keep-alive. - * @sa: source MAC address. + * fcoe_ctlr_send_keep_alive() - Send a keep-alive to the selected FCF + * @fip: The FCoE controller to send the FKA on + * @lport: libfc fc_lport to send from + * @ports: 0 for controller keep-alive, 1 for port keep-alive + * @sa: The source MAC address * * A controller keep-alive is sent every fka_period (typically 8 seconds). * The source MAC is the native MAC address. @@ -369,7 +375,7 @@ static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip, kal->fip.fip_op = htons(FIP_OP_CTRL); kal->fip.fip_subcode = FIP_SC_KEEP_ALIVE; kal->fip.fip_dl_len = htons((sizeof(kal->mac) + - ports * sizeof(*vn)) / FIP_BPW); + ports * sizeof(*vn)) / FIP_BPW); kal->fip.fip_flags = htons(FIP_FL_FPMA); if (fip->spma) kal->fip.fip_flags |= htons(FIP_FL_SPMA); @@ -393,11 +399,10 @@ static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip, } /** - * fcoe_ctlr_encaps() - Encapsulate an ELS frame for FIP, without sending it. - * @fip: FCoE controller. - * @lport: libfc fc_lport to use for the source address - * @dtype: FIP descriptor type for the frame. - * @skb: FCoE ELS frame including FC header but no FCoE headers. + * fcoe_ctlr_encaps() - Encapsulate an ELS frame for FIP, without sending it + * @fip: The FCoE controller for the ELS frame + * @dtype: The FIP descriptor type for the frame + * @skb: The FCoE ELS frame including FC header but no FCoE headers * * Returns non-zero error code on failure. * @@ -553,9 +558,9 @@ drop: } EXPORT_SYMBOL(fcoe_ctlr_els_send); -/* - * fcoe_ctlr_age_fcfs() - Reset and free all old FCFs for a controller. - * @fip: FCoE controller. +/** + * fcoe_ctlr_age_fcfs() - Reset and free all old FCFs for a controller + * @fip: The FCoE controller to free FCFs on * * Called with lock held. * @@ -596,9 +601,9 @@ static void fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) } /** - * fcoe_ctlr_parse_adv() - Decode a FIP advertisement into a new FCF entry. - * @skb: received FIP advertisement frame - * @fcf: resulting FCF entry. + * fcoe_ctlr_parse_adv() - Decode a FIP advertisement into a new FCF entry + * @skb: The received FIP advertisement frame + * @fcf: The resulting FCF entry * * Returns zero on a valid parsed advertisement, * otherwise returns non zero value. @@ -699,9 +704,9 @@ len_err: } /** - * fcoe_ctlr_recv_adv() - Handle an incoming advertisement. - * @fip: FCoE controller. - * @skb: Received FIP packet. + * fcoe_ctlr_recv_adv() - Handle an incoming advertisement + * @fip: The FCoE controller receiving the advertisement + * @skb: The received FIP packet */ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, struct sk_buff *skb) { @@ -784,7 +789,7 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, struct sk_buff *skb) */ if (mtu_valid && !fip->sel_time && fcoe_ctlr_fcf_usable(fcf)) { fip->sel_time = jiffies + - msecs_to_jiffies(FCOE_CTLR_START_DELAY); + msecs_to_jiffies(FCOE_CTLR_START_DELAY); if (!timer_pending(&fip->timer) || time_before(fip->sel_time, fip->timer.expires)) mod_timer(&fip->timer, fip->sel_time); @@ -794,13 +799,13 @@ out: } /** - * fcoe_ctlr_recv_els() - Handle an incoming FIP-encapsulated ELS frame. - * @fip: FCoE controller. - * @skb: Received FIP packet. + * fcoe_ctlr_recv_els() - Handle an incoming FIP encapsulated ELS frame + * @fip: The FCoE controller which received the packet + * @skb: The received FIP packet */ static void fcoe_ctlr_recv_els(struct fcoe_ctlr *fip, struct sk_buff *skb) { - struct fc_lport *lp = fip->lp; + struct fc_lport *lport = fip->lp; struct fip_header *fiph; struct fc_frame *fp = (struct fc_frame *)skb; struct fc_frame_header *fh = NULL; @@ -886,13 +891,13 @@ static void fcoe_ctlr_recv_els(struct fcoe_ctlr *fip, struct sk_buff *skb) fc_frame_init(fp); fr_sof(fp) = FC_SOF_I3; fr_eof(fp) = FC_EOF_T; - fr_dev(fp) = lp; + fr_dev(fp) = lport; - stats = fc_lport_get_stats(lp); + stats = fc_lport_get_stats(lport); stats->RxFrames++; stats->RxWords += skb->len / FIP_BPW; - fc_exch_recv(lp, fp); + fc_exch_recv(lport, fp); return; len_err: @@ -903,15 +908,15 @@ drop: } /** - * fcoe_ctlr_recv_els() - Handle an incoming link reset frame. - * @fip: FCoE controller. - * @fh: Received FIP header. + * fcoe_ctlr_recv_els() - Handle an incoming link reset frame + * @fip: The FCoE controller that received the frame + * @fh: The received FIP header * * There may be multiple VN_Port descriptors. * The overall length has already been checked. */ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip, - struct fip_header *fh) + struct fip_header *fh) { struct fip_desc *desc; struct fip_mac_desc *mp; @@ -920,13 +925,13 @@ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip, size_t rlen; size_t dlen; struct fcoe_fcf *fcf = fip->sel_fcf; - struct fc_lport *lp = fip->lp; + struct fc_lport *lport = fip->lp; u32 desc_mask; LIBFCOE_FIP_DBG("Clear Virtual Link received\n"); if (!fcf) return; - if (!fcf || !fc_host_port_id(lp->host)) + if (!fcf || !fc_host_port_id(lport->host)) return; /* @@ -962,9 +967,10 @@ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip, if (dlen < sizeof(*vp)) return; if (compare_ether_addr(vp->fd_mac, - fip->get_src_addr(lp)) == 0 && - get_unaligned_be64(&vp->fd_wwpn) == lp->wwpn && - ntoh24(vp->fd_fc_id) == fc_host_port_id(lp->host)) + fip->get_src_addr(lport)) == 0 && + get_unaligned_be64(&vp->fd_wwpn) == lport->wwpn && + ntoh24(vp->fd_fc_id) == + fc_host_port_id(lport->host)) desc_mask &= ~BIT(FIP_DT_VN_ID); break; default: @@ -989,9 +995,9 @@ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip, } /** - * fcoe_ctlr_recv() - Receive a FIP frame. - * @fip: FCoE controller. - * @skb: Received FIP packet. + * fcoe_ctlr_recv() - Receive a FIP packet + * @fip: The FCoE controller that received the packet + * @skb: The received FIP packet * * This is called from NET_RX_SOFTIRQ. */ @@ -1005,9 +1011,9 @@ void fcoe_ctlr_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) EXPORT_SYMBOL(fcoe_ctlr_recv); /** - * fcoe_ctlr_recv_handler() - Receive a FIP frame. - * @fip: FCoE controller. - * @skb: Received FIP packet. + * fcoe_ctlr_recv_handler() - Receive a FIP frame + * @fip: The FCoE controller that received the frame + * @skb: The received FIP frame * * Returns non-zero if the frame is dropped. */ @@ -1064,8 +1070,8 @@ drop: } /** - * fcoe_ctlr_select() - Select the best FCF, if possible. - * @fip: FCoE controller. + * fcoe_ctlr_select() - Select the best FCF (if possible) + * @fip: The FCoE controller * * If there are conflicting advertisements, no FCF can be chosen. * @@ -1106,8 +1112,8 @@ static void fcoe_ctlr_select(struct fcoe_ctlr *fip) } /** - * fcoe_ctlr_timeout() - FIP timer function. - * @arg: &fcoe_ctlr pointer. + * fcoe_ctlr_timeout() - FIP timeout handler + * @arg: The FCoE controller that timed out * * Ages FCFs. Triggers FCF selection if possible. Sends keep-alives. */ @@ -1142,12 +1148,12 @@ static void fcoe_ctlr_timeout(unsigned long arg) fip->lp->host->host_no, sel->fcf_mac); memcpy(fip->dest_addr, sel->fcf_mac, ETH_ALEN); fip->port_ka_time = jiffies + - msecs_to_jiffies(FIP_VN_KA_PERIOD); + msecs_to_jiffies(FIP_VN_KA_PERIOD); fip->ctlr_ka_time = jiffies + sel->fka_period; fip->link = 1; } else { printk(KERN_NOTICE "libfcoe: host%d: " - "FIP Fibre-Channel Forwarder timed out. " + "FIP Fibre-Channel Forwarder timed out. " "Starting FCF discovery.\n", fip->lp->host->host_no); fip->link = 0; @@ -1165,7 +1171,7 @@ static void fcoe_ctlr_timeout(unsigned long arg) if (time_after_eq(jiffies, fip->port_ka_time)) { fip->port_ka_time += jiffies + - msecs_to_jiffies(FIP_VN_KA_PERIOD); + msecs_to_jiffies(FIP_VN_KA_PERIOD); fip->send_port_ka = 1; } if (time_after(next_timer, fip->port_ka_time)) @@ -1173,7 +1179,7 @@ static void fcoe_ctlr_timeout(unsigned long arg) mod_timer(&fip->timer, next_timer); } else if (fip->sel_time) { next_timer = fip->sel_time + - msecs_to_jiffies(FCOE_CTLR_START_DELAY); + msecs_to_jiffies(FCOE_CTLR_START_DELAY); mod_timer(&fip->timer, next_timer); } if (fip->send_ctlr_ka || fip->send_port_ka) @@ -1182,8 +1188,8 @@ static void fcoe_ctlr_timeout(unsigned long arg) } /** - * fcoe_ctlr_link_work() - worker thread function for link changes. - * @work: pointer to link_work member inside &fcoe_ctlr. + * fcoe_ctlr_link_work() - Worker thread function for link changes + * @work: Handle to a FCoE controller * * See if the link status has changed and if so, report it. * @@ -1230,8 +1236,8 @@ static void fcoe_ctlr_link_work(struct work_struct *work) } /** - * fcoe_ctlr_recv_work() - Worker thread function for receiving FIP frames. - * @recv_work: pointer to recv_work member inside &fcoe_ctlr. + * fcoe_ctlr_recv_work() - Worker thread function for receiving FIP frames + * @recv_work: Handle to a FCoE controller */ static void fcoe_ctlr_recv_work(struct work_struct *recv_work) { @@ -1249,11 +1255,10 @@ static void fcoe_ctlr_recv_work(struct work_struct *recv_work) } /** - * fcoe_ctlr_recv_flogi() - snoop Pre-FIP receipt of FLOGI response or request. - * @fip: FCoE controller. - * @lport: libfc fc_lport instance received on - * @fp: FC frame. - * @sa: Ethernet source MAC address from received FCoE frame. + * fcoe_ctlr_recv_flogi() - Snoop pre-FIP receipt of FLOGI response or request + * @fip: The FCoE controller + * @fp: The FC frame to snoop + * @sa: Ethernet source MAC address from received FCoE frame * * Snoop potential response to FLOGI or even incoming FLOGI. * @@ -1323,10 +1328,10 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *fip, struct fc_lport *lport, EXPORT_SYMBOL(fcoe_ctlr_recv_flogi); /** - * fcoe_wwn_from_mac() - Converts 48-bit IEEE MAC address to 64-bit FC WWN. - * @mac: mac address - * @scheme: check port - * @port: port indicator for converting + * fcoe_wwn_from_mac() - Converts a 48-bit IEEE MAC address to a 64-bit FC WWN + * @mac: The MAC address to convert + * @scheme: The scheme to use when converting + * @port: The port indicator for converting * * Returns: u64 fc world wide name */ @@ -1364,23 +1369,24 @@ u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN], EXPORT_SYMBOL_GPL(fcoe_wwn_from_mac); /** - * fcoe_libfc_config() - sets up libfc related properties for lport - * @lp: ptr to the fc_lport - * @tt: libfc function template + * fcoe_libfc_config() - Sets up libfc related properties for local port + * @lp: The local port to configure libfc for + * @tt: The libfc function template * * Returns : 0 for success */ -int fcoe_libfc_config(struct fc_lport *lp, struct libfc_function_template *tt) +int fcoe_libfc_config(struct fc_lport *lport, + struct libfc_function_template *tt) { /* Set the function pointers set by the LLDD */ - memcpy(&lp->tt, tt, sizeof(*tt)); - if (fc_fcp_init(lp)) + memcpy(&lport->tt, tt, sizeof(*tt)); + if (fc_fcp_init(lport)) return -ENOMEM; - fc_exch_init(lp); - fc_elsct_init(lp); - fc_lport_init(lp); - fc_rport_init(lp); - fc_disc_init(lp); + fc_exch_init(lport); + fc_elsct_init(lport); + fc_lport_init(lport); + fc_rport_init(lport); + fc_disc_init(lport); return 0; } diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index 8ef5e209c216..76d08c9a7678 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -53,35 +53,35 @@ enum fip_state { }; /** - * struct fcoe_ctlr - FCoE Controller and FIP state. - * @state: internal FIP state for network link and FIP or non-FIP mode. - * @lp: &fc_lport: libfc local port. - * @sel_fcf: currently selected FCF, or NULL. - * @fcfs: list of discovered FCFs. - * @fcf_count: number of discovered FCF entries. - * @sol_time: time when a multicast solicitation was last sent. - * @sel_time: time after which to select an FCF. - * @port_ka_time: time of next port keep-alive. - * @ctlr_ka_time: time of next controller keep-alive. - * @timer: timer struct used for all delayed events. - * @link_work: &work_struct for doing FCF selection. - * @recv_work: &work_struct for receiving FIP frames. + * struct fcoe_ctlr - FCoE Controller and FIP state + * @state: internal FIP state for network link and FIP or non-FIP mode. + * @lp: &fc_lport: libfc local port. + * @sel_fcf: currently selected FCF, or NULL. + * @fcfs: list of discovered FCFs. + * @fcf_count: number of discovered FCF entries. + * @sol_time: time when a multicast solicitation was last sent. + * @sel_time: time after which to select an FCF. + * @port_ka_time: time of next port keep-alive. + * @ctlr_ka_time: time of next controller keep-alive. + * @timer: timer struct used for all delayed events. + * @link_work: &work_struct for doing FCF selection. + * @recv_work: &work_struct for receiving FIP frames. * @fip_recv_list: list of received FIP frames. - * @user_mfs: configured maximum FC frame size, including FC header. - * @flogi_oxid: exchange ID of most recent fabric login. - * @flogi_count: number of FLOGI attempts in AUTO mode. - * @link: current link status for libfc. - * @last_link: last link state reported to libfc. - * @map_dest: use the FC_MAP mode for destination MAC addresses. - * @spma: supports SPMA server-provided MACs mode - * @send_ctlr_ka: need to send controller keep alive - * @send_port_ka: need to send port keep alives - * @dest_addr: MAC address of the selected FC forwarder. - * @ctl_src_addr: the native MAC address of our local port. - * @send: LLD-supplied function to handle sending of FIP Ethernet frames. - * @update_mac: LLD-supplied function to handle changes to MAC addresses. - * @get_src_addr: LLD-supplied function to supply a source MAC address. - * @lock: lock protecting this structure. + * @user_mfs: configured maximum FC frame size, including FC header. + * @flogi_oxid: exchange ID of most recent fabric login. + * @flogi_count: number of FLOGI attempts in AUTO mode. + * @link: current link status for libfc. + * @last_link: last link state reported to libfc. + * @map_dest: use the FC_MAP mode for destination MAC addresses. + * @spma: supports SPMA server-provided MACs mode + * @send_ctlr_ka: need to send controller keep alive + * @send_port_ka: need to send port keep alives + * @dest_addr: MAC address of the selected FC forwarder. + * @ctl_src_addr: the native MAC address of our local port. + * @send: LLD-supplied function to handle sending FIP Ethernet frames + * @update_mac: LLD-supplied function to handle changes to MAC addresses. + * @get_src_addr: LLD-supplied function to supply a source MAC address. + * @lock: lock protecting this structure. * * This structure is used by all FCoE drivers. It contains information * needed by all FCoE low-level drivers (LLDs) as well as internal state @@ -119,18 +119,18 @@ struct fcoe_ctlr { spinlock_t lock; }; -/* - * struct fcoe_fcf - Fibre-Channel Forwarder. - * @list: list linkage. - * @time: system time (jiffies) when an advertisement was last received. - * @switch_name: WWN of switch from advertisement. - * @fabric_name: WWN of fabric from advertisement. - * @fc_map: FC_MAP value from advertisement. - * @fcf_mac: Ethernet address of the FCF. - * @vfid: virtual fabric ID. - * @pri: seletion priority, smaller values are better. - * @flags: flags received from advertisement. - * @fka_period: keep-alive period, in jiffies. +/** + * struct fcoe_fcf - Fibre-Channel Forwarder + * @list: list linkage + * @time: system time (jiffies) when an advertisement was last received + * @switch_name: WWN of switch from advertisement + * @fabric_name: WWN of fabric from advertisement + * @fc_map: FC_MAP value from advertisement + * @fcf_mac: Ethernet address of the FCF + * @vfid: virtual fabric ID + * @pri: selection priority, smaller values are better + * @flags: flags received from advertisement + * @fka_period: keep-alive period, in jiffies * * A Fibre-Channel Forwarder (FCF) is the entity on the Ethernet that * passes FCoE frames on to an FC fabric. This structure represents @@ -161,8 +161,8 @@ void fcoe_ctlr_link_up(struct fcoe_ctlr *); int fcoe_ctlr_link_down(struct fcoe_ctlr *); int fcoe_ctlr_els_send(struct fcoe_ctlr *, struct fc_lport *, struct sk_buff *); void fcoe_ctlr_recv(struct fcoe_ctlr *, struct sk_buff *); -int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *lport, - struct fc_frame *fp, u8 *sa); +int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *, + struct fc_frame *, u8 *); /* libfcoe funcs */ u64 fcoe_wwn_from_mac(unsigned char mac[], unsigned int, unsigned int); -- cgit v1.3-8-gc7d7 From a7bbc7f40aa01eefef3d367349e1e6e87881a305 Mon Sep 17 00:00:00 2001 From: Vasu Dev Date: Tue, 3 Nov 2009 11:47:55 -0800 Subject: [SCSI] fcoe, libfc: use single frame allocation API Cleans up frame allocation APIs to have just single fc_frame_alloc API. Removes _fc_frame_alloc, renames __fc_frame_alloc to _fc_frame_alloc. Modifies fc_fcp_send_data for removed _fc_frame_alloc, fc_fcp_send_data was the only user of removed _fc_frame_alloc. Also Adds check in fc_frame_alloc to do mod by 4 for only non-zero len value. This patch is prep work to fix can_queue reducing in next patch. Single fc_frame_alloc API helps in fixing can_queue reducing in next patch. Signed-off-by: Vasu Dev Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_fcp.c | 15 ++++----------- drivers/scsi/libfc/fc_frame.c | 6 +++--- include/scsi/fc_frame.h | 16 +++------------- 3 files changed, 10 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index 970b54f653b7..567eee7b8609 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -505,18 +505,11 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq, */ if (tlen % 4) using_sg = 0; - if (using_sg) { - fp = _fc_frame_alloc(lport, 0); - if (!fp) - return -ENOMEM; - } else { - fp = fc_frame_alloc(lport, tlen); - if (!fp) - return -ENOMEM; + fp = fc_frame_alloc(lport, using_sg ? 0 : tlen); + if (!fp) + return -ENOMEM; - data = (void *)(fr_hdr(fp)) + - sizeof(struct fc_frame_header); - } + data = fc_frame_header_get(fp) + 1; fh_parm_offset = frame_offset; fr_max_payload(fp) = fsp->max_payload; } diff --git a/drivers/scsi/libfc/fc_frame.c b/drivers/scsi/libfc/fc_frame.c index 4fea369b58ee..79c956501bd9 100644 --- a/drivers/scsi/libfc/fc_frame.c +++ b/drivers/scsi/libfc/fc_frame.c @@ -51,7 +51,7 @@ EXPORT_SYMBOL(fc_frame_crc_check); * Allocate a frame intended to be sent via fcoe_xmit. * Get an sk_buff for the frame and set the length. */ -struct fc_frame *__fc_frame_alloc(size_t len) +struct fc_frame *_fc_frame_alloc(size_t len) { struct fc_frame *fp; struct sk_buff *skb; @@ -67,7 +67,7 @@ struct fc_frame *__fc_frame_alloc(size_t len) skb_put(skb, len); return fp; } -EXPORT_SYMBOL(__fc_frame_alloc); +EXPORT_SYMBOL(_fc_frame_alloc); struct fc_frame *fc_frame_alloc_fill(struct fc_lport *lp, size_t payload_len) { @@ -77,7 +77,7 @@ struct fc_frame *fc_frame_alloc_fill(struct fc_lport *lp, size_t payload_len) fill = payload_len % 4; if (fill != 0) fill = 4 - fill; - fp = __fc_frame_alloc(payload_len + fill); + fp = _fc_frame_alloc(payload_len + fill); if (fp) { memset((char *) fr_hdr(fp) + payload_len, 0, fill); /* trim is OK, we just allocated it so there are no fragments */ diff --git a/include/scsi/fc_frame.h b/include/scsi/fc_frame.h index ab2f8d41761b..4d3e9c7b7c57 100644 --- a/include/scsi/fc_frame.h +++ b/include/scsi/fc_frame.h @@ -100,17 +100,7 @@ static inline void fc_frame_init(struct fc_frame *fp) } struct fc_frame *fc_frame_alloc_fill(struct fc_lport *, size_t payload_len); - -struct fc_frame *__fc_frame_alloc(size_t payload_len); - -/* - * Get frame for sending via port. - */ -static inline struct fc_frame *_fc_frame_alloc(struct fc_lport *dev, - size_t payload_len) -{ - return __fc_frame_alloc(payload_len); -} +struct fc_frame *_fc_frame_alloc(size_t payload_len); /* * Allocate fc_frame structure and buffer. Set the initial length to @@ -124,10 +114,10 @@ static inline struct fc_frame *fc_frame_alloc(struct fc_lport *dev, size_t len) * Note: Since len will often be a constant multiple of 4, * this check will usually be evaluated and eliminated at compile time. */ - if ((len % 4) != 0) + if (len && len % 4) fp = fc_frame_alloc_fill(dev, len); else - fp = _fc_frame_alloc(dev, len); + fp = _fc_frame_alloc(len); return fp; } -- cgit v1.3-8-gc7d7 From 22bcd225bfe2107725228758137d2109befa942a Mon Sep 17 00:00:00 2001 From: Joe Eykholt Date: Tue, 3 Nov 2009 11:48:11 -0800 Subject: [SCSI] libfcoe: Allow FIP to be disabled by the driver Allow FIP to be disabled by the driver for devices that want to use libfcoe in non-FIP mode. The driver merely sets the fcoe_ctlr mode to the state which should be entered when the link comes up. The default is auto. No change is needed for fcoe.c which uses auto mode. Signed-off-by: Joe Eykholt Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/libfcoe.c | 6 ++++-- include/scsi/libfcoe.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c index 6b07a8400889..1ea17a3c8749 100644 --- a/drivers/scsi/fcoe/libfcoe.c +++ b/drivers/scsi/fcoe/libfcoe.c @@ -109,6 +109,7 @@ static inline int fcoe_ctlr_fcf_usable(struct fcoe_fcf *fcf) void fcoe_ctlr_init(struct fcoe_ctlr *fip) { fip->state = FIP_ST_LINK_WAIT; + fip->mode = FIP_ST_AUTO; INIT_LIST_HEAD(&fip->fcfs); spin_lock_init(&fip->lock); fip->flogi_oxid = FC_XID_UNKNOWN; @@ -261,11 +262,12 @@ void fcoe_ctlr_link_up(struct fcoe_ctlr *fip) spin_unlock_bh(&fip->lock); fc_linkup(fip->lp); } else if (fip->state == FIP_ST_LINK_WAIT) { - fip->state = FIP_ST_AUTO; + fip->state = fip->mode; fip->last_link = 1; fip->link = 1; spin_unlock_bh(&fip->lock); - LIBFCOE_FIP_DBG("%s", "setting AUTO mode.\n"); + if (fip->state == FIP_ST_AUTO) + LIBFCOE_FIP_DBG("%s", "setting AUTO mode.\n"); fc_linkup(fip->lp); fcoe_ctlr_solicit(fip, NULL); } else diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index 76d08c9a7678..2344a00e92ef 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -55,6 +55,7 @@ enum fip_state { /** * struct fcoe_ctlr - FCoE Controller and FIP state * @state: internal FIP state for network link and FIP or non-FIP mode. + * @mode: LLD-selected mode. * @lp: &fc_lport: libfc local port. * @sel_fcf: currently selected FCF, or NULL. * @fcfs: list of discovered FCFs. @@ -89,6 +90,7 @@ enum fip_state { */ struct fcoe_ctlr { enum fip_state state; + enum fip_state mode; struct fc_lport *lp; struct fcoe_fcf *sel_fcf; struct list_head fcfs; -- cgit v1.3-8-gc7d7 From dd42dac4ecd1799077c132aab35d3c36b26d4d8c Mon Sep 17 00:00:00 2001 From: Joe Eykholt Date: Tue, 3 Nov 2009 11:48:27 -0800 Subject: [SCSI] libfcoe: FIP should report link to libfc whether selected or not The fnic driver with FIP is reporting link up, even though it's down. When the interface is shut down by the switch, we receive a clear virtual link, and set the state reported to libfc as down, although we still report it up. Clearly wrong. That causes the subsequent link down event not to be reported, and /sys shows the host "Online". Currently, in FIP mode, if an FCF times out, then link to libfc is reported as down, to stop FLOGIs. That interferes with the LLD link down being reported. Users really need to know the physical link information, to diagnose cabling issues, so physical link status should be reported to libfc. If the selected FCF needs to be reported, that should be done separately, in a later patch. Signed-off-by: Joe Eykholt Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/libfcoe.c | 60 ++++++++++++++++++++++----------------------- include/scsi/libfcoe.h | 1 + 2 files changed, 31 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c index 787e7225ddde..4d857c2aef6c 100644 --- a/drivers/scsi/fcoe/libfcoe.c +++ b/drivers/scsi/fcoe/libfcoe.c @@ -277,38 +277,16 @@ EXPORT_SYMBOL(fcoe_ctlr_link_up); /** * fcoe_ctlr_reset() - Reset a FCoE controller * @fip: The FCoE controller to reset - * @new_state: The FIP state to be entered - * - * Returns non-zero if the link was up and now isn't. */ -static int fcoe_ctlr_reset(struct fcoe_ctlr *fip, enum fip_state new_state) +static void fcoe_ctlr_reset(struct fcoe_ctlr *fip) { - struct fc_lport *lport = fip->lp; - int link_dropped; - - spin_lock_bh(&fip->lock); fcoe_ctlr_reset_fcfs(fip); del_timer(&fip->timer); - fip->state = new_state; fip->ctlr_ka_time = 0; fip->port_ka_time = 0; fip->sol_time = 0; fip->flogi_oxid = FC_XID_UNKNOWN; fip->map_dest = 0; - fip->last_link = 0; - link_dropped = fip->link; - fip->link = 0; - spin_unlock_bh(&fip->lock); - - if (link_dropped) - fc_linkdown(lport); - - if (new_state == FIP_ST_ENABLED) { - fcoe_ctlr_solicit(fip, NULL); - fc_linkup(lport); - link_dropped = 0; - } - return link_dropped; } /** @@ -322,7 +300,20 @@ static int fcoe_ctlr_reset(struct fcoe_ctlr *fip, enum fip_state new_state) */ int fcoe_ctlr_link_down(struct fcoe_ctlr *fip) { - return fcoe_ctlr_reset(fip, FIP_ST_LINK_WAIT); + int link_dropped; + + LIBFCOE_FIP_DBG(fip, "link down.\n"); + spin_lock_bh(&fip->lock); + fcoe_ctlr_reset(fip); + link_dropped = fip->link; + fip->link = 0; + fip->last_link = 0; + fip->state = FIP_ST_LINK_WAIT; + spin_unlock_bh(&fip->lock); + + if (link_dropped) + fc_linkdown(fip->lp); + return link_dropped; } EXPORT_SYMBOL(fcoe_ctlr_link_down); @@ -994,7 +985,13 @@ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip, desc_mask); } else { LIBFCOE_FIP_DBG(fip, "performing Clear Virtual Link\n"); - fcoe_ctlr_reset(fip, FIP_ST_ENABLED); + + spin_lock_bh(&fip->lock); + fcoe_ctlr_reset(fip); + spin_unlock_bh(&fip->lock); + + fc_lport_reset(fip->lp); + fcoe_ctlr_solicit(fip, NULL); } } @@ -1152,15 +1149,14 @@ static void fcoe_ctlr_timeout(unsigned long arg) fip->port_ka_time = jiffies + msecs_to_jiffies(FIP_VN_KA_PERIOD); fip->ctlr_ka_time = jiffies + sel->fka_period; - fip->link = 1; } else { printk(KERN_NOTICE "libfcoe: host%d: " "FIP Fibre-Channel Forwarder timed out. " "Starting FCF discovery.\n", fip->lp->host->host_no); - fip->link = 0; + fip->reset_req = 1; + schedule_work(&fip->link_work); } - schedule_work(&fip->link_work); } if (sel) { @@ -1205,20 +1201,24 @@ static void fcoe_ctlr_link_work(struct work_struct *work) u8 *mac; int link; int last_link; + int reset; fip = container_of(work, struct fcoe_ctlr, link_work); spin_lock_bh(&fip->lock); last_link = fip->last_link; link = fip->link; fip->last_link = link; + reset = fip->reset_req; + fip->reset_req = 0; spin_unlock_bh(&fip->lock); if (last_link != link) { if (link) fc_linkup(fip->lp); else - fcoe_ctlr_reset(fip, FIP_ST_LINK_WAIT); - } + fc_linkdown(fip->lp); + } else if (reset && link) + fc_lport_reset(fip->lp); if (fip->send_ctlr_ka) { fip->send_ctlr_ka = 0; diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index 2344a00e92ef..e38ffa05dc26 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -108,6 +108,7 @@ struct fcoe_ctlr { u8 flogi_count; u8 link; u8 last_link; + u8 reset_req; u8 map_dest; u8 spma; u8 send_ctlr_ka; -- cgit v1.3-8-gc7d7 From 6049d95a8a223e2dc3a476dea9f0fbc9b580f38f Mon Sep 17 00:00:00 2001 From: Joe Eykholt Date: Tue, 3 Nov 2009 11:48:50 -0800 Subject: [SCSI] libfc: fix RNN_ID smashing skb payload The code that filled in the name server RNN_ID (register node name) request had somehow gotten a line in it from the RFT_ID code which copies 32 bytes of data over the relatively short payload. This caused some corruption and hangs. Simply deleted the extraneous line. Signed-off-by: Joe Eykholt Signed-off-by: Robert Love Signed-off-by: James Bottomley --- include/scsi/fc_encode.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h index 9afcbb94ec30..c8968d31c610 100644 --- a/include/scsi/fc_encode.h +++ b/include/scsi/fc_encode.h @@ -134,7 +134,6 @@ static inline int fc_ct_fill(struct fc_lport *lport, ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rn_id)); hton24(ct->payload.rn.fr_fid.fp_fid, fc_host_port_id(lport->host)); - ct->payload.rft.fts = lport->fcts; put_unaligned_be64(lport->wwnn, &ct->payload.rn.fr_wwn); break; -- cgit v1.3-8-gc7d7 From 5f9a056db9c7973c46337ec8d034323aa72bf206 Mon Sep 17 00:00:00 2001 From: Joe Eykholt Date: Tue, 3 Nov 2009 11:48:55 -0800 Subject: [SCSI] libfc: fix symbolic name registrations smashing skb data The strncpy for RSPN_ID and RSNN_NN requests was padding past the allocated frame size. Get the string length before filling in the ct header. Signed-off-by: Joe Eykholt Signed-off-by: Robert Love Signed-off-by: James Bottomley --- include/scsi/fc_encode.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h index c8968d31c610..ab2260cb149c 100644 --- a/include/scsi/fc_encode.h +++ b/include/scsi/fc_encode.h @@ -111,6 +111,7 @@ static inline int fc_ct_fill(struct fc_lport *lport, enum fc_fh_type *fh_type) { struct fc_ct_req *ct; + size_t len; switch (op) { case FC_NS_GPN_FT: @@ -138,22 +139,22 @@ static inline int fc_ct_fill(struct fc_lport *lport, break; case FC_NS_RSPN_ID: - ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rspn)); + len = strnlen(fc_host_symbolic_name(lport->host), 255); + ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rspn) + len); hton24(ct->payload.spn.fr_fid.fp_fid, fc_host_port_id(lport->host)); strncpy(ct->payload.spn.fr_name, - fc_host_symbolic_name(lport->host), 255); - ct->payload.spn.fr_name_len = - strnlen(ct->payload.spn.fr_name, 255); + fc_host_symbolic_name(lport->host), len); + ct->payload.spn.fr_name_len = len; break; case FC_NS_RSNN_NN: - ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rsnn)); + len = strnlen(fc_host_symbolic_name(lport->host), 255); + ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rsnn) + len); put_unaligned_be64(lport->wwnn, &ct->payload.snn.fr_wwn); strncpy(ct->payload.snn.fr_name, - fc_host_symbolic_name(lport->host), 255); - ct->payload.snn.fr_name_len = - strnlen(ct->payload.snn.fr_name, 255); + fc_host_symbolic_name(lport->host), len); + ct->payload.snn.fr_name_len = len; break; default: -- cgit v1.3-8-gc7d7 From 093bb6a2d378ee83fc6ab886c772b6be86abb5a8 Mon Sep 17 00:00:00 2001 From: Joe Eykholt Date: Tue, 3 Nov 2009 11:49:05 -0800 Subject: [SCSI] libfc: add set_fid function to libfc template This is to notify the LLD when an FC_ID is assigned to the local port. The fnic driver needs to push the assigned FC_ID to firmware. It currently does this by intercepting the FLOGI responses, and in order to make that code more common with FIP and NPIV, it makes more sense to wait until the local port has completely handled the FLOGI or FDISC response. Also, when we fix point-to-point FC_ID assignment, we'll need this callback as well. Add a call to the libfc template, which is called whenever the local port FC_ID is being assigned. It defaults to fc_lport_set_fid(), supplied by libfc. As additional benefit of this function, the LLD may determine the MAC address that caused the change by looking at the received frame. We also print the assigned port ID as long as it isn't 0. Setting port ID to 0 happens often in reset while retrying FLOGI, and would be uninteresting. This replaces the previous message which didn't identify the host adapter instance. patch v2 note: changed one word in a comment. "intercepted" -> "provided". Signed-off-by: Joe Eykholt Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_lport.c | 34 +++++++++++++++++++++++++++------- include/scsi/libfc.h | 20 ++++++++++++++++++++ 2 files changed, 47 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 90930c435455..653b52dd2ff7 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -732,6 +732,27 @@ static void fc_lport_enter_ready(struct fc_lport *lport) lport->tt.disc_start(fc_lport_disc_callback, lport); } +/** + * fc_lport_set_port_id() - set the local port Port ID + * @lport: The local port which will have its Port ID set. + * @port_id: The new port ID. + * @fp: The frame containing the incoming request, or NULL. + * + * Locking Note: The lport lock is expected to be held before calling + * this function. + */ +static void fc_lport_set_port_id(struct fc_lport *lport, u32 port_id, + struct fc_frame *fp) +{ + if (port_id) + printk(KERN_INFO "host%d: Assigned Port ID %6x\n", + lport->host->host_no, port_id); + + fc_host_port_id(lport->host) = port_id; + if (lport->tt.lport_set_port_id) + lport->tt.lport_set_port_id(lport, port_id, fp); +} + /** * fc_lport_recv_flogi_req() - Receive a FLOGI request * @sp_in: The sequence the FLOGI is on @@ -790,7 +811,7 @@ static void fc_lport_recv_flogi_req(struct fc_seq *sp_in, remote_fid = FC_LOCAL_PTP_FID_HI; } - fc_host_port_id(lport->host) = local_fid; + fc_lport_set_port_id(lport, local_fid, rx_fp); fp = fc_frame_alloc(lport, sizeof(*flp)); if (fp) { @@ -926,7 +947,9 @@ static void fc_lport_reset_locked(struct fc_lport *lport) lport->tt.exch_mgr_reset(lport, 0, 0); fc_host_fabric_name(lport->host) = 0; - fc_host_port_id(lport->host) = 0; + + if (fc_host_port_id(lport->host)) + fc_lport_set_port_id(lport, 0, NULL); } /** @@ -1428,11 +1451,6 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, fh = fc_frame_header_get(fp); did = ntoh24(fh->fh_d_id); if (fc_frame_payload_op(fp) == ELS_LS_ACC && did != 0) { - - printk(KERN_INFO "libfc: Assigned FID (%6x) in FLOGI response\n", - did); - fc_host_port_id(lport->host) = did; - flp = fc_frame_payload_get(fp, sizeof(*flp)); if (flp) { mfs = ntohs(flp->fl_csp.sp_bb_data) & @@ -1452,6 +1470,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, if (e_d_tov > lport->e_d_tov) lport->e_d_tov = e_d_tov; lport->r_a_tov = 2 * e_d_tov; + fc_lport_set_port_id(lport, did, fp); printk(KERN_INFO "libfc: Port (%6x) entered " "point to point mode\n", did); fc_lport_ptp_setup(lport, ntoh24(fh->fh_s_id), @@ -1464,6 +1483,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, lport->r_a_tov = r_a_tov; fc_host_fabric_name(lport->host) = get_unaligned_be64(&flp->fl_wwnn); + fc_lport_set_port_id(lport, did, fp); fc_lport_enter_dns(lport); } } diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 310d8a22b726..67ce9fa1fee4 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -580,6 +580,26 @@ struct libfc_function_template { */ int (*lport_reset)(struct fc_lport *); + /* + * Set the local port FC_ID. + * + * This may be provided by the LLD to allow it to be + * notified when the local port is assigned a FC-ID. + * + * The frame, if non-NULL, is the incoming frame with the + * FLOGI LS_ACC or FLOGI, and may contain the granted MAC + * address for the LLD. The frame pointer may be NULL if + * no MAC is associated with this assignment (LOGO or PLOGI). + * + * If FC_ID is non-zero, r_a_tov and e_d_tov must be valid. + * + * Note: this is called with the local port mutex held. + * + * STATUS: OPTIONAL + */ + void (*lport_set_port_id)(struct fc_lport *, u32 port_id, + struct fc_frame *); + /* * Create a remote port with a given port ID * -- cgit v1.3-8-gc7d7 From 386309ce927a308d7742a6fb24a536d3383fbd49 Mon Sep 17 00:00:00 2001 From: Joe Eykholt Date: Tue, 3 Nov 2009 11:49:16 -0800 Subject: [SCSI] libfcoe: fcoe: simplify receive FLOGI response There was a locking problem where the fip->lock was held during the call to update_mac(). The rtnl_lock() must be taken before the fip->lock, not the other way around. This fixes that. Now that fcoe_ctlr_recv_flog() is called only from the response handler to a FLOGI request, some checking can be eliminated. Instead of calling update_mac(), just fill in the granted_mac address for the passed-in frame (skb). Eliminate the passed-in source MAC address since it is also in the skb. Also, in fcoe, call fcoe_set_src_mac() directly instead of going thru the fip function pointer. This will generate less code. Then, since fip isn't needed for LOGO response, use lport as the arg. Signed-off-by: Joe Eykholt Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/fcoe.c | 15 +++++---------- drivers/scsi/fcoe/libfcoe.c | 12 ++++++------ include/scsi/libfcoe.h | 2 +- 3 files changed, 12 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index b15ec996b477..343900ac0ece 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -2247,15 +2247,12 @@ static void fcoe_flogi_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg) mac = fr_cb(fp)->granted_mac; if (is_zero_ether_addr(mac)) { /* pre-FIP */ - mac = eth_hdr(&fp->skb)->h_source; - if (fcoe_ctlr_recv_flogi(fip, lport, fp, mac)) { + if (fcoe_ctlr_recv_flogi(fip, lport, fp)) { fc_frame_free(fp); return; } - } else { - /* FIP, libfcoe has already seen it */ - fip->update_mac(lport, fr_cb(fp)->granted_mac); } + fcoe_update_src_mac(lport, mac); done: fc_lport_flogi_resp(seq, fp, lport); } @@ -2271,13 +2268,11 @@ done: */ static void fcoe_logo_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg) { - struct fcoe_ctlr *fip = arg; - struct fc_exch *exch = fc_seq_exch(seq); - struct fc_lport *lport = exch->lp; + struct fc_lport *lport = arg; static u8 zero_mac[ETH_ALEN] = { 0 }; if (!IS_ERR(fp)) - fip->update_mac(lport, zero_mac); + fcoe_update_src_mac(lport, zero_mac); fc_lport_logo_resp(seq, fp, lport); } @@ -2312,7 +2307,7 @@ static struct fc_seq *fcoe_elsct_send(struct fc_lport *lport, u32 did, if (ntoh24(fh->fh_d_id) != FC_FID_FLOGI) break; return fc_elsct_send(lport, did, fp, op, fcoe_logo_resp, - fip, timeout); + lport, timeout); } return fc_elsct_send(lport, did, fp, op, resp, arg, timeout); } diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c index 2aab97221c6c..2988b71d1e87 100644 --- a/drivers/scsi/fcoe/libfcoe.c +++ b/drivers/scsi/fcoe/libfcoe.c @@ -1254,10 +1254,9 @@ static void fcoe_ctlr_recv_work(struct work_struct *recv_work) } /** - * fcoe_ctlr_recv_flogi() - Snoop pre-FIP receipt of FLOGI response or request + * fcoe_ctlr_recv_flogi() - Snoop pre-FIP receipt of FLOGI response * @fip: The FCoE controller * @fp: The FC frame to snoop - * @sa: Ethernet source MAC address from received FCoE frame * * Snoop potential response to FLOGI or even incoming FLOGI. * @@ -1265,16 +1264,18 @@ static void fcoe_ctlr_recv_work(struct work_struct *recv_work) * by fip->flogi_oxid != FC_XID_UNKNOWN. * * The caller is responsible for freeing the frame. + * Fill in the granted_mac address. * * Return non-zero if the frame should not be delivered to libfc. */ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *fip, struct fc_lport *lport, - struct fc_frame *fp, u8 *sa) + struct fc_frame *fp) { struct fc_frame_header *fh; u8 op; - u8 mac[ETH_ALEN]; + u8 *sa; + sa = eth_hdr(&fp->skb)->h_source; fh = fc_frame_header_get(fp); if (fh->fh_type != FC_TYPE_ELS) return 0; @@ -1305,9 +1306,8 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *fip, struct fc_lport *lport, fip->map_dest = 0; } fip->flogi_oxid = FC_XID_UNKNOWN; - fc_fcoe_set_mac(mac, fh->fh_d_id); - fip->update_mac(lport, mac); spin_unlock_bh(&fip->lock); + fc_fcoe_set_mac(fr_cb(fp)->granted_mac, fh->fh_d_id); } else if (op == ELS_FLOGI && fh->fh_r_ctl == FC_RCTL_ELS_REQ && sa) { /* * Save source MAC for point-to-point responses. diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index e38ffa05dc26..3837872f1965 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -165,7 +165,7 @@ int fcoe_ctlr_link_down(struct fcoe_ctlr *); int fcoe_ctlr_els_send(struct fcoe_ctlr *, struct fc_lport *, struct sk_buff *); void fcoe_ctlr_recv(struct fcoe_ctlr *, struct sk_buff *); int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *, - struct fc_frame *, u8 *); + struct fc_frame *); /* libfcoe funcs */ u64 fcoe_wwn_from_mac(unsigned char mac[], unsigned int, unsigned int); -- cgit v1.3-8-gc7d7 From ab593b187391bdd03ccad2968972a2e118a88cd4 Mon Sep 17 00:00:00 2001 From: Joe Eykholt Date: Tue, 3 Nov 2009 11:49:27 -0800 Subject: [SCSI] libfc: register FC4 features with the FC switch Customers and certification tests have pointed out that we don't show up on the switch management software as an initiator. On some MDS switches 'show fcns database' command shows libfc initiators as 'fcp' not 'fcp:init' like other initiators. On others switches, I think the switch gets the features by doing a PRLI, but it may be only certain models or under certain configurations. Fix this by registering our FC4 features with the RFF_ID CT request after local port login and after the RFT_ID. Signed-off-by: Joe Eykholt Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_lport.c | 12 +++++++++++- include/scsi/fc/fc_fcp.h | 6 ++++++ include/scsi/fc/fc_ns.h | 13 ++++++++++++- include/scsi/fc_encode.h | 12 ++++++++++++ include/scsi/libfc.h | 2 ++ 5 files changed, 43 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index d3aec1959394..1bcc5e11d2c0 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -122,6 +122,7 @@ static const char *fc_lport_state_names[] = { [LPORT_ST_RSNN_NN] = "RSNN_NN", [LPORT_ST_RSPN_ID] = "RSPN_ID", [LPORT_ST_RFT_ID] = "RFT_ID", + [LPORT_ST_RFF_ID] = "RFF_ID", [LPORT_ST_SCR] = "SCR", [LPORT_ST_READY] = "Ready", [LPORT_ST_LOGO] = "LOGO", @@ -1034,6 +1035,7 @@ static void fc_lport_error(struct fc_lport *lport, struct fc_frame *fp) case LPORT_ST_RSNN_NN: case LPORT_ST_RSPN_ID: case LPORT_ST_RFT_ID: + case LPORT_ST_RFF_ID: case LPORT_ST_SCR: case LPORT_ST_DNS: case LPORT_ST_FLOGI: @@ -1070,7 +1072,7 @@ static void fc_lport_ns_resp(struct fc_seq *sp, struct fc_frame *fp, mutex_lock(&lport->lp_mutex); - if (lport->state < LPORT_ST_RNN_ID || lport->state > LPORT_ST_RFT_ID) { + if (lport->state < LPORT_ST_RNN_ID || lport->state > LPORT_ST_RFF_ID) { FC_LPORT_DBG(lport, "Received a name server response, " "but in state %s\n", fc_lport_state(lport)); if (IS_ERR(fp)) @@ -1101,6 +1103,9 @@ static void fc_lport_ns_resp(struct fc_seq *sp, struct fc_frame *fp, fc_lport_enter_ns(lport, LPORT_ST_RFT_ID); break; case LPORT_ST_RFT_ID: + fc_lport_enter_ns(lport, LPORT_ST_RFF_ID); + break; + case LPORT_ST_RFF_ID: fc_lport_enter_scr(lport); break; default: @@ -1235,6 +1240,10 @@ static void fc_lport_enter_ns(struct fc_lport *lport, enum fc_lport_state state) cmd = FC_NS_RFT_ID; size += sizeof(struct fc_ns_rft); break; + case LPORT_ST_RFF_ID: + cmd = FC_NS_RFF_ID; + size += sizeof(struct fc_ns_rff_id); + break; default: fc_lport_error(lport, NULL); return; @@ -1317,6 +1326,7 @@ static void fc_lport_timeout(struct work_struct *work) case LPORT_ST_RSNN_NN: case LPORT_ST_RSPN_ID: case LPORT_ST_RFT_ID: + case LPORT_ST_RFF_ID: fc_lport_enter_ns(lport, lport->state); break; case LPORT_ST_SCR: diff --git a/include/scsi/fc/fc_fcp.h b/include/scsi/fc/fc_fcp.h index 5d38f1989f37..29ecb0b02b09 100644 --- a/include/scsi/fc/fc_fcp.h +++ b/include/scsi/fc/fc_fcp.h @@ -196,4 +196,10 @@ struct fcp_srr { __u8 srr_resvd2[3]; /* reserved */ }; +/* + * Feature bits in name server FC-4 Features object. + */ +#define FCP_FEAT_TARG (1 << 0) /* target function supported */ +#define FCP_FEAT_INIT (1 << 1) /* initiator function supported */ + #endif /* _FC_FCP_H_ */ diff --git a/include/scsi/fc/fc_ns.h b/include/scsi/fc/fc_ns.h index f4d354eb26b9..e7d3ac497d7d 100644 --- a/include/scsi/fc/fc_ns.h +++ b/include/scsi/fc/fc_ns.h @@ -46,10 +46,11 @@ enum fc_ns_req { FC_NS_GID_FT = 0x0171, /* get IDs by FC4 type */ FC_NS_GPN_FT = 0x0172, /* get port names by FC4 type */ FC_NS_GID_PT = 0x01a1, /* get IDs by port type */ - FC_NS_RFT_ID = 0x0217, /* reg FC4 type for ID */ FC_NS_RPN_ID = 0x0212, /* reg port name for ID */ FC_NS_RNN_ID = 0x0213, /* reg node name for ID */ + FC_NS_RFT_ID = 0x0217, /* reg FC4 type for ID */ FC_NS_RSPN_ID = 0x0218, /* reg symbolic port name */ + FC_NS_RFF_ID = 0x021f, /* reg FC4 Features for ID */ FC_NS_RSNN_NN = 0x0239, /* reg symbolic node name */ }; @@ -178,4 +179,14 @@ struct fc_ns_rspn { char fr_name[]; } __attribute__((__packed__)); +/* + * RFF_ID request - register FC-4 Features for ID. + */ +struct fc_ns_rff_id { + struct fc_ns_fid fr_fid; /* port ID object */ + __u8 fr_resvd[2]; + __u8 fr_feat; /* FC-4 Feature bits */ + __u8 fr_type; /* FC-4 type */ +} __attribute__((__packed__)); + #endif /* _FC_NS_H_ */ diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h index ab2260cb149c..8eb0a0fc0a71 100644 --- a/include/scsi/fc_encode.h +++ b/include/scsi/fc_encode.h @@ -32,6 +32,7 @@ struct fc_ct_req { struct fc_ns_gid_ft gid; struct fc_ns_rn_id rn; struct fc_ns_rft rft; + struct fc_ns_rff_id rff; struct fc_ns_fid fid; struct fc_ns_rsnn snn; struct fc_ns_rspn spn; @@ -131,6 +132,17 @@ static inline int fc_ct_fill(struct fc_lport *lport, ct->payload.rft.fts = lport->fcts; break; + case FC_NS_RFF_ID: + ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rff_id)); + hton24(ct->payload.rff.fr_fid.fp_fid, + fc_host_port_id(lport->host)); + ct->payload.rff.fr_type = FC_TYPE_FCP; + if (lport->service_params & FCP_SPPF_INIT_FCN) + ct->payload.rff.fr_feat = FCP_FEAT_INIT; + if (lport->service_params & FCP_SPPF_TARG_FCN) + ct->payload.rff.fr_feat |= FCP_FEAT_TARG; + break; + case FC_NS_RNN_ID: ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rn_id)); hton24(ct->payload.rn.fr_fid.fp_fid, diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 67ce9fa1fee4..2936fbae41e4 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -62,6 +62,7 @@ * @LPORT_ST_DNS: Waiting for name server remote port to become ready * @LPORT_ST_RPN_ID: Register port name by ID (RPN_ID) sent * @LPORT_ST_RFT_ID: Register Fibre Channel types by ID (RFT_ID) sent + * @LPORT_ST_RFF_ID: Register FC-4 Features by ID (RFF_ID) sent * @LPORT_ST_SCR: State Change Register (SCR) sent * @LPORT_ST_READY: Ready for use * @LPORT_ST_LOGO: Local port logout (LOGO) sent @@ -75,6 +76,7 @@ enum fc_lport_state { LPORT_ST_RSNN_NN, LPORT_ST_RSPN_ID, LPORT_ST_RFT_ID, + LPORT_ST_RFF_ID, LPORT_ST_SCR, LPORT_ST_READY, LPORT_ST_LOGO, -- cgit v1.3-8-gc7d7 From d139b9bd0e52dda14fd13412e7096e68b56d0076 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 5 Nov 2009 13:33:12 -0600 Subject: [SCSI] scsi_lib_dma: fix bug with dma maps on nested scsi objects Some of our virtual SCSI hosts don't have a proper bus parent at the top, which can be a problem for doing DMA on them This patch makes the host device cache a pointer to the physical bus device and provides an extra API for setting it (the normal API picks it up from the parent). This patch also modifies the qla2xxx and lpfc vport logic to use the new DMA host setting API. Acked-By: James Smart Cc: Stable Tree Signed-off-by: James Bottomley --- drivers/scsi/hosts.c | 13 ++++++++++--- drivers/scsi/lpfc/lpfc_init.c | 2 +- drivers/scsi/qla2xxx/qla_attr.c | 3 ++- drivers/scsi/scsi_lib_dma.c | 4 ++-- include/scsi/scsi_host.h | 16 +++++++++++++++- 5 files changed, 30 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 5fd2da494d08..28a753d796f3 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -180,14 +180,20 @@ void scsi_remove_host(struct Scsi_Host *shost) EXPORT_SYMBOL(scsi_remove_host); /** - * scsi_add_host - add a scsi host + * scsi_add_host_with_dma - add a scsi host with dma device * @shost: scsi host pointer to add * @dev: a struct device of type scsi class + * @dma_dev: dma device for the host + * + * Note: You rarely need to worry about this unless you're in a + * virtualised host environments, so use the simpler scsi_add_host() + * function instead. * * Return value: * 0 on success / != 0 for error **/ -int scsi_add_host(struct Scsi_Host *shost, struct device *dev) +int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, + struct device *dma_dev) { struct scsi_host_template *sht = shost->hostt; int error = -EINVAL; @@ -207,6 +213,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device *dev) if (!shost->shost_gendev.parent) shost->shost_gendev.parent = dev ? dev : &platform_bus; + shost->dma_dev = dma_dev; error = device_add(&shost->shost_gendev); if (error) @@ -262,7 +269,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device *dev) fail: return error; } -EXPORT_SYMBOL(scsi_add_host); +EXPORT_SYMBOL(scsi_add_host_with_dma); static void scsi_host_dev_release(struct device *dev) { diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 562d8cee874b..f913f1e93635 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -2408,7 +2408,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) vport->els_tmofunc.function = lpfc_els_timeout; vport->els_tmofunc.data = (unsigned long)vport; - error = scsi_add_host(shost, dev); + error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev); if (error) goto out_put_shost; diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index fbcb82a2f7f4..21e2bc4d7401 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -1654,7 +1654,8 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable) fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN); } - if (scsi_add_host(vha->host, &fc_vport->dev)) { + if (scsi_add_host_with_dma(vha->host, &fc_vport->dev, + &ha->pdev->dev)) { DEBUG15(printk("scsi(%ld): scsi_add_host failure for VP[%d].\n", vha->host_no, vha->vp_idx)); goto vport_create_failed_2; diff --git a/drivers/scsi/scsi_lib_dma.c b/drivers/scsi/scsi_lib_dma.c index ac6855cd2657..dcd128583b89 100644 --- a/drivers/scsi/scsi_lib_dma.c +++ b/drivers/scsi/scsi_lib_dma.c @@ -23,7 +23,7 @@ int scsi_dma_map(struct scsi_cmnd *cmd) int nseg = 0; if (scsi_sg_count(cmd)) { - struct device *dev = cmd->device->host->shost_gendev.parent; + struct device *dev = cmd->device->host->dma_dev; nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd), cmd->sc_data_direction); @@ -41,7 +41,7 @@ EXPORT_SYMBOL(scsi_dma_map); void scsi_dma_unmap(struct scsi_cmnd *cmd) { if (scsi_sg_count(cmd)) { - struct device *dev = cmd->device->host->shost_gendev.parent; + struct device *dev = cmd->device->host->dma_dev; dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd), cmd->sc_data_direction); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 603054d8f40c..6ff6bc18e294 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -682,6 +682,12 @@ struct Scsi_Host { */ void *shost_data; + /* + * Points to the physical bus device we'd use to do DMA + * Needed just in case we have virtual hosts. + */ + struct device *dma_dev; + /* * We should ensure that this is aligned, both for better performance * and also because some compilers (m68k) don't automatically force @@ -726,7 +732,9 @@ extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *); extern void scsi_flush_work(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int); -extern int __must_check scsi_add_host(struct Scsi_Host *, struct device *); +extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *, + struct device *, + struct device *); extern void scsi_scan_host(struct Scsi_Host *); extern void scsi_rescan_device(struct device *); extern void scsi_remove_host(struct Scsi_Host *); @@ -737,6 +745,12 @@ extern const char *scsi_host_state_name(enum scsi_host_state); extern u64 scsi_calculate_bounce_limit(struct Scsi_Host *); +static inline int __must_check scsi_add_host(struct Scsi_Host *host, + struct device *dev) +{ + return scsi_add_host_with_dma(host, dev, dev); +} + static inline struct device *scsi_get_device(struct Scsi_Host *shost) { return shost->shost_gendev.parent; -- cgit v1.3-8-gc7d7 From 5d12c05e29fc8715e3e32f57a8cced9290d87c55 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 11 Nov 2009 16:34:32 -0600 Subject: [SCSI] libiscsi: Check TMF state before sending PDU Patch and mail from both MikeC and HannesR: Before we're trying to send a PDU we have to check whether a TMF is active. If so and if the PDU will be affected by the TMF we should allow only Data-out PDUs to be sent. If fast_abort is set, no Data-out PDUs will be sent while a LUN reset is being processed for a affected LUN. fast_abort is now ingored during a ABORT TASK tmf. We will not send any Data-outs for a task if the task is being aborted. Signed-off-by: Mike Christie Signed-off-by: Hannes Reinecke Signed-off-by: James Bottomley --- drivers/scsi/libiscsi.c | 113 ++++++++++++++++++++++++++++++++++++++++----- include/scsi/iscsi_proto.h | 2 + 2 files changed, 103 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 8c29480fc02b..b6ffdc5512cd 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -265,6 +265,88 @@ static int iscsi_prep_bidi_ahs(struct iscsi_task *task) return 0; } +/** + * iscsi_check_tmf_restrictions - check if a task is affected by TMF + * @task: iscsi task + * @opcode: opcode to check for + * + * During TMF a task has to be checked if it's affected. + * All unrelated I/O can be passed through, but I/O to the + * affected LUN should be restricted. + * If 'fast_abort' is set we won't be sending any I/O to the + * affected LUN. + * Otherwise the target is waiting for all TTTs to be completed, + * so we have to send all outstanding Data-Out PDUs to the target. + */ +static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode) +{ + struct iscsi_conn *conn = task->conn; + struct iscsi_tm *tmf = &conn->tmhdr; + unsigned int hdr_lun; + + if (conn->tmf_state == TMF_INITIAL) + return 0; + + if ((tmf->opcode & ISCSI_OPCODE_MASK) != ISCSI_OP_SCSI_TMFUNC) + return 0; + + switch (ISCSI_TM_FUNC_VALUE(tmf)) { + case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET: + /* + * Allow PDUs for unrelated LUNs + */ + hdr_lun = scsilun_to_int((struct scsi_lun *)tmf->lun); + if (hdr_lun != task->sc->device->lun) + return 0; + + /* + * Fail all SCSI cmd PDUs + */ + if (opcode != ISCSI_OP_SCSI_DATA_OUT) { + iscsi_conn_printk(KERN_INFO, conn, + "task [op %x/%x itt " + "0x%x/0x%x lun %u] " + "rejected.\n", + task->hdr->opcode, opcode, + task->itt, task->hdr_itt, hdr_lun); + return -EACCES; + } + /* + * And also all data-out PDUs in response to R2T + * if fast_abort is set. + */ + if (conn->session->fast_abort) { + iscsi_conn_printk(KERN_INFO, conn, + "task [op %x/%x itt " + "0x%x/0x%x lun %u] " + "fast abort.\n", + task->hdr->opcode, opcode, + task->itt, task->hdr_itt, hdr_lun); + return -EACCES; + } + break; + case ISCSI_TM_FUNC_ABORT_TASK: + /* + * the caller has already checked if the task + * they want to abort was in the pending queue so if + * we are here the cmd pdu has gone out already, and + * we will only hit this for data-outs + */ + if (opcode == ISCSI_OP_SCSI_DATA_OUT && + task->hdr_itt == tmf->rtt) { + ISCSI_DBG_SESSION(conn->session, + "Preventing task %x/%x from sending " + "data-out due to abort task in " + "progress\n", task->itt, + task->hdr_itt); + return -EACCES; + } + break; + } + + return 0; +} + /** * iscsi_prep_scsi_cmd_pdu - prep iscsi scsi cmd pdu * @task: iscsi task @@ -282,6 +364,10 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) itt_t itt; int rc; + rc = iscsi_check_tmf_restrictions(task, ISCSI_OP_SCSI_CMD); + if (rc) + return rc; + if (conn->session->tt->alloc_pdu) { rc = conn->session->tt->alloc_pdu(task, ISCSI_OP_SCSI_CMD); if (rc) @@ -1366,6 +1452,7 @@ EXPORT_SYMBOL_GPL(iscsi_requeue_task); **/ static int iscsi_data_xmit(struct iscsi_conn *conn) { + struct iscsi_task *task; int rc = 0; spin_lock_bh(&conn->session->lock); @@ -1403,11 +1490,8 @@ check_mgmt: /* process pending command queue */ while (!list_empty(&conn->cmdqueue)) { - if (conn->tmf_state == TMF_QUEUED) - break; - - conn->task = list_entry(conn->cmdqueue.next, - struct iscsi_task, running); + conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task, + running); list_del_init(&conn->task->running); if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { fail_scsi_task(conn->task, DID_IMM_RETRY); @@ -1415,7 +1499,7 @@ check_mgmt: } rc = iscsi_prep_scsi_cmd_pdu(conn->task); if (rc) { - if (rc == -ENOMEM) { + if (rc == -ENOMEM || rc == -EACCES) { list_add_tail(&conn->task->running, &conn->cmdqueue); conn->task = NULL; @@ -1437,17 +1521,18 @@ check_mgmt: } while (!list_empty(&conn->requeue)) { - if (conn->session->fast_abort && conn->tmf_state != TMF_INITIAL) - break; - /* * we always do fastlogout - conn stop code will clean up. */ if (conn->session->state == ISCSI_STATE_LOGGING_OUT) break; - conn->task = list_entry(conn->requeue.next, - struct iscsi_task, running); + task = list_entry(conn->requeue.next, struct iscsi_task, + running); + if (iscsi_check_tmf_restrictions(task, ISCSI_OP_SCSI_DATA_OUT)) + break; + + conn->task = task; list_del_init(&conn->task->running); conn->task->state = ISCSI_TASK_RUNNING; rc = iscsi_xmit_task(conn); @@ -1600,7 +1685,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) if (!ihost->workq) { reason = iscsi_prep_scsi_cmd_pdu(task); if (reason) { - if (reason == -ENOMEM) { + if (reason == -ENOMEM || reason == -EACCES) { reason = FAILURE_OOM; goto prepd_reject; } else { @@ -2120,6 +2205,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) spin_lock_bh(&session->lock); fail_scsi_task(task, DID_ABORT); conn->tmf_state = TMF_INITIAL; + memset(hdr, 0, sizeof(*hdr)); spin_unlock_bh(&session->lock); iscsi_start_tx(conn); goto success_unlocked; @@ -2130,6 +2216,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) case TMF_NOT_FOUND: if (!sc->SCp.ptr) { conn->tmf_state = TMF_INITIAL; + memset(hdr, 0, sizeof(*hdr)); /* task completed before tmf abort response */ ISCSI_DBG_EH(session, "sc completed while abort in " "progress\n"); @@ -2224,6 +2311,7 @@ int iscsi_eh_device_reset(struct scsi_cmnd *sc) iscsi_suspend_tx(conn); spin_lock_bh(&session->lock); + memset(hdr, 0, sizeof(*hdr)); fail_scsi_tasks(conn, sc->device->lun, DID_ERROR); conn->tmf_state = TMF_INITIAL; spin_unlock_bh(&session->lock); @@ -2868,6 +2956,7 @@ static void iscsi_start_session_recovery(struct iscsi_session *session, spin_lock_bh(&session->lock); fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED); fail_mgmt_tasks(session, conn); + memset(&conn->tmhdr, 0, sizeof(conn->tmhdr)); spin_unlock_bh(&session->lock); mutex_unlock(&session->eh_mutex); } diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h index f2a2c1169486..dd0a52cea95a 100644 --- a/include/scsi/iscsi_proto.h +++ b/include/scsi/iscsi_proto.h @@ -279,6 +279,8 @@ struct iscsi_tm { #define ISCSI_TM_FUNC_TARGET_COLD_RESET 7 #define ISCSI_TM_FUNC_TASK_REASSIGN 8 +#define ISCSI_TM_FUNC_VALUE(hdr) ((hdr)->flags & ISCSI_FLAG_TM_FUNC_MASK) + /* SCSI Task Management Response Header */ struct iscsi_tm_rsp { uint8_t opcode; -- cgit v1.3-8-gc7d7 From 3fe5ae8b4c4d3a82c755074878da7ddb9dde381e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 11 Nov 2009 16:34:33 -0600 Subject: [SCSI] libiscsi: add warm target reset tmf support This implements warm target reset tmf support for the scsi-ml target reset callback. Previously we would just drop the session in that callback. This patch will now try a target reset and if that fails drop the session. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/be2iscsi/be_main.c | 2 +- drivers/scsi/bnx2i/bnx2i_iscsi.c | 2 +- drivers/scsi/cxgb3i/cxgb3i_iscsi.c | 2 +- drivers/scsi/iscsi_tcp.c | 2 +- drivers/scsi/libiscsi.c | 251 +++++++++++++++++++++++++----------- drivers/scsi/scsi_transport_iscsi.c | 4 +- include/scsi/iscsi_if.h | 3 + include/scsi/libiscsi.h | 1 + 8 files changed, 190 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index d15df07ba783..1a557fa77888 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -3859,7 +3859,7 @@ struct iscsi_transport beiscsi_iscsi_transport = { ISCSI_USERNAME | ISCSI_PASSWORD | ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN | ISCSI_FAST_ABORT | ISCSI_ABORT_TMO | - ISCSI_LU_RESET_TMO | + ISCSI_LU_RESET_TMO | ISCSI_TGT_RESET_TMO | ISCSI_PING_TMO | ISCSI_RECV_TMO | ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME, .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS | diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index 89e84c302aa0..070118a8f184 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -2028,7 +2028,7 @@ struct iscsi_transport bnx2i_iscsi_transport = { ISCSI_USERNAME | ISCSI_PASSWORD | ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN | ISCSI_FAST_ABORT | ISCSI_ABORT_TMO | - ISCSI_LU_RESET_TMO | + ISCSI_LU_RESET_TMO | ISCSI_TGT_RESET_TMO | ISCSI_PING_TMO | ISCSI_RECV_TMO | ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME, .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_NETDEV_NAME, diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c index 2631bddd255e..969c83162cc4 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c +++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c @@ -937,7 +937,7 @@ static struct iscsi_transport cxgb3i_iscsi_transport = { ISCSI_USERNAME | ISCSI_PASSWORD | ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN | ISCSI_FAST_ABORT | ISCSI_ABORT_TMO | - ISCSI_LU_RESET_TMO | + ISCSI_LU_RESET_TMO | ISCSI_TGT_RESET_TMO | ISCSI_PING_TMO | ISCSI_RECV_TMO | ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME, .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS | diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index edc49ca49cea..517da3fd89d3 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -903,7 +903,7 @@ static struct iscsi_transport iscsi_sw_tcp_transport = { ISCSI_USERNAME | ISCSI_PASSWORD | ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN | ISCSI_FAST_ABORT | ISCSI_ABORT_TMO | - ISCSI_LU_RESET_TMO | + ISCSI_LU_RESET_TMO | ISCSI_TGT_RESET_TMO | ISCSI_PING_TMO | ISCSI_RECV_TMO | ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME, .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS | diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index b6ffdc5512cd..07ec997c5d4f 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -298,17 +298,18 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode) hdr_lun = scsilun_to_int((struct scsi_lun *)tmf->lun); if (hdr_lun != task->sc->device->lun) return 0; - + /* fall through */ + case ISCSI_TM_FUNC_TARGET_WARM_RESET: /* * Fail all SCSI cmd PDUs */ if (opcode != ISCSI_OP_SCSI_DATA_OUT) { iscsi_conn_printk(KERN_INFO, conn, "task [op %x/%x itt " - "0x%x/0x%x lun %u] " + "0x%x/0x%x] " "rejected.\n", task->hdr->opcode, opcode, - task->itt, task->hdr_itt, hdr_lun); + task->itt, task->hdr_itt); return -EACCES; } /* @@ -318,10 +319,9 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode) if (conn->session->fast_abort) { iscsi_conn_printk(KERN_INFO, conn, "task [op %x/%x itt " - "0x%x/0x%x lun %u] " - "fast abort.\n", + "0x%x/0x%x] fast abort.\n", task->hdr->opcode, opcode, - task->itt, task->hdr_itt, hdr_lun); + task->itt, task->hdr_itt); return -EACCES; } break; @@ -1757,72 +1757,6 @@ int iscsi_target_alloc(struct scsi_target *starget) } EXPORT_SYMBOL_GPL(iscsi_target_alloc); -void iscsi_session_recovery_timedout(struct iscsi_cls_session *cls_session) -{ - struct iscsi_session *session = cls_session->dd_data; - - spin_lock_bh(&session->lock); - if (session->state != ISCSI_STATE_LOGGED_IN) { - session->state = ISCSI_STATE_RECOVERY_FAILED; - if (session->leadconn) - wake_up(&session->leadconn->ehwait); - } - spin_unlock_bh(&session->lock); -} -EXPORT_SYMBOL_GPL(iscsi_session_recovery_timedout); - -int iscsi_eh_target_reset(struct scsi_cmnd *sc) -{ - struct iscsi_cls_session *cls_session; - struct iscsi_session *session; - struct iscsi_conn *conn; - - cls_session = starget_to_session(scsi_target(sc->device)); - session = cls_session->dd_data; - conn = session->leadconn; - - mutex_lock(&session->eh_mutex); - spin_lock_bh(&session->lock); - if (session->state == ISCSI_STATE_TERMINATE) { -failed: - ISCSI_DBG_EH(session, - "failing target reset: Could not log back into " - "target [age %d]\n", - session->age); - spin_unlock_bh(&session->lock); - mutex_unlock(&session->eh_mutex); - return FAILED; - } - - spin_unlock_bh(&session->lock); - mutex_unlock(&session->eh_mutex); - /* - * we drop the lock here but the leadconn cannot be destoyed while - * we are in the scsi eh - */ - iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); - - ISCSI_DBG_EH(session, "wait for relogin\n"); - wait_event_interruptible(conn->ehwait, - session->state == ISCSI_STATE_TERMINATE || - session->state == ISCSI_STATE_LOGGED_IN || - session->state == ISCSI_STATE_RECOVERY_FAILED); - if (signal_pending(current)) - flush_signals(current); - - mutex_lock(&session->eh_mutex); - spin_lock_bh(&session->lock); - if (session->state == ISCSI_STATE_LOGGED_IN) { - ISCSI_DBG_EH(session, - "target reset succeeded\n"); - } else - goto failed; - spin_unlock_bh(&session->lock); - mutex_unlock(&session->eh_mutex); - return SUCCESS; -} -EXPORT_SYMBOL_GPL(iscsi_eh_target_reset); - static void iscsi_tmf_timedout(unsigned long data) { struct iscsi_conn *conn = (struct iscsi_conn *)data; @@ -2329,6 +2263,172 @@ done: } EXPORT_SYMBOL_GPL(iscsi_eh_device_reset); +void iscsi_session_recovery_timedout(struct iscsi_cls_session *cls_session) +{ + struct iscsi_session *session = cls_session->dd_data; + + spin_lock_bh(&session->lock); + if (session->state != ISCSI_STATE_LOGGED_IN) { + session->state = ISCSI_STATE_RECOVERY_FAILED; + if (session->leadconn) + wake_up(&session->leadconn->ehwait); + } + spin_unlock_bh(&session->lock); +} +EXPORT_SYMBOL_GPL(iscsi_session_recovery_timedout); + +/** + * iscsi_eh_session_reset - drop session and attempt relogin + * @sc: scsi command + * + * This function will wait for a relogin, session termination from + * userspace, or a recovery/replacement timeout. + */ +static int iscsi_eh_session_reset(struct scsi_cmnd *sc) +{ + struct iscsi_cls_session *cls_session; + struct iscsi_session *session; + struct iscsi_conn *conn; + + cls_session = starget_to_session(scsi_target(sc->device)); + session = cls_session->dd_data; + conn = session->leadconn; + + mutex_lock(&session->eh_mutex); + spin_lock_bh(&session->lock); + if (session->state == ISCSI_STATE_TERMINATE) { +failed: + ISCSI_DBG_EH(session, + "failing session reset: Could not log back into " + "%s, %s [age %d]\n", session->targetname, + conn->persistent_address, session->age); + spin_unlock_bh(&session->lock); + mutex_unlock(&session->eh_mutex); + return FAILED; + } + + spin_unlock_bh(&session->lock); + mutex_unlock(&session->eh_mutex); + /* + * we drop the lock here but the leadconn cannot be destoyed while + * we are in the scsi eh + */ + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); + + ISCSI_DBG_EH(session, "wait for relogin\n"); + wait_event_interruptible(conn->ehwait, + session->state == ISCSI_STATE_TERMINATE || + session->state == ISCSI_STATE_LOGGED_IN || + session->state == ISCSI_STATE_RECOVERY_FAILED); + if (signal_pending(current)) + flush_signals(current); + + mutex_lock(&session->eh_mutex); + spin_lock_bh(&session->lock); + if (session->state == ISCSI_STATE_LOGGED_IN) { + ISCSI_DBG_EH(session, + "session reset succeeded for %s,%s\n", + session->targetname, conn->persistent_address); + } else + goto failed; + spin_unlock_bh(&session->lock); + mutex_unlock(&session->eh_mutex); + return SUCCESS; +} + +static void iscsi_prep_tgt_reset_pdu(struct scsi_cmnd *sc, struct iscsi_tm *hdr) +{ + memset(hdr, 0, sizeof(*hdr)); + hdr->opcode = ISCSI_OP_SCSI_TMFUNC | ISCSI_OP_IMMEDIATE; + hdr->flags = ISCSI_TM_FUNC_TARGET_WARM_RESET & ISCSI_FLAG_TM_FUNC_MASK; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hdr->rtt = RESERVED_ITT; +} + +/** + * iscsi_eh_target_reset - reset target + * @sc: scsi command + * + * This will attempt to send a warm target reset. If that fails + * then we will drop the session and attempt ERL0 recovery. + */ +int iscsi_eh_target_reset(struct scsi_cmnd *sc) +{ + struct iscsi_cls_session *cls_session; + struct iscsi_session *session; + struct iscsi_conn *conn; + struct iscsi_tm *hdr; + int rc = FAILED; + + cls_session = starget_to_session(scsi_target(sc->device)); + session = cls_session->dd_data; + + ISCSI_DBG_EH(session, "tgt Reset [sc %p tgt %s]\n", sc, + session->targetname); + + mutex_lock(&session->eh_mutex); + spin_lock_bh(&session->lock); + /* + * Just check if we are not logged in. We cannot check for + * the phase because the reset could come from a ioctl. + */ + if (!session->leadconn || session->state != ISCSI_STATE_LOGGED_IN) + goto unlock; + conn = session->leadconn; + + /* only have one tmf outstanding at a time */ + if (conn->tmf_state != TMF_INITIAL) + goto unlock; + conn->tmf_state = TMF_QUEUED; + + hdr = &conn->tmhdr; + iscsi_prep_tgt_reset_pdu(sc, hdr); + + if (iscsi_exec_task_mgmt_fn(conn, hdr, session->age, + session->tgt_reset_timeout)) { + rc = FAILED; + goto unlock; + } + + switch (conn->tmf_state) { + case TMF_SUCCESS: + break; + case TMF_TIMEDOUT: + spin_unlock_bh(&session->lock); + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); + goto done; + default: + conn->tmf_state = TMF_INITIAL; + goto unlock; + } + + rc = SUCCESS; + spin_unlock_bh(&session->lock); + + iscsi_suspend_tx(conn); + + spin_lock_bh(&session->lock); + memset(hdr, 0, sizeof(*hdr)); + fail_scsi_tasks(conn, -1, DID_ERROR); + conn->tmf_state = TMF_INITIAL; + spin_unlock_bh(&session->lock); + + iscsi_start_tx(conn); + goto done; + +unlock: + spin_unlock_bh(&session->lock); +done: + ISCSI_DBG_EH(session, "tgt %s reset result = %s\n", session->targetname, + rc == SUCCESS ? "SUCCESS" : "FAILED"); + mutex_unlock(&session->eh_mutex); + + if (rc == FAILED) + rc = iscsi_eh_session_reset(sc); + return rc; +} +EXPORT_SYMBOL_GPL(iscsi_eh_target_reset); + /* * Pre-allocate a pool of @max items of @item_size. By default, the pool * should be accessed via kfifo_{get,put} on q->queue. @@ -2595,6 +2695,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost, session->host = shost; session->state = ISCSI_STATE_FREE; session->fast_abort = 1; + session->tgt_reset_timeout = 30; session->lu_reset_timeout = 15; session->abort_timeout = 10; session->scsi_cmds_max = scsi_cmds; @@ -3033,6 +3134,9 @@ int iscsi_set_param(struct iscsi_cls_conn *cls_conn, case ISCSI_PARAM_LU_RESET_TMO: sscanf(buf, "%d", &session->lu_reset_timeout); break; + case ISCSI_PARAM_TGT_RESET_TMO: + sscanf(buf, "%d", &session->tgt_reset_timeout); + break; case ISCSI_PARAM_PING_TMO: sscanf(buf, "%d", &conn->ping_timeout); break; @@ -3132,6 +3236,9 @@ int iscsi_session_get_param(struct iscsi_cls_session *cls_session, case ISCSI_PARAM_LU_RESET_TMO: len = sprintf(buf, "%d\n", session->lu_reset_timeout); break; + case ISCSI_PARAM_TGT_RESET_TMO: + len = sprintf(buf, "%d\n", session->tgt_reset_timeout); + break; case ISCSI_PARAM_INITIAL_R2T_EN: len = sprintf(buf, "%d\n", session->initial_r2t_en); break; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index ad897df36615..dc04ca124a69 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -30,7 +30,7 @@ #include #include -#define ISCSI_SESSION_ATTRS 21 +#define ISCSI_SESSION_ATTRS 22 #define ISCSI_CONN_ATTRS 13 #define ISCSI_HOST_ATTRS 4 @@ -1759,6 +1759,7 @@ iscsi_session_attr(password_in, ISCSI_PARAM_PASSWORD_IN, 1); iscsi_session_attr(fast_abort, ISCSI_PARAM_FAST_ABORT, 0); iscsi_session_attr(abort_tmo, ISCSI_PARAM_ABORT_TMO, 0); iscsi_session_attr(lu_reset_tmo, ISCSI_PARAM_LU_RESET_TMO, 0); +iscsi_session_attr(tgt_reset_tmo, ISCSI_PARAM_TGT_RESET_TMO, 0); iscsi_session_attr(ifacename, ISCSI_PARAM_IFACE_NAME, 0); iscsi_session_attr(initiatorname, ISCSI_PARAM_INITIATOR_NAME, 0) @@ -2000,6 +2001,7 @@ iscsi_register_transport(struct iscsi_transport *tt) SETUP_SESSION_RD_ATTR(fast_abort, ISCSI_FAST_ABORT); SETUP_SESSION_RD_ATTR(abort_tmo, ISCSI_ABORT_TMO); SETUP_SESSION_RD_ATTR(lu_reset_tmo,ISCSI_LU_RESET_TMO); + SETUP_SESSION_RD_ATTR(tgt_reset_tmo,ISCSI_TGT_RESET_TMO); SETUP_SESSION_RD_ATTR(ifacename, ISCSI_IFACE_NAME); SETUP_SESSION_RD_ATTR(initiatorname, ISCSI_INITIATOR_NAME); SETUP_PRIV_SESSION_RD_ATTR(recovery_tmo); diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h index d67dda2b6aa0..66d377b9c72b 100644 --- a/include/scsi/iscsi_if.h +++ b/include/scsi/iscsi_if.h @@ -311,6 +311,8 @@ enum iscsi_param { ISCSI_PARAM_IFACE_NAME, ISCSI_PARAM_ISID, ISCSI_PARAM_INITIATOR_NAME, + + ISCSI_PARAM_TGT_RESET_TMO, /* must always be last */ ISCSI_PARAM_MAX, }; @@ -350,6 +352,7 @@ enum iscsi_param { #define ISCSI_IFACE_NAME (1ULL << ISCSI_PARAM_IFACE_NAME) #define ISCSI_ISID (1ULL << ISCSI_PARAM_ISID) #define ISCSI_INITIATOR_NAME (1ULL << ISCSI_PARAM_INITIATOR_NAME) +#define ISCSI_TGT_RESET_TMO (1ULL << ISCSI_PARAM_TGT_RESET_TMO) /* iSCSI HBA params */ enum iscsi_host_param { diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 2db2bc26b1e9..7394e3bc8f4b 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -267,6 +267,7 @@ struct iscsi_session { /* configuration */ int abort_timeout; int lu_reset_timeout; + int tgt_reset_timeout; int initial_r2t_en; unsigned max_r2t; int imm_data_en; -- cgit v1.3-8-gc7d7 From d531b37929f412de09e9ad711fdd5b04fa39aca1 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 16 Nov 2009 20:39:25 +0200 Subject: [SCSI] libosd: osd_dev_is_ver1 - Minor API cleanup define a new osd_dev_is_ver1 that operates on devices and the old osd_req_is_ver1 uses that new API. Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- include/scsi/osd_initiator.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index 02bd9f716357..f787d24d3bab 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -84,6 +84,15 @@ static inline void osd_dev_set_ver(struct osd_dev *od, enum osd_std_version v) #endif } +static inline bool osd_dev_is_ver1(struct osd_dev *od) +{ +#ifdef OSD_VER1_SUPPORT + return od->version == OSD_VER1; +#else + return false; +#endif +} + struct osd_request; typedef void (osd_req_done_fn)(struct osd_request *or, void *private); @@ -120,14 +129,9 @@ struct osd_request { int async_error; }; -/* OSD Version control */ static inline bool osd_req_is_ver1(struct osd_request *or) { -#ifdef OSD_VER1_SUPPORT - return or->osd_dev->version == OSD_VER1; -#else - return false; -#endif + return osd_dev_is_ver1(or->osd_dev); } /* -- cgit v1.3-8-gc7d7 From c7d2dc2a204fa37bdf607d4d062dfd14e392aaf1 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 16 Nov 2009 20:41:03 +0200 Subject: [SCSI] libosd: osd_sense: OSD_CFO_PERMISSIONS Add one more important cdb_field_offset that can be returned with scsi_invalid_field_in_cdb. It is the offset of the permissions_bit_mask field in the capabilities structure. Interestingly, the offset is the same for V1/V2 Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- include/scsi/osd_sense.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/scsi/osd_sense.h b/include/scsi/osd_sense.h index ff9b33c773c7..91db543a5502 100644 --- a/include/scsi/osd_sense.h +++ b/include/scsi/osd_sense.h @@ -255,6 +255,9 @@ enum osdv2_cdb_field_offset { OSD_CFO_STARTING_BYTE = OSD_CDB_OFFSET(v2.start_address), OSD_CFO_PARTITION_ID = OSD_CDB_OFFSET(partition), OSD_CFO_OBJECT_ID = OSD_CDB_OFFSET(object), + OSD_CFO_PERMISSIONS = sizeof(struct osd_cdb_head) + + offsetof(struct osd_capability_head, + permissions_bit_mask), }; #endif /* ndef __OSD_SENSE_H__ */ -- cgit v1.3-8-gc7d7 From d6ae4333e648492721a098bdc329bbd82d25eb67 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 29 Nov 2009 16:25:26 +0200 Subject: [SCSI] osduld: Use device->release instead of internal kref The true logic of this patch will be clear in the next patch where we use the class_find_device() API. When doing so the use of an internal kref leaves us a narrow window where a find is started while the actual object can go away. Using the device's kobj reference solves this problem because now the same kref is used for both operations. (Remove and find) Core changes * Embed a struct device in uld_ structure and use device_register instead of devie_create. Set __remove to be the device release function. * __uld_get/put is just get_/put_device. Now every thing is accounted for on the device object. Internal kref is removed. * At __remove() we can safely de-allocate the uld_ structure. (The function has moved to avoid forward declaration) Some cleanups * Use class register/unregister is cleaner for this driver now. * cdev ref-counting games are no longer necessary I have incremented the device version string in case of new bugs. Note: Previous bugfix of taking the reference around fput() still applies. Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- drivers/scsi/osd/osd_uld.c | 162 ++++++++++++++++++++----------------------- include/scsi/osd_initiator.h | 1 - 2 files changed, 77 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c index 1ea6447f9418..fc6fc1c4d4d1 100644 --- a/drivers/scsi/osd/osd_uld.c +++ b/drivers/scsi/osd/osd_uld.c @@ -71,8 +71,7 @@ #define SCSI_OSD_MAX_MINOR 64 static const char osd_name[] = "osd"; -static const char *osd_version_string = "open-osd 0.1.0"; -const char osd_symlink[] = "scsi_osd"; +static const char *osd_version_string = "open-osd 0.2.0"; MODULE_AUTHOR("Boaz Harrosh "); MODULE_DESCRIPTION("open-osd Upper-Layer-Driver osd.ko"); @@ -82,15 +81,24 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_OSD); struct osd_uld_device { int minor; - struct kref kref; + struct device class_dev; struct cdev cdev; struct osd_dev od; struct gendisk *disk; - struct device *class_member; }; -static void __uld_get(struct osd_uld_device *oud); -static void __uld_put(struct osd_uld_device *oud); +struct osd_dev_handle { + struct osd_dev od; + struct file *file; + struct osd_uld_device *oud; +} ; + +static DEFINE_IDA(osd_minor_ida); + +static struct class osd_uld_class = { + .owner = THIS_MODULE, + .name = "scsi_osd", +}; /* * Char Device operations @@ -101,7 +109,7 @@ static int osd_uld_open(struct inode *inode, struct file *file) struct osd_uld_device *oud = container_of(inode->i_cdev, struct osd_uld_device, cdev); - __uld_get(oud); + get_device(&oud->class_dev); /* cache osd_uld_device on file handle */ file->private_data = oud; OSD_DEBUG("osd_uld_open %p\n", oud); @@ -114,7 +122,7 @@ static int osd_uld_release(struct inode *inode, struct file *file) OSD_DEBUG("osd_uld_release %p\n", file->private_data); file->private_data = NULL; - __uld_put(oud); + put_device(&oud->class_dev); return 0; } @@ -177,7 +185,7 @@ static const struct file_operations osd_fops = { struct osd_dev *osduld_path_lookup(const char *name) { struct osd_uld_device *oud; - struct osd_dev *od; + struct osd_dev_handle *odh; struct file *file; int error; @@ -186,8 +194,8 @@ struct osd_dev *osduld_path_lookup(const char *name) return ERR_PTR(-EINVAL); } - od = kzalloc(sizeof(*od), GFP_KERNEL); - if (!od) + odh = kzalloc(sizeof(*odh), GFP_KERNEL); + if (unlikely(!odh)) return ERR_PTR(-ENOMEM); file = filp_open(name, O_RDWR, 0); @@ -203,37 +211,39 @@ struct osd_dev *osduld_path_lookup(const char *name) oud = file->private_data; - *od = oud->od; - od->file = file; + odh->od = oud->od; + odh->file = file; + odh->oud = oud; - return od; + return &odh->od; close_file: fput(file); free_od: - kfree(od); + kfree(odh); return ERR_PTR(error); } EXPORT_SYMBOL(osduld_path_lookup); void osduld_put_device(struct osd_dev *od) { - if (od && !IS_ERR(od)) { - struct osd_uld_device *oud = od->file->private_data; + struct osd_dev_handle *odh = + container_of(od, struct osd_dev_handle, od); + struct osd_uld_device *oud = odh->oud; BUG_ON(od->scsi_device != oud->od.scsi_device); /* If scsi has released the device (logout), and exofs has last * reference on oud it will be freed by above osd_uld_release * within fput below. But this will oops in cdev_release which - * is called after the fops->release. __uld_get/put pair makes + * is called after the fops->release. A get_/put_ pair makes * sure we have a cdev for the duration of fput */ - __uld_get(oud); - fput(od->file); - __uld_put(oud); - kfree(od); + get_device(&oud->class_dev); + fput(odh->file); + put_device(&oud->class_dev); + kfree(odh); } } EXPORT_SYMBOL(osduld_put_device); @@ -264,8 +274,27 @@ static int __detect_osd(struct osd_uld_device *oud) return 0; } -static struct class *osd_sysfs_class; -static DEFINE_IDA(osd_minor_ida); +static void __remove(struct device *dev) +{ + struct osd_uld_device *oud = container_of(dev, struct osd_uld_device, + class_dev); + struct scsi_device *scsi_device = oud->od.scsi_device; + + if (oud->cdev.owner) + cdev_del(&oud->cdev); + + osd_dev_fini(&oud->od); + scsi_device_put(scsi_device); + + OSD_INFO("osd_remove %s\n", + oud->disk ? oud->disk->disk_name : NULL); + + if (oud->disk) + put_disk(oud->disk); + ida_remove(&osd_minor_ida, oud->minor); + + kfree(oud); +} static int osd_probe(struct device *dev) { @@ -297,7 +326,6 @@ static int osd_probe(struct device *dev) if (NULL == oud) goto err_retract_minor; - kref_init(&oud->kref); dev_set_drvdata(dev, oud); oud->minor = minor; @@ -335,18 +363,25 @@ static int osd_probe(struct device *dev) OSD_ERR("cdev_add failed\n"); goto err_put_disk; } - kobject_get(&oud->cdev.kobj); /* 2nd ref see osd_remove() */ - - /* class_member */ - oud->class_member = device_create(osd_sysfs_class, dev, - MKDEV(SCSI_OSD_MAJOR, oud->minor), "%s", disk->disk_name); - if (IS_ERR(oud->class_member)) { - OSD_ERR("class_device_create failed\n"); - error = PTR_ERR(oud->class_member); + + /* class device member */ + oud->class_dev.devt = oud->cdev.dev; + oud->class_dev.class = &osd_uld_class; + oud->class_dev.parent = dev; + oud->class_dev.release = __remove; + error = dev_set_name(&oud->class_dev, disk->disk_name); + if (error) { + OSD_ERR("dev_set_name failed => %d\n", error); goto err_put_cdev; } - dev_set_drvdata(oud->class_member, oud); + error = device_register(&oud->class_dev); + if (error) { + OSD_ERR("device_register failed => %d\n", error); + goto err_put_cdev; + } + + get_device(&oud->class_dev); OSD_INFO("osd_probe %s\n", disk->disk_name); return 0; @@ -375,54 +410,12 @@ static int osd_remove(struct device *dev) scsi_device); } - if (oud->class_member) - device_destroy(osd_sysfs_class, - MKDEV(SCSI_OSD_MAJOR, oud->minor)); - - /* We have 2 references to the cdev. One is released here - * and also takes down the /dev/osdX mapping. The second - * Will be released in __remove() after all users have released - * the osd_uld_device. - */ - if (oud->cdev.owner) - cdev_del(&oud->cdev); + device_unregister(&oud->class_dev); - __uld_put(oud); + put_device(&oud->class_dev); return 0; } -static void __remove(struct kref *kref) -{ - struct osd_uld_device *oud = container_of(kref, - struct osd_uld_device, kref); - struct scsi_device *scsi_device = oud->od.scsi_device; - - /* now let delete the char_dev */ - kobject_put(&oud->cdev.kobj); - - osd_dev_fini(&oud->od); - scsi_device_put(scsi_device); - - OSD_INFO("osd_remove %s\n", - oud->disk ? oud->disk->disk_name : NULL); - - if (oud->disk) - put_disk(oud->disk); - - ida_remove(&osd_minor_ida, oud->minor); - kfree(oud); -} - -static void __uld_get(struct osd_uld_device *oud) -{ - kref_get(&oud->kref); -} - -static void __uld_put(struct osd_uld_device *oud) -{ - kref_put(&oud->kref, __remove); -} - /* * Global driver and scsi registration */ @@ -440,11 +433,10 @@ static int __init osd_uld_init(void) { int err; - osd_sysfs_class = class_create(THIS_MODULE, osd_symlink); - if (IS_ERR(osd_sysfs_class)) { - OSD_ERR("Unable to register sysfs class => %ld\n", - PTR_ERR(osd_sysfs_class)); - return PTR_ERR(osd_sysfs_class); + err = class_register(&osd_uld_class); + if (err) { + OSD_ERR("Unable to register sysfs class => %d\n", err); + return err; } err = register_chrdev_region(MKDEV(SCSI_OSD_MAJOR, 0), @@ -467,7 +459,7 @@ static int __init osd_uld_init(void) err_out_chrdev: unregister_chrdev_region(MKDEV(SCSI_OSD_MAJOR, 0), SCSI_OSD_MAX_MINOR); err_out: - class_destroy(osd_sysfs_class); + class_unregister(&osd_uld_class); return err; } @@ -475,7 +467,7 @@ static void __exit osd_uld_exit(void) { scsi_unregister_driver(&osd_driver.gendrv); unregister_chrdev_region(MKDEV(SCSI_OSD_MAJOR, 0), SCSI_OSD_MAX_MINOR); - class_destroy(osd_sysfs_class); + class_unregister(&osd_uld_class); OSD_INFO("UNLOADED %s\n", osd_version_string); } diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index f787d24d3bab..589e5f0d67b1 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -48,7 +48,6 @@ enum osd_std_version { */ struct osd_dev { struct scsi_device *scsi_device; - struct file *file; unsigned def_timeout; #ifdef OSD_VER1_SUPPORT -- cgit v1.3-8-gc7d7 From 2cdd6410e5a1665823f2a048fc7f8f6a8384be1d Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 29 Nov 2009 16:26:45 +0200 Subject: [SCSI] libosd: osd_dev_info: Unique Identification of an OSD device Define an osd_dev_info structure that Uniquely identifies an OSD device lun on the network. The identification is built from unique target attributes and is the same for all network/SAN machines. osduld_info_lookup() - NEW New API that will lookup an osd_dev by its osd_dev_info. This is used by pNFS-objects for cross network global device identification. And by exofs multy-device support, the device info is specified in the on-disk exofs device table. osduld_device_info() - NEW Given an osd_dev handle returns its associated osd_dev_info. The ULD fetches this information at startup and hangs it on each OSD device. (This is a fast operation that can be called at any condition) osduld_device_same() - NEW With a given osd_dev at one hand and an osd_dev_info at another, we would like to know if they are the same device. Two osd_dev handles can be checked by: osduld_device_same(od1, osduld_device_info(od2)); osd_auto_detect_ver() - REVISED Now returns an osd_dev_info structure. Is only called once by ULD as before. See added comments for how to use. Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- drivers/scsi/osd/osd_initiator.c | 26 +++++++--- drivers/scsi/osd/osd_uld.c | 100 +++++++++++++++++++++++++++++++++++++-- include/scsi/osd_initiator.h | 38 +++++++++++++-- 3 files changed, 151 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index 7a117c18114c..60b7ca1e9bc0 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -73,7 +73,8 @@ static const char *_osd_ver_desc(struct osd_request *or) #define ATTR_DEF_RI(id, len) ATTR_DEF(OSD_APAGE_ROOT_INFORMATION, id, len) -static int _osd_print_system_info(struct osd_dev *od, void *caps) +static int _osd_get_print_system_info(struct osd_dev *od, + void *caps, struct osd_dev_info *odi) { struct osd_request *or; struct osd_attr get_attrs[] = { @@ -137,8 +138,12 @@ static int _osd_print_system_info(struct osd_dev *od, void *caps) OSD_INFO("PRODUCT_SERIAL_NUMBER [%s]\n", (char *)pFirst); - pFirst = get_attrs[a].val_ptr; - OSD_INFO("OSD_NAME [%s]\n", (char *)pFirst); + odi->osdname_len = get_attrs[a].len; + /* Avoid NULL for memcmp optimization 0-length is good enough */ + odi->osdname = kzalloc(odi->osdname_len + 1, GFP_KERNEL); + if (odi->osdname_len) + memcpy(odi->osdname, get_attrs[a].val_ptr, odi->osdname_len); + OSD_INFO("OSD_NAME [%s]\n", odi->osdname); a++; pFirst = get_attrs[a++].val_ptr; @@ -171,6 +176,14 @@ static int _osd_print_system_info(struct osd_dev *od, void *caps) sid_dump, sizeof(sid_dump), true); OSD_INFO("OSD_SYSTEM_ID(%d)\n" " [%s]\n", len, sid_dump); + + if (unlikely(len > sizeof(odi->systemid))) { + OSD_ERR("OSD Target error: OSD_SYSTEM_ID too long(%d). " + "device idetification might not work\n", len); + len = sizeof(odi->systemid); + } + odi->systemid_len = len; + memcpy(odi->systemid, get_attrs[a].val_ptr, len); a++; } out: @@ -178,16 +191,17 @@ out: return ret; } -int osd_auto_detect_ver(struct osd_dev *od, void *caps) +int osd_auto_detect_ver(struct osd_dev *od, + void *caps, struct osd_dev_info *odi) { int ret; /* Auto-detect the osd version */ - ret = _osd_print_system_info(od, caps); + ret = _osd_get_print_system_info(od, caps, odi); if (ret) { osd_dev_set_ver(od, OSD_VER1); OSD_DEBUG("converting to OSD1\n"); - ret = _osd_print_system_info(od, caps); + ret = _osd_get_print_system_info(od, caps, odi); } return ret; diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c index fc6fc1c4d4d1..0a90702b3d71 100644 --- a/drivers/scsi/osd/osd_uld.c +++ b/drivers/scsi/osd/osd_uld.c @@ -84,6 +84,7 @@ struct osd_uld_device { struct device class_dev; struct cdev cdev; struct osd_dev od; + struct osd_dev_info odi; struct gendisk *disk; }; @@ -225,6 +226,72 @@ free_od: } EXPORT_SYMBOL(osduld_path_lookup); +static inline bool _the_same_or_null(const u8 *a1, unsigned a1_len, + const u8 *a2, unsigned a2_len) +{ + if (!a2_len) /* User string is Empty means don't care */ + return true; + + if (a1_len != a2_len) + return false; + + return 0 == memcmp(a1, a2, a1_len); +} + +struct find_oud_t { + const struct osd_dev_info *odi; + struct device *dev; + struct osd_uld_device *oud; +} ; + +int _mach_odi(struct device *dev, void *find_data) +{ + struct osd_uld_device *oud = container_of(dev, struct osd_uld_device, + class_dev); + struct find_oud_t *fot = find_data; + const struct osd_dev_info *odi = fot->odi; + + if (_the_same_or_null(oud->odi.systemid, oud->odi.systemid_len, + odi->systemid, odi->systemid_len) && + _the_same_or_null(oud->odi.osdname, oud->odi.osdname_len, + odi->osdname, odi->osdname_len)) { + OSD_DEBUG("found device sysid_len=%d osdname=%d\n", + odi->systemid_len, odi->osdname_len); + fot->oud = oud; + return 1; + } else { + return 0; + } +} + +/* osduld_info_lookup - Loop through all devices, return the requested osd_dev. + * + * if @odi->systemid_len and/or @odi->osdname_len are zero, they act as a don't + * care. .e.g if they're both zero /dev/osd0 is returned. + */ +struct osd_dev *osduld_info_lookup(const struct osd_dev_info *odi) +{ + struct find_oud_t find = {.odi = odi}; + + find.dev = class_find_device(&osd_uld_class, NULL, &find, _mach_odi); + if (likely(find.dev)) { + struct osd_dev_handle *odh = kzalloc(sizeof(*odh), GFP_KERNEL); + + if (unlikely(!odh)) { + put_device(find.dev); + return ERR_PTR(-ENOMEM); + } + + odh->od = find.oud->od; + odh->oud = find.oud; + + return &odh->od; + } + + return ERR_PTR(-ENODEV); +} +EXPORT_SYMBOL(osduld_info_lookup); + void osduld_put_device(struct osd_dev *od) { if (od && !IS_ERR(od)) { @@ -240,14 +307,39 @@ void osduld_put_device(struct osd_dev *od) * is called after the fops->release. A get_/put_ pair makes * sure we have a cdev for the duration of fput */ - get_device(&oud->class_dev); - fput(odh->file); + if (odh->file) { + get_device(&oud->class_dev); + fput(odh->file); + } put_device(&oud->class_dev); kfree(odh); } } EXPORT_SYMBOL(osduld_put_device); +const struct osd_dev_info *osduld_device_info(struct osd_dev *od) +{ + struct osd_dev_handle *odh = + container_of(od, struct osd_dev_handle, od); + return &odh->oud->odi; +} +EXPORT_SYMBOL(osduld_device_info); + +bool osduld_device_same(struct osd_dev *od, const struct osd_dev_info *odi) +{ + struct osd_dev_handle *odh = + container_of(od, struct osd_dev_handle, od); + struct osd_uld_device *oud = odh->oud; + + return (oud->odi.systemid_len == odi->systemid_len) && + _the_same_or_null(oud->odi.systemid, oud->odi.systemid_len, + odi->systemid, odi->systemid_len) && + (oud->odi.osdname_len == odi->osdname_len) && + _the_same_or_null(oud->odi.osdname, oud->odi.osdname_len, + odi->osdname, odi->osdname_len); +} +EXPORT_SYMBOL(osduld_device_same); + /* * Scsi Device operations */ @@ -268,7 +360,7 @@ static int __detect_osd(struct osd_uld_device *oud) OSD_ERR("warning: scsi_test_unit_ready failed\n"); osd_sec_init_nosec_doall_caps(caps, &osd_root_object, false, true); - if (osd_auto_detect_ver(&oud->od, caps)) + if (osd_auto_detect_ver(&oud->od, caps, &oud->odi)) return -ENODEV; return 0; @@ -280,6 +372,8 @@ static void __remove(struct device *dev) class_dev); struct scsi_device *scsi_device = oud->od.scsi_device; + kfree(oud->odi.osdname); + if (oud->cdev.owner) cdev_del(&oud->cdev); diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index 589e5f0d67b1..3ec346e15dda 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -55,10 +55,24 @@ struct osd_dev { #endif }; -/* Retrieve/return osd_dev(s) for use by Kernel clients */ -struct osd_dev *osduld_path_lookup(const char *dev_name); /*Use IS_ERR/ERR_PTR*/ +/* Unique Identification of an OSD device */ +struct osd_dev_info { + unsigned systemid_len; + u8 systemid[OSD_SYSTEMID_LEN]; + unsigned osdname_len; + u8 *osdname; +}; + +/* Retrieve/return osd_dev(s) for use by Kernel clients + * Use IS_ERR/ERR_PTR on returned "osd_dev *". + */ +struct osd_dev *osduld_path_lookup(const char *dev_name); +struct osd_dev *osduld_info_lookup(const struct osd_dev_info *odi); void osduld_put_device(struct osd_dev *od); +const struct osd_dev_info *osduld_device_info(struct osd_dev *od); +bool osduld_device_same(struct osd_dev *od, const struct osd_dev_info *odi); + /* Add/remove test ioctls from external modules */ typedef int (do_test_fn)(struct osd_dev *od, unsigned cmd, unsigned long arg); int osduld_register_test(unsigned ioctl, do_test_fn *do_test); @@ -68,8 +82,24 @@ void osduld_unregister_test(unsigned ioctl); void osd_dev_init(struct osd_dev *od, struct scsi_device *scsi_device); void osd_dev_fini(struct osd_dev *od); -/* some hi level device operations */ -int osd_auto_detect_ver(struct osd_dev *od, void *caps); /* GFP_KERNEL */ +/** + * osd_auto_detect_ver - Detect the OSD version, return Unique Identification + * + * @od: OSD target lun handle + * @caps: Capabilities authorizing OSD root read attributes access + * @odi: Retrieved information uniquely identifying the osd target lun + * Note: odi->osdname must be kfreed by caller. + * + * Auto detects the OSD version of the OSD target and sets the @od + * accordingly. Meanwhile also returns the "system id" and "osd name" root + * attributes which uniquely identify the OSD target. This member is usually + * called by the ULD. ULD users should call osduld_device_info(). + * This rutine allocates osd requests and memory at GFP_KERNEL level and might + * sleep. + */ +int osd_auto_detect_ver(struct osd_dev *od, + void *caps, struct osd_dev_info *odi); + static inline struct request_queue *osd_request_queue(struct osd_dev *od) { return od->scsi_device->request_queue; -- cgit v1.3-8-gc7d7 From aa9fffbe2c4db4557248c5c626a85bf3c7867044 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 16 Nov 2009 20:48:38 +0200 Subject: [SCSI] libosd: Error handling revamped Administer some love to the osd_req_decode_sense function * Fix a bad bug with osd_req_decode_sense(). If there was no scsi residual, .i.e the request never reached the target, then all the osd_sense_info members where garbage. * Add grossly missing in/out_resid to osd_sense_info and fill them in properly. * Define an osd_err_priority enum which divides the possible errors into 7 categories in ascending severity. Each category is also assigned a Linux return code translation. Analyze the different osd/scsi/block returned errors and set the proper osd_err_priority and Linux return code accordingly. * extra check a few situations so not to get stuck with inconsistent error view. Example an empty residual with an error code, and other places ... Lots of libosd's osd_req_decode_sense clients had this logic in some form or another. Consolidate all these into one place that should actually know about osd returns. Thous translating it to a more abstract error. Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- drivers/scsi/osd/osd_initiator.c | 85 ++++++++++++++++++++++++++++++++++------ include/scsi/osd_initiator.h | 26 +++++++++++- 2 files changed, 99 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index ba25b1e58a6c..950202a70bcf 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -475,7 +475,8 @@ EXPORT_SYMBOL(osd_end_request); int osd_execute_request(struct osd_request *or) { - return blk_execute_rq(or->request->q, NULL, or->request, 0); + return or->async_error = + blk_execute_rq(or->request->q, NULL, or->request, 0); } EXPORT_SYMBOL(osd_execute_request); @@ -485,8 +486,12 @@ static void osd_request_async_done(struct request *req, int error) or->async_error = error; - if (error) - OSD_DEBUG("osd_request_async_done error recieved %d\n", error); + if (unlikely(error)) { + OSD_DEBUG("osd_request_async_done error recieved %d " + "errors 0x%x\n", error, req->errors); + if (!req->errors) /* don't miss out on this one */ + req->errors = error; + } if (or->async_done) or->async_done(or, or->async_private); @@ -1451,6 +1456,15 @@ int osd_finalize_request(struct osd_request *or, } EXPORT_SYMBOL(osd_finalize_request); +static bool _is_osd_security_code(int code) +{ + return (code == osd_security_audit_value_frozen) || + (code == osd_security_working_key_frozen) || + (code == osd_nonce_not_unique) || + (code == osd_nonce_timestamp_out_of_range) || + (code == osd_invalid_dataout_buffer_integrity_check_value); +} + #define OSD_SENSE_PRINT1(fmt, a...) \ do { \ if (__cur_sense_need_output) \ @@ -1473,9 +1487,16 @@ int osd_req_decode_sense_full(struct osd_request *or, #else bool __cur_sense_need_output = !silent; #endif + int ret; - if (!or->request->errors) + if (likely(!or->request->errors)) { + osi->out_resid = 0; + osi->in_resid = 0; return 0; + } + + osi = osi ? : &local_osi; + memset(osi, 0, sizeof(*osi)); ssdb = or->request->sense; sense_len = or->request->sense_len; @@ -1483,17 +1504,15 @@ int osd_req_decode_sense_full(struct osd_request *or, OSD_ERR("Block-layer returned error(0x%x) but " "sense_len(%u) || key(%d) is empty\n", or->request->errors, sense_len, ssdb->sense_key); - return -EIO; + goto analyze; } if ((ssdb->response_code != 0x72) && (ssdb->response_code != 0x73)) { OSD_ERR("Unrecognized scsi sense: rcode=%x length=%d\n", ssdb->response_code, sense_len); - return -EIO; + goto analyze; } - osi = osi ? : &local_osi; - memset(osi, 0, sizeof(*osi)); osi->key = ssdb->sense_key; osi->additional_code = be16_to_cpu(ssdb->additional_sense_code); original_sense_len = ssdb->additional_sense_length + 8; @@ -1503,9 +1522,10 @@ int osd_req_decode_sense_full(struct osd_request *or, __cur_sense_need_output = (osi->key > scsi_sk_recovered_error); #endif OSD_SENSE_PRINT1("Main Sense information key=0x%x length(%d, %d) " - "additional_code=0x%x\n", + "additional_code=0x%x async_error=%d errors=0x%x\n", osi->key, original_sense_len, sense_len, - osi->additional_code); + osi->additional_code, or->async_error, + or->request->errors); if (original_sense_len < sense_len) sense_len = original_sense_len; @@ -1637,7 +1657,50 @@ int osd_req_decode_sense_full(struct osd_request *or, cur_descriptor += cur_len; } - return (osi->key > scsi_sk_recovered_error) ? -EIO : 0; +analyze: + if (!osi->key) { + /* scsi sense is Empty, the request was never issued to target + * linux return code might tell us what happened. + */ + if (or->async_error == -ENOMEM) + osi->osd_err_pri = OSD_ERR_PRI_RESOURCE; + else + osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE; + ret = or->async_error; + } else if (osi->key <= scsi_sk_recovered_error) { + osi->osd_err_pri = 0; + ret = 0; + } else if (osi->additional_code == scsi_invalid_field_in_cdb) { + if (osi->cdb_field_offset == OSD_CFO_STARTING_BYTE) { + osi->osd_err_pri = OSD_ERR_PRI_CLEAR_PAGES; + ret = -EFAULT; /* caller should recover from this */ + } else if (osi->cdb_field_offset == OSD_CFO_OBJECT_ID) { + osi->osd_err_pri = OSD_ERR_PRI_NOT_FOUND; + ret = -ENOENT; + } else if (osi->cdb_field_offset == OSD_CFO_PERMISSIONS) { + osi->osd_err_pri = OSD_ERR_PRI_NO_ACCESS; + ret = -EACCES; + } else { + osi->osd_err_pri = OSD_ERR_PRI_BAD_CRED; + ret = -EINVAL; + } + } else if (osi->additional_code == osd_quota_error) { + osi->osd_err_pri = OSD_ERR_PRI_NO_SPACE; + ret = -ENOSPC; + } else if (_is_osd_security_code(osi->additional_code)) { + osi->osd_err_pri = OSD_ERR_PRI_BAD_CRED; + ret = -EINVAL; + } else { + osi->osd_err_pri = OSD_ERR_PRI_EIO; + ret = -EIO; + } + + if (or->out.req) + osi->out_resid = or->out.req->resid_len ?: or->out.total_bytes; + if (or->in.req) + osi->in_resid = or->in.req->resid_len ?: or->in.total_bytes; + + return ret; } EXPORT_SYMBOL(osd_req_decode_sense_full); diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index 3ec346e15dda..39d6d1097153 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -267,7 +267,7 @@ int osd_execute_request_async(struct osd_request *or, * @bad_attr_list - List of failing attributes (optional) * @max_attr - Size of @bad_attr_list. * - * After execution, sense + return code can be analyzed using this function. The + * After execution, osd_request results are analyzed using this function. The * return code is the final disposition on the error. So it is possible that a * CHECK_CONDITION was returned from target but this will return NO_ERROR, for * example on recovered errors. All parameters are optional if caller does @@ -276,7 +276,31 @@ int osd_execute_request_async(struct osd_request *or, * of the SCSI_OSD_DPRINT_SENSE Kconfig value. Set @silent if you know the * command would routinely fail, to not spam the dmsg file. */ + +/** + * osd_err_priority - osd categorized return codes in ascending severity. + * + * The categories are borrowed from the pnfs_osd_errno enum. + * See comments for translated Linux codes returned by osd_req_decode_sense. + */ +enum osd_err_priority { + OSD_ERR_PRI_NO_ERROR = 0, + /* Recoverable, caller should clear_highpage() all pages */ + OSD_ERR_PRI_CLEAR_PAGES = 1, /* -EFAULT */ + OSD_ERR_PRI_RESOURCE = 2, /* -ENOMEM */ + OSD_ERR_PRI_BAD_CRED = 3, /* -EINVAL */ + OSD_ERR_PRI_NO_ACCESS = 4, /* -EACCES */ + OSD_ERR_PRI_UNREACHABLE = 5, /* any other */ + OSD_ERR_PRI_NOT_FOUND = 6, /* -ENOENT */ + OSD_ERR_PRI_NO_SPACE = 7, /* -ENOSPC */ + OSD_ERR_PRI_EIO = 8, /* -EIO */ +}; + struct osd_sense_info { + u64 out_resid; /* Zero on success otherwise out residual */ + u64 in_resid; /* Zero on success otherwise in residual */ + enum osd_err_priority osd_err_pri; + int key; /* one of enum scsi_sense_keys */ int additional_code ; /* enum osd_additional_sense_codes */ union { /* Sense specific information */ -- cgit v1.3-8-gc7d7 From 0899638688f223fd9e9fee60d662665e11693d12 Mon Sep 17 00:00:00 2001 From: Martin Michlmayr Date: Mon, 16 Nov 2009 20:49:25 +0200 Subject: [SCSI] osd_protocol.h: Add missing #include include/scsi/osd_protocol.h uses ALIGN() without an #include , leading to: | include/scsi/osd_protocol.h:362: error: implicit declaration of function 'ALIGN' Signed-off-by: Martin Michlmayr Signed-off-by: Boaz Harrosh Cc: Stable Tree Signed-off-by: James Bottomley --- include/scsi/osd_protocol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h index 2cc8e8b1cc19..685661283540 100644 --- a/include/scsi/osd_protocol.h +++ b/include/scsi/osd_protocol.h @@ -17,6 +17,7 @@ #define __OSD_PROTOCOL_H__ #include +#include #include #include -- cgit v1.3-8-gc7d7 From 6580bbd0afe6ba1be5d53b331e92a7690046c923 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Fri, 20 Nov 2009 14:54:52 -0800 Subject: [SCSI] libfc: add FC-BB-5 LESB counters to fcoe_dev_stats FC-BB-5 Rev2.0, Clause 7.10 extends the FC-LS-3 LESB for FC-BB_E. We are already tracking Link Failure Count so add the rest in this patch. For VLinkFailureCount and MissDiscAdvCount, they are part of the per-cpu fcoe_dev_stats. For SymbolErrorCount, ErroredBlockCount, and FCSErrorCount, they are defined in IEEE 802.3-2008 and are per LLD. They are expected to come from LLD. Signed-off-by: Yi Zou Signed-off-by: Robert Love Signed-off-by: James Bottomley --- include/scsi/libfc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 2936fbae41e4..b97be2903cbc 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -231,6 +231,8 @@ struct fc_rport_priv { * @ControlRequests: Number of control requests * @InputMegabytes: Number of received megabytes * @OutputMegabytes: Number of transmitted megabytes + * @VLinkFailureCount: Number of virtual link failures + * @MissDiscAdvCount: Number of missing FIP discovery advertisement */ struct fcoe_dev_stats { u64 SecondsSinceLastReset; @@ -249,6 +251,8 @@ struct fcoe_dev_stats { u64 ControlRequests; u64 InputMegabytes; u64 OutputMegabytes; + u64 VLinkFailureCount; + u64 MissDiscAdvCount; }; /** -- cgit v1.3-8-gc7d7 From 8cdffdccd948ea4872b7b65280bc04f2fa93fc96 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Fri, 20 Nov 2009 14:54:57 -0800 Subject: [SCSI] libfcoe: add checking disable flag in FIP_FKA_ADV When the D bit is set if the FKA_ADV_Period of the FIP Discovery Advertisement, the ENode should not transmit period ENode FIP Keep Alive and VN_Port FIP Keep Alive (FC-BB-5 Rev2, 7.8.3.13). Note that fcf->flags is taken directly from the fip_header, I am claiming one bit for the purpose of the FIP_FKA_Period D bit as FIP_FL_FK_ADV_B, and use FIP_HEADER_FLAGS as bitmask for bits used in fip_header. Signed-off-by: Yi Zou Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/libfcoe.c | 4 +++- include/scsi/fc/fc_fip.h | 12 +++++++++++- include/scsi/libfcoe.h | 1 + 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c index 3c501d4973e3..9961fd7310b7 100644 --- a/drivers/scsi/fcoe/libfcoe.c +++ b/drivers/scsi/fcoe/libfcoe.c @@ -665,6 +665,8 @@ static int fcoe_ctlr_parse_adv(struct fcoe_ctlr *fip, if (dlen != sizeof(struct fip_fka_desc)) goto len_err; fka = (struct fip_fka_desc *)desc; + if (fka->fd_flags & FIP_FKA_ADV_D) + fcf->fd_flags = 1; t = ntohl(fka->fd_fka_period); if (t >= FCOE_CTLR_MIN_FKA) fcf->fka_period = msecs_to_jiffies(t); @@ -1160,7 +1162,7 @@ static void fcoe_ctlr_timeout(unsigned long arg) } } - if (sel) { + if (sel && !sel->fd_flags) { if (time_after_eq(jiffies, fip->ctlr_ka_time)) { fip->ctlr_ka_time = jiffies + sel->fka_period; fip->send_ctlr_ka = 1; diff --git a/include/scsi/fc/fc_fip.h b/include/scsi/fc/fc_fip.h index 3d138c1fcf8a..17baa19380f0 100644 --- a/include/scsi/fc/fc_fip.h +++ b/include/scsi/fc/fc_fip.h @@ -214,10 +214,20 @@ struct fip_vn_desc { */ struct fip_fka_desc { struct fip_desc fd_desc; - __u8 fd_resvd[2]; + __u8 fd_resvd; + __u8 fd_flags; /* bit0 is fka disable flag */ __be32 fd_fka_period; /* adv./keep-alive period in mS */ } __attribute__((packed)); +/* + * flags for fip_fka_desc.fd_flags + */ +enum fip_fka_flags { + FIP_FKA_ADV_D = 0x01, /* no need for FKA from ENode */ +}; + +/* FIP_DT_FKA flags */ + /* * FIP_DT_VENDOR descriptor. */ diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index 3837872f1965..c603f4a7e7fc 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -155,6 +155,7 @@ struct fcoe_fcf { u8 pri; u16 flags; u32 fka_period; + u8 fd_flags:1; }; /* FIP API functions */ -- cgit v1.3-8-gc7d7 From b21a0c397eea722ff84bbeaf5e6e732a06b69896 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Fri, 20 Nov 2009 14:55:14 -0800 Subject: [SCSI] libfc: add fcoe_fc_els_lesb to fc_fcoe.h for FC-BB-5 LESB definitions Add struct fcoe_fc_els_lesb as described in FC-BB-5 LESB for FCoE. It has the same size as LESB defined in FC-FS-3 (struct fc_els_lesb) but members have different meanings according to FC-BB-5. Signed-off-by: Yi Zou Signed-off-by: Robert Love Signed-off-by: James Bottomley --- include/scsi/fc/fc_fcoe.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/scsi/fc/fc_fcoe.h b/include/scsi/fc/fc_fcoe.h index ccb3dbe90463..e6ad3d2ae475 100644 --- a/include/scsi/fc/fc_fcoe.h +++ b/include/scsi/fc/fc_fcoe.h @@ -85,6 +85,18 @@ struct fcoe_crc_eof { */ #define FCOE_MIN_FRAME 46 +/* + * FCoE Link Error Status Block: T11 FC-BB-5 Rev2.0, Clause 7.10. + */ +struct fcoe_fc_els_lesb { + __be32 lesb_link_fail; /* link failure count */ + __be32 lesb_vlink_fail; /* virtual link failure count */ + __be32 lesb_miss_fka; /* missing FIP keep-alive count */ + __be32 lesb_symb_err; /* symbol error during carrier count */ + __be32 lesb_err_block; /* errored block count */ + __be32 lesb_fcs_error; /* frame check sequence error count */ +}; + /* * fc_fcoe_set_mac - Store OUI + DID into MAC address field. * @mac: mac address to be set -- cgit v1.3-8-gc7d7 From b84056bf68404a5fe06b452ea9790b9927e793a6 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Fri, 20 Nov 2009 14:55:19 -0800 Subject: [SCSI] fcoe, libfc: add get_lesb() to allow LLD to fill the link error status block (LESB) Add a member function pointer as get_lesb to libfc_function_template so LLD can fill the LESB based on its own statistics. For fcoe, it fills the LESB as a fcoe_fc_els_lesb struct according to FC-BB-5. Signed-off-by: Yi Zou Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/fcoe.c | 34 ++++++++++++++++++++++++++++++++++ include/scsi/libfc.h | 6 ++++++ 2 files changed, 40 insertions(+) (limited to 'include') diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 32298ed60614..a30ffaa1222c 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -111,6 +111,8 @@ static struct fc_seq *fcoe_elsct_send(struct fc_lport *, void *, u32 timeout); static void fcoe_recv_frame(struct sk_buff *skb); +static void fcoe_get_lesb(struct fc_lport *, struct fc_els_lesb *); + module_param_call(create, fcoe_create, NULL, NULL, S_IWUSR); __MODULE_PARM_TYPE(create, "string"); MODULE_PARM_DESC(create, "Create fcoe fcoe using net device passed in."); @@ -141,6 +143,7 @@ static struct libfc_function_template fcoe_libfc_fcn_templ = { .ddp_setup = fcoe_ddp_setup, .ddp_done = fcoe_ddp_done, .elsct_send = fcoe_elsct_send, + .get_lesb = fcoe_get_lesb, }; struct fc_function_template fcoe_transport_function = { @@ -2455,3 +2458,34 @@ static void fcoe_set_vport_symbolic_name(struct fc_vport *vport) lport->tt.elsct_send(lport, FC_FID_DIR_SERV, fp, FC_NS_RSPN_ID, NULL, NULL, 3 * lport->r_a_tov); } + +/** + * fcoe_get_lesb() - Fill the FCoE Link Error Status Block + * @lport: the local port + * @fc_lesb: the link error status block + */ +static void fcoe_get_lesb(struct fc_lport *lport, + struct fc_els_lesb *fc_lesb) +{ + unsigned int cpu; + u32 lfc, vlfc, mdac; + struct fcoe_dev_stats *devst; + struct fcoe_fc_els_lesb *lesb; + struct net_device *netdev = fcoe_netdev(lport); + + lfc = 0; + vlfc = 0; + mdac = 0; + lesb = (struct fcoe_fc_els_lesb *)fc_lesb; + memset(lesb, 0, sizeof(*lesb)); + for_each_possible_cpu(cpu) { + devst = per_cpu_ptr(lport->dev_stats, cpu); + lfc += devst->LinkFailureCount; + vlfc += devst->VLinkFailureCount; + mdac += devst->MissDiscAdvCount; + } + lesb->lesb_link_fail = htonl(lfc); + lesb->lesb_vlink_fail = htonl(vlfc); + lesb->lesb_miss_fka = htonl(mdac); + lesb->lesb_fcs_error = htonl(dev_get_stats(netdev)->rx_crc_errors); +} diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index b97be2903cbc..4b912eee33e5 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -510,6 +510,12 @@ struct libfc_function_template { * STATUS: OPTIONAL */ int (*ddp_done)(struct fc_lport *, u16); + /* + * Allow LLD to fill its own Link Error Status Block + * + * STATUS: OPTIONAL + */ + void (*get_lesb)(struct fc_lport *, struct fc_els_lesb *lesb); /* * Send a frame using an existing sequence and exchange. * -- cgit v1.3-8-gc7d7 From 0a55256d158c18e4821c248a295b7f8f4423660f Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Tue, 24 Nov 2009 16:53:57 +0100 Subject: [SCSI] libfc: Add target reset flag to FCP header file While the target reset task management function has been deprecated in newer specs, it is still in use by SCSI FC drivers and there is no real replacement. Add the target reset flag to the FCP header file to allow usage of this definition in SCSI FC drivers. Reviewed-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- include/scsi/fc/fc_fcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/scsi/fc/fc_fcp.h b/include/scsi/fc/fc_fcp.h index 29ecb0b02b09..747e2c7d88d6 100644 --- a/include/scsi/fc/fc_fcp.h +++ b/include/scsi/fc/fc_fcp.h @@ -83,6 +83,8 @@ struct fcp_cmnd32 { * fc_tm_flags - task management flags field. */ #define FCP_TMF_CLR_ACA 0x40 /* clear ACA condition */ +#define FCP_TMF_TGT_RESET 0x20 /* target reset task management, + deprecated as of FCP-3 */ #define FCP_TMF_LUN_RESET 0x10 /* logical unit reset task management */ #define FCP_TMF_CLR_TASK_SET 0x04 /* clear task set */ #define FCP_TMF_ABT_TASK_SET 0x02 /* abort task set */ -- cgit v1.3-8-gc7d7 From 6070d81eb5f2d4943223c96e7609a53cdc984364 Mon Sep 17 00:00:00 2001 From: Adam Buchbinder Date: Fri, 4 Dec 2009 15:47:01 -0500 Subject: tree-wide: fix misspelling of "definition" in comments "Definition" is misspelled "defintion" in several comments; this patch fixes them. No code changes. Signed-off-by: Adam Buchbinder Signed-off-by: Jiri Kosina --- arch/arm/mach-bcmring/include/csp/reg.h | 2 +- arch/arm/mach-bcmring/include/mach/csp/mm_addr.h | 2 +- arch/arm/mach-u300/include/mach/u300-regs.h | 2 +- arch/sh/include/mach-common/mach/titan.h | 2 +- arch/x86/include/asm/sigcontext.h | 4 ++-- drivers/block/cciss_cmd.h | 2 +- drivers/gpu/drm/radeon/atombios.h | 16 ++++++++-------- drivers/media/video/cx18/cx18-av-core.h | 2 +- drivers/media/video/cx18/cx18-mailbox.h | 2 +- drivers/net/wireless/ath/ath5k/base.h | 2 +- drivers/s390/net/qeth_core_mpc.h | 2 +- drivers/scsi/advansys.c | 2 +- drivers/scsi/aic94xx/aic94xx_reg_def.h | 2 +- drivers/scsi/bfa/include/protocol/ct.h | 4 ++-- drivers/scsi/dmx3191d.c | 2 +- drivers/scsi/wd7000.c | 2 +- drivers/staging/otus/80211core/pub_zfi.h | 2 +- drivers/staging/otus/zdcompat.h | 2 +- drivers/staging/rtl8192su/r8192S_phyreg.h | 2 +- fs/cifs/cifspdu.h | 2 +- fs/compat_ioctl.c | 2 +- include/linux/cciss_ioctl.h | 2 +- include/linux/in6.h | 2 +- include/linux/usb/wusb.h | 2 +- 24 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-bcmring/include/csp/reg.h b/arch/arm/mach-bcmring/include/csp/reg.h index e5f60bf5a1f3..56654d23c3d7 100644 --- a/arch/arm/mach-bcmring/include/csp/reg.h +++ b/arch/arm/mach-bcmring/include/csp/reg.h @@ -16,7 +16,7 @@ /** * @file reg.h * -* @brief Generic register defintions used in CSP +* @brief Generic register definitions used in CSP */ /****************************************************************************/ diff --git a/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h b/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h index 86bb58d4f58c..ad58cf873377 100644 --- a/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h +++ b/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h @@ -16,7 +16,7 @@ /** * @file mm_addr.h * -* @brief Memory Map address defintions +* @brief Memory Map address definitions * * @note * None diff --git a/arch/arm/mach-u300/include/mach/u300-regs.h b/arch/arm/mach-u300/include/mach/u300-regs.h index 88333dfb19fc..56721a0cd2af 100644 --- a/arch/arm/mach-u300/include/mach/u300-regs.h +++ b/arch/arm/mach-u300/include/mach/u300-regs.h @@ -6,7 +6,7 @@ * Copyright (C) 2006-2009 ST-Ericsson AB * License terms: GNU General Public License (GPL) version 2 * Basic register address definitions in physical memory and - * some block defintions for core devices like the timer. + * some block definitions for core devices like the timer. * Author: Linus Walleij */ diff --git a/arch/sh/include/mach-common/mach/titan.h b/arch/sh/include/mach-common/mach/titan.h index 03f3583c8918..4a674d27cbb8 100644 --- a/arch/sh/include/mach-common/mach/titan.h +++ b/arch/sh/include/mach-common/mach/titan.h @@ -1,5 +1,5 @@ /* - * Platform defintions for Titan + * Platform definitions for Titan */ #ifndef _ASM_SH_TITAN_H #define _ASM_SH_TITAN_H diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 72e5a4491661..04459d25e66e 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -124,7 +124,7 @@ struct sigcontext { * fpstate is really (struct _fpstate *) or (struct _xstate *) * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end - * of extended memory layout. See comments at the defintion of + * of extended memory layout. See comments at the definition of * (struct _fpx_sw_bytes) */ void __user *fpstate; /* zero when no FPU/extended context */ @@ -219,7 +219,7 @@ struct sigcontext { * fpstate is really (struct _fpstate *) or (struct _xstate *) * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end - * of extended memory layout. See comments at the defintion of + * of extended memory layout. See comments at the definition of * (struct _fpx_sw_bytes) */ void __user *fpstate; /* zero when no FPU/extended context */ diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index dbaed1ea0da3..8098fccdbec4 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h @@ -5,7 +5,7 @@ //########################################################################### #define CISS_VERSION "1.00" -//general boundary defintions +//general boundary definitions #define SENSEINFOBYTES 32//note that this value may vary between host implementations #define MAXSGENTRIES 31 #define MAXREPLYQS 256 diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h index 5d402086bc47..811178907413 100644 --- a/drivers/gpu/drm/radeon/atombios.h +++ b/drivers/gpu/drm/radeon/atombios.h @@ -1141,7 +1141,7 @@ typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS { /* ucTableFormatRevision=1,ucTableContentRevision=2 */ typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS_V2 { USHORT usPixelClock; /* in 10KHz; for bios convenient */ - UCHAR ucMisc; /* see PANEL_ENCODER_MISC_xx defintions below */ + UCHAR ucMisc; /* see PANEL_ENCODER_MISC_xx definitions below */ UCHAR ucAction; /* 0: turn off encoder */ /* 1: setup and turn on encoder */ UCHAR ucTruncate; /* bit0=0: Disable truncate */ @@ -1424,7 +1424,7 @@ typedef struct _ATOM_MULTIMEDIA_CONFIG_INFO { /* Structures used in FirmwareInfoTable */ /****************************************************************************/ -/* usBIOSCapability Defintion: */ +/* usBIOSCapability Definition: */ /* Bit 0 = 0: Bios image is not Posted, =1:Bios image is Posted; */ /* Bit 1 = 0: Dual CRTC is not supported, =1: Dual CRTC is supported; */ /* Bit 2 = 0: Extended Desktop is not supported, =1: Extended Desktop is supported; */ @@ -2386,7 +2386,7 @@ typedef struct _ATOM_ANALOG_TV_INFO_V1_2 { } ATOM_ANALOG_TV_INFO_V1_2; /**************************************************************************/ -/* VRAM usage and their defintions */ +/* VRAM usage and their definitions */ /* One chunk of VRAM used by Bios are for HWICON surfaces,EDID data. */ /* Current Mode timing and Dail Timing and/or STD timing data EACH device. They can be broken down as below. */ @@ -3046,7 +3046,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO { #define ATOM_S0_SYSTEM_POWER_STATE_VALUE_DC 2 #define ATOM_S0_SYSTEM_POWER_STATE_VALUE_LITEAC 3 -/* Byte aligned defintion for BIOS usage */ +/* Byte aligned definition for BIOS usage */ #define ATOM_S0_CRT1_MONOb0 0x01 #define ATOM_S0_CRT1_COLORb0 0x02 #define ATOM_S0_CRT1_MASKb0 (ATOM_S0_CRT1_MONOb0+ATOM_S0_CRT1_COLORb0) @@ -3131,7 +3131,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO { #define ATOM_S2_DISPLAY_ROTATION_DEGREE_SHIFT 30 #define ATOM_S2_DISPLAY_ROTATION_ANGLE_MASK 0xC0000000L -/* Byte aligned defintion for BIOS usage */ +/* Byte aligned definition for BIOS usage */ #define ATOM_S2_TV1_STANDARD_MASKb0 0x0F #define ATOM_S2_CURRENT_BL_LEVEL_MASKb1 0xFF #define ATOM_S2_CRT1_DPMS_STATEb2 0x01 @@ -3190,7 +3190,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO { #define ATOM_S3_ALLOW_FAST_PWR_SWITCH 0x40000000L #define ATOM_S3_RQST_GPU_USE_MIN_PWR 0x80000000L -/* Byte aligned defintion for BIOS usage */ +/* Byte aligned definition for BIOS usage */ #define ATOM_S3_CRT1_ACTIVEb0 0x01 #define ATOM_S3_LCD1_ACTIVEb0 0x02 #define ATOM_S3_TV1_ACTIVEb0 0x04 @@ -3230,7 +3230,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO { #define ATOM_S4_LCD1_REFRESH_MASK 0x0000FF00L #define ATOM_S4_LCD1_REFRESH_SHIFT 8 -/* Byte aligned defintion for BIOS usage */ +/* Byte aligned definition for BIOS usage */ #define ATOM_S4_LCD1_PANEL_ID_MASKb0 0x0FF #define ATOM_S4_LCD1_REFRESH_MASKb1 ATOM_S4_LCD1_PANEL_ID_MASKb0 #define ATOM_S4_VRAM_INFO_MASKb2 ATOM_S4_LCD1_PANEL_ID_MASKb0 @@ -3310,7 +3310,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO { #define ATOM_S6_VRI_BRIGHTNESS_CHANGE 0x40000000L #define ATOM_S6_CONFIG_DISPLAY_CHANGE_MASK 0x80000000L -/* Byte aligned defintion for BIOS usage */ +/* Byte aligned definition for BIOS usage */ #define ATOM_S6_DEVICE_CHANGEb0 0x01 #define ATOM_S6_SCALER_CHANGEb0 0x02 #define ATOM_S6_LID_CHANGEb0 0x04 diff --git a/drivers/media/video/cx18/cx18-av-core.h b/drivers/media/video/cx18/cx18-av-core.h index 9b84a0c58e0e..cafb7e99b9a0 100644 --- a/drivers/media/video/cx18/cx18-av-core.h +++ b/drivers/media/video/cx18/cx18-av-core.h @@ -294,7 +294,7 @@ struct cx18_av_state { #define CXADEC_QAM_CONST_DEC 0x924 #define CXADEC_QAM_ROTATOR_FREQ 0x948 -/* Bit defintions / settings used in Mako Audio */ +/* Bit definitions / settings used in Mako Audio */ #define CXADEC_PREF_MODE_MONO_LANGA 0 #define CXADEC_PREF_MODE_MONO_LANGB 1 #define CXADEC_PREF_MODE_MONO_LANGC 2 diff --git a/drivers/media/video/cx18/cx18-mailbox.h b/drivers/media/video/cx18/cx18-mailbox.h index e23aaac5b280..522ad534034c 100644 --- a/drivers/media/video/cx18/cx18-mailbox.h +++ b/drivers/media/video/cx18/cx18-mailbox.h @@ -41,7 +41,7 @@ struct cx18; /* * This structure is used by CPU to provide completed buffers information * Its structure is dictrated by the layout of the SCB, required by the - * firmware, but its defintion needs to be here, instead of in cx18-scb.h, + * firmware, but its definition needs to be here, instead of in cx18-scb.h, * for mailbox work order scheduling */ struct cx18_mdl_ack { diff --git a/drivers/net/wireless/ath/ath5k/base.h b/drivers/net/wireless/ath/ath5k/base.h index c79a65044b67..4da029410467 100644 --- a/drivers/net/wireless/ath/ath5k/base.h +++ b/drivers/net/wireless/ath/ath5k/base.h @@ -36,7 +36,7 @@ */ /* - * Defintions for the Atheros Wireless LAN controller driver. + * Definitions for the Atheros Wireless LAN controller driver. */ #ifndef _DEV_ATH_ATHVAR_H #define _DEV_ATH_ATHVAR_H diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index eecb2ee62e85..de7bccef49f0 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -507,7 +507,7 @@ extern unsigned char ULP_ENABLE[]; (PDU_ENCAPSULATION(buffer) + 0x17) #define QETH_ULP_ENABLE_RESP_LINK_TYPE(buffer) \ (PDU_ENCAPSULATION(buffer) + 0x2b) -/* Layer 2 defintions */ +/* Layer 2 definitions */ #define QETH_PROT_LAYER2 0x08 #define QETH_PROT_TCPIP 0x03 #define QETH_PROT_OSN2 0x0a diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c index b756041f0b26..22626abdb630 100644 --- a/drivers/scsi/advansys.c +++ b/drivers/scsi/advansys.c @@ -7969,7 +7969,7 @@ static int advansys_reset(struct scsi_cmnd *scp) ASC_DBG(1, "before AscInitAsc1000Driver()\n"); status = AscInitAsc1000Driver(asc_dvc); - /* Refer to ASC_IERR_* defintions for meaning of 'err_code'. */ + /* Refer to ASC_IERR_* definitions for meaning of 'err_code'. */ if (asc_dvc->err_code) { scmd_printk(KERN_INFO, scp, "SCSI bus reset error: " "0x%x\n", asc_dvc->err_code); diff --git a/drivers/scsi/aic94xx/aic94xx_reg_def.h b/drivers/scsi/aic94xx/aic94xx_reg_def.h index a43e8cdf4ee4..28aaf349c111 100644 --- a/drivers/scsi/aic94xx/aic94xx_reg_def.h +++ b/drivers/scsi/aic94xx/aic94xx_reg_def.h @@ -1,5 +1,5 @@ /* - * Aic94xx SAS/SATA driver hardware registers defintions. + * Aic94xx SAS/SATA driver hardware registers definitions. * * Copyright (C) 2004 Adaptec, Inc. All rights reserved. * Copyright (C) 2004 David Chaw diff --git a/drivers/scsi/bfa/include/protocol/ct.h b/drivers/scsi/bfa/include/protocol/ct.h index c59d6630b070..79d16a9ab281 100644 --- a/drivers/scsi/bfa/include/protocol/ct.h +++ b/drivers/scsi/bfa/include/protocol/ct.h @@ -82,7 +82,7 @@ enum { }; /* - * defintions for CT reason code + * definitions for CT reason code */ enum { CT_RSN_INV_CMD = 0x01, @@ -129,7 +129,7 @@ enum { }; /* - * defintions for the explanation code for all servers + * definitions for the explanation code for all servers */ enum { CT_EXP_AUTH_EXCEPTION = 0xF1, diff --git a/drivers/scsi/dmx3191d.c b/drivers/scsi/dmx3191d.c index fa738ec8692a..207352cc70cc 100644 --- a/drivers/scsi/dmx3191d.c +++ b/drivers/scsi/dmx3191d.c @@ -31,7 +31,7 @@ #include /* - * Defintions for the generic 5380 driver. + * Definitions for the generic 5380 driver. */ #define AUTOSENSE diff --git a/drivers/scsi/wd7000.c b/drivers/scsi/wd7000.c index 093610bcfcce..2f6e9d8eaf71 100644 --- a/drivers/scsi/wd7000.c +++ b/drivers/scsi/wd7000.c @@ -161,7 +161,7 @@ * * 2003/02/12 - Christoph Hellwig * - * Cleaned up host template defintion + * Cleaned up host template definition * Removed now obsolete wd7000.h */ diff --git a/drivers/staging/otus/80211core/pub_zfi.h b/drivers/staging/otus/80211core/pub_zfi.h index a35bd5d41d2c..60b7d1c56dee 100644 --- a/drivers/staging/otus/80211core/pub_zfi.h +++ b/drivers/staging/otus/80211core/pub_zfi.h @@ -20,7 +20,7 @@ #include "../oal_dt.h" /***** Section 1 : Tunable Parameters *****/ -/* The defintions in this section are tunabel parameters */ +/* The definitions in this section are tunabel parameters */ /* Maximum number of BSS that could be scaned */ #define ZM_MAX_BSS 128 diff --git a/drivers/staging/otus/zdcompat.h b/drivers/staging/otus/zdcompat.h index 84ac43356b77..cdcaef54afcd 100644 --- a/drivers/staging/otus/zdcompat.h +++ b/drivers/staging/otus/zdcompat.h @@ -17,7 +17,7 @@ /* Module Name : zdcompat.h */ /* */ /* Abstract */ -/* This module contains function defintion for compatibility. */ +/* This module contains function definition for compatibility. */ /* */ /* NOTES */ /* Platform dependent. */ diff --git a/drivers/staging/rtl8192su/r8192S_phyreg.h b/drivers/staging/rtl8192su/r8192S_phyreg.h index 96c7cfa92542..acf644f430aa 100644 --- a/drivers/staging/rtl8192su/r8192S_phyreg.h +++ b/drivers/staging/rtl8192su/r8192S_phyreg.h @@ -38,7 +38,7 @@ // 2. 0x800/0x900/0xA00/0xC00/0xD00/0xE00 // 3. RF register 0x00-2E // 4. Bit Mask for BB/RF register -// 5. Other defintion for BB/RF R/W +// 5. Other definition for BB/RF R/W // diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 2d07f890a842..3877737f96a6 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1227,7 +1227,7 @@ typedef struct smb_com_setattr_rsp { /* empty wct response to setattr */ /*******************************************************/ -/* NT Transact structure defintions follow */ +/* NT Transact structure definitions follow */ /* Currently only ioctl, acl (get security descriptor) */ /* and notify are implemented */ /*******************************************************/ diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index f91fd51b32e3..5fc8e52ee306 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -2655,7 +2655,7 @@ COMPATIBLE_IOCTL(TIOCSLTC) #endif #ifdef TIOCSTART /* - * For these two we have defintions in ioctls.h and/or termios.h on + * For these two we have definitions in ioctls.h and/or termios.h on * some architectures but no actual implemention. Some applications * like bash call them if they are defined in the headers, so we provide * entries here to avoid syslog message spew. diff --git a/include/linux/cciss_ioctl.h b/include/linux/cciss_ioctl.h index cb57c30081a8..eb130b4d8e72 100644 --- a/include/linux/cciss_ioctl.h +++ b/include/linux/cciss_ioctl.h @@ -39,7 +39,7 @@ typedef __u32 DriverVer_type; #ifndef CCISS_CMD_H // This defines are duplicated in cciss_cmd.h in the driver directory -//general boundary defintions +//general boundary definitions #define SENSEINFOBYTES 32//note that this value may vary between host implementations //Command Status value diff --git a/include/linux/in6.h b/include/linux/in6.h index 718bf21c5754..010290dd79bb 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -113,7 +113,7 @@ struct in6_flowlabel_req #define IPV6_FLOWINFO_FLOWLABEL 0x000fffff #define IPV6_FLOWINFO_PRIORITY 0x0ff00000 -/* These defintions are obsolete */ +/* These definitions are obsolete */ #define IPV6_PRIORITY_UNCHARACTERIZED 0x0000 #define IPV6_PRIORITY_FILLER 0x0100 #define IPV6_PRIORITY_UNATTENDED 0x0200 diff --git a/include/linux/usb/wusb.h b/include/linux/usb/wusb.h index 429c631d2aad..63ebdcc5dda6 100644 --- a/include/linux/usb/wusb.h +++ b/include/linux/usb/wusb.h @@ -74,7 +74,7 @@ enum { * WUSB defines that CHIDs, CDIDs and CKs are a 16 byte string of * data. In order to avoid confusion and enforce types, we wrap it. * - * Make it packed, as we use it in some hw defintions. + * Make it packed, as we use it in some hw definitions. */ struct wusb_ckhdid { u8 data[16]; -- cgit v1.3-8-gc7d7 From 38938c879eb0c39edf85d5164aa0cffe2874304c Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 4 Dec 2009 17:44:50 -0800 Subject: Add support for GCC-4.5's __builtin_unreachable() to compiler.h (v2) Starting with version 4.5, GCC has a new built-in function __builtin_unreachable() that can be used in places like the kernel's BUG() where inline assembly is used to transfer control flow. This eliminated the need for an endless loop in these places. The patch adds a new macro 'unreachable()' that will expand to either __builtin_unreachable() or an endless loop depending on the compiler version. Change from v1: Simplify unreachable() for non-GCC 4.5 case. Signed-off-by: David Daney Acked-by: Ralf Baechle Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc4.h | 14 ++++++++++++++ include/linux/compiler.h | 5 +++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 450fa597c94d..ab3af40a53c6 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -36,4 +36,18 @@ the kernel context */ #define __cold __attribute__((__cold__)) + +#if __GNUC_MINOR__ >= 5 +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + * + * Early snapshots of gcc 4.5 don't support this and we can't detect + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +#define unreachable() __builtin_unreachable() +#endif + #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb5135b4e1..59f208926d13 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -144,6 +144,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define barrier() __memory_barrier() #endif +/* Unreachable code */ +#ifndef unreachable +# define unreachable() do { } while (1) +#endif + #ifndef RELOC_HIDE # define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ -- cgit v1.3-8-gc7d7 From 8d64827172ae680d34d0611a1e865b546e6a5f08 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 23 Sep 2009 04:59:24 -0300 Subject: V4L/DVB (13040): V4L2: Add a v4l2-subdev (soc-camera) driver for OmniVision OV9640 sensor Signed-off-by: Marek Vasut Signed-off-by: Guennadi Liakhovetski Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/Kconfig | 6 + drivers/media/video/Makefile | 1 + drivers/media/video/ov9640.c | 801 ++++++++++++++++++++++++++++++++++++++++ drivers/media/video/ov9640.h | 209 +++++++++++ include/media/v4l2-chip-ident.h | 1 + 5 files changed, 1018 insertions(+) create mode 100644 drivers/media/video/ov9640.c create mode 100644 drivers/media/video/ov9640.h (limited to 'include') diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig index e6186b338a12..c318676f2d29 100644 --- a/drivers/media/video/Kconfig +++ b/drivers/media/video/Kconfig @@ -865,6 +865,12 @@ config SOC_CAMERA_OV772X help This is a ov772x camera driver +config SOC_CAMERA_OV9640 + tristate "ov9640 camera support" + depends on SOC_CAMERA && I2C + help + This is a ov9640 camera driver + config MX1_VIDEO bool diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile index e541932a789b..e706ceecef84 100644 --- a/drivers/media/video/Makefile +++ b/drivers/media/video/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_SOC_CAMERA_MT9M111) += mt9m111.o obj-$(CONFIG_SOC_CAMERA_MT9T031) += mt9t031.o obj-$(CONFIG_SOC_CAMERA_MT9V022) += mt9v022.o obj-$(CONFIG_SOC_CAMERA_OV772X) += ov772x.o +obj-$(CONFIG_SOC_CAMERA_OV9640) += ov9640.o obj-$(CONFIG_SOC_CAMERA_TW9910) += tw9910.o # And now the v4l2 drivers: diff --git a/drivers/media/video/ov9640.c b/drivers/media/video/ov9640.c new file mode 100644 index 000000000000..c81ae2192887 --- /dev/null +++ b/drivers/media/video/ov9640.c @@ -0,0 +1,801 @@ +/* + * OmniVision OV96xx Camera Driver + * + * Copyright (C) 2009 Marek Vasut + * + * Based on ov772x camera driver: + * + * Copyright (C) 2008 Renesas Solutions Corp. + * Kuninori Morimoto + * + * Based on ov7670 and soc_camera_platform driver, + * + * Copyright 2006-7 Jonathan Corbet + * Copyright (C) 2008 Magnus Damm + * Copyright (C) 2008, Guennadi Liakhovetski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ov9640.h" + +/* default register setup */ +static const struct ov9640_reg ov9640_regs_dflt[] = { + { OV9640_COM5, OV9640_COM5_SYSCLK | OV9640_COM5_LONGEXP }, + { OV9640_COM6, OV9640_COM6_OPT_BLC | OV9640_COM6_ADBLC_BIAS | + OV9640_COM6_FMT_RST | OV9640_COM6_ADBLC_OPTEN }, + { OV9640_PSHFT, OV9640_PSHFT_VAL(0x01) }, + { OV9640_ACOM, OV9640_ACOM_2X_ANALOG | OV9640_ACOM_RSVD }, + { OV9640_TSLB, OV9640_TSLB_YUYV_UYVY }, + { OV9640_COM16, OV9640_COM16_RB_AVG }, + + /* Gamma curve P */ + { 0x6c, 0x40 }, { 0x6d, 0x30 }, { 0x6e, 0x4b }, { 0x6f, 0x60 }, + { 0x70, 0x70 }, { 0x71, 0x70 }, { 0x72, 0x70 }, { 0x73, 0x70 }, + { 0x74, 0x60 }, { 0x75, 0x60 }, { 0x76, 0x50 }, { 0x77, 0x48 }, + { 0x78, 0x3a }, { 0x79, 0x2e }, { 0x7a, 0x28 }, { 0x7b, 0x22 }, + + /* Gamma curve T */ + { 0x7c, 0x04 }, { 0x7d, 0x07 }, { 0x7e, 0x10 }, { 0x7f, 0x28 }, + { 0x80, 0x36 }, { 0x81, 0x44 }, { 0x82, 0x52 }, { 0x83, 0x60 }, + { 0x84, 0x6c }, { 0x85, 0x78 }, { 0x86, 0x8c }, { 0x87, 0x9e }, + { 0x88, 0xbb }, { 0x89, 0xd2 }, { 0x8a, 0xe6 }, +}; + +/* Configurations + * NOTE: for YUV, alter the following registers: + * COM12 |= OV9640_COM12_YUV_AVG + * + * for RGB, alter the following registers: + * COM7 |= OV9640_COM7_RGB + * COM13 |= OV9640_COM13_RGB_AVG + * COM15 |= proper RGB color encoding mode + */ +static const struct ov9640_reg ov9640_regs_qqcif[] = { + { OV9640_CLKRC, OV9640_CLKRC_DPLL_EN | OV9640_CLKRC_DIV(0x0f) }, + { OV9640_COM1, OV9640_COM1_QQFMT | OV9640_COM1_HREF_2SKIP }, + { OV9640_COM4, OV9640_COM4_QQ_VP | OV9640_COM4_RSVD }, + { OV9640_COM7, OV9640_COM7_QCIF }, + { OV9640_COM12, OV9640_COM12_RSVD }, + { OV9640_COM13, OV9640_COM13_GAMMA_RAW | OV9640_COM13_MATRIX_EN }, + { OV9640_COM15, OV9640_COM15_OR_10F0 }, +}; + +static const struct ov9640_reg ov9640_regs_qqvga[] = { + { OV9640_CLKRC, OV9640_CLKRC_DPLL_EN | OV9640_CLKRC_DIV(0x07) }, + { OV9640_COM1, OV9640_COM1_QQFMT | OV9640_COM1_HREF_2SKIP }, + { OV9640_COM4, OV9640_COM4_QQ_VP | OV9640_COM4_RSVD }, + { OV9640_COM7, OV9640_COM7_QVGA }, + { OV9640_COM12, OV9640_COM12_RSVD }, + { OV9640_COM13, OV9640_COM13_GAMMA_RAW | OV9640_COM13_MATRIX_EN }, + { OV9640_COM15, OV9640_COM15_OR_10F0 }, +}; + +static const struct ov9640_reg ov9640_regs_qcif[] = { + { OV9640_CLKRC, OV9640_CLKRC_DPLL_EN | OV9640_CLKRC_DIV(0x07) }, + { OV9640_COM4, OV9640_COM4_QQ_VP | OV9640_COM4_RSVD }, + { OV9640_COM7, OV9640_COM7_QCIF }, + { OV9640_COM12, OV9640_COM12_RSVD }, + { OV9640_COM13, OV9640_COM13_GAMMA_RAW | OV9640_COM13_MATRIX_EN }, + { OV9640_COM15, OV9640_COM15_OR_10F0 }, +}; + +static const struct ov9640_reg ov9640_regs_qvga[] = { + { OV9640_CLKRC, OV9640_CLKRC_DPLL_EN | OV9640_CLKRC_DIV(0x03) }, + { OV9640_COM4, OV9640_COM4_QQ_VP | OV9640_COM4_RSVD }, + { OV9640_COM7, OV9640_COM7_QVGA }, + { OV9640_COM12, OV9640_COM12_RSVD }, + { OV9640_COM13, OV9640_COM13_GAMMA_RAW | OV9640_COM13_MATRIX_EN }, + { OV9640_COM15, OV9640_COM15_OR_10F0 }, +}; + +static const struct ov9640_reg ov9640_regs_cif[] = { + { OV9640_CLKRC, OV9640_CLKRC_DPLL_EN | OV9640_CLKRC_DIV(0x03) }, + { OV9640_COM3, OV9640_COM3_VP }, + { OV9640_COM7, OV9640_COM7_CIF }, + { OV9640_COM12, OV9640_COM12_RSVD }, + { OV9640_COM13, OV9640_COM13_GAMMA_RAW | OV9640_COM13_MATRIX_EN }, + { OV9640_COM15, OV9640_COM15_OR_10F0 }, +}; + +static const struct ov9640_reg ov9640_regs_vga[] = { + { OV9640_CLKRC, OV9640_CLKRC_DPLL_EN | OV9640_CLKRC_DIV(0x01) }, + { OV9640_COM3, OV9640_COM3_VP }, + { OV9640_COM7, OV9640_COM7_VGA }, + { OV9640_COM12, OV9640_COM12_RSVD }, + { OV9640_COM13, OV9640_COM13_GAMMA_RAW | OV9640_COM13_MATRIX_EN }, + { OV9640_COM15, OV9640_COM15_OR_10F0 }, +}; + +static const struct ov9640_reg ov9640_regs_sxga[] = { + { OV9640_CLKRC, OV9640_CLKRC_DPLL_EN | OV9640_CLKRC_DIV(0x01) }, + { OV9640_COM3, OV9640_COM3_VP }, + { OV9640_COM7, 0 }, + { OV9640_COM12, OV9640_COM12_RSVD }, + { OV9640_COM13, OV9640_COM13_GAMMA_RAW | OV9640_COM13_MATRIX_EN }, + { OV9640_COM15, OV9640_COM15_OR_10F0 }, +}; + +static const struct ov9640_reg ov9640_regs_yuv[] = { + { OV9640_MTX1, 0x58 }, + { OV9640_MTX2, 0x48 }, + { OV9640_MTX3, 0x10 }, + { OV9640_MTX4, 0x28 }, + { OV9640_MTX5, 0x48 }, + { OV9640_MTX6, 0x70 }, + { OV9640_MTX7, 0x40 }, + { OV9640_MTX8, 0x40 }, + { OV9640_MTX9, 0x40 }, + { OV9640_MTXS, 0x0f }, +}; + +static const struct ov9640_reg ov9640_regs_rgb[] = { + { OV9640_MTX1, 0x71 }, + { OV9640_MTX2, 0x3e }, + { OV9640_MTX3, 0x0c }, + { OV9640_MTX4, 0x33 }, + { OV9640_MTX5, 0x72 }, + { OV9640_MTX6, 0x00 }, + { OV9640_MTX7, 0x2b }, + { OV9640_MTX8, 0x66 }, + { OV9640_MTX9, 0xd2 }, + { OV9640_MTXS, 0x65 }, +}; + +/* + * TODO: this sensor also supports RGB555 and RGB565 formats, but support for + * them has not yet been sufficiently tested and so it is not included with + * this version of the driver. To test and debug these formats add two entries + * to the below array, see ov722x.c for an example. + */ +static const struct soc_camera_data_format ov9640_fmt_lists[] = { + { + .name = "UYVY", + .fourcc = V4L2_PIX_FMT_UYVY, + .depth = 16, + .colorspace = V4L2_COLORSPACE_JPEG, + }, +}; + +static const struct v4l2_queryctrl ov9640_controls[] = { + { + .id = V4L2_CID_VFLIP, + .type = V4L2_CTRL_TYPE_BOOLEAN, + .name = "Flip Vertically", + .minimum = 0, + .maximum = 1, + .step = 1, + .default_value = 0, + }, + { + .id = V4L2_CID_HFLIP, + .type = V4L2_CTRL_TYPE_BOOLEAN, + .name = "Flip Horizontally", + .minimum = 0, + .maximum = 1, + .step = 1, + .default_value = 0, + }, +}; + +/* read a register */ +static int ov9640_reg_read(struct i2c_client *client, u8 reg, u8 *val) +{ + int ret; + u8 data = reg; + struct i2c_msg msg = { + .addr = client->addr, + .flags = 0, + .len = 1, + .buf = &data, + }; + + ret = i2c_transfer(client->adapter, &msg, 1); + if (ret < 0) + goto err; + + msg.flags = I2C_M_RD; + ret = i2c_transfer(client->adapter, &msg, 1); + if (ret < 0) + goto err; + + *val = data; + return 0; + +err: + dev_err(&client->dev, "Failed reading register 0x%02x!\n", reg); + return ret; +} + +/* write a register */ +static int ov9640_reg_write(struct i2c_client *client, u8 reg, u8 val) +{ + int ret; + u8 _val; + unsigned char data[2] = { reg, val }; + struct i2c_msg msg = { + .addr = client->addr, + .flags = 0, + .len = 2, + .buf = data, + }; + + ret = i2c_transfer(client->adapter, &msg, 1); + if (ret < 0) { + dev_err(&client->dev, "Failed writing register 0x%02x!\n", reg); + return ret; + } + + /* we have to read the register back ... no idea why, maybe HW bug */ + ret = ov9640_reg_read(client, reg, &_val); + if (ret) + dev_err(&client->dev, + "Failed reading back register 0x%02x!\n", reg); + + return 0; +} + + +/* Read a register, alter its bits, write it back */ +static int ov9640_reg_rmw(struct i2c_client *client, u8 reg, u8 set, u8 unset) +{ + u8 val; + int ret; + + ret = ov9640_reg_read(client, reg, &val); + if (ret) { + dev_err(&client->dev, + "[Read]-Modify-Write of register %02x failed!\n", reg); + return val; + } + + val |= set; + val &= ~unset; + + ret = ov9640_reg_write(client, reg, val); + if (ret) + dev_err(&client->dev, + "Read-Modify-[Write] of register %02x failed!\n", reg); + + return ret; +} + +/* Soft reset the camera. This has nothing to do with the RESET pin! */ +static int ov9640_reset(struct i2c_client *client) +{ + int ret; + + ret = ov9640_reg_write(client, OV9640_COM7, OV9640_COM7_SCCB_RESET); + if (ret) + dev_err(&client->dev, + "An error occured while entering soft reset!\n"); + + return ret; +} + +/* Start/Stop streaming from the device */ +static int ov9640_s_stream(struct v4l2_subdev *sd, int enable) +{ + return 0; +} + +/* Alter bus settings on camera side */ +static int ov9640_set_bus_param(struct soc_camera_device *icd, + unsigned long flags) +{ + return 0; +} + +/* Request bus settings on camera side */ +static unsigned long ov9640_query_bus_param(struct soc_camera_device *icd) +{ + struct soc_camera_link *icl = to_soc_camera_link(icd); + + /* + * REVISIT: the camera probably can do 10 bit transfers, but I don't + * have those pins connected on my hardware. + */ + unsigned long flags = SOCAM_PCLK_SAMPLE_RISING | SOCAM_MASTER | + SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_HIGH | + SOCAM_DATA_ACTIVE_HIGH | SOCAM_DATAWIDTH_8; + + return soc_camera_apply_sensor_flags(icl, flags); +} + +/* Get status of additional camera capabilities */ +static int ov9640_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl) +{ + struct i2c_client *client = sd->priv; + struct ov9640_priv *priv = container_of(i2c_get_clientdata(client), + struct ov9640_priv, subdev); + + switch (ctrl->id) { + case V4L2_CID_VFLIP: + ctrl->value = priv->flag_vflip; + break; + case V4L2_CID_HFLIP: + ctrl->value = priv->flag_hflip; + break; + } + return 0; +} + +/* Set status of additional camera capabilities */ +static int ov9640_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl) +{ + struct i2c_client *client = sd->priv; + struct ov9640_priv *priv = container_of(i2c_get_clientdata(client), + struct ov9640_priv, subdev); + + int ret = 0; + + switch (ctrl->id) { + case V4L2_CID_VFLIP: + priv->flag_vflip = ctrl->value; + if (ctrl->value) + ret = ov9640_reg_rmw(client, OV9640_MVFP, + OV9640_MVFP_V, 0); + else + ret = ov9640_reg_rmw(client, OV9640_MVFP, + 0, OV9640_MVFP_V); + break; + case V4L2_CID_HFLIP: + priv->flag_hflip = ctrl->value; + if (ctrl->value) + ret = ov9640_reg_rmw(client, OV9640_MVFP, + OV9640_MVFP_H, 0); + else + ret = ov9640_reg_rmw(client, OV9640_MVFP, + 0, OV9640_MVFP_H); + break; + } + + return ret; +} + +/* Get chip identification */ +static int ov9640_g_chip_ident(struct v4l2_subdev *sd, + struct v4l2_dbg_chip_ident *id) +{ + struct i2c_client *client = sd->priv; + struct ov9640_priv *priv = container_of(i2c_get_clientdata(client), + struct ov9640_priv, subdev); + + id->ident = priv->model; + id->revision = priv->revision; + + return 0; +} + +#ifdef CONFIG_VIDEO_ADV_DEBUG +static int ov9640_get_register(struct v4l2_subdev *sd, + struct v4l2_dbg_register *reg) +{ + struct i2c_client *client = sd->priv; + int ret; + u8 val; + + if (reg->reg & ~0xff) + return -EINVAL; + + reg->size = 1; + + ret = ov9640_reg_read(client, reg->reg, &val); + if (ret) + return ret; + + reg->val = (__u64)val; + + return 0; +} + +static int ov9640_set_register(struct v4l2_subdev *sd, + struct v4l2_dbg_register *reg) +{ + struct i2c_client *client = sd->priv; + + if (reg->reg & ~0xff || reg->val & ~0xff) + return -EINVAL; + + return ov9640_reg_write(client, reg->reg, reg->val); +} +#endif + +/* select nearest higher resolution for capture */ +static void ov9640_res_roundup(u32 *width, u32 *height) +{ + int i; + enum { QQCIF, QQVGA, QCIF, QVGA, CIF, VGA, SXGA }; + int res_x[] = { 88, 160, 176, 320, 352, 640, 1280 }; + int res_y[] = { 72, 120, 144, 240, 288, 480, 960 }; + + for (i = 0; i < ARRAY_SIZE(res_x); i++) { + if (res_x[i] >= *width && res_y[i] >= *height) { + *width = res_x[i]; + *height = res_y[i]; + return; + } + } + + *width = res_x[SXGA]; + *height = res_y[SXGA]; +} + +/* Prepare necessary register changes depending on color encoding */ +static void ov9640_alter_regs(u32 pixfmt, struct ov9640_reg_alt *alt) +{ + switch (pixfmt) { + case V4L2_PIX_FMT_UYVY: + alt->com12 = OV9640_COM12_YUV_AVG; + alt->com13 = OV9640_COM13_Y_DELAY_EN | + OV9640_COM13_YUV_DLY(0x01); + break; + case V4L2_PIX_FMT_RGB555: + alt->com7 = OV9640_COM7_RGB; + alt->com13 = OV9640_COM13_RGB_AVG; + alt->com15 = OV9640_COM15_RGB_555; + break; + case V4L2_PIX_FMT_RGB565: + alt->com7 = OV9640_COM7_RGB; + alt->com13 = OV9640_COM13_RGB_AVG; + alt->com15 = OV9640_COM15_RGB_565; + break; + }; +} + +/* Setup registers according to resolution and color encoding */ +static int ov9640_write_regs(struct i2c_client *client, + u32 width, u32 pixfmt, struct ov9640_reg_alt *alts) +{ + const struct ov9640_reg *ov9640_regs, *matrix_regs; + int ov9640_regs_len, matrix_regs_len; + int i, ret; + u8 val; + + /* select register configuration for given resolution */ + switch (width) { + case W_QQCIF: + ov9640_regs = ov9640_regs_qqcif; + ov9640_regs_len = ARRAY_SIZE(ov9640_regs_qqcif); + break; + case W_QQVGA: + ov9640_regs = ov9640_regs_qqvga; + ov9640_regs_len = ARRAY_SIZE(ov9640_regs_qqvga); + break; + case W_QCIF: + ov9640_regs = ov9640_regs_qcif; + ov9640_regs_len = ARRAY_SIZE(ov9640_regs_qcif); + break; + case W_QVGA: + ov9640_regs = ov9640_regs_qvga; + ov9640_regs_len = ARRAY_SIZE(ov9640_regs_qvga); + break; + case W_CIF: + ov9640_regs = ov9640_regs_cif; + ov9640_regs_len = ARRAY_SIZE(ov9640_regs_cif); + break; + case W_VGA: + ov9640_regs = ov9640_regs_vga; + ov9640_regs_len = ARRAY_SIZE(ov9640_regs_vga); + break; + case W_SXGA: + ov9640_regs = ov9640_regs_sxga; + ov9640_regs_len = ARRAY_SIZE(ov9640_regs_sxga); + break; + default: + dev_err(&client->dev, "Failed to select resolution!\n"); + return -EINVAL; + } + + /* select color matrix configuration for given color encoding */ + if (pixfmt == V4L2_PIX_FMT_UYVY) { + matrix_regs = ov9640_regs_yuv; + matrix_regs_len = ARRAY_SIZE(ov9640_regs_yuv); + } else { + matrix_regs = ov9640_regs_rgb; + matrix_regs_len = ARRAY_SIZE(ov9640_regs_rgb); + } + + /* write register settings into the module */ + for (i = 0; i < ov9640_regs_len; i++) { + val = ov9640_regs[i].val; + + switch (ov9640_regs[i].reg) { + case OV9640_COM7: + val |= alts->com7; + break; + case OV9640_COM12: + val |= alts->com12; + break; + case OV9640_COM13: + val |= alts->com13; + break; + case OV9640_COM15: + val |= alts->com15; + break; + } + + ret = ov9640_reg_write(client, ov9640_regs[i].reg, val); + if (ret) + return ret; + } + + /* write color matrix configuration into the module */ + for (i = 0; i < matrix_regs_len; i++) { + ret = ov9640_reg_write(client, matrix_regs[i].reg, + matrix_regs[i].val); + if (ret) + return ret; + } + + return 0; +} + +/* program default register values */ +static int ov9640_prog_dflt(struct i2c_client *client) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(ov9640_regs_dflt); i++) { + ret = ov9640_reg_write(client, ov9640_regs_dflt[i].reg, + ov9640_regs_dflt[i].val); + if (ret) + return ret; + } + + /* wait for the changes to actually happen, 140ms are not enough yet */ + mdelay(150); + + return 0; +} + +/* set the format we will capture in */ +static int ov9640_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f) +{ + struct i2c_client *client = sd->priv; + struct v4l2_pix_format *pix = &f->fmt.pix; + struct ov9640_reg_alt alts = {0}; + int ret; + + ov9640_res_roundup(&pix->width, &pix->height); + ov9640_alter_regs(pix->pixelformat, &alts); + + ov9640_reset(client); + + ret = ov9640_prog_dflt(client); + if (ret) + return ret; + + return ov9640_write_regs(client, pix->width, pix->pixelformat, &alts); +} + +static int ov9640_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f) +{ + struct v4l2_pix_format *pix = &f->fmt.pix; + + ov9640_res_roundup(&pix->width, &pix->height); + pix->field = V4L2_FIELD_NONE; + + return 0; +} + +static int ov9640_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a) +{ + a->c.left = 0; + a->c.top = 0; + a->c.width = W_SXGA; + a->c.height = H_SXGA; + a->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + return 0; +} + +static int ov9640_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a) +{ + a->bounds.left = 0; + a->bounds.top = 0; + a->bounds.width = W_SXGA; + a->bounds.height = H_SXGA; + a->defrect = a->bounds; + a->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + a->pixelaspect.numerator = 1; + a->pixelaspect.denominator = 1; + + return 0; +} + + + +static int ov9640_video_probe(struct soc_camera_device *icd, + struct i2c_client *client) +{ + struct ov9640_priv *priv = i2c_get_clientdata(client); + u8 pid, ver, midh, midl; + const char *devname; + int ret = 0; + + /* + * We must have a parent by now. And it cannot be a wrong one. + * So this entire test is completely redundant. + */ + if (!icd->dev.parent || + to_soc_camera_host(icd->dev.parent)->nr != icd->iface) { + dev_err(&client->dev, "Parent missing or invalid!\n"); + ret = -ENODEV; + goto err; + } + + icd->formats = ov9640_fmt_lists; + icd->num_formats = ARRAY_SIZE(ov9640_fmt_lists); + + /* + * check and show product ID and manufacturer ID + */ + + ret = ov9640_reg_read(client, OV9640_PID, &pid); + if (ret) + goto err; + + ret = ov9640_reg_read(client, OV9640_VER, &ver); + if (ret) + goto err; + + ret = ov9640_reg_read(client, OV9640_MIDH, &midh); + if (ret) + goto err; + + ret = ov9640_reg_read(client, OV9640_MIDL, &midl); + if (ret) + goto err; + + switch (VERSION(pid, ver)) { + case OV9640_V2: + devname = "ov9640"; + priv->model = V4L2_IDENT_OV9640; + priv->revision = 2; + case OV9640_V3: + devname = "ov9640"; + priv->model = V4L2_IDENT_OV9640; + priv->revision = 3; + break; + default: + dev_err(&client->dev, "Product ID error %x:%x\n", pid, ver); + ret = -ENODEV; + goto err; + } + + dev_info(&client->dev, "%s Product ID %0x:%0x Manufacturer ID %x:%x\n", + devname, pid, ver, midh, midl); + +err: + return ret; +} + +static struct soc_camera_ops ov9640_ops = { + .set_bus_param = ov9640_set_bus_param, + .query_bus_param = ov9640_query_bus_param, + .controls = ov9640_controls, + .num_controls = ARRAY_SIZE(ov9640_controls), +}; + +static struct v4l2_subdev_core_ops ov9640_core_ops = { + .g_ctrl = ov9640_g_ctrl, + .s_ctrl = ov9640_s_ctrl, + .g_chip_ident = ov9640_g_chip_ident, +#ifdef CONFIG_VIDEO_ADV_DEBUG + .g_register = ov9640_get_register, + .s_register = ov9640_set_register, +#endif + +}; + +static struct v4l2_subdev_video_ops ov9640_video_ops = { + .s_stream = ov9640_s_stream, + .s_fmt = ov9640_s_fmt, + .try_fmt = ov9640_try_fmt, + .cropcap = ov9640_cropcap, + .g_crop = ov9640_g_crop, + +}; + +static struct v4l2_subdev_ops ov9640_subdev_ops = { + .core = &ov9640_core_ops, + .video = &ov9640_video_ops, +}; + +/* + * i2c_driver function + */ +static int ov9640_probe(struct i2c_client *client, + const struct i2c_device_id *did) +{ + struct ov9640_priv *priv; + struct soc_camera_device *icd = client->dev.platform_data; + struct soc_camera_link *icl; + int ret; + + if (!icd) { + dev_err(&client->dev, "Missing soc-camera data!\n"); + return -EINVAL; + } + + icl = to_soc_camera_link(icd); + if (!icl) { + dev_err(&client->dev, "Missing platform_data for driver\n"); + return -EINVAL; + } + + priv = kzalloc(sizeof(struct ov9640_priv), GFP_KERNEL); + if (!priv) { + dev_err(&client->dev, + "Failed to allocate memory for private data!\n"); + return -ENOMEM; + } + + v4l2_i2c_subdev_init(&priv->subdev, client, &ov9640_subdev_ops); + + icd->ops = &ov9640_ops; + + ret = ov9640_video_probe(icd, client); + + if (ret) { + icd->ops = NULL; + i2c_set_clientdata(client, NULL); + kfree(priv); + } + + return ret; +} + +static int ov9640_remove(struct i2c_client *client) +{ + struct ov9640_priv *priv = i2c_get_clientdata(client); + + i2c_set_clientdata(client, NULL); + kfree(priv); + return 0; +} + +static const struct i2c_device_id ov9640_id[] = { + { "ov9640", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ov9640_id); + +static struct i2c_driver ov9640_i2c_driver = { + .driver = { + .name = "ov9640", + }, + .probe = ov9640_probe, + .remove = ov9640_remove, + .id_table = ov9640_id, +}; + +static int __init ov9640_module_init(void) +{ + return i2c_add_driver(&ov9640_i2c_driver); +} + +static void __exit ov9640_module_exit(void) +{ + i2c_del_driver(&ov9640_i2c_driver); +} + +module_init(ov9640_module_init); +module_exit(ov9640_module_exit); + +MODULE_DESCRIPTION("SoC Camera driver for OmniVision OV96xx"); +MODULE_AUTHOR("Marek Vasut "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/media/video/ov9640.h b/drivers/media/video/ov9640.h new file mode 100644 index 000000000000..f8a51b70792e --- /dev/null +++ b/drivers/media/video/ov9640.h @@ -0,0 +1,209 @@ +/* + * OmniVision OV96xx Camera Header File + * + * Copyright (C) 2009 Marek Vasut + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __DRIVERS_MEDIA_VIDEO_OV9640_H__ +#define __DRIVERS_MEDIA_VIDEO_OV9640_H__ + +/* Register definitions */ +#define OV9640_GAIN 0x00 +#define OV9640_BLUE 0x01 +#define OV9640_RED 0x02 +#define OV9640_VFER 0x03 +#define OV9640_COM1 0x04 +#define OV9640_BAVE 0x05 +#define OV9640_GEAVE 0x06 +#define OV9640_RSID 0x07 +#define OV9640_RAVE 0x08 +#define OV9640_COM2 0x09 +#define OV9640_PID 0x0a +#define OV9640_VER 0x0b +#define OV9640_COM3 0x0c +#define OV9640_COM4 0x0d +#define OV9640_COM5 0x0e +#define OV9640_COM6 0x0f +#define OV9640_AECH 0x10 +#define OV9640_CLKRC 0x11 +#define OV9640_COM7 0x12 +#define OV9640_COM8 0x13 +#define OV9640_COM9 0x14 +#define OV9640_COM10 0x15 +/* 0x16 - RESERVED */ +#define OV9640_HSTART 0x17 +#define OV9640_HSTOP 0x18 +#define OV9640_VSTART 0x19 +#define OV9640_VSTOP 0x1a +#define OV9640_PSHFT 0x1b +#define OV9640_MIDH 0x1c +#define OV9640_MIDL 0x1d +#define OV9640_MVFP 0x1e +#define OV9640_LAEC 0x1f +#define OV9640_BOS 0x20 +#define OV9640_GBOS 0x21 +#define OV9640_GROS 0x22 +#define OV9640_ROS 0x23 +#define OV9640_AEW 0x24 +#define OV9640_AEB 0x25 +#define OV9640_VPT 0x26 +#define OV9640_BBIAS 0x27 +#define OV9640_GBBIAS 0x28 +/* 0x29 - RESERVED */ +#define OV9640_EXHCH 0x2a +#define OV9640_EXHCL 0x2b +#define OV9640_RBIAS 0x2c +#define OV9640_ADVFL 0x2d +#define OV9640_ADVFH 0x2e +#define OV9640_YAVE 0x2f +#define OV9640_HSYST 0x30 +#define OV9640_HSYEN 0x31 +#define OV9640_HREF 0x32 +#define OV9640_CHLF 0x33 +#define OV9640_ARBLM 0x34 +/* 0x35..0x36 - RESERVED */ +#define OV9640_ADC 0x37 +#define OV9640_ACOM 0x38 +#define OV9640_OFON 0x39 +#define OV9640_TSLB 0x3a +#define OV9640_COM11 0x3b +#define OV9640_COM12 0x3c +#define OV9640_COM13 0x3d +#define OV9640_COM14 0x3e +#define OV9640_EDGE 0x3f +#define OV9640_COM15 0x40 +#define OV9640_COM16 0x41 +#define OV9640_COM17 0x42 +/* 0x43..0x4e - RESERVED */ +#define OV9640_MTX1 0x4f +#define OV9640_MTX2 0x50 +#define OV9640_MTX3 0x51 +#define OV9640_MTX4 0x52 +#define OV9640_MTX5 0x53 +#define OV9640_MTX6 0x54 +#define OV9640_MTX7 0x55 +#define OV9640_MTX8 0x56 +#define OV9640_MTX9 0x57 +#define OV9640_MTXS 0x58 +/* 0x59..0x61 - RESERVED */ +#define OV9640_LCC1 0x62 +#define OV9640_LCC2 0x63 +#define OV9640_LCC3 0x64 +#define OV9640_LCC4 0x65 +#define OV9640_LCC5 0x66 +#define OV9640_MANU 0x67 +#define OV9640_MANV 0x68 +#define OV9640_HV 0x69 +#define OV9640_MBD 0x6a +#define OV9640_DBLV 0x6b +#define OV9640_GSP 0x6c /* ... till 0x7b */ +#define OV9640_GST 0x7c /* ... till 0x8a */ + +#define OV9640_CLKRC_DPLL_EN 0x80 +#define OV9640_CLKRC_DIRECT 0x40 +#define OV9640_CLKRC_DIV(x) ((x) & 0x3f) + +#define OV9640_PSHFT_VAL(x) ((x) & 0xff) + +#define OV9640_ACOM_2X_ANALOG 0x80 +#define OV9640_ACOM_RSVD 0x12 + +#define OV9640_MVFP_V 0x10 +#define OV9640_MVFP_H 0x20 + +#define OV9640_COM1_HREF_NOSKIP 0x00 +#define OV9640_COM1_HREF_2SKIP 0x04 +#define OV9640_COM1_HREF_3SKIP 0x08 +#define OV9640_COM1_QQFMT 0x20 + +#define OV9640_COM2_SSM 0x10 + +#define OV9640_COM3_VP 0x04 + +#define OV9640_COM4_QQ_VP 0x80 +#define OV9640_COM4_RSVD 0x40 + +#define OV9640_COM5_SYSCLK 0x80 +#define OV9640_COM5_LONGEXP 0x01 + +#define OV9640_COM6_OPT_BLC 0x40 +#define OV9640_COM6_ADBLC_BIAS 0x08 +#define OV9640_COM6_FMT_RST 0x82 +#define OV9640_COM6_ADBLC_OPTEN 0x01 + +#define OV9640_COM7_RAW_RGB 0x01 +#define OV9640_COM7_RGB 0x04 +#define OV9640_COM7_QCIF 0x08 +#define OV9640_COM7_QVGA 0x10 +#define OV9640_COM7_CIF 0x20 +#define OV9640_COM7_VGA 0x40 +#define OV9640_COM7_SCCB_RESET 0x80 + +#define OV9640_TSLB_YVYU_YUYV 0x04 +#define OV9640_TSLB_YUYV_UYVY 0x08 + +#define OV9640_COM12_YUV_AVG 0x04 +#define OV9640_COM12_RSVD 0x40 + +#define OV9640_COM13_GAMMA_NONE 0x00 +#define OV9640_COM13_GAMMA_Y 0x40 +#define OV9640_COM13_GAMMA_RAW 0x80 +#define OV9640_COM13_RGB_AVG 0x20 +#define OV9640_COM13_MATRIX_EN 0x10 +#define OV9640_COM13_Y_DELAY_EN 0x08 +#define OV9640_COM13_YUV_DLY(x) ((x) & 0x07) + +#define OV9640_COM15_OR_00FF 0x00 +#define OV9640_COM15_OR_01FE 0x40 +#define OV9640_COM15_OR_10F0 0xc0 +#define OV9640_COM15_RGB_NORM 0x00 +#define OV9640_COM15_RGB_565 0x10 +#define OV9640_COM15_RGB_555 0x30 + +#define OV9640_COM16_RB_AVG 0x01 + +/* IDs */ +#define OV9640_V2 0x9648 +#define OV9640_V3 0x9649 +#define VERSION(pid, ver) (((pid) << 8) | ((ver) & 0xFF)) + +/* supported resolutions */ +enum { + W_QQCIF = 88, + W_QQVGA = 160, + W_QCIF = 176, + W_QVGA = 320, + W_CIF = 352, + W_VGA = 640, + W_SXGA = 1280 +}; +#define H_SXGA 960 + +/* Misc. structures */ +struct ov9640_reg_alt { + u8 com7; + u8 com12; + u8 com13; + u8 com15; +}; + +struct ov9640_reg { + u8 reg; + u8 val; +}; + +struct ov9640_priv { + struct v4l2_subdev subdev; + + int model; + int revision; + + bool flag_vflip; + bool flag_hflip; +}; + +#endif /* __DRIVERS_MEDIA_VIDEO_OV9640_H__ */ diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h index cf16689adba7..5ce56f90f315 100644 --- a/include/media/v4l2-chip-ident.h +++ b/include/media/v4l2-chip-ident.h @@ -64,6 +64,7 @@ enum { V4L2_IDENT_OV9650 = 254, V4L2_IDENT_OV9655 = 255, V4L2_IDENT_SOI968 = 256, + V4L2_IDENT_OV9640 = 257, /* module saa7146: reserved range 300-309 */ V4L2_IDENT_SAA7146 = 300, -- cgit v1.3-8-gc7d7 From f56db93cef5d368b4fa5db49b68bc4ab0b20c4fd Mon Sep 17 00:00:00 2001 From: Andy Walls Date: Sat, 26 Sep 2009 22:07:10 -0300 Subject: V4L/DVB (13084): v4l2-chip-ident: Add ID's needed for the cx23885 and cx25840 modules Add identifiers for CX2388[578] chips, CX2310[012] chips, integrated A/V decoders cores, integrated IR controller core, and the CX23417 MPEG encoder. The cx23885 module and cx25840 module will use these identifiers in upcoming changes. Signed-off-by: Andy Walls Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-chip-ident.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h index 5ce56f90f315..a2d93da38450 100644 --- a/include/media/v4l2-chip-ident.h +++ b/include/media/v4l2-chip-ident.h @@ -73,6 +73,7 @@ enum { V4L2_IDENT_CX23418_843 = 403, /* Integrated A/V Decoder on the '418 */ V4L2_IDENT_CX23415 = 415, V4L2_IDENT_CX23416 = 416, + V4L2_IDENT_CX23417 = 417, V4L2_IDENT_CX23418 = 418, /* module au0828 */ @@ -166,12 +167,27 @@ enum { /* module mt9v011, just ident 8243 */ V4L2_IDENT_MT9V011 = 8243, + /* module cx23885 and cx25840 */ + V4L2_IDENT_CX23885 = 8850, + V4L2_IDENT_CX23885_AV = 8851, /* Integrated A/V decoder */ + V4L2_IDENT_CX23887 = 8870, + V4L2_IDENT_CX23887_AV = 8871, /* Integrated A/V decoder */ + V4L2_IDENT_CX23888 = 8880, + V4L2_IDENT_CX23888_AV = 8881, /* Integrated A/V decoder */ + V4L2_IDENT_CX23888_IR = 8882, /* Integrated infrared controller */ + /* module tw9910: just ident 9910 */ V4L2_IDENT_TW9910 = 9910, /* module sn9c20x: just ident 10000 */ V4L2_IDENT_SN9C20X = 10000, + /* module cx231xx and cx25840 */ + V4L2_IDENT_CX2310X_AV = 23099, /* Integrated A/V decoder; not in '100 */ + V4L2_IDENT_CX23100 = 23100, + V4L2_IDENT_CX23101 = 23101, + V4L2_IDENT_CX23102 = 23102, + /* module msp3400: reserved range 34000-34999 and 44000-44999 */ V4L2_IDENT_MSPX4XX = 34000, /* generic MSPX4XX identifier, only use internally (tveeprom.c). */ -- cgit v1.3-8-gc7d7 From 1d986add96a06f311cfef377b36602514db54507 Mon Sep 17 00:00:00 2001 From: Andy Walls Date: Sun, 27 Sep 2009 17:50:04 -0300 Subject: V4L/DVB (13096): v4l2-subdev: Add v4l2_subdev_ir_ops and IR notify defines for v4l2_device Add v4l2_subdev_ir_ops and IR notify defines for v4l2_device. This change is specifically needed at this time to support the integrated IR controller in the CX2388[58] chips. Signed-off-by: Andy Walls Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-subdev.h | 94 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index d411345f244b..ecc2818938b3 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -23,6 +23,16 @@ #include +/* generic v4l2_device notify callback notification values */ +#define V4L2_SUBDEV_IR_RX_NOTIFY _IOW('v', 0, u32) +#define V4L2_SUBDEV_IR_RX_FIFO_SERVICE_REQ 0x00000001 +#define V4L2_SUBDEV_IR_RX_END_OF_RX_DETECTED 0x00000002 +#define V4L2_SUBDEV_IR_RX_HW_FIFO_OVERRUN 0x00000004 +#define V4L2_SUBDEV_IR_RX_SW_FIFO_OVERRUN 0x00000008 + +#define V4L2_SUBDEV_IR_TX_NOTIFY _IOW('v', 1, u32) +#define V4L2_SUBDEV_IR_TX_FIFO_SERVICE_REQ 0x00000001 + struct v4l2_device; struct v4l2_subdev; struct tuner_setup; @@ -231,11 +241,95 @@ struct v4l2_subdev_video_ops { int (*enum_frameintervals)(struct v4l2_subdev *sd, struct v4l2_frmivalenum *fival); }; +/* + interrupt_service_routine: Called by the bridge chip's interrupt service + handler, when an IR interrupt status has be raised due to this subdev, + so that this subdev can handle the details. It may schedule work to be + performed later. It must not sleep. *Called from an IRQ context*. + + [rt]x_g_parameters: Get the current operating parameters and state of the + the IR receiver or transmitter. + + [rt]x_s_parameters: Set the current operating parameters and state of the + the IR receiver or transmitter. It is recommended to call + [rt]x_g_parameters first to fill out the current state, and only change + the fields that need to be changed. Upon return, the actual device + operating parameters and state will be returned. Note that hardware + limitations may prevent the actual settings from matching the requested + settings - e.g. an actual carrier setting of 35,904 Hz when 36,000 Hz + was requested. An exception is when the shutdown parameter is true. + The last used operational parameters will be returned, but the actual + state of the hardware be different to minimize power consumption and + processing when shutdown is true. + + rx_read: Reads received codes or pulse width data. + The semantics are similar to a non-blocking read() call. + + tx_write: Writes codes or pulse width data for transmission. + The semantics are similar to a non-blocking write() call. + */ + +enum v4l2_subdev_ir_mode { + V4L2_SUBDEV_IR_MODE_PULSE_WIDTH, /* space & mark widths in nanosecs */ +}; + +/* Data format of data read or written for V4L2_SUBDEV_IR_MODE_PULSE_WIDTH */ +#define V4L2_SUBDEV_IR_PULSE_MAX_WIDTH_NS 0x7fffffff +#define V4L2_SUBDEV_IR_PULSE_LEVEL_MASK 0x80000000 +#define V4L2_SUBDEV_IR_PULSE_RX_SEQ_END 0xffffffff + +struct v4l2_subdev_ir_parameters { + /* Either Rx or Tx */ + unsigned int bytes_per_data_element; /* of data in read or write call */ + enum v4l2_subdev_ir_mode mode; + + bool enable; + bool interrupt_enable; + bool shutdown; /* true: set hardware to low/no power, false: normal */ + + bool modulation; /* true: uses carrier, false: baseband */ + u32 max_pulse_width; /* ns, valid only for baseband signal */ + unsigned int carrier_freq; /* Hz, valid only for modulated signal*/ + unsigned int duty_cycle; /* percent, valid only for modulated signal*/ + bool invert; /* logically invert sense of mark/space */ + + /* Rx only */ + u32 noise_filter_min_width; /* ns, min time of a valid pulse */ + unsigned int carrier_range_lower; /* Hz, valid only for modulated sig */ + unsigned int carrier_range_upper; /* Hz, valid only for modulated sig */ + u32 resolution; /* ns */ +}; + +struct v4l2_subdev_ir_ops { + /* Common to receiver and transmitter */ + int (*interrupt_service_routine)(struct v4l2_subdev *sd, + u32 status, bool *handled); + + /* Receiver */ + int (*rx_read)(struct v4l2_subdev *sd, u8 *buf, size_t count, + ssize_t *num); + + int (*rx_g_parameters)(struct v4l2_subdev *sd, + struct v4l2_subdev_ir_parameters *params); + int (*rx_s_parameters)(struct v4l2_subdev *sd, + struct v4l2_subdev_ir_parameters *params); + + /* Transmitter */ + int (*tx_write)(struct v4l2_subdev *sd, u8 *buf, size_t count, + ssize_t *num); + + int (*tx_g_parameters)(struct v4l2_subdev *sd, + struct v4l2_subdev_ir_parameters *params); + int (*tx_s_parameters)(struct v4l2_subdev *sd, + struct v4l2_subdev_ir_parameters *params); +}; + struct v4l2_subdev_ops { const struct v4l2_subdev_core_ops *core; const struct v4l2_subdev_tuner_ops *tuner; const struct v4l2_subdev_audio_ops *audio; const struct v4l2_subdev_video_ops *video; + const struct v4l2_subdev_ir_ops *ir; }; #define V4L2_SUBDEV_NAME_SIZE 32 -- cgit v1.3-8-gc7d7 From 1d23a002434802078d806ddc2937bd69bbbd6dc8 Mon Sep 17 00:00:00 2001 From: Andy Walls Date: Sun, 27 Sep 2009 20:05:23 -0300 Subject: V4L/DVB (13099): ir-functions: Export ir_rc5_decode() for use by the cx23885 module Signed-off-by: Andy Walls Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/ir-functions.c | 3 ++- include/media/ir-common.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/media/common/ir-functions.c b/drivers/media/common/ir-functions.c index abd4791acb0e..fc2d8941459f 100644 --- a/drivers/media/common/ir-functions.c +++ b/drivers/media/common/ir-functions.c @@ -275,7 +275,7 @@ EXPORT_SYMBOL_GPL(ir_decode_biphase); * saa7134 */ /* decode raw bit pattern to RC5 code */ -static u32 ir_rc5_decode(unsigned int code) +u32 ir_rc5_decode(unsigned int code) { unsigned int org_code = code; unsigned int pair; @@ -304,6 +304,7 @@ static u32 ir_rc5_decode(unsigned int code) RC5_TOGGLE(rc5), RC5_ADDR(rc5), RC5_INSTR(rc5)); return rc5; } +EXPORT_SYMBOL_GPL(ir_rc5_decode); void ir_rc5_timer_end(unsigned long data) { diff --git a/include/media/ir-common.h b/include/media/ir-common.h index 29f0e53cff94..af3257e6b808 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -111,6 +111,7 @@ u32 ir_extract_bits(u32 data, u32 mask); int ir_dump_samples(u32 *samples, int count); int ir_decode_biphase(u32 *samples, int count, int low, int high); int ir_decode_pulsedistance(u32 *samples, int count, int low, int high); +u32 ir_rc5_decode(unsigned int code); void ir_rc5_timer_end(unsigned long data); void ir_rc5_timer_keyup(unsigned long data); -- cgit v1.3-8-gc7d7 From 9133aee09e3689c116c526fa9011c33b872e65c1 Mon Sep 17 00:00:00 2001 From: Michael Krufky Date: Sat, 23 May 2009 18:00:59 -0300 Subject: V4L/DVB (13103): create a standard method for dvb adapter drivers to override frontend ioctls Signed-off-by: Michael Krufky Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb/dvb-core/dvb_frontend.c | 20 +++++++++++++++++++- drivers/media/dvb/dvb-core/dvbdev.h | 28 ++++++++++++++++++++++++++++ drivers/media/video/cx23885/cx23885-dvb.c | 2 +- drivers/media/video/cx88/cx88-dvb.c | 2 +- drivers/media/video/saa7134/saa7134-dvb.c | 2 +- drivers/media/video/videobuf-dvb.c | 11 ++++++++--- include/media/videobuf-dvb.h | 4 +++- 7 files changed, 61 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c index 98082416aa52..e9ec8e911056 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -1712,7 +1712,18 @@ static int dvb_frontend_ioctl_legacy(struct inode *inode, struct file *file, struct dvb_device *dvbdev = file->private_data; struct dvb_frontend *fe = dvbdev->priv; struct dvb_frontend_private *fepriv = fe->frontend_priv; - int err = -EOPNOTSUPP; + int cb_err, err = -EOPNOTSUPP; + + if (fe->dvb->fe_ioctl_override) { + cb_err = fe->dvb->fe_ioctl_override(fe, cmd, parg, + DVB_FE_IOCTL_PRE); + if (cb_err < 0) + return cb_err; + if (cb_err > 0) + return 0; + /* fe_ioctl_override returning 0 allows + * dvb-core to continue handling the ioctl */ + } switch (cmd) { case FE_GET_INFO: { @@ -1978,6 +1989,13 @@ static int dvb_frontend_ioctl_legacy(struct inode *inode, struct file *file, break; }; + if (fe->dvb->fe_ioctl_override) { + cb_err = fe->dvb->fe_ioctl_override(fe, cmd, parg, + DVB_FE_IOCTL_POST); + if (cb_err < 0) + return cb_err; + } + return err; } diff --git a/drivers/media/dvb/dvb-core/dvbdev.h b/drivers/media/dvb/dvb-core/dvbdev.h index 01fc70484743..f7b499d4a3c0 100644 --- a/drivers/media/dvb/dvb-core/dvbdev.h +++ b/drivers/media/dvb/dvb-core/dvbdev.h @@ -54,6 +54,8 @@ module_param_array(adapter_nr, short, NULL, 0444); \ MODULE_PARM_DESC(adapter_nr, "DVB adapter numbers") +struct dvb_frontend; + struct dvb_adapter { int num; struct list_head list_head; @@ -69,6 +71,32 @@ struct dvb_adapter { int mfe_shared; /* indicates mutually exclusive frontends */ struct dvb_device *mfe_dvbdev; /* frontend device in use */ struct mutex mfe_lock; /* access lock for thread creation */ + + /* Allow the adapter/bridge driver to perform an action before and/or + * after the core handles an ioctl: + * + * DVB_FE_IOCTL_PRE indicates that the ioctl has not yet been handled. + * DVB_FE_IOCTL_POST indicates that the ioctl has been handled. + * + * When DVB_FE_IOCTL_PRE is passed to the callback as the stage arg: + * + * return 0 to allow dvb-core to handle the ioctl. + * return a positive int to prevent dvb-core from handling the ioctl, + * and exit without error. + * return a negative int to prevent dvb-core from handling the ioctl, + * and return that value as an error. + * + * When DVB_FE_IOCTL_POST is passed to the callback as the stage arg: + * + * return 0 to allow the dvb_frontend ioctl handler to exit normally. + * return a negative int to cause the dvb_frontend ioctl handler to + * return that value as an error. + */ +#define DVB_FE_IOCTL_PRE 0 +#define DVB_FE_IOCTL_POST 1 + int (*fe_ioctl_override)(struct dvb_frontend *fe, + unsigned int cmd, void *parg, + unsigned int stage); }; diff --git a/drivers/media/video/cx23885/cx23885-dvb.c b/drivers/media/video/cx23885/cx23885-dvb.c index 45e13ee66dc7..0a03e02b119b 100644 --- a/drivers/media/video/cx23885/cx23885-dvb.c +++ b/drivers/media/video/cx23885/cx23885-dvb.c @@ -904,7 +904,7 @@ static int dvb_register(struct cx23885_tsport *port) /* register everything */ ret = videobuf_dvb_register_bus(&port->frontends, THIS_MODULE, port, - &dev->pci->dev, adapter_nr, 0); + &dev->pci->dev, adapter_nr, 0, NULL); /* init CI & MAC */ switch (dev->board) { diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c index 518bcfe18bcb..d9e402b25b56 100644 --- a/drivers/media/video/cx88/cx88-dvb.c +++ b/drivers/media/video/cx88/cx88-dvb.c @@ -1174,7 +1174,7 @@ static int dvb_register(struct cx8802_dev *dev) /* register everything */ return videobuf_dvb_register_bus(&dev->frontends, THIS_MODULE, dev, - &dev->pci->dev, adapter_nr, mfe_shared); + &dev->pci->dev, adapter_nr, mfe_shared, NULL); frontend_detach: core->gate_ctrl = NULL; diff --git a/drivers/media/video/saa7134/saa7134-dvb.c b/drivers/media/video/saa7134/saa7134-dvb.c index 058b56bf6717..96d3668f4691 100644 --- a/drivers/media/video/saa7134/saa7134-dvb.c +++ b/drivers/media/video/saa7134/saa7134-dvb.c @@ -1574,7 +1574,7 @@ static int dvb_init(struct saa7134_dev *dev) /* register everything else */ ret = videobuf_dvb_register_bus(&dev->frontends, THIS_MODULE, dev, - &dev->pci->dev, adapter_nr, 0); + &dev->pci->dev, adapter_nr, 0, NULL); /* this sequence is necessary to make the tda1004x load its firmware * and to enter analog mode of hybrid boards diff --git a/drivers/media/video/videobuf-dvb.c b/drivers/media/video/videobuf-dvb.c index 0e7dcba8e4ae..a56cf0d3a6d6 100644 --- a/drivers/media/video/videobuf-dvb.c +++ b/drivers/media/video/videobuf-dvb.c @@ -139,7 +139,9 @@ static int videobuf_dvb_register_adapter(struct videobuf_dvb_frontends *fe, struct device *device, char *adapter_name, short *adapter_nr, - int mfe_shared) + int mfe_shared, + int (*fe_ioctl_override)(struct dvb_frontend *, + unsigned int, void *, unsigned int)) { int result; @@ -154,6 +156,7 @@ static int videobuf_dvb_register_adapter(struct videobuf_dvb_frontends *fe, } fe->adapter.priv = adapter_priv; fe->adapter.mfe_shared = mfe_shared; + fe->adapter.fe_ioctl_override = fe_ioctl_override; return result; } @@ -253,7 +256,9 @@ int videobuf_dvb_register_bus(struct videobuf_dvb_frontends *f, void *adapter_priv, struct device *device, short *adapter_nr, - int mfe_shared) + int mfe_shared, + int (*fe_ioctl_override)(struct dvb_frontend *, + unsigned int, void *, unsigned int)) { struct list_head *list, *q; struct videobuf_dvb_frontend *fe; @@ -267,7 +272,7 @@ int videobuf_dvb_register_bus(struct videobuf_dvb_frontends *f, /* Bring up the adapter */ res = videobuf_dvb_register_adapter(f, module, adapter_priv, device, - fe->dvb.name, adapter_nr, mfe_shared); + fe->dvb.name, adapter_nr, mfe_shared, fe_ioctl_override); if (res < 0) { printk(KERN_WARNING "videobuf_dvb_register_adapter failed (errno = %d)\n", res); return res; diff --git a/include/media/videobuf-dvb.h b/include/media/videobuf-dvb.h index 6ba4f1271d23..07cf4b9d0a65 100644 --- a/include/media/videobuf-dvb.h +++ b/include/media/videobuf-dvb.h @@ -42,7 +42,9 @@ int videobuf_dvb_register_bus(struct videobuf_dvb_frontends *f, void *adapter_priv, struct device *device, short *adapter_nr, - int mfe_shared); + int mfe_shared, + int (*fe_ioctl_override)(struct dvb_frontend *, + unsigned int, void *, unsigned int)); void videobuf_dvb_unregister_bus(struct videobuf_dvb_frontends *f); -- cgit v1.3-8-gc7d7 From 8f37cf25badd0ba9de7cd05c3f1d5362607c1bf9 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 5 Oct 2009 12:54:04 -0300 Subject: V4L/DVB (13130): soc-camera: add a new driver for the RJ54N1CB0C camera sensor from Sharp This adds an soc-camera / v4l2-subdev driver for the RJ54N1CB0C CMOS camera sensor from Sharp. The sensor is very picky about initialisation and configuration sequences. The driver limits artificially maximum window size by 800x600, although the sensor supports 1600x1200. Sizes above 800x600 don't seem to work correctly, besides, examples from the system integrator use sizes above 640x480 only for still photography. Unfortunately, I had to use "magic" register-value pairs for undocumented and "reserved" registers. This version of the driver also omits some functionality, like cropping, which hasn't been sufficiently tested yet and will be added later. create mode 100644 drivers/media/video/rj54n1cb0c.c Signed-off-by: Guennadi Liakhovetski Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/Kconfig | 6 + drivers/media/video/Makefile | 1 + drivers/media/video/rj54n1cb0c.c | 1219 ++++++++++++++++++++++++++++++++++++++ include/media/v4l2-chip-ident.h | 3 + 4 files changed, 1229 insertions(+) create mode 100644 drivers/media/video/rj54n1cb0c.c (limited to 'include') diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig index c318676f2d29..0ab7ccd8729b 100644 --- a/drivers/media/video/Kconfig +++ b/drivers/media/video/Kconfig @@ -847,6 +847,12 @@ config SOC_CAMERA_MT9V022 help This driver supports MT9V022 cameras from Micron +config SOC_CAMERA_RJ54N1 + tristate "rj54n1cb0c support" + depends on SOC_CAMERA && I2C + help + This is a rj54n1cb0c video driver + config SOC_CAMERA_TW9910 tristate "tw9910 support" depends on SOC_CAMERA && I2C diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile index e706ceecef84..7a2dcc34111c 100644 --- a/drivers/media/video/Makefile +++ b/drivers/media/video/Makefile @@ -78,6 +78,7 @@ obj-$(CONFIG_SOC_CAMERA_MT9T031) += mt9t031.o obj-$(CONFIG_SOC_CAMERA_MT9V022) += mt9v022.o obj-$(CONFIG_SOC_CAMERA_OV772X) += ov772x.o obj-$(CONFIG_SOC_CAMERA_OV9640) += ov9640.o +obj-$(CONFIG_SOC_CAMERA_RJ54N1) += rj54n1cb0c.o obj-$(CONFIG_SOC_CAMERA_TW9910) += tw9910.o # And now the v4l2 drivers: diff --git a/drivers/media/video/rj54n1cb0c.c b/drivers/media/video/rj54n1cb0c.c new file mode 100644 index 000000000000..373f2a30a677 --- /dev/null +++ b/drivers/media/video/rj54n1cb0c.c @@ -0,0 +1,1219 @@ +/* + * Driver for RJ54N1CB0C CMOS Image Sensor from Micron + * + * Copyright (C) 2009, Guennadi Liakhovetski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#define RJ54N1_DEV_CODE 0x0400 +#define RJ54N1_DEV_CODE2 0x0401 +#define RJ54N1_OUT_SEL 0x0403 +#define RJ54N1_XY_OUTPUT_SIZE_S_H 0x0404 +#define RJ54N1_X_OUTPUT_SIZE_S_L 0x0405 +#define RJ54N1_Y_OUTPUT_SIZE_S_L 0x0406 +#define RJ54N1_XY_OUTPUT_SIZE_P_H 0x0407 +#define RJ54N1_X_OUTPUT_SIZE_P_L 0x0408 +#define RJ54N1_Y_OUTPUT_SIZE_P_L 0x0409 +#define RJ54N1_LINE_LENGTH_PCK_S_H 0x040a +#define RJ54N1_LINE_LENGTH_PCK_S_L 0x040b +#define RJ54N1_LINE_LENGTH_PCK_P_H 0x040c +#define RJ54N1_LINE_LENGTH_PCK_P_L 0x040d +#define RJ54N1_RESIZE_N 0x040e +#define RJ54N1_RESIZE_N_STEP 0x040f +#define RJ54N1_RESIZE_STEP 0x0410 +#define RJ54N1_RESIZE_HOLD_H 0x0411 +#define RJ54N1_RESIZE_HOLD_L 0x0412 +#define RJ54N1_H_OBEN_OFS 0x0413 +#define RJ54N1_V_OBEN_OFS 0x0414 +#define RJ54N1_RESIZE_CONTROL 0x0415 +#define RJ54N1_INC_USE_SEL_H 0x0425 +#define RJ54N1_INC_USE_SEL_L 0x0426 +#define RJ54N1_MIRROR_STILL_MODE 0x0427 +#define RJ54N1_INIT_START 0x0428 +#define RJ54N1_SCALE_1_2_LEV 0x0429 +#define RJ54N1_SCALE_4_LEV 0x042a +#define RJ54N1_Y_GAIN 0x04d8 +#define RJ54N1_APT_GAIN_UP 0x04fa +#define RJ54N1_RA_SEL_UL 0x0530 +#define RJ54N1_BYTE_SWAP 0x0531 +#define RJ54N1_OUT_SIGPO 0x053b +#define RJ54N1_FRAME_LENGTH_S_H 0x0595 +#define RJ54N1_FRAME_LENGTH_S_L 0x0596 +#define RJ54N1_FRAME_LENGTH_P_H 0x0597 +#define RJ54N1_FRAME_LENGTH_P_L 0x0598 +#define RJ54N1_IOC 0x05ef +#define RJ54N1_TG_BYPASS 0x0700 +#define RJ54N1_PLL_L 0x0701 +#define RJ54N1_PLL_N 0x0702 +#define RJ54N1_PLL_EN 0x0704 +#define RJ54N1_RATIO_TG 0x0706 +#define RJ54N1_RATIO_T 0x0707 +#define RJ54N1_RATIO_R 0x0708 +#define RJ54N1_RAMP_TGCLK_EN 0x0709 +#define RJ54N1_OCLK_DSP 0x0710 +#define RJ54N1_RATIO_OP 0x0711 +#define RJ54N1_RATIO_O 0x0712 +#define RJ54N1_OCLK_SEL_EN 0x0713 +#define RJ54N1_CLK_RST 0x0717 +#define RJ54N1_RESET_STANDBY 0x0718 + +#define E_EXCLK (1 << 7) +#define SOFT_STDBY (1 << 4) +#define SEN_RSTX (1 << 2) +#define TG_RSTX (1 << 1) +#define DSP_RSTX (1 << 0) + +#define RESIZE_HOLD_SEL (1 << 2) +#define RESIZE_GO (1 << 1) + +#define RJ54N1_COLUMN_SKIP 0 +#define RJ54N1_ROW_SKIP 0 +#define RJ54N1_MAX_WIDTH 1600 +#define RJ54N1_MAX_HEIGHT 1200 + +/* I2C addresses: 0x50, 0x51, 0x60, 0x61 */ + +static const struct soc_camera_data_format rj54n1_colour_formats[] = { + { + .name = "YUYV", + .depth = 16, + .fourcc = V4L2_PIX_FMT_YUYV, + .colorspace = V4L2_COLORSPACE_JPEG, + }, { + .name = "RGB565", + .depth = 16, + .fourcc = V4L2_PIX_FMT_RGB565, + .colorspace = V4L2_COLORSPACE_SRGB, + } +}; + +struct rj54n1_clock_div { + u8 ratio_tg; + u8 ratio_t; + u8 ratio_r; + u8 ratio_op; + u8 ratio_o; +}; + +struct rj54n1 { + struct v4l2_subdev subdev; + struct v4l2_rect rect; /* Sensor window */ + unsigned short width; /* Output window */ + unsigned short height; + unsigned short resize; /* Sensor * 1024 / resize = Output */ + struct rj54n1_clock_div clk_div; + u32 fourcc; + unsigned short scale; + u8 bank; +}; + +struct rj54n1_reg_val { + u16 reg; + u8 val; +}; + +const static struct rj54n1_reg_val bank_4[] = { + {0x417, 0}, + {0x42c, 0}, + {0x42d, 0xf0}, + {0x42e, 0}, + {0x42f, 0x50}, + {0x430, 0xf5}, + {0x431, 0x16}, + {0x432, 0x20}, + {0x433, 0}, + {0x434, 0xc8}, + {0x43c, 8}, + {0x43e, 0x90}, + {0x445, 0x83}, + {0x4ba, 0x58}, + {0x4bb, 4}, + {0x4bc, 0x20}, + {0x4db, 4}, + {0x4fe, 2}, +}; + +const static struct rj54n1_reg_val bank_5[] = { + {0x514, 0}, + {0x516, 0}, + {0x518, 0}, + {0x51a, 0}, + {0x51d, 0xff}, + {0x56f, 0x28}, + {0x575, 0x40}, + {0x5bc, 0x48}, + {0x5c1, 6}, + {0x5e5, 0x11}, + {0x5e6, 0x43}, + {0x5e7, 0x33}, + {0x5e8, 0x21}, + {0x5e9, 0x30}, + {0x5ea, 0x0}, + {0x5eb, 0xa5}, + {0x5ec, 0xff}, + {0x5fe, 2}, +}; + +const static struct rj54n1_reg_val bank_7[] = { + {0x70a, 0}, + {0x714, 0xff}, + {0x715, 0xff}, + {0x716, 0x1f}, + {0x7FE, 0x02}, +}; + +const static struct rj54n1_reg_val bank_8[] = { + {0x800, 0x00}, + {0x801, 0x01}, + {0x802, 0x61}, + {0x805, 0x00}, + {0x806, 0x00}, + {0x807, 0x00}, + {0x808, 0x00}, + {0x809, 0x01}, + {0x80A, 0x61}, + {0x80B, 0x00}, + {0x80C, 0x01}, + {0x80D, 0x00}, + {0x80E, 0x00}, + {0x80F, 0x00}, + {0x810, 0x00}, + {0x811, 0x01}, + {0x812, 0x61}, + {0x813, 0x00}, + {0x814, 0x11}, + {0x815, 0x00}, + {0x816, 0x41}, + {0x817, 0x00}, + {0x818, 0x51}, + {0x819, 0x01}, + {0x81A, 0x1F}, + {0x81B, 0x00}, + {0x81C, 0x01}, + {0x81D, 0x00}, + {0x81E, 0x11}, + {0x81F, 0x00}, + {0x820, 0x41}, + {0x821, 0x00}, + {0x822, 0x51}, + {0x823, 0x00}, + {0x824, 0x00}, + {0x825, 0x00}, + {0x826, 0x47}, + {0x827, 0x01}, + {0x828, 0x4F}, + {0x829, 0x00}, + {0x82A, 0x00}, + {0x82B, 0x00}, + {0x82C, 0x30}, + {0x82D, 0x00}, + {0x82E, 0x40}, + {0x82F, 0x00}, + {0x830, 0xB3}, + {0x831, 0x00}, + {0x832, 0xE3}, + {0x833, 0x00}, + {0x834, 0x00}, + {0x835, 0x00}, + {0x836, 0x00}, + {0x837, 0x00}, + {0x838, 0x00}, + {0x839, 0x01}, + {0x83A, 0x61}, + {0x83B, 0x00}, + {0x83C, 0x01}, + {0x83D, 0x00}, + {0x83E, 0x00}, + {0x83F, 0x00}, + {0x840, 0x00}, + {0x841, 0x01}, + {0x842, 0x61}, + {0x843, 0x00}, + {0x844, 0x1D}, + {0x845, 0x00}, + {0x846, 0x00}, + {0x847, 0x00}, + {0x848, 0x00}, + {0x849, 0x01}, + {0x84A, 0x1F}, + {0x84B, 0x00}, + {0x84C, 0x05}, + {0x84D, 0x00}, + {0x84E, 0x19}, + {0x84F, 0x01}, + {0x850, 0x21}, + {0x851, 0x01}, + {0x852, 0x5D}, + {0x853, 0x00}, + {0x854, 0x00}, + {0x855, 0x00}, + {0x856, 0x19}, + {0x857, 0x01}, + {0x858, 0x21}, + {0x859, 0x00}, + {0x85A, 0x00}, + {0x85B, 0x00}, + {0x85C, 0x00}, + {0x85D, 0x00}, + {0x85E, 0x00}, + {0x85F, 0x00}, + {0x860, 0xB3}, + {0x861, 0x00}, + {0x862, 0xE3}, + {0x863, 0x00}, + {0x864, 0x00}, + {0x865, 0x00}, + {0x866, 0x00}, + {0x867, 0x00}, + {0x868, 0x00}, + {0x869, 0xE2}, + {0x86A, 0x00}, + {0x86B, 0x01}, + {0x86C, 0x06}, + {0x86D, 0x00}, + {0x86E, 0x00}, + {0x86F, 0x00}, + {0x870, 0x60}, + {0x871, 0x8C}, + {0x872, 0x10}, + {0x873, 0x00}, + {0x874, 0xE0}, + {0x875, 0x00}, + {0x876, 0x27}, + {0x877, 0x01}, + {0x878, 0x00}, + {0x879, 0x00}, + {0x87A, 0x00}, + {0x87B, 0x03}, + {0x87C, 0x00}, + {0x87D, 0x00}, + {0x87E, 0x00}, + {0x87F, 0x00}, + {0x880, 0x00}, + {0x881, 0x00}, + {0x882, 0x00}, + {0x883, 0x00}, + {0x884, 0x00}, + {0x885, 0x00}, + {0x886, 0xF8}, + {0x887, 0x00}, + {0x888, 0x03}, + {0x889, 0x00}, + {0x88A, 0x64}, + {0x88B, 0x00}, + {0x88C, 0x03}, + {0x88D, 0x00}, + {0x88E, 0xB1}, + {0x88F, 0x00}, + {0x890, 0x03}, + {0x891, 0x01}, + {0x892, 0x1D}, + {0x893, 0x00}, + {0x894, 0x03}, + {0x895, 0x01}, + {0x896, 0x4B}, + {0x897, 0x00}, + {0x898, 0xE5}, + {0x899, 0x00}, + {0x89A, 0x01}, + {0x89B, 0x00}, + {0x89C, 0x01}, + {0x89D, 0x04}, + {0x89E, 0xC8}, + {0x89F, 0x00}, + {0x8A0, 0x01}, + {0x8A1, 0x01}, + {0x8A2, 0x61}, + {0x8A3, 0x00}, + {0x8A4, 0x01}, + {0x8A5, 0x00}, + {0x8A6, 0x00}, + {0x8A7, 0x00}, + {0x8A8, 0x00}, + {0x8A9, 0x00}, + {0x8AA, 0x7F}, + {0x8AB, 0x03}, + {0x8AC, 0x00}, + {0x8AD, 0x00}, + {0x8AE, 0x00}, + {0x8AF, 0x00}, + {0x8B0, 0x00}, + {0x8B1, 0x00}, + {0x8B6, 0x00}, + {0x8B7, 0x01}, + {0x8B8, 0x00}, + {0x8B9, 0x00}, + {0x8BA, 0x02}, + {0x8BB, 0x00}, + {0x8BC, 0xFF}, + {0x8BD, 0x00}, + {0x8FE, 0x02}, +}; + +const static struct rj54n1_reg_val bank_10[] = { + {0x10bf, 0x69} +}; + +/* Clock dividers - these are default register values, divider = register + 1 */ +const static struct rj54n1_clock_div clk_div = { + .ratio_tg = 3 /* default: 5 */, + .ratio_t = 4 /* default: 1 */, + .ratio_r = 4 /* default: 0 */, + .ratio_op = 1 /* default: 5 */, + .ratio_o = 9 /* default: 0 */, +}; + +static struct rj54n1 *to_rj54n1(const struct i2c_client *client) +{ + return container_of(i2c_get_clientdata(client), struct rj54n1, subdev); +} + +static int reg_read(struct i2c_client *client, const u16 reg) +{ + struct rj54n1 *rj54n1 = to_rj54n1(client); + int ret; + + /* set bank */ + if (rj54n1->bank != reg >> 8) { + dev_dbg(&client->dev, "[0x%x] = 0x%x\n", 0xff, reg >> 8); + ret = i2c_smbus_write_byte_data(client, 0xff, reg >> 8); + if (ret < 0) + return ret; + rj54n1->bank = reg >> 8; + } + return i2c_smbus_read_byte_data(client, reg & 0xff); +} + +static int reg_write(struct i2c_client *client, const u16 reg, + const u8 data) +{ + struct rj54n1 *rj54n1 = to_rj54n1(client); + int ret; + + /* set bank */ + if (rj54n1->bank != reg >> 8) { + dev_dbg(&client->dev, "[0x%x] = 0x%x\n", 0xff, reg >> 8); + ret = i2c_smbus_write_byte_data(client, 0xff, reg >> 8); + if (ret < 0) + return ret; + rj54n1->bank = reg >> 8; + } + dev_dbg(&client->dev, "[0x%x] = 0x%x\n", reg & 0xff, data); + return i2c_smbus_write_byte_data(client, reg & 0xff, data); +} + +static int reg_set(struct i2c_client *client, const u16 reg, + const u8 data, const u8 mask) +{ + int ret; + + ret = reg_read(client, reg); + if (ret < 0) + return ret; + return reg_write(client, reg, (ret & ~mask) | (data & mask)); +} + +static int reg_write_multiple(struct i2c_client *client, + const struct rj54n1_reg_val *rv, const int n) +{ + int i, ret; + + for (i = 0; i < n; i++) { + ret = reg_write(client, rv->reg, rv->val); + if (ret < 0) + return ret; + rv++; + } + + return 0; +} + +static int rj54n1_s_stream(struct v4l2_subdev *sd, int enable) +{ + /* TODO: start / stop streaming */ + return 0; +} + +static int rj54n1_set_bus_param(struct soc_camera_device *icd, + unsigned long flags) +{ + struct v4l2_subdev *sd = soc_camera_to_subdev(icd); + struct i2c_client *client = sd->priv; + /* Figures 2.5-1 to 2.5-3 - default falling pixclk edge */ + + if (flags & SOCAM_PCLK_SAMPLE_RISING) + return reg_write(client, RJ54N1_OUT_SIGPO, 1 << 4); + else + return reg_write(client, RJ54N1_OUT_SIGPO, 0); +} + +static unsigned long rj54n1_query_bus_param(struct soc_camera_device *icd) +{ + struct soc_camera_link *icl = to_soc_camera_link(icd); + const unsigned long flags = + SOCAM_PCLK_SAMPLE_RISING | SOCAM_PCLK_SAMPLE_FALLING | + SOCAM_MASTER | SOCAM_DATAWIDTH_8 | + SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_HIGH | + SOCAM_DATA_ACTIVE_HIGH; + + return soc_camera_apply_sensor_flags(icl, flags); +} + +static int rj54n1_set_rect(struct i2c_client *client, + u16 reg_x, u16 reg_y, u16 reg_xy, + u32 width, u32 height) +{ + int ret; + + ret = reg_write(client, reg_xy, + ((width >> 4) & 0x70) | + ((height >> 8) & 7)); + + if (!ret) + ret = reg_write(client, reg_x, width & 0xff); + if (!ret) + ret = reg_write(client, reg_y, height & 0xff); + + return ret; +} + +/* + * Some commands, specifically certain initialisation sequences, require + * a commit operation. + */ +static int rj54n1_commit(struct i2c_client *client) +{ + int ret = reg_write(client, RJ54N1_INIT_START, 1); + msleep(10); + if (!ret) + ret = reg_write(client, RJ54N1_INIT_START, 0); + return ret; +} + +static int rj54n1_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a) +{ + struct i2c_client *client = sd->priv; + struct rj54n1 *rj54n1 = to_rj54n1(client); + + a->c = rj54n1->rect; + a->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + return 0; +} + +static int rj54n1_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a) +{ + a->bounds.left = RJ54N1_COLUMN_SKIP; + a->bounds.top = RJ54N1_ROW_SKIP; + a->bounds.width = RJ54N1_MAX_WIDTH; + a->bounds.height = RJ54N1_MAX_HEIGHT; + a->defrect = a->bounds; + a->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + a->pixelaspect.numerator = 1; + a->pixelaspect.denominator = 1; + + return 0; +} + +static int rj54n1_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f) +{ + struct i2c_client *client = sd->priv; + struct rj54n1 *rj54n1 = to_rj54n1(client); + struct v4l2_pix_format *pix = &f->fmt.pix; + + pix->pixelformat = rj54n1->fourcc; + pix->field = V4L2_FIELD_NONE; + pix->width = rj54n1->width; + pix->height = rj54n1->height; + + return 0; +} + +/* + * The actual geometry configuration routine. It scales the input window into + * the output one, updates the window sizes and returns an error or the resize + * coefficient on success. Note: we only use the "Fixed Scaling" on this camera. + */ +static int rj54n1_sensor_scale(struct v4l2_subdev *sd, u32 *in_w, u32 *in_h, + u32 *out_w, u32 *out_h) +{ + struct i2c_client *client = sd->priv; + unsigned int skip, resize, input_w = *in_w, input_h = *in_h, + output_w = *out_w, output_h = *out_h; + u16 inc_sel; + int ret; + + ret = rj54n1_set_rect(client, RJ54N1_X_OUTPUT_SIZE_S_L, + RJ54N1_Y_OUTPUT_SIZE_S_L, + RJ54N1_XY_OUTPUT_SIZE_S_H, output_w, output_h); + if (!ret) + ret = rj54n1_set_rect(client, RJ54N1_X_OUTPUT_SIZE_P_L, + RJ54N1_Y_OUTPUT_SIZE_P_L, + RJ54N1_XY_OUTPUT_SIZE_P_H, output_w, output_h); + + if (ret < 0) + return ret; + + if (output_w > input_w || output_h > input_h) { + input_w = output_w; + input_h = output_h; + + resize = 1024; + } else { + unsigned int resize_x, resize_y; + resize_x = input_w * 1024 / output_w; + resize_y = input_h * 1024 / output_h; + + resize = min(resize_x, resize_y); + + /* Prohibited value ranges */ + switch (resize) { + case 2040 ... 2047: + resize = 2039; + break; + case 4080 ... 4095: + resize = 4079; + break; + case 8160 ... 8191: + resize = 8159; + break; + case 16320 ... 16383: + resize = 16319; + } + + input_w = output_w * resize / 1024; + input_h = output_h * resize / 1024; + } + + /* Set scaling */ + ret = reg_write(client, RJ54N1_RESIZE_HOLD_L, resize & 0xff); + if (!ret) + ret = reg_write(client, RJ54N1_RESIZE_HOLD_H, resize >> 8); + + if (ret < 0) + return ret; + + /* + * Configure a skipping bitmask. The sensor will select a skipping value + * among set bits automatically. + */ + skip = min(resize / 1024, (unsigned)15); + inc_sel = 1 << skip; + + if (inc_sel <= 2) + inc_sel = 0xc; + else if (resize & 1023 && skip < 15) + inc_sel |= 1 << (skip + 1); + + ret = reg_write(client, RJ54N1_INC_USE_SEL_L, inc_sel & 0xfc); + if (!ret) + ret = reg_write(client, RJ54N1_INC_USE_SEL_H, inc_sel >> 8); + + /* Start resizing */ + if (!ret) + ret = reg_write(client, RJ54N1_RESIZE_CONTROL, + RESIZE_HOLD_SEL | RESIZE_GO | 1); + + if (ret < 0) + return ret; + + dev_dbg(&client->dev, "resize %u, skip %u\n", resize, skip); + + /* Constant taken from manufacturer's example */ + msleep(230); + + ret = reg_write(client, RJ54N1_RESIZE_CONTROL, RESIZE_HOLD_SEL | 1); + if (ret < 0) + return ret; + + *in_w = input_w; + *in_h = input_h; + *out_w = output_w; + *out_h = output_h; + + return resize; +} + +static int rj54n1_set_clock(struct i2c_client *client) +{ + struct rj54n1 *rj54n1 = to_rj54n1(client); + int ret; + + /* Enable external clock */ + ret = reg_write(client, RJ54N1_RESET_STANDBY, E_EXCLK | SOFT_STDBY); + /* Leave stand-by */ + if (!ret) + ret = reg_write(client, RJ54N1_RESET_STANDBY, E_EXCLK); + + if (!ret) + ret = reg_write(client, RJ54N1_PLL_L, 2); + if (!ret) + ret = reg_write(client, RJ54N1_PLL_N, 0x31); + + /* TGCLK dividers */ + if (!ret) + ret = reg_write(client, RJ54N1_RATIO_TG, + rj54n1->clk_div.ratio_tg); + if (!ret) + ret = reg_write(client, RJ54N1_RATIO_T, + rj54n1->clk_div.ratio_t); + if (!ret) + ret = reg_write(client, RJ54N1_RATIO_R, + rj54n1->clk_div.ratio_r); + + /* Enable TGCLK & RAMP */ + if (!ret) + ret = reg_write(client, RJ54N1_RAMP_TGCLK_EN, 3); + + /* Disable clock output */ + if (!ret) + ret = reg_write(client, RJ54N1_OCLK_DSP, 0); + + /* Set divisors */ + if (!ret) + ret = reg_write(client, RJ54N1_RATIO_OP, + rj54n1->clk_div.ratio_op); + if (!ret) + ret = reg_write(client, RJ54N1_RATIO_O, + rj54n1->clk_div.ratio_o); + + /* Enable OCLK */ + if (!ret) + ret = reg_write(client, RJ54N1_OCLK_SEL_EN, 1); + + /* Use PLL for Timing Generator, write 2 to reserved bits */ + if (!ret) + ret = reg_write(client, RJ54N1_TG_BYPASS, 2); + + /* Take sensor out of reset */ + if (!ret) + ret = reg_write(client, RJ54N1_RESET_STANDBY, + E_EXCLK | SEN_RSTX); + /* Enable PLL */ + if (!ret) + ret = reg_write(client, RJ54N1_PLL_EN, 1); + + /* Wait for PLL to stabilise */ + msleep(10); + + /* Enable clock to frequency divider */ + if (!ret) + ret = reg_write(client, RJ54N1_CLK_RST, 1); + + if (!ret) + ret = reg_read(client, RJ54N1_CLK_RST); + if (ret != 1) { + dev_err(&client->dev, + "Resetting RJ54N1CB0C clock failed: %d!\n", ret); + return -EIO; + } + /* Start the PLL */ + ret = reg_set(client, RJ54N1_OCLK_DSP, 1, 1); + + /* Enable OCLK */ + if (!ret) + ret = reg_write(client, RJ54N1_OCLK_SEL_EN, 1); + + return ret; +} + +static int rj54n1_reg_init(struct i2c_client *client) +{ + int ret = rj54n1_set_clock(client); + + if (!ret) + ret = reg_write_multiple(client, bank_7, ARRAY_SIZE(bank_7)); + if (!ret) + ret = reg_write_multiple(client, bank_10, ARRAY_SIZE(bank_10)); + + /* Set binning divisors */ + if (!ret) + ret = reg_write(client, RJ54N1_SCALE_1_2_LEV, 3 | (7 << 4)); + if (!ret) + ret = reg_write(client, RJ54N1_SCALE_4_LEV, 0xf); + + /* Switch to fixed resize mode */ + if (!ret) + ret = reg_write(client, RJ54N1_RESIZE_CONTROL, + RESIZE_HOLD_SEL | 1); + + /* Set gain */ + if (!ret) + ret = reg_write(client, RJ54N1_Y_GAIN, 0x84); + + /* Mirror the image back: default is upside down and left-to-right... */ + if (!ret) + ret = reg_set(client, RJ54N1_MIRROR_STILL_MODE, 3, 3); + + if (!ret) + ret = reg_write_multiple(client, bank_4, ARRAY_SIZE(bank_4)); + if (!ret) + ret = reg_write_multiple(client, bank_5, ARRAY_SIZE(bank_5)); + if (!ret) + ret = reg_write_multiple(client, bank_8, ARRAY_SIZE(bank_8)); + + if (!ret) + ret = reg_write(client, RJ54N1_RESET_STANDBY, + E_EXCLK | DSP_RSTX | SEN_RSTX); + + /* Commit init */ + if (!ret) + ret = rj54n1_commit(client); + + /* Take DSP, TG, sensor out of reset */ + if (!ret) + ret = reg_write(client, RJ54N1_RESET_STANDBY, + E_EXCLK | DSP_RSTX | TG_RSTX | SEN_RSTX); + + if (!ret) + ret = reg_write(client, 0x7fe, 2); + + /* Constant taken from manufacturer's example */ + msleep(700); + + return ret; +} + +/* FIXME: streaming output only up to 800x600 is functional */ +static int rj54n1_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f) +{ + struct v4l2_pix_format *pix = &f->fmt.pix; + + pix->field = V4L2_FIELD_NONE; + + if (pix->width > 800) + pix->width = 800; + if (pix->height > 600) + pix->height = 600; + + return 0; +} + +static int rj54n1_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f) +{ + struct i2c_client *client = sd->priv; + struct rj54n1 *rj54n1 = to_rj54n1(client); + struct v4l2_pix_format *pix = &f->fmt.pix; + unsigned int output_w, output_h, + input_w = rj54n1->rect.width, input_h = rj54n1->rect.height; + int ret; + + /* + * The host driver can call us without .try_fmt(), so, we have to take + * care ourseleves + */ + ret = rj54n1_try_fmt(sd, f); + + /* + * Verify if the sensor has just been powered on. TODO: replace this + * with proper PM, when a suitable API is available. + */ + if (!ret) + ret = reg_read(client, RJ54N1_RESET_STANDBY); + if (ret < 0) + return ret; + + if (!(ret & E_EXCLK)) { + ret = rj54n1_reg_init(client); + if (ret < 0) + return ret; + } + + /* RA_SEL_UL is only relevant for raw modes, ignored otherwise. */ + switch (pix->pixelformat) { + case V4L2_PIX_FMT_YUYV: + ret = reg_write(client, RJ54N1_OUT_SEL, 0); + if (!ret) + ret = reg_set(client, RJ54N1_BYTE_SWAP, 8, 8); + break; + case V4L2_PIX_FMT_RGB565: + ret = reg_write(client, RJ54N1_OUT_SEL, 0x11); + if (!ret) + ret = reg_set(client, RJ54N1_BYTE_SWAP, 8, 8); + break; + default: + ret = -EINVAL; + } + + if (ret < 0) + return ret; + + /* Supported scales 1:1 - 1:16 */ + if (pix->width < input_w / 16) + pix->width = input_w / 16; + if (pix->height < input_h / 16) + pix->height = input_h / 16; + + output_w = pix->width; + output_h = pix->height; + + ret = rj54n1_sensor_scale(sd, &input_w, &input_h, &output_w, &output_h); + if (ret < 0) + return ret; + + rj54n1->fourcc = pix->pixelformat; + rj54n1->resize = ret; + rj54n1->rect.width = input_w; + rj54n1->rect.height = input_h; + rj54n1->width = output_w; + rj54n1->height = output_h; + + pix->width = output_w; + pix->height = output_h; + pix->field = V4L2_FIELD_NONE; + + return ret; +} + +static int rj54n1_g_chip_ident(struct v4l2_subdev *sd, + struct v4l2_dbg_chip_ident *id) +{ + struct i2c_client *client = sd->priv; + + if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR) + return -EINVAL; + + if (id->match.addr != client->addr) + return -ENODEV; + + id->ident = V4L2_IDENT_RJ54N1CB0C; + id->revision = 0; + + return 0; +} + +#ifdef CONFIG_VIDEO_ADV_DEBUG +static int rj54n1_g_register(struct v4l2_subdev *sd, + struct v4l2_dbg_register *reg) +{ + struct i2c_client *client = sd->priv; + + if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || + reg->reg < 0x400 || reg->reg > 0x1fff) + /* Registers > 0x0800 are only available from Sharp support */ + return -EINVAL; + + if (reg->match.addr != client->addr) + return -ENODEV; + + reg->size = 1; + reg->val = reg_read(client, reg->reg); + + if (reg->val > 0xff) + return -EIO; + + return 0; +} + +static int rj54n1_s_register(struct v4l2_subdev *sd, + struct v4l2_dbg_register *reg) +{ + struct i2c_client *client = sd->priv; + + if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || + reg->reg < 0x400 || reg->reg > 0x1fff) + /* Registers >= 0x0800 are only available from Sharp support */ + return -EINVAL; + + if (reg->match.addr != client->addr) + return -ENODEV; + + if (reg_write(client, reg->reg, reg->val) < 0) + return -EIO; + + return 0; +} +#endif + +static const struct v4l2_queryctrl rj54n1_controls[] = { + { + .id = V4L2_CID_VFLIP, + .type = V4L2_CTRL_TYPE_BOOLEAN, + .name = "Flip Vertically", + .minimum = 0, + .maximum = 1, + .step = 1, + .default_value = 0, + }, { + .id = V4L2_CID_HFLIP, + .type = V4L2_CTRL_TYPE_BOOLEAN, + .name = "Flip Horizontally", + .minimum = 0, + .maximum = 1, + .step = 1, + .default_value = 0, + }, { + .id = V4L2_CID_GAIN, + .type = V4L2_CTRL_TYPE_INTEGER, + .name = "Gain", + .minimum = 0, + .maximum = 127, + .step = 1, + .default_value = 66, + .flags = V4L2_CTRL_FLAG_SLIDER, + }, +}; + +static struct soc_camera_ops rj54n1_ops = { + .set_bus_param = rj54n1_set_bus_param, + .query_bus_param = rj54n1_query_bus_param, + .controls = rj54n1_controls, + .num_controls = ARRAY_SIZE(rj54n1_controls), +}; + +static int rj54n1_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl) +{ + struct i2c_client *client = sd->priv; + int data; + + switch (ctrl->id) { + case V4L2_CID_VFLIP: + data = reg_read(client, RJ54N1_MIRROR_STILL_MODE); + if (data < 0) + return -EIO; + ctrl->value = !(data & 1); + break; + case V4L2_CID_HFLIP: + data = reg_read(client, RJ54N1_MIRROR_STILL_MODE); + if (data < 0) + return -EIO; + ctrl->value = !(data & 2); + break; + case V4L2_CID_GAIN: + data = reg_read(client, RJ54N1_Y_GAIN); + if (data < 0) + return -EIO; + + ctrl->value = data / 2; + break; + } + + return 0; +} + +static int rj54n1_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl) +{ + int data; + struct i2c_client *client = sd->priv; + const struct v4l2_queryctrl *qctrl; + + qctrl = soc_camera_find_qctrl(&rj54n1_ops, ctrl->id); + if (!qctrl) + return -EINVAL; + + switch (ctrl->id) { + case V4L2_CID_VFLIP: + if (ctrl->value) + data = reg_set(client, RJ54N1_MIRROR_STILL_MODE, 0, 1); + else + data = reg_set(client, RJ54N1_MIRROR_STILL_MODE, 1, 1); + if (data < 0) + return -EIO; + break; + case V4L2_CID_HFLIP: + if (ctrl->value) + data = reg_set(client, RJ54N1_MIRROR_STILL_MODE, 0, 2); + else + data = reg_set(client, RJ54N1_MIRROR_STILL_MODE, 2, 2); + if (data < 0) + return -EIO; + break; + case V4L2_CID_GAIN: + if (ctrl->value > qctrl->maximum || + ctrl->value < qctrl->minimum) + return -EINVAL; + else if (reg_write(client, RJ54N1_Y_GAIN, ctrl->value * 2) < 0) + return -EIO; + break; + } + + return 0; +} + +static struct v4l2_subdev_core_ops rj54n1_subdev_core_ops = { + .g_ctrl = rj54n1_g_ctrl, + .s_ctrl = rj54n1_s_ctrl, + .g_chip_ident = rj54n1_g_chip_ident, +#ifdef CONFIG_VIDEO_ADV_DEBUG + .g_register = rj54n1_g_register, + .s_register = rj54n1_s_register, +#endif +}; + +static struct v4l2_subdev_video_ops rj54n1_subdev_video_ops = { + .s_stream = rj54n1_s_stream, + .s_fmt = rj54n1_s_fmt, + .g_fmt = rj54n1_g_fmt, + .try_fmt = rj54n1_try_fmt, + .g_crop = rj54n1_g_crop, + .cropcap = rj54n1_cropcap, +}; + +static struct v4l2_subdev_ops rj54n1_subdev_ops = { + .core = &rj54n1_subdev_core_ops, + .video = &rj54n1_subdev_video_ops, +}; + +static int rj54n1_pin_config(struct i2c_client *client) +{ + /* + * Experimentally found out IOCTRL wired to 0. TODO: add to platform + * data: 0 or 1 << 7. + */ + return reg_write(client, RJ54N1_IOC, 0); +} + +/* + * Interface active, can use i2c. If it fails, it can indeed mean, that + * this wasn't our capture interface, so, we wait for the right one + */ +static int rj54n1_video_probe(struct soc_camera_device *icd, + struct i2c_client *client) +{ + int data1, data2; + int ret; + + /* This could be a BUG_ON() or a WARN_ON(), or remove it completely */ + if (!icd->dev.parent || + to_soc_camera_host(icd->dev.parent)->nr != icd->iface) + return -ENODEV; + + /* Read out the chip version register */ + data1 = reg_read(client, RJ54N1_DEV_CODE); + data2 = reg_read(client, RJ54N1_DEV_CODE2); + + if (data1 != 0x51 || data2 != 0x10) { + ret = -ENODEV; + dev_info(&client->dev, "No RJ54N1CB0C found, read 0x%x:0x%x\n", + data1, data2); + goto ei2c; + } + + ret = rj54n1_pin_config(client); + if (ret < 0) + goto ei2c; + + dev_info(&client->dev, "Detected a RJ54N1CB0C chip ID 0x%x:0x%x\n", + data1, data2); + +ei2c: + return ret; +} + +static int rj54n1_probe(struct i2c_client *client, + const struct i2c_device_id *did) +{ + struct rj54n1 *rj54n1; + struct soc_camera_device *icd = client->dev.platform_data; + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct soc_camera_link *icl; + int ret; + + if (!icd) { + dev_err(&client->dev, "RJ54N1CB0C: missing soc-camera data!\n"); + return -EINVAL; + } + + icl = to_soc_camera_link(icd); + if (!icl) { + dev_err(&client->dev, "RJ54N1CB0C: missing platform data!\n"); + return -EINVAL; + } + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { + dev_warn(&adapter->dev, + "I2C-Adapter doesn't support I2C_FUNC_SMBUS_BYTE\n"); + return -EIO; + } + + rj54n1 = kzalloc(sizeof(struct rj54n1), GFP_KERNEL); + if (!rj54n1) + return -ENOMEM; + + v4l2_i2c_subdev_init(&rj54n1->subdev, client, &rj54n1_subdev_ops); + + icd->ops = &rj54n1_ops; + + rj54n1->clk_div = clk_div; + rj54n1->rect.left = RJ54N1_COLUMN_SKIP; + rj54n1->rect.top = RJ54N1_ROW_SKIP; + rj54n1->rect.width = RJ54N1_MAX_WIDTH; + rj54n1->rect.height = RJ54N1_MAX_HEIGHT; + rj54n1->width = RJ54N1_MAX_WIDTH; + rj54n1->height = RJ54N1_MAX_HEIGHT; + rj54n1->fourcc = V4L2_PIX_FMT_YUYV; + rj54n1->resize = 1024; + + ret = rj54n1_video_probe(icd, client); + if (ret < 0) { + icd->ops = NULL; + i2c_set_clientdata(client, NULL); + kfree(rj54n1); + return ret; + } + + icd->formats = rj54n1_colour_formats; + icd->num_formats = ARRAY_SIZE(rj54n1_colour_formats); + + return ret; +} + +static int rj54n1_remove(struct i2c_client *client) +{ + struct rj54n1 *rj54n1 = to_rj54n1(client); + struct soc_camera_device *icd = client->dev.platform_data; + struct soc_camera_link *icl = to_soc_camera_link(icd); + + icd->ops = NULL; + if (icl->free_bus) + icl->free_bus(icl); + i2c_set_clientdata(client, NULL); + client->driver = NULL; + kfree(rj54n1); + + return 0; +} + +static const struct i2c_device_id rj54n1_id[] = { + { "rj54n1cb0c", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, rj54n1_id); + +static struct i2c_driver rj54n1_i2c_driver = { + .driver = { + .name = "rj54n1cb0c", + }, + .probe = rj54n1_probe, + .remove = rj54n1_remove, + .id_table = rj54n1_id, +}; + +static int __init rj54n1_mod_init(void) +{ + return i2c_add_driver(&rj54n1_i2c_driver); +} + +static void __exit rj54n1_mod_exit(void) +{ + i2c_del_driver(&rj54n1_i2c_driver); +} + +module_init(rj54n1_mod_init); +module_exit(rj54n1_mod_exit); + +MODULE_DESCRIPTION("Sharp RJ54N1CB0C Camera driver"); +MODULE_AUTHOR("Guennadi Liakhovetski "); +MODULE_LICENSE("GPL v2"); diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h index a2d93da38450..43cf26ea6734 100644 --- a/include/media/v4l2-chip-ident.h +++ b/include/media/v4l2-chip-ident.h @@ -281,6 +281,9 @@ enum { /* module m52790: just ident 52790 */ V4L2_IDENT_M52790 = 52790, + + /* Sharp RJ54N1CB0C, 0xCB0C = 51980 */ + V4L2_IDENT_RJ54N1CB0C = 51980, }; #endif -- cgit v1.3-8-gc7d7 From d8d8622552088ca94fab4e4997f948514d0bdc27 Mon Sep 17 00:00:00 2001 From: "Igor M. Liplianin" Date: Sat, 19 Sep 2009 09:51:12 -0300 Subject: V4L/DVB (13134): Add support for TBS-likes remotes The patch brings infrared remote support for some cx88 based cards. Such as: TeVii S460,S420; Omicom SS4; SatTrade ST4200; TBS 8920,8910; Prof 7300,6200. Signed-off-by: Igor M. Liplianin Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/ir-keymaps.c | 40 +++++++++++++++++++++++++++++++++++ drivers/media/video/cx88/cx88-input.c | 24 +++++++++++++++++++++ include/media/ir-common.h | 1 + 3 files changed, 65 insertions(+) (limited to 'include') diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c index f6790172736a..a2c80a7a3ae4 100644 --- a/drivers/media/common/ir-keymaps.c +++ b/drivers/media/common/ir-keymaps.c @@ -2964,6 +2964,46 @@ struct ir_scancode_table ir_codes_dm1105_nec_table = { }; EXPORT_SYMBOL_GPL(ir_codes_dm1105_nec_table); +static struct ir_scancode ir_codes_tbs_nec[] = { + { 0x04, KEY_POWER2}, /*power*/ + { 0x14, KEY_MUTE}, /*mute*/ + { 0x07, KEY_1}, + { 0x06, KEY_2}, + { 0x05, KEY_3}, + { 0x0b, KEY_4}, + { 0x0a, KEY_5}, + { 0x09, KEY_6}, + { 0x0f, KEY_7}, + { 0x0e, KEY_8}, + { 0x0d, KEY_9}, + { 0x12, KEY_0}, + { 0x16, KEY_CHANNELUP}, /*ch+*/ + { 0x11, KEY_CHANNELDOWN},/*ch-*/ + { 0x13, KEY_VOLUMEUP}, /*vol+*/ + { 0x0c, KEY_VOLUMEDOWN},/*vol-*/ + { 0x03, KEY_RECORD}, /*rec*/ + { 0x18, KEY_PAUSE}, /*pause*/ + { 0x19, KEY_OK}, /*ok*/ + { 0x1a, KEY_CAMERA}, /* snapshot */ + { 0x01, KEY_UP}, + { 0x10, KEY_LEFT}, + { 0x02, KEY_RIGHT}, + { 0x08, KEY_DOWN}, + { 0x15, KEY_FAVORITES}, + { 0x17, KEY_SUBTITLE}, + { 0x1d, KEY_ZOOM}, + { 0x1f, KEY_EXIT}, + { 0x1e, KEY_MENU}, + { 0x1c, KEY_EPG}, + { 0x00, KEY_PREVIOUS}, + { 0x1b, KEY_MODE}, +}; +struct ir_scancode_table ir_codes_tbs_nec_table = { + .scan = ir_codes_tbs_nec, + .size = ARRAY_SIZE(ir_codes_tbs_nec), +}; +EXPORT_SYMBOL_GPL(ir_codes_tbs_nec_table); + /* Terratec Cinergy Hybrid T USB XS Devin Heitmueller */ diff --git a/drivers/media/video/cx88/cx88-input.c b/drivers/media/video/cx88/cx88-input.c index 78b3635178af..33437e403144 100644 --- a/drivers/media/video/cx88/cx88-input.c +++ b/drivers/media/video/cx88/cx88-input.c @@ -303,6 +303,22 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci) ir->mask_keydown = 0x02; ir->polling = 50; /* ms */ break; + case CX88_BOARD_OMICOM_SS4_PCI: + case CX88_BOARD_SATTRADE_ST4200: + case CX88_BOARD_TBS_8920: + case CX88_BOARD_TBS_8910: + case CX88_BOARD_PROF_7300: + case CX88_BOARD_PROF_6200: + ir_codes = &ir_codes_tbs_nec_table; + ir_type = IR_TYPE_PD; + ir->sampling = 0xff00; /* address */ + break; + case CX88_BOARD_TEVII_S460: + case CX88_BOARD_TEVII_S420: + ir_codes = &ir_codes_dm1105_nec_table; + ir_type = IR_TYPE_PD; + ir->sampling = 0xff00; /* address */ + break; case CX88_BOARD_DNTV_LIVE_DVB_T_PRO: ir_codes = &ir_codes_dntv_live_dvbt_pro_table; ir_type = IR_TYPE_PD; @@ -432,8 +448,16 @@ void cx88_ir_irq(struct cx88_core *core) /* decode it */ switch (core->boardnr) { + case CX88_BOARD_TEVII_S460: + case CX88_BOARD_TEVII_S420: case CX88_BOARD_TERRATEC_CINERGY_1400_DVB_T1: case CX88_BOARD_DNTV_LIVE_DVB_T_PRO: + case CX88_BOARD_OMICOM_SS4_PCI: + case CX88_BOARD_SATTRADE_ST4200: + case CX88_BOARD_TBS_8920: + case CX88_BOARD_TBS_8910: + case CX88_BOARD_PROF_7300: + case CX88_BOARD_PROF_6200: ircode = ir_decode_pulsedistance(ir->samples, ir->scount, 1, 4); if (ircode == 0xffffffff) { /* decoding error */ diff --git a/include/media/ir-common.h b/include/media/ir-common.h index af3257e6b808..c2d515dd94d2 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -173,6 +173,7 @@ extern struct ir_scancode_table ir_codes_ati_tv_wonder_hd_600_table; extern struct ir_scancode_table ir_codes_kworld_plus_tv_analog_table; extern struct ir_scancode_table ir_codes_kaiomy_table; extern struct ir_scancode_table ir_codes_dm1105_nec_table; +extern struct ir_scancode_table ir_codes_tbs_nec_table; extern struct ir_scancode_table ir_codes_evga_indtube_table; extern struct ir_scancode_table ir_codes_terratec_cinergy_xs_table; extern struct ir_scancode_table ir_codes_videomate_s350_table; -- cgit v1.3-8-gc7d7 From 9d2ba7ad802300d6a1830df9268d8ba478c66a18 Mon Sep 17 00:00:00 2001 From: "Igor M. Liplianin" Date: Wed, 23 Sep 2009 14:44:12 -0300 Subject: V4L/DVB (13135): Add support for TeVii remotes The patch brings infrared remote support for some cx88 based cards. Such as TeVii S460,S420. Signed-off-by: Igor M. Liplianin Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/ir-keymaps.c | 55 +++++++++++++++++++++++++++++++++++ drivers/media/video/cx88/cx88-input.c | 2 +- include/media/ir-common.h | 1 + 3 files changed, 57 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c index a2c80a7a3ae4..32f765e6e5fd 100644 --- a/drivers/media/common/ir-keymaps.c +++ b/drivers/media/common/ir-keymaps.c @@ -2964,6 +2964,61 @@ struct ir_scancode_table ir_codes_dm1105_nec_table = { }; EXPORT_SYMBOL_GPL(ir_codes_dm1105_nec_table); +static struct ir_scancode ir_codes_tevii_nec[] = { + { 0x0a, KEY_POWER2}, + { 0x0c, KEY_MUTE}, + { 0x11, KEY_1}, + { 0x12, KEY_2}, + { 0x13, KEY_3}, + { 0x14, KEY_4}, + { 0x15, KEY_5}, + { 0x16, KEY_6}, + { 0x17, KEY_7}, + { 0x18, KEY_8}, + { 0x19, KEY_9}, + { 0x10, KEY_0}, + { 0x1c, KEY_MENU}, + { 0x0f, KEY_VOLUMEDOWN}, + { 0x1a, KEY_LAST}, + { 0x0e, KEY_OPEN}, + { 0x04, KEY_RECORD}, + { 0x09, KEY_VOLUMEUP}, + { 0x08, KEY_CHANNELUP}, + { 0x07, KEY_PVR}, + { 0x0b, KEY_TIME}, + { 0x02, KEY_RIGHT}, + { 0x03, KEY_LEFT}, + { 0x00, KEY_UP}, + { 0x1f, KEY_OK}, + { 0x01, KEY_DOWN}, + { 0x05, KEY_TUNER}, + { 0x06, KEY_CHANNELDOWN}, + { 0x40, KEY_PLAYPAUSE}, + { 0x1e, KEY_REWIND}, + { 0x1b, KEY_FAVORITES}, + { 0x1d, KEY_BACK}, + { 0x4d, KEY_FASTFORWARD}, + { 0x44, KEY_EPG}, + { 0x4c, KEY_INFO}, + { 0x41, KEY_AB}, + { 0x43, KEY_AUDIO}, + { 0x45, KEY_SUBTITLE}, + { 0x4a, KEY_LIST}, + { 0x46, KEY_F1}, + { 0x47, KEY_F2}, + { 0x5e, KEY_F3}, + { 0x5c, KEY_F4}, + { 0x52, KEY_F5}, + { 0x5a, KEY_F6}, + { 0x56, KEY_MODE}, + { 0x58, KEY_SWITCHVIDEOMODE}, +}; +struct ir_scancode_table ir_codes_tevii_nec_table = { + .scan = ir_codes_tevii_nec, + .size = ARRAY_SIZE(ir_codes_tevii_nec), +}; +EXPORT_SYMBOL_GPL(ir_codes_tevii_nec_table); + static struct ir_scancode ir_codes_tbs_nec[] = { { 0x04, KEY_POWER2}, /*power*/ { 0x14, KEY_MUTE}, /*mute*/ diff --git a/drivers/media/video/cx88/cx88-input.c b/drivers/media/video/cx88/cx88-input.c index 33437e403144..c0047c1960cc 100644 --- a/drivers/media/video/cx88/cx88-input.c +++ b/drivers/media/video/cx88/cx88-input.c @@ -315,7 +315,7 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci) break; case CX88_BOARD_TEVII_S460: case CX88_BOARD_TEVII_S420: - ir_codes = &ir_codes_dm1105_nec_table; + ir_codes = &ir_codes_tevii_nec_table; ir_type = IR_TYPE_PD; ir->sampling = 0xff00; /* address */ break; diff --git a/include/media/ir-common.h b/include/media/ir-common.h index c2d515dd94d2..5921776929e7 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -173,6 +173,7 @@ extern struct ir_scancode_table ir_codes_ati_tv_wonder_hd_600_table; extern struct ir_scancode_table ir_codes_kworld_plus_tv_analog_table; extern struct ir_scancode_table ir_codes_kaiomy_table; extern struct ir_scancode_table ir_codes_dm1105_nec_table; +extern struct ir_scancode_table ir_codes_tevii_nec_table; extern struct ir_scancode_table ir_codes_tbs_nec_table; extern struct ir_scancode_table ir_codes_evga_indtube_table; extern struct ir_scancode_table ir_codes_terratec_cinergy_xs_table; -- cgit v1.3-8-gc7d7 From eea85b0a629970d462481a80e1d45f4d71fe797f Mon Sep 17 00:00:00 2001 From: Richard Röjfors Date: Tue, 22 Sep 2009 10:14:39 -0300 Subject: V4L/DVB (13177): radio: Add support for TEF6862 tuner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for TEF6862 Car Radio Enhanced Selectivity Tuner. It's implemented as a subdev, supporting checking signal strength and setting and getting frequency. Signed-off-by: Richard Röjfors Signed-off-by: Mauro Carvalho Chehab --- drivers/media/radio/Kconfig | 12 +++ drivers/media/radio/Makefile | 1 + drivers/media/radio/tef6862.c | 232 ++++++++++++++++++++++++++++++++++++++++ include/media/v4l2-chip-ident.h | 3 + 4 files changed, 248 insertions(+) create mode 100644 drivers/media/radio/tef6862.c (limited to 'include') diff --git a/drivers/media/radio/Kconfig b/drivers/media/radio/Kconfig index a87a477c87f2..931b8b370b50 100644 --- a/drivers/media/radio/Kconfig +++ b/drivers/media/radio/Kconfig @@ -401,4 +401,16 @@ config RADIO_TEA5764_XTAL Say Y here if TEA5764 have a 32768 Hz crystal in circuit, say N here if TEA5764 reference frequency is connected in FREQIN. +config RADIO_TEF6862 + tristate "TEF6862 Car Radio Enhanced Selectivity Tuner" + depends on I2C && VIDEO_V4L2 + ---help--- + Say Y here if you want to use the TEF6862 Car Radio Enhanced + Selectivity Tuner, found for instance on the Russellville development + board. On the russellville the device is connected to internal + timberdale I2C bus. + + To compile this driver as a module, choose M here: the + module will be called TEF6862. + endif # RADIO_ADAPTERS diff --git a/drivers/media/radio/Makefile b/drivers/media/radio/Makefile index 2a1be3bf4f7c..a4bdca08a94d 100644 --- a/drivers/media/radio/Makefile +++ b/drivers/media/radio/Makefile @@ -22,5 +22,6 @@ obj-$(CONFIG_USB_DSBR) += dsbr100.o obj-$(CONFIG_RADIO_SI470X) += si470x/ obj-$(CONFIG_USB_MR800) += radio-mr800.o obj-$(CONFIG_RADIO_TEA5764) += radio-tea5764.o +obj-$(CONFIG_RADIO_TEF6862) += tef6862.o EXTRA_CFLAGS += -Isound diff --git a/drivers/media/radio/tef6862.c b/drivers/media/radio/tef6862.c new file mode 100644 index 000000000000..6e607ff0c169 --- /dev/null +++ b/drivers/media/radio/tef6862.c @@ -0,0 +1,232 @@ +/* + * tef6862.c Philips TEF6862 Car Radio Enhanced Selectivity Tuner + * Copyright (c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "tef6862" + +#define FREQ_MUL 16000 + +#define TEF6862_LO_FREQ (875 * FREQ_MUL / 10) +#define TEF6862_HI_FREQ (108 * FREQ_MUL) + +/* Write mode sub addresses */ +#define WM_SUB_BANDWIDTH 0x0 +#define WM_SUB_PLLM 0x1 +#define WM_SUB_PLLL 0x2 +#define WM_SUB_DAA 0x3 +#define WM_SUB_AGC 0x4 +#define WM_SUB_BAND 0x5 +#define WM_SUB_CONTROL 0x6 +#define WM_SUB_LEVEL 0x7 +#define WM_SUB_IFCF 0x8 +#define WM_SUB_IFCAP 0x9 +#define WM_SUB_ACD 0xA +#define WM_SUB_TEST 0xF + +/* Different modes of the MSA register */ +#define MODE_BUFFER 0x0 +#define MODE_PRESET 0x1 +#define MODE_SEARCH 0x2 +#define MODE_AF_UPDATE 0x3 +#define MODE_JUMP 0x4 +#define MODE_CHECK 0x5 +#define MODE_LOAD 0x6 +#define MODE_END 0x7 +#define MODE_SHIFT 5 + +struct tef6862_state { + struct v4l2_subdev sd; + unsigned long freq; +}; + +static inline struct tef6862_state *to_state(struct v4l2_subdev *sd) +{ + return container_of(sd, struct tef6862_state, sd); +} + +static u16 tef6862_sigstr(struct i2c_client *client) +{ + u8 buf[4]; + int err = i2c_master_recv(client, buf, sizeof(buf)); + if (err == sizeof(buf)) + return buf[3] << 8; + return 0; +} + +static int tef6862_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *v) +{ + if (v->index > 0) + return -EINVAL; + + /* only support FM for now */ + strlcpy(v->name, "FM", sizeof(v->name)); + v->type = V4L2_TUNER_RADIO; + v->rangelow = TEF6862_LO_FREQ; + v->rangehigh = TEF6862_HI_FREQ; + v->rxsubchans = V4L2_TUNER_SUB_MONO; + v->capability = V4L2_TUNER_CAP_LOW; + v->audmode = V4L2_TUNER_MODE_STEREO; + v->signal = tef6862_sigstr(v4l2_get_subdevdata(sd)); + + return 0; +} + +static int tef6862_s_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *v) +{ + return v->index ? -EINVAL : 0; +} + +static int tef6862_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f) +{ + struct tef6862_state *state = to_state(sd); + struct i2c_client *client = v4l2_get_subdevdata(sd); + u16 pll; + u8 i2cmsg[3]; + int err; + + if (f->tuner != 0) + return -EINVAL; + + pll = 1964 + ((f->frequency - TEF6862_LO_FREQ) * 20) / FREQ_MUL; + i2cmsg[0] = (MODE_PRESET << MODE_SHIFT) | WM_SUB_PLLM; + i2cmsg[1] = (pll >> 8) & 0xff; + i2cmsg[2] = pll & 0xff; + + err = i2c_master_send(client, i2cmsg, sizeof(i2cmsg)); + if (!err) + state->freq = f->frequency; + return err; +} + +static int tef6862_g_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f) +{ + struct tef6862_state *state = to_state(sd); + + if (f->tuner != 0) + return -EINVAL; + f->type = V4L2_TUNER_RADIO; + f->frequency = state->freq; + return 0; +} + +static int tef6862_g_chip_ident(struct v4l2_subdev *sd, + struct v4l2_dbg_chip_ident *chip) +{ + struct i2c_client *client = v4l2_get_subdevdata(sd); + + return v4l2_chip_ident_i2c_client(client, chip, V4L2_IDENT_TEF6862, 0); +} + +static const struct v4l2_subdev_tuner_ops tef6862_tuner_ops = { + .g_tuner = tef6862_g_tuner, + .s_tuner = tef6862_s_tuner, + .s_frequency = tef6862_s_frequency, + .g_frequency = tef6862_g_frequency, +}; + +static const struct v4l2_subdev_core_ops tef6862_core_ops = { + .g_chip_ident = tef6862_g_chip_ident, +}; + +static const struct v4l2_subdev_ops tef6862_ops = { + .core = &tef6862_core_ops, + .tuner = &tef6862_tuner_ops, +}; + +/* + * Generic i2c probe + * concerning the addresses: i2c wants 7 bit (without the r/w bit), so '>>1' + */ + +static int __devinit tef6862_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct tef6862_state *state; + struct v4l2_subdev *sd; + + /* Check if the adapter supports the needed features */ + if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -EIO; + + v4l_info(client, "chip found @ 0x%02x (%s)\n", + client->addr << 1, client->adapter->name); + + state = kmalloc(sizeof(struct tef6862_state), GFP_KERNEL); + if (state == NULL) + return -ENOMEM; + state->freq = TEF6862_LO_FREQ; + + sd = &state->sd; + v4l2_i2c_subdev_init(sd, client, &tef6862_ops); + + return 0; +} + +static int __devexit tef6862_remove(struct i2c_client *client) +{ + struct v4l2_subdev *sd = i2c_get_clientdata(client); + + v4l2_device_unregister_subdev(sd); + kfree(to_state(sd)); + return 0; +} + +static const struct i2c_device_id tef6862_id[] = { + {DRIVER_NAME, 0}, + {}, +}; + +MODULE_DEVICE_TABLE(i2c, tef6862_id); + +static struct i2c_driver tef6862_driver = { + .driver = { + .owner = THIS_MODULE, + .name = DRIVER_NAME, + }, + .probe = tef6862_probe, + .remove = tef6862_remove, + .id_table = tef6862_id, +}; + +static __init int tef6862_init(void) +{ + return i2c_add_driver(&tef6862_driver); +} + +static __exit void tef6862_exit(void) +{ + i2c_del_driver(&tef6862_driver); +} + +module_init(tef6862_init); +module_exit(tef6862_exit); + +MODULE_DESCRIPTION("TEF6862 Car Radio Enhanced Selectivity Tuner"); +MODULE_AUTHOR("Mocean Laboratories"); +MODULE_LICENSE("GPL v2"); + diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h index 43cf26ea6734..91942dbe64e3 100644 --- a/include/media/v4l2-chip-ident.h +++ b/include/media/v4l2-chip-ident.h @@ -131,6 +131,9 @@ enum { V4L2_IDENT_SAA6752HS = 6752, V4L2_IDENT_SAA6752HS_AC3 = 6753, + /* modules tef6862: just ident 6862 */ + V4L2_IDENT_TEF6862 = 6862, + /* module adv7170: just ident 7170 */ V4L2_IDENT_ADV7170 = 7170, -- cgit v1.3-8-gc7d7 From 21f1b932dbcc5ed18444e6995aeb856e583804ae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 23 Oct 2009 06:50:12 -0300 Subject: V4L/DVB (13183): gspca: add stv0680 subdriver This patch adds a new subdriver to gspca supporting cams with the stv0680 bridge (replacing the old in kernel v4l1 driver). Many thanks to Hans Verkuil for providing me with one of the 2 cams used in testing this new sub driver. Signed-off-by: Hans de Goede Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/Kconfig | 6 +- drivers/media/video/gspca/Kconfig | 9 + drivers/media/video/gspca/Makefile | 2 + drivers/media/video/gspca/stv0680.c | 365 ++++++++++++++++++++++++++++++++++++ include/linux/videodev2.h | 1 + 5 files changed, 382 insertions(+), 1 deletion(-) create mode 100644 drivers/media/video/gspca/stv0680.c (limited to 'include') diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig index 7ecae636e6ad..82ae85c975f5 100644 --- a/drivers/media/video/Kconfig +++ b/drivers/media/video/Kconfig @@ -1016,9 +1016,13 @@ config USB_SE401 source "drivers/media/video/sn9c102/Kconfig" config USB_STV680 - tristate "USB STV680 (Pencam) Camera support" + tristate "USB STV680 (Pencam) Camera support (DEPRECATED)" depends on VIDEO_V4L1 ---help--- + This driver is DEPRECATED please use the gspca stv0680 module + instead. Note that for the gspca stv0680 module you need + atleast version 0.6.3 of libv4l. + Say Y here if you want to connect this type of camera to your computer's USB port. This includes the Pencam line of cameras. See for more information diff --git a/drivers/media/video/gspca/Kconfig b/drivers/media/video/gspca/Kconfig index cbc2367719f0..568edbbb8948 100644 --- a/drivers/media/video/gspca/Kconfig +++ b/drivers/media/video/gspca/Kconfig @@ -230,6 +230,15 @@ config USB_GSPCA_STK014 To compile this driver as a module, choose M here: the module will be called gspca_stk014. +config USB_GSPCA_STV0680 + tristate "STV0680 USB Camera Driver" + depends on VIDEO_V4L2 && USB_GSPCA + help + Say Y here if you want support for cameras based on the STV0680 chip. + + To compile this driver as a module, choose M here: the + module will be called gspca_stv0680. + config USB_GSPCA_SUNPLUS tristate "SUNPLUS USB Camera Driver" depends on VIDEO_V4L2 && USB_GSPCA diff --git a/drivers/media/video/gspca/Makefile b/drivers/media/video/gspca/Makefile index b7420818037e..770b01387e99 100644 --- a/drivers/media/video/gspca/Makefile +++ b/drivers/media/video/gspca/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_USB_GSPCA_SQ905) += gspca_sq905.o obj-$(CONFIG_USB_GSPCA_SQ905C) += gspca_sq905c.o obj-$(CONFIG_USB_GSPCA_SUNPLUS) += gspca_sunplus.o obj-$(CONFIG_USB_GSPCA_STK014) += gspca_stk014.o +obj-$(CONFIG_USB_GSPCA_STV0680) += gspca_stv0680.o obj-$(CONFIG_USB_GSPCA_T613) += gspca_t613.o obj-$(CONFIG_USB_GSPCA_TV8532) += gspca_tv8532.o obj-$(CONFIG_USB_GSPCA_VC032X) += gspca_vc032x.o @@ -50,6 +51,7 @@ gspca_spca561-objs := spca561.o gspca_sq905-objs := sq905.o gspca_sq905c-objs := sq905c.o gspca_stk014-objs := stk014.o +gspca_stv0680-objs := stv0680.o gspca_sunplus-objs := sunplus.o gspca_t613-objs := t613.o gspca_tv8532-objs := tv8532.o diff --git a/drivers/media/video/gspca/stv0680.c b/drivers/media/video/gspca/stv0680.c new file mode 100644 index 000000000000..0981ce14235d --- /dev/null +++ b/drivers/media/video/gspca/stv0680.c @@ -0,0 +1,365 @@ +/* + * STV0680 USB Camera Driver + * + * Copyright (C) 2009 Hans de Goede + * + * This module is adapted from the in kernel v4l1 stv680 driver: + * + * STV0680 USB Camera Driver, by Kevin Sisson (kjsisson@bellsouth.net) + * + * Thanks to STMicroelectronics for information on the usb commands, and + * to Steve Miller at STM for his help and encouragement while I was + * writing this driver. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define MODULE_NAME "stv0680" + +#include "gspca.h" + +MODULE_AUTHOR("Hans de Goede "); +MODULE_DESCRIPTION("STV0680 USB Camera Driver"); +MODULE_LICENSE("GPL"); + +/* specific webcam descriptor */ +struct sd { + struct gspca_dev gspca_dev; /* !! must be the first item */ + struct v4l2_pix_format mode; + u8 orig_mode; + u8 video_mode; + u8 current_mode; +}; + +/* V4L2 controls supported by the driver */ +static struct ctrl sd_ctrls[] = { +}; + +static int stv_sndctrl(struct gspca_dev *gspca_dev, int set, u8 req, u16 val, + int size) +{ + int ret = -1; + u8 req_type = 0; + + switch (set) { + case 0: /* 0xc1 */ + req_type = USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_ENDPOINT; + break; + case 1: /* 0x41 */ + req_type = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_ENDPOINT; + break; + case 2: /* 0x80 */ + req_type = USB_DIR_IN | USB_RECIP_DEVICE; + break; + case 3: /* 0x40 */ + req_type = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE; + break; + } + + ret = usb_control_msg(gspca_dev->dev, + usb_rcvctrlpipe(gspca_dev->dev, 0), + req, req_type, + val, 0, gspca_dev->usb_buf, size, 500); + + if ((ret < 0) && (req != 0x0a)) + PDEBUG(D_ERR, + "usb_control_msg error %i, request = 0x%x, error = %i", + set, req, ret); + + return ret; +} + +static int stv0680_handle_error(struct gspca_dev *gspca_dev, int ret) +{ + stv_sndctrl(gspca_dev, 0, 0x80, 0, 0x02); /* Get Last Error */ + PDEBUG(D_ERR, "last error: %i, command = 0x%x", + gspca_dev->usb_buf[0], gspca_dev->usb_buf[1]); + return ret; +} + +static int stv0680_get_video_mode(struct gspca_dev *gspca_dev) +{ + /* Note not sure if this init of usb_buf is really necessary */ + memset(gspca_dev->usb_buf, 0, 8); + gspca_dev->usb_buf[0] = 0x0f; + + if (stv_sndctrl(gspca_dev, 0, 0x87, 0, 0x08) != 0x08) { + PDEBUG(D_ERR, "Get_Camera_Mode failed"); + return stv0680_handle_error(gspca_dev, -EIO); + } + + return gspca_dev->usb_buf[0]; /* 01 = VGA, 03 = QVGA, 00 = CIF */ +} + +static int stv0680_set_video_mode(struct gspca_dev *gspca_dev, u8 mode) +{ + struct sd *sd = (struct sd *) gspca_dev; + + if (sd->current_mode == mode) + return 0; + + memset(gspca_dev->usb_buf, 0, 8); + gspca_dev->usb_buf[0] = mode; + + if (stv_sndctrl(gspca_dev, 3, 0x07, 0x0100, 0x08) != 0x08) { + PDEBUG(D_ERR, "Set_Camera_Mode failed"); + return stv0680_handle_error(gspca_dev, -EIO); + } + + /* Verify we got what we've asked for */ + if (stv0680_get_video_mode(gspca_dev) != mode) { + PDEBUG(D_ERR, "Error setting camera video mode!"); + return -EIO; + } + + sd->current_mode = mode; + + return 0; +} + +/* this function is called at probe time */ +static int sd_config(struct gspca_dev *gspca_dev, + const struct usb_device_id *id) +{ + int ret; + struct sd *sd = (struct sd *) gspca_dev; + struct cam *cam = &gspca_dev->cam; + + /* ping camera to be sure STV0680 is present */ + if (stv_sndctrl(gspca_dev, 0, 0x88, 0x5678, 0x02) != 0x02 || + gspca_dev->usb_buf[0] != 0x56 || gspca_dev->usb_buf[1] != 0x78) { + PDEBUG(D_ERR, "STV(e): camera ping failed!!"); + return stv0680_handle_error(gspca_dev, -ENODEV); + } + + /* get camera descriptor */ + if (stv_sndctrl(gspca_dev, 2, 0x06, 0x0200, 0x09) != 0x09) + return stv0680_handle_error(gspca_dev, -ENODEV); + + if (stv_sndctrl(gspca_dev, 2, 0x06, 0x0200, 0x22) != 0x22 || + gspca_dev->usb_buf[7] != 0xa0 || gspca_dev->usb_buf[8] != 0x23) { + PDEBUG(D_ERR, "Could not get descriptor 0200."); + return stv0680_handle_error(gspca_dev, -ENODEV); + } + if (stv_sndctrl(gspca_dev, 0, 0x8a, 0, 0x02) != 0x02) + return stv0680_handle_error(gspca_dev, -ENODEV); + if (stv_sndctrl(gspca_dev, 0, 0x8b, 0, 0x24) != 0x24) + return stv0680_handle_error(gspca_dev, -ENODEV); + if (stv_sndctrl(gspca_dev, 0, 0x85, 0, 0x10) != 0x10) + return stv0680_handle_error(gspca_dev, -ENODEV); + + if (!(gspca_dev->usb_buf[7] & 0x09)) { + PDEBUG(D_ERR, "Camera supports neither CIF nor QVGA mode"); + return -ENODEV; + } + if (gspca_dev->usb_buf[7] & 0x01) + PDEBUG(D_PROBE, "Camera supports CIF mode"); + if (gspca_dev->usb_buf[7] & 0x02) + PDEBUG(D_PROBE, "Camera supports VGA mode"); + if (gspca_dev->usb_buf[7] & 0x08) + PDEBUG(D_PROBE, "Camera supports QVGA mode"); + + if (gspca_dev->usb_buf[7] & 0x01) + sd->video_mode = 0x00; /* CIF */ + else + sd->video_mode = 0x03; /* QVGA */ + + /* FW rev, ASIC rev, sensor ID */ + PDEBUG(D_PROBE, "Firmware rev is %i.%i", + gspca_dev->usb_buf[0], gspca_dev->usb_buf[1]); + PDEBUG(D_PROBE, "ASIC rev is %i.%i", + gspca_dev->usb_buf[2], gspca_dev->usb_buf[3]); + PDEBUG(D_PROBE, "Sensor ID is %i", + (gspca_dev->usb_buf[4]*16) + (gspca_dev->usb_buf[5]>>4)); + + + ret = stv0680_get_video_mode(gspca_dev); + if (ret < 0) + return ret; + sd->current_mode = sd->orig_mode = ret; + + ret = stv0680_set_video_mode(gspca_dev, sd->video_mode); + if (ret < 0) + return ret; + + /* Get mode details */ + if (stv_sndctrl(gspca_dev, 0, 0x8f, 0, 0x10) != 0x10) + return stv0680_handle_error(gspca_dev, -EIO); + + cam->bulk = 1; + cam->bulk_nurbs = 1; /* The cam cannot handle more */ + cam->bulk_size = (gspca_dev->usb_buf[0] << 24) | + (gspca_dev->usb_buf[1] << 16) | + (gspca_dev->usb_buf[2] << 8) | + (gspca_dev->usb_buf[3]); + sd->mode.width = (gspca_dev->usb_buf[4] << 8) | + (gspca_dev->usb_buf[5]); /* 322, 356, 644 */ + sd->mode.height = (gspca_dev->usb_buf[6] << 8) | + (gspca_dev->usb_buf[7]); /* 242, 292, 484 */ + sd->mode.pixelformat = V4L2_PIX_FMT_STV0680; + sd->mode.field = V4L2_FIELD_NONE; + sd->mode.bytesperline = sd->mode.width; + sd->mode.sizeimage = cam->bulk_size; + sd->mode.colorspace = V4L2_COLORSPACE_SRGB; + + /* origGain = gspca_dev->usb_buf[12]; */ + + cam->cam_mode = &sd->mode; + cam->nmodes = 1; + + + ret = stv0680_set_video_mode(gspca_dev, sd->orig_mode); + if (ret < 0) + return ret; + + if (stv_sndctrl(gspca_dev, 2, 0x06, 0x0100, 0x12) != 0x12 || + gspca_dev->usb_buf[8] != 0x53 || gspca_dev->usb_buf[9] != 0x05) { + PDEBUG(D_ERR, "Could not get descriptor 0100."); + return stv0680_handle_error(gspca_dev, -EIO); + } + + return 0; +} + +/* this function is called at probe and resume time */ +static int sd_init(struct gspca_dev *gspca_dev) +{ + return 0; +} + +/* -- start the camera -- */ +static int sd_start(struct gspca_dev *gspca_dev) +{ + int ret; + struct sd *sd = (struct sd *) gspca_dev; + + ret = stv0680_set_video_mode(gspca_dev, sd->video_mode); + if (ret < 0) + return ret; + + if (stv_sndctrl(gspca_dev, 0, 0x85, 0, 0x10) != 0x10) + return stv0680_handle_error(gspca_dev, -EIO); + + /* Start stream at: + 0x0000 = CIF (352x288) + 0x0100 = VGA (640x480) + 0x0300 = QVGA (320x240) */ + if (stv_sndctrl(gspca_dev, 1, 0x09, sd->video_mode << 8, 0x0) != 0x0) + return stv0680_handle_error(gspca_dev, -EIO); + + return 0; +} + +static void sd_stopN(struct gspca_dev *gspca_dev) +{ + /* This is a high priority command; it stops all lower order cmds */ + if (stv_sndctrl(gspca_dev, 1, 0x04, 0x0000, 0x0) != 0x0) + stv0680_handle_error(gspca_dev, -EIO); +} + +static void sd_stop0(struct gspca_dev *gspca_dev) +{ + struct sd *sd = (struct sd *) gspca_dev; + + if (!sd->gspca_dev.present) + return; + + stv0680_set_video_mode(gspca_dev, sd->orig_mode); +} + +static void sd_pkt_scan(struct gspca_dev *gspca_dev, + struct gspca_frame *frame, + __u8 *data, + int len) +{ + struct sd *sd = (struct sd *) gspca_dev; + + /* Every now and then the camera sends a 16 byte packet, no idea + what it contains, but it is not image data, when this + happens the frame received before this packet is corrupt, + so discard it. */ + if (len != sd->mode.sizeimage) { + gspca_dev->last_packet_type = DISCARD_PACKET; + return; + } + + /* Finish the previous frame, we do this upon reception of the next + packet, even though it is already complete so that the strange 16 + byte packets send after a corrupt frame can discard it. */ + frame = gspca_frame_add(gspca_dev, LAST_PACKET, frame, NULL, 0); + + /* Store the just received frame */ + gspca_frame_add(gspca_dev, FIRST_PACKET, frame, data, len); +} + +/* sub-driver description */ +static const struct sd_desc sd_desc = { + .name = MODULE_NAME, + .ctrls = sd_ctrls, + .nctrls = ARRAY_SIZE(sd_ctrls), + .config = sd_config, + .init = sd_init, + .start = sd_start, + .stopN = sd_stopN, + .stop0 = sd_stop0, + .pkt_scan = sd_pkt_scan, +}; + +/* -- module initialisation -- */ +static const __devinitdata struct usb_device_id device_table[] = { + {USB_DEVICE(0x0553, 0x0202)}, + {USB_DEVICE(0x041e, 0x4007)}, + {} +}; +MODULE_DEVICE_TABLE(usb, device_table); + +/* -- device connect -- */ +static int sd_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), + THIS_MODULE); +} + +static struct usb_driver sd_driver = { + .name = MODULE_NAME, + .id_table = device_table, + .probe = sd_probe, + .disconnect = gspca_disconnect, +#ifdef CONFIG_PM + .suspend = gspca_suspend, + .resume = gspca_resume, +#endif +}; + +/* -- module insert / remove -- */ +static int __init sd_mod_init(void) +{ + int ret; + ret = usb_register(&sd_driver); + if (ret < 0) + return ret; + PDEBUG(D_PROBE, "registered"); + return 0; +} +static void __exit sd_mod_exit(void) +{ + usb_deregister(&sd_driver); + PDEBUG(D_PROBE, "deregistered"); +} + +module_init(sd_mod_init); +module_exit(sd_mod_exit); diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index b59e78c57161..b9a799a35763 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -361,6 +361,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_PJPG v4l2_fourcc('P', 'J', 'P', 'G') /* Pixart 73xx JPEG */ #define V4L2_PIX_FMT_OV511 v4l2_fourcc('O', '5', '1', '1') /* ov511 JPEG */ #define V4L2_PIX_FMT_OV518 v4l2_fourcc('O', '5', '1', '8') /* ov518 JPEG */ +#define V4L2_PIX_FMT_STV0680 v4l2_fourcc('S', '6', '8', '0') /* stv0680 bayer */ /* * F O R M A T E N U M E R A T I O N -- cgit v1.3-8-gc7d7 From fb29ab96982baba57b03636e2a894c0d0acd197e Mon Sep 17 00:00:00 2001 From: "David T.L. Wong" Date: Tue, 20 Oct 2009 12:13:39 -0300 Subject: V4L/DVB (13206): cx25840: add component support Signed-off-by: David T.L. Wong Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/cx25840/cx25840-core.c | 36 ++++++++++++++++++++---------- include/media/cx25840.h | 1 + 2 files changed, 25 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c index 904e9a5a9065..9702a9334b4a 100644 --- a/drivers/media/video/cx25840/cx25840-core.c +++ b/drivers/media/video/cx25840/cx25840-core.c @@ -703,6 +703,10 @@ static int set_input(struct i2c_client *client, enum cx25840_video_input vid_inp struct cx25840_state *state = to_state(i2c_get_clientdata(client)); u8 is_composite = (vid_input >= CX25840_COMPOSITE1 && vid_input <= CX25840_COMPOSITE8); + u8 is_component = (vid_input & CX25840_COMPONENT_ON) == + CX25840_COMPONENT_ON; + int luma = vid_input & 0xf0; + int chroma = vid_input & 0xf00; u8 reg; v4l_dbg(1, cx25840_debug, client, @@ -715,18 +719,14 @@ static int set_input(struct i2c_client *client, enum cx25840_video_input vid_inp reg = vid_input & 0xff; if ((vid_input & CX25840_SVIDEO_ON) == CX25840_SVIDEO_ON) is_composite = 0; - else + else if ((vid_input & CX25840_COMPONENT_ON) == 0) is_composite = 1; v4l_dbg(1, cx25840_debug, client, "mux cfg 0x%x comp=%d\n", reg, is_composite); - } else - if (is_composite) { + } else if (is_composite) { reg = 0xf0 + (vid_input - CX25840_COMPOSITE1); } else { - int luma = vid_input & 0xf0; - int chroma = vid_input & 0xf00; - if ((vid_input & ~0xff0) || luma < CX25840_SVIDEO_LUMA1 || luma > CX25840_SVIDEO_LUMA8 || chroma < CX25840_SVIDEO_CHROMA4 || chroma > CX25840_SVIDEO_CHROMA8) { @@ -768,8 +768,11 @@ static int set_input(struct i2c_client *client, enum cx25840_video_input vid_inp cx25840_write(client, 0x103, reg); - /* Set INPUT_MODE to Composite (0) or S-Video (1) */ - cx25840_and_or(client, 0x401, ~0x6, is_composite ? 0 : 0x02); + /* Set INPUT_MODE to Composite, S-Video or Component */ + if (is_component) + cx25840_and_or(client, 0x401, ~0x6, 0x6); + else + cx25840_and_or(client, 0x401, ~0x6, is_composite ? 0 : 0x02); if (!is_cx2388x(state) && !is_cx231xx(state)) { /* Set CH_SEL_ADC2 to 1 if input comes from CH3 */ @@ -780,12 +783,21 @@ static int set_input(struct i2c_client *client, enum cx25840_video_input vid_inp else cx25840_and_or(client, 0x102, ~0x4, 0); } else { - if (is_composite) + /* Set DUAL_MODE_ADC2 to 1 if component*/ + cx25840_and_or(client, 0x102, ~0x4, is_component ? 0x4 : 0x0); + if (is_composite) { /* ADC2 input select channel 2 */ cx25840_and_or(client, 0x102, ~0x2, 0); - else - /* ADC2 input select channel 3 */ - cx25840_and_or(client, 0x102, ~0x2, 2); + } else if (!is_component) { + /* S-Video */ + if (chroma >= CX25840_SVIDEO_CHROMA7) { + /* ADC2 input select channel 3 */ + cx25840_and_or(client, 0x102, ~0x2, 2); + } else { + /* ADC2 input select channel 2 */ + cx25840_and_or(client, 0x102, ~0x2, 0); + } + } } state->vid_input = vid_input; diff --git a/include/media/cx25840.h b/include/media/cx25840.h index 2c3fbaa33f74..0b0cb1776796 100644 --- a/include/media/cx25840.h +++ b/include/media/cx25840.h @@ -84,6 +84,7 @@ enum cx25840_video_input { CX25840_NONE0_CH3 = 0x80000080, CX25840_NONE1_CH3 = 0x800000c0, CX25840_SVIDEO_ON = 0x80000100, + CX25840_COMPONENT_ON = 0x80000200, }; enum cx25840_audio_input { -- cgit v1.3-8-gc7d7 From 622b828ab795580903e79acb33fb44f5c9ce7b0f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 5 Oct 2009 10:48:17 -0300 Subject: V4L/DVB (13238): v4l2_subdev: rename tuner s_standby operation to core s_power Upcoming I2C v4l2_subdev drivers need a way to control the subdevice power state from the core. This use case is already partially covered by the tuner s_standby operation, but no way to explicitly come back from the standby state is available. Rename the tuner s_standby operation to core s_power, and fix tuner drivers accordingly. The tuner core will call s_power(0) instead of s_standby(). No explicit call to s_power(1) is required for tuners as they are supposed to wake up from standby automatically. [mchehab@redhat.com: CodingStyle fix] Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/au0828/au0828-video.c | 2 +- drivers/media/video/cx231xx/cx231xx-video.c | 2 +- drivers/media/video/cx23885/cx23885-core.c | 2 +- drivers/media/video/cx23885/cx23885-dvb.c | 2 +- drivers/media/video/cx88/cx88-cards.c | 2 +- drivers/media/video/cx88/cx88-dvb.c | 2 +- drivers/media/video/cx88/cx88-video.c | 2 +- drivers/media/video/em28xx/em28xx-cards.c | 2 +- drivers/media/video/em28xx/em28xx-video.c | 2 +- drivers/media/video/saa7134/saa7134-core.c | 2 +- drivers/media/video/saa7134/saa7134-video.c | 2 +- drivers/media/video/tuner-core.c | 9 ++++++--- include/media/v4l2-subdev.h | 7 ++++--- 13 files changed, 21 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/media/video/au0828/au0828-video.c b/drivers/media/video/au0828/au0828-video.c index 51527d7b55a7..1485aee18d58 100644 --- a/drivers/media/video/au0828/au0828-video.c +++ b/drivers/media/video/au0828/au0828-video.c @@ -830,7 +830,7 @@ static int au0828_v4l2_close(struct file *filp) au0828_uninit_isoc(dev); /* Save some power by putting tuner to sleep */ - v4l2_device_call_all(&dev->v4l2_dev, 0, tuner, s_standby); + v4l2_device_call_all(&dev->v4l2_dev, 0, core, s_power, 0); /* When close the device, set the usb intf0 into alt0 to free USB bandwidth */ diff --git a/drivers/media/video/cx231xx/cx231xx-video.c b/drivers/media/video/cx231xx/cx231xx-video.c index 36503725d973..d095aa0d6d19 100644 --- a/drivers/media/video/cx231xx/cx231xx-video.c +++ b/drivers/media/video/cx231xx/cx231xx-video.c @@ -2106,7 +2106,7 @@ static int cx231xx_v4l2_close(struct file *filp) } /* Save some power by putting tuner to sleep */ - call_all(dev, tuner, s_standby); + call_all(dev, core, s_power, 0); /* do this before setting alternate! */ cx231xx_uninit_isoc(dev); diff --git a/drivers/media/video/cx23885/cx23885-core.c b/drivers/media/video/cx23885/cx23885-core.c index d6e41db500ef..e34a4ec4a2cd 100644 --- a/drivers/media/video/cx23885/cx23885-core.c +++ b/drivers/media/video/cx23885/cx23885-core.c @@ -919,7 +919,7 @@ static int cx23885_dev_setup(struct cx23885_dev *dev) cx23885_i2c_register(&dev->i2c_bus[1]); cx23885_i2c_register(&dev->i2c_bus[2]); cx23885_card_setup(dev); - call_all(dev, tuner, s_standby); + call_all(dev, core, s_power, 0); cx23885_ir_init(dev); if (cx23885_boards[dev->board].porta == CX23885_ANALOG_VIDEO) { diff --git a/drivers/media/video/cx23885/cx23885-dvb.c b/drivers/media/video/cx23885/cx23885-dvb.c index 4d439f2c4abc..f4f046cd81a5 100644 --- a/drivers/media/video/cx23885/cx23885-dvb.c +++ b/drivers/media/video/cx23885/cx23885-dvb.c @@ -920,7 +920,7 @@ static int dvb_register(struct cx23885_tsport *port) fe0->dvb.frontend->callback = cx23885_tuner_callback; /* Put the analog decoder in standby to keep it quiet */ - call_all(dev, tuner, s_standby); + call_all(dev, core, s_power, 0); if (fe0->dvb.frontend->ops.analog_ops.standby) fe0->dvb.frontend->ops.analog_ops.standby(fe0->dvb.frontend); diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c index 33be6369871a..7330a2d70439 100644 --- a/drivers/media/video/cx88/cx88-cards.c +++ b/drivers/media/video/cx88/cx88-cards.c @@ -3267,7 +3267,7 @@ static void cx88_card_setup(struct cx88_core *core) ctl.fname); call_all(core, tuner, s_config, &xc2028_cfg); } - call_all(core, tuner, s_standby); + call_all(core, core, s_power, 0); } /* ------------------------------------------------------------------ */ diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c index d9e402b25b56..9df71d0244a8 100644 --- a/drivers/media/video/cx88/cx88-dvb.c +++ b/drivers/media/video/cx88/cx88-dvb.c @@ -1170,7 +1170,7 @@ static int dvb_register(struct cx8802_dev *dev) fe1->dvb.frontend->ops.ts_bus_ctrl = cx88_dvb_bus_ctrl; /* Put the analog decoder in standby to keep it quiet */ - call_all(core, tuner, s_standby); + call_all(core, core, s_power, 0); /* register everything */ return videobuf_dvb_register_bus(&dev->frontends, THIS_MODULE, dev, diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c index 57e6b1241090..d7e8fcee559c 100644 --- a/drivers/media/video/cx88/cx88-video.c +++ b/drivers/media/video/cx88/cx88-video.c @@ -935,7 +935,7 @@ static int video_release(struct file *file) mutex_lock(&dev->core->lock); if(atomic_dec_and_test(&dev->core->users)) - call_all(dev->core, tuner, s_standby); + call_all(dev->core, core, s_power, 0); mutex_unlock(&dev->core->lock); return 0; diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index 6c7cbbc59e1c..1e1937070738 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -2658,7 +2658,7 @@ static int em28xx_init_dev(struct em28xx **devhandle, struct usb_device *udev, em28xx_init_extension(dev); /* Save some power by putting tuner to sleep */ - v4l2_device_call_all(&dev->v4l2_dev, 0, tuner, s_standby); + v4l2_device_call_all(&dev->v4l2_dev, 0, core, s_power, 0); return 0; diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c index 3a1dfb7726f8..fbe536f092f9 100644 --- a/drivers/media/video/em28xx/em28xx-video.c +++ b/drivers/media/video/em28xx/em28xx-video.c @@ -2225,7 +2225,7 @@ static int em28xx_v4l2_close(struct file *filp) } /* Save some power by putting tuner to sleep */ - v4l2_device_call_all(&dev->v4l2_dev, 0, tuner, s_standby); + v4l2_device_call_all(&dev->v4l2_dev, 0, core, s_power, 0); /* do this before setting alternate! */ em28xx_uninit_isoc(dev); diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c index f87757fccc72..9a5df930885b 100644 --- a/drivers/media/video/saa7134/saa7134-core.c +++ b/drivers/media/video/saa7134/saa7134-core.c @@ -1032,7 +1032,7 @@ static int __devinit saa7134_initdev(struct pci_dev *pci_dev, saa7134_irq_video_signalchange(dev); if (TUNER_ABSENT != dev->tuner_type) - saa_call_all(dev, tuner, s_standby); + saa_call_all(dev, core, s_power, 0); /* register v4l devices */ if (saa7134_no_overlay > 0) diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c index da26f476a302..35f8daa3a359 100644 --- a/drivers/media/video/saa7134/saa7134-video.c +++ b/drivers/media/video/saa7134/saa7134-video.c @@ -1499,7 +1499,7 @@ static int video_release(struct file *file) saa_andorb(SAA7134_OFMT_DATA_A, 0x1f, 0); saa_andorb(SAA7134_OFMT_DATA_B, 0x1f, 0); - saa_call_all(dev, tuner, s_standby); + saa_call_all(dev, core, s_power, 0); if (fh->radio) saa_call_all(dev, core, ioctl, RDS_CMD_CLOSE, &cmd); diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index aba92e2313d8..50f6db5cd50b 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -752,14 +752,17 @@ static int tuner_s_radio(struct v4l2_subdev *sd) return 0; } -static int tuner_s_standby(struct v4l2_subdev *sd) +static int tuner_s_power(struct v4l2_subdev *sd, int on) { struct tuner *t = to_tuner(sd); struct analog_demod_ops *analog_ops = &t->fe.ops.analog_ops; + if (on) + return 0; + tuner_dbg("Putting tuner to sleep\n"); - if (check_mode(t, "s_standby") == -EINVAL) + if (check_mode(t, "s_power") == -EINVAL) return 0; t->mode = T_STANDBY; if (analog_ops->standby) @@ -961,6 +964,7 @@ static int tuner_command(struct i2c_client *client, unsigned cmd, void *arg) static const struct v4l2_subdev_core_ops tuner_core_ops = { .log_status = tuner_log_status, .s_std = tuner_s_std, + .s_power = tuner_s_power, }; static const struct v4l2_subdev_tuner_ops tuner_tuner_ops = { @@ -971,7 +975,6 @@ static const struct v4l2_subdev_tuner_ops tuner_tuner_ops = { .g_frequency = tuner_g_frequency, .s_type_addr = tuner_s_type_addr, .s_config = tuner_s_config, - .s_standby = tuner_s_standby, }; static const struct v4l2_subdev_ops tuner_ops = { diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index ecc2818938b3..88c13d6f7912 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -106,6 +106,9 @@ struct v4l2_decode_vbi_line { s_gpio: set GPIO pins. Very simple right now, might need to be extended with a direction argument if needed. + + s_power: puts subdevice in power saving mode (on == 0) or normal operation + mode (on == 1). */ struct v4l2_subdev_core_ops { int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip); @@ -128,6 +131,7 @@ struct v4l2_subdev_core_ops { int (*g_register)(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg); int (*s_register)(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg); #endif + int (*s_power)(struct v4l2_subdev *sd, int on); }; /* s_mode: switch the tuner to a specific tuner mode. Replacement of s_radio. @@ -137,8 +141,6 @@ struct v4l2_subdev_core_ops { s_type_addr: sets tuner type and its I2C addr. s_config: sets tda9887 specific stuff, like port1, port2 and qss - - s_standby: puts tuner on powersaving state, disabling it, except for i2c. */ struct v4l2_subdev_tuner_ops { int (*s_mode)(struct v4l2_subdev *sd, enum v4l2_tuner_type); @@ -151,7 +153,6 @@ struct v4l2_subdev_tuner_ops { int (*s_modulator)(struct v4l2_subdev *sd, struct v4l2_modulator *vm); int (*s_type_addr)(struct v4l2_subdev *sd, struct tuner_setup *type); int (*s_config)(struct v4l2_subdev *sd, const struct v4l2_priv_tun_config *config); - int (*s_standby)(struct v4l2_subdev *sd); }; /* s_clock_freq: set the frequency (in Hz) of the audio clock output. -- cgit v1.3-8-gc7d7 From 38a54f35a0a90c0b62b111dd4de24248b22616b9 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 17 Nov 2009 19:43:41 -0300 Subject: V4L/DVB (13377): make struct videobuf_queue_ops constant The videobuf_queue_ops function vector is not declared constant, but there's no need for the videobuf layer to ever change it. Make it const so that videobuf users can make their operations const without warnings. Signed-off-by: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/videobuf-core.c | 2 +- drivers/media/video/videobuf-dma-contig.c | 2 +- drivers/media/video/videobuf-dma-sg.c | 2 +- drivers/media/video/videobuf-vmalloc.c | 2 +- include/media/videobuf-core.h | 4 ++-- include/media/videobuf-dma-contig.h | 2 +- include/media/videobuf-dma-sg.h | 2 +- include/media/videobuf-vmalloc.h | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c index a96b08d3df5a..136bc3496c87 100644 --- a/drivers/media/video/videobuf-core.c +++ b/drivers/media/video/videobuf-core.c @@ -110,7 +110,7 @@ EXPORT_SYMBOL_GPL(videobuf_queue_to_vmalloc); void videobuf_queue_core_init(struct videobuf_queue *q, - struct videobuf_queue_ops *ops, + const struct videobuf_queue_ops *ops, struct device *dev, spinlock_t *irqlock, enum v4l2_buf_type type, diff --git a/drivers/media/video/videobuf-dma-contig.c b/drivers/media/video/videobuf-dma-contig.c index c3065c4bcba9..d25f28461da1 100644 --- a/drivers/media/video/videobuf-dma-contig.c +++ b/drivers/media/video/videobuf-dma-contig.c @@ -429,7 +429,7 @@ static struct videobuf_qtype_ops qops = { }; void videobuf_queue_dma_contig_init(struct videobuf_queue *q, - struct videobuf_queue_ops *ops, + const struct videobuf_queue_ops *ops, struct device *dev, spinlock_t *irqlock, enum v4l2_buf_type type, diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c index a583d394696e..fa78555b118b 100644 --- a/drivers/media/video/videobuf-dma-sg.c +++ b/drivers/media/video/videobuf-dma-sg.c @@ -702,7 +702,7 @@ void *videobuf_sg_alloc(size_t size) } void videobuf_queue_sg_init(struct videobuf_queue* q, - struct videobuf_queue_ops *ops, + const struct videobuf_queue_ops *ops, struct device *dev, spinlock_t *irqlock, enum v4l2_buf_type type, diff --git a/drivers/media/video/videobuf-vmalloc.c b/drivers/media/video/videobuf-vmalloc.c index 35f3900c5633..99d646ed0248 100644 --- a/drivers/media/video/videobuf-vmalloc.c +++ b/drivers/media/video/videobuf-vmalloc.c @@ -391,7 +391,7 @@ static struct videobuf_qtype_ops qops = { }; void videobuf_queue_vmalloc_init(struct videobuf_queue* q, - struct videobuf_queue_ops *ops, + const struct videobuf_queue_ops *ops, void *dev, spinlock_t *irqlock, enum v4l2_buf_type type, diff --git a/include/media/videobuf-core.h b/include/media/videobuf-core.h index 1c5946c44758..316fdccdcaa0 100644 --- a/include/media/videobuf-core.h +++ b/include/media/videobuf-core.h @@ -166,7 +166,7 @@ struct videobuf_queue { enum v4l2_field field; enum v4l2_field last; /* for field=V4L2_FIELD_ALTERNATE */ struct videobuf_buffer *bufs[VIDEO_MAX_FRAME]; - struct videobuf_queue_ops *ops; + const struct videobuf_queue_ops *ops; struct videobuf_qtype_ops *int_ops; unsigned int streaming:1; @@ -195,7 +195,7 @@ void *videobuf_queue_to_vmalloc (struct videobuf_queue* q, struct videobuf_buffer *buf); void videobuf_queue_core_init(struct videobuf_queue *q, - struct videobuf_queue_ops *ops, + const struct videobuf_queue_ops *ops, struct device *dev, spinlock_t *irqlock, enum v4l2_buf_type type, diff --git a/include/media/videobuf-dma-contig.h b/include/media/videobuf-dma-contig.h index 549386681aab..ebaa9bc1ee8d 100644 --- a/include/media/videobuf-dma-contig.h +++ b/include/media/videobuf-dma-contig.h @@ -17,7 +17,7 @@ #include void videobuf_queue_dma_contig_init(struct videobuf_queue *q, - struct videobuf_queue_ops *ops, + const struct videobuf_queue_ops *ops, struct device *dev, spinlock_t *irqlock, enum v4l2_buf_type type, diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h index dda47f0082e9..53e72f787175 100644 --- a/include/media/videobuf-dma-sg.h +++ b/include/media/videobuf-dma-sg.h @@ -103,7 +103,7 @@ struct videobuf_dmabuf *videobuf_to_dma (struct videobuf_buffer *buf); void *videobuf_sg_alloc(size_t size); void videobuf_queue_sg_init(struct videobuf_queue* q, - struct videobuf_queue_ops *ops, + const struct videobuf_queue_ops *ops, struct device *dev, spinlock_t *irqlock, enum v4l2_buf_type type, diff --git a/include/media/videobuf-vmalloc.h b/include/media/videobuf-vmalloc.h index e87222c6a125..1ffdb6624436 100644 --- a/include/media/videobuf-vmalloc.h +++ b/include/media/videobuf-vmalloc.h @@ -30,7 +30,7 @@ struct videobuf_vmalloc_memory }; void videobuf_queue_vmalloc_init(struct videobuf_queue* q, - struct videobuf_queue_ops *ops, + const struct videobuf_queue_ops *ops, void *dev, spinlock_t *irqlock, enum v4l2_buf_type type, -- cgit v1.3-8-gc7d7 From f8b0bca1a7ea8479490bcc06835ccbf590ba2c4e Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 23 Nov 2009 14:29:35 -0300 Subject: V4L/DVB (13417): Fix videobuf_queue_vmalloc_init() prototype For whatever reason, the device structure pointer to videobuf_queue_vmalloc_init is typed "void *", even though it's passed right through to videobuf_queue_core_init(), which expects a struct device pointer. The other videobuf implementations use struct device *; I think vmalloc should too. Signed-off-by: Jonathan Corbet Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/videobuf-vmalloc.c | 2 +- include/media/videobuf-vmalloc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/media/video/videobuf-vmalloc.c b/drivers/media/video/videobuf-vmalloc.c index 99d646ed0248..d6e6a28fb6b8 100644 --- a/drivers/media/video/videobuf-vmalloc.c +++ b/drivers/media/video/videobuf-vmalloc.c @@ -392,7 +392,7 @@ static struct videobuf_qtype_ops qops = { void videobuf_queue_vmalloc_init(struct videobuf_queue* q, const struct videobuf_queue_ops *ops, - void *dev, + struct device *dev, spinlock_t *irqlock, enum v4l2_buf_type type, enum v4l2_field field, diff --git a/include/media/videobuf-vmalloc.h b/include/media/videobuf-vmalloc.h index 1ffdb6624436..4b419a257a7d 100644 --- a/include/media/videobuf-vmalloc.h +++ b/include/media/videobuf-vmalloc.h @@ -31,7 +31,7 @@ struct videobuf_vmalloc_memory void videobuf_queue_vmalloc_init(struct videobuf_queue* q, const struct videobuf_queue_ops *ops, - void *dev, + struct device *dev, spinlock_t *irqlock, enum v4l2_buf_type type, enum v4l2_field field, -- cgit v1.3-8-gc7d7 From e53a70b4725f0a5e10e659c8352696548b9b9478 Mon Sep 17 00:00:00 2001 From: Vaibhav Hiremath Date: Mon, 9 Nov 2009 09:13:20 -0300 Subject: V4L/DVB (13464): Davinci VPFE Capture: add i2c adapter id in platform data The I2C adapter ID is actually depends on Board and may vary, Davinci uses id=1, but in case of AM3517 id=3. Signed-off-by: Vaibhav Hiremath Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/davinci/vpfe_capture.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/media/davinci/vpfe_capture.h b/include/media/davinci/vpfe_capture.h index 71d8982e13ff..d863e5e8426d 100644 --- a/include/media/davinci/vpfe_capture.h +++ b/include/media/davinci/vpfe_capture.h @@ -83,6 +83,8 @@ struct vpfe_subdev_info { struct vpfe_config { /* Number of sub devices connected to vpfe */ int num_subdevs; + /* i2c bus adapter no */ + int i2c_adapter_id; /* information about each subdev */ struct vpfe_subdev_info *sub_devs; /* evm card info */ -- cgit v1.3-8-gc7d7 From 85213630731605503c8fd4df9bf06beefb2cc7c4 Mon Sep 17 00:00:00 2001 From: Vaibhav Hiremath Date: Tue, 10 Nov 2009 13:32:53 -0300 Subject: V4L/DVB (13467): V4L2: Added CID's V4L2_CID_ROTATE/BG_COLOR Signed-off-by: Vaibhav Hiremath Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/v4l/videodev2.h.xml | 4 +++- drivers/media/video/v4l2-common.c | 9 +++++++++ include/linux/videodev2.h | 4 +++- 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml index bb6b07f9ea14..7162b9ffc2b3 100644 --- a/Documentation/DocBook/v4l/videodev2.h.xml +++ b/Documentation/DocBook/v4l/videodev2.h.xml @@ -915,8 +915,10 @@ enum v4l2_colorfx { #define V4L2_CID_AUTOBRIGHTNESS (V4L2_CID_BASE+32) #define V4L2_CID_BAND_STOP_FILTER (V4L2_CID_BASE+33) +#define V4L2_CID_ROTATE (V4L2_CID_BASE+34) +#define V4L2_CID_BG_COLOR (V4L2_CID_BASE+35) /* last CID + 1 */ -#define V4L2_CID_LASTP1 (V4L2_CID_BASE+34) +#define V4L2_CID_LASTP1 (V4L2_CID_BASE+36) /* MPEG-class control IDs defined by V4L2 */ #define V4L2_CID_MPEG_BASE (V4L2_CTRL_CLASS_MPEG | 0x900) diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c index f5a93ae3cdf9..e8e5affbabce 100644 --- a/drivers/media/video/v4l2-common.c +++ b/drivers/media/video/v4l2-common.c @@ -431,6 +431,8 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_CHROMA_AGC: return "Chroma AGC"; case V4L2_CID_COLOR_KILLER: return "Color Killer"; case V4L2_CID_COLORFX: return "Color Effects"; + case V4L2_CID_ROTATE: return "Rotate"; + case V4L2_CID_BG_COLOR: return "Background color"; /* MPEG controls */ case V4L2_CID_MPEG_CLASS: return "MPEG Encoder Controls"; @@ -587,6 +589,13 @@ int v4l2_ctrl_query_fill(struct v4l2_queryctrl *qctrl, s32 min, s32 max, s32 ste qctrl->flags |= V4L2_CTRL_FLAG_READ_ONLY; min = max = step = def = 0; break; + case V4L2_CID_BG_COLOR: + qctrl->type = V4L2_CTRL_TYPE_INTEGER; + step = 1; + min = 0; + /* Max is calculated as RGB888 that is 2^24 */ + max = 0xFFFFFF; + break; default: qctrl->type = V4L2_CTRL_TYPE_INTEGER; break; diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index b9a799a35763..54395460a414 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -913,8 +913,10 @@ enum v4l2_colorfx { #define V4L2_CID_AUTOBRIGHTNESS (V4L2_CID_BASE+32) #define V4L2_CID_BAND_STOP_FILTER (V4L2_CID_BASE+33) +#define V4L2_CID_ROTATE (V4L2_CID_BASE+34) +#define V4L2_CID_BG_COLOR (V4L2_CID_BASE+35) /* last CID + 1 */ -#define V4L2_CID_LASTP1 (V4L2_CID_BASE+34) +#define V4L2_CID_LASTP1 (V4L2_CID_BASE+36) /* MPEG-class control IDs defined by V4L2 */ #define V4L2_CID_MPEG_BASE (V4L2_CTRL_CLASS_MPEG | 0x900) -- cgit v1.3-8-gc7d7 From d8008562379b927758ca08eded1508c68d9beb4e Mon Sep 17 00:00:00 2001 From: Vaibhav Hiremath Date: Tue, 10 Nov 2009 13:46:36 -0300 Subject: V4L/DVB (13470): V4L2: Add Capability and Flag field for Chroma Key Signed-off-by: Vaibhav Hiremath Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/v4l/videodev2.h.xml | 2 ++ include/linux/videodev2.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml index 7162b9ffc2b3..228277bf9f10 100644 --- a/Documentation/DocBook/v4l/videodev2.h.xml +++ b/Documentation/DocBook/v4l/videodev2.h.xml @@ -566,6 +566,7 @@ struct v4l2_framebuffer { #define V4L2_FBUF_CAP_LOCAL_ALPHA 0x0010 #define V4L2_FBUF_CAP_GLOBAL_ALPHA 0x0020 #define V4L2_FBUF_CAP_LOCAL_INV_ALPHA 0x0040 +#define V4L2_FBUF_CAP_SRC_CHROMAKEY 0x0080 /* Flags for the 'flags' field. */ #define V4L2_FBUF_FLAG_PRIMARY 0x0001 #define V4L2_FBUF_FLAG_OVERLAY 0x0002 @@ -573,6 +574,7 @@ struct v4l2_framebuffer { #define V4L2_FBUF_FLAG_LOCAL_ALPHA 0x0008 #define V4L2_FBUF_FLAG_GLOBAL_ALPHA 0x0010 #define V4L2_FBUF_FLAG_LOCAL_INV_ALPHA 0x0020 +#define V4L2_FBUF_FLAG_SRC_CHROMAKEY 0x0040 struct v4l2_clip { struct v4l2_rect c; diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 54395460a414..e4eb403a3b00 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -564,6 +564,7 @@ struct v4l2_framebuffer { #define V4L2_FBUF_CAP_LOCAL_ALPHA 0x0010 #define V4L2_FBUF_CAP_GLOBAL_ALPHA 0x0020 #define V4L2_FBUF_CAP_LOCAL_INV_ALPHA 0x0040 +#define V4L2_FBUF_CAP_SRC_CHROMAKEY 0x0080 /* Flags for the 'flags' field. */ #define V4L2_FBUF_FLAG_PRIMARY 0x0001 #define V4L2_FBUF_FLAG_OVERLAY 0x0002 @@ -571,6 +572,7 @@ struct v4l2_framebuffer { #define V4L2_FBUF_FLAG_LOCAL_ALPHA 0x0008 #define V4L2_FBUF_FLAG_GLOBAL_ALPHA 0x0010 #define V4L2_FBUF_FLAG_LOCAL_INV_ALPHA 0x0020 +#define V4L2_FBUF_FLAG_SRC_CHROMAKEY 0x0040 struct v4l2_clip { struct v4l2_rect c; -- cgit v1.3-8-gc7d7 From dbc8e34a3265e7ec6b2a07c4337c60a947768891 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 9 Jun 2009 17:34:01 -0300 Subject: V4L/DVB (13477): v4l2-subdev: remove unnecessary check Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-subdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 88c13d6f7912..00bf17608453 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -385,7 +385,7 @@ static inline void v4l2_subdev_init(struct v4l2_subdev *sd, Example: err = v4l2_subdev_call(sd, core, g_chip_ident, &chip); */ #define v4l2_subdev_call(sd, o, f, args...) \ - (!(sd) ? -ENODEV : (((sd) && (sd)->ops->o && (sd)->ops->o->f) ? \ + (!(sd) ? -ENODEV : (((sd)->ops->o && (sd)->ops->o->f) ? \ (sd)->ops->o->f((sd) , ##args) : -ENOIOCTLCMD)) /* Send a notification to v4l2_device. */ -- cgit v1.3-8-gc7d7 From 4e89217b943cfb26f88f04920d44f2077931f0e7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Nov 2009 21:54:41 -0300 Subject: V4L/DVB (13531): ir-common: rename the debug routine to allow exporting it As newer IR common code will be added on other files, we need a global debug var inside the module. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/ir-functions.c | 23 ++++++++++------------- include/media/ir-common.h | 4 ++++ 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/media/common/ir-functions.c b/drivers/media/common/ir-functions.c index fc2d8941459f..b111a0d9409b 100644 --- a/drivers/media/common/ir-functions.c +++ b/drivers/media/common/ir-functions.c @@ -34,11 +34,8 @@ static int repeat = 1; module_param(repeat, int, 0444); MODULE_PARM_DESC(repeat,"auto-repeat for IR keys (default: on)"); -static int debug; /* debug level (0,1,2) */ -module_param(debug, int, 0644); - -#define dprintk(level, fmt, arg...) if (debug >= level) \ - printk(KERN_DEBUG fmt , ## arg) +int media_ir_debug; /* media_ir_debug level (0,1,2) */ +module_param_named(debug, media_ir_debug, int, 0644); /* -------------------------------------------------------------------------- */ @@ -49,7 +46,7 @@ static void ir_input_key_event(struct input_dev *dev, struct ir_input_state *ir) dev->name,ir->ir_key,ir->ir_raw,ir->keypressed); return; } - dprintk(1,"%s: key event code=%d down=%d\n", + IR_dprintk(1,"%s: key event code=%d down=%d\n", dev->name,ir->keycode,ir->keypressed); input_report_key(dev,ir->keycode,ir->keypressed); input_sync(dev); @@ -295,11 +292,11 @@ u32 ir_rc5_decode(unsigned int code) rc5 |= 1; break; case 3: - dprintk(1, "ir-common: ir_rc5_decode(%x) bad code\n", org_code); + IR_dprintk(1, "ir-common: ir_rc5_decode(%x) bad code\n", org_code); return 0; } } - dprintk(1, "ir-common: code=%x, rc5=%x, start=%x, toggle=%x, address=%x, " + IR_dprintk(1, "ir-common: code=%x, rc5=%x, start=%x, toggle=%x, address=%x, " "instr=%x\n", rc5, org_code, RC5_START(rc5), RC5_TOGGLE(rc5), RC5_ADDR(rc5), RC5_INSTR(rc5)); return rc5; @@ -331,20 +328,20 @@ void ir_rc5_timer_end(unsigned long data) /* Allow some timer jitter (RC5 is ~24ms anyway so this is ok) */ if (gap < 28000) { - dprintk(1, "ir-common: spurious timer_end\n"); + IR_dprintk(1, "ir-common: spurious timer_end\n"); return; } if (ir->last_bit < 20) { /* ignore spurious codes (caused by light/other remotes) */ - dprintk(1, "ir-common: short code: %x\n", ir->code); + IR_dprintk(1, "ir-common: short code: %x\n", ir->code); } else { ir->code = (ir->code << ir->shift_by) | 1; rc5 = ir_rc5_decode(ir->code); /* two start bits? */ if (RC5_START(rc5) != ir->start) { - dprintk(1, "ir-common: rc5 start bits invalid: %u\n", RC5_START(rc5)); + IR_dprintk(1, "ir-common: rc5 start bits invalid: %u\n", RC5_START(rc5)); /* right address? */ } else if (RC5_ADDR(rc5) == ir->addr) { @@ -354,7 +351,7 @@ void ir_rc5_timer_end(unsigned long data) /* Good code, decide if repeat/repress */ if (toggle != RC5_TOGGLE(ir->last_rc5) || instr != RC5_INSTR(ir->last_rc5)) { - dprintk(1, "ir-common: instruction %x, toggle %x\n", instr, + IR_dprintk(1, "ir-common: instruction %x, toggle %x\n", instr, toggle); ir_input_nokey(ir->dev, &ir->ir); ir_input_keydown(ir->dev, &ir->ir, instr, @@ -377,7 +374,7 @@ void ir_rc5_timer_keyup(unsigned long data) { struct card_ir *ir = (struct card_ir *)data; - dprintk(1, "ir-common: key released\n"); + IR_dprintk(1, "ir-common: key released\n"); ir_input_nokey(ir->dev, &ir->ir); } EXPORT_SYMBOL_GPL(ir_rc5_timer_keyup); diff --git a/include/media/ir-common.h b/include/media/ir-common.h index 5921776929e7..5964145d65e6 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -27,6 +27,10 @@ #include #include +extern int media_ir_debug; /* media_ir_debug level (0,1,2) */ +#define IR_dprintk(level, fmt, arg...) if (media_ir_debug >= level) \ + printk(KERN_DEBUG fmt , ## arg) + #define IR_TYPE_RC5 1 #define IR_TYPE_PD 2 /* Pulse distance encoded IR */ #define IR_TYPE_OTHER 99 -- cgit v1.3-8-gc7d7 From ef53a1159dfcdc1fecf5adb5b8d26803f194c09b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Nov 2009 22:01:23 -0300 Subject: V4L/DVB (13532): ir-common: Add infrastructure to use a dynamic keycode table V4L drivers use an static keycode vector with 128 entries, where the scancode indexes the keycode. While this works, it limits the scancodes to have only 7 bits, not allowing for example full RC5 codes. Instead of implementing the same code on every V4L driver, provide a common infrastructure to handle the bigger tables, minimizing the changes inside each driver. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/Makefile | 2 +- drivers/media/common/ir-keytable.c | 156 +++++++++++++++++++++++++++++++++++++ include/media/ir-common.h | 12 ++- 3 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 drivers/media/common/ir-keytable.c (limited to 'include') diff --git a/drivers/media/common/Makefile b/drivers/media/common/Makefile index 351b98b9b302..169b337b7c9d 100644 --- a/drivers/media/common/Makefile +++ b/drivers/media/common/Makefile @@ -1,6 +1,6 @@ saa7146-objs := saa7146_i2c.o saa7146_core.o saa7146_vv-objs := saa7146_fops.o saa7146_video.o saa7146_hlp.o saa7146_vbi.o -ir-common-objs := ir-functions.o ir-keymaps.o +ir-common-objs := ir-functions.o ir-keymaps.o ir-keytable.o obj-y += tuners/ obj-$(CONFIG_VIDEO_SAA7146) += saa7146.o diff --git a/drivers/media/common/ir-keytable.c b/drivers/media/common/ir-keytable.c new file mode 100644 index 000000000000..2de7caeef437 --- /dev/null +++ b/drivers/media/common/ir-keytable.c @@ -0,0 +1,156 @@ +/* ir-register.c - handle IR scancode->keycode tables + * + * Copyright (C) 2009 by Mauro Carvalho Chehab + */ + +#include + +#include + +/** + * ir_getkeycode() - get a keycode at the evdev scancode ->keycode table + * @dev: the struct input_dev device descriptor + * @scancode: the desired scancode + * @keycode: the keycode to be retorned. + * + * This routine is used to handle evdev EVIOCGKEY ioctl. + * If the key is not found, returns -EINVAL, otherwise, returns 0. + */ +static int ir_getkeycode(struct input_dev *dev, + int scancode, int *keycode) +{ + int i; + struct ir_scancode_table *rc_tab = input_get_drvdata(dev); + struct ir_scancode *keymap = rc_tab->scan; + + /* See if we can match the raw key code. */ + for (i = 0; i < rc_tab->size; i++) + if (keymap[i].scancode == scancode) { + *keycode = keymap[i].keycode; + return 0; + } + + /* + * If is there extra space, returns KEY_RESERVED, + * otherwise, input core won't let ir_setkeycode + * to work + */ + for (i = 0; i < rc_tab->size; i++) + if (keymap[i].keycode == KEY_RESERVED || + keymap[i].keycode == KEY_UNKNOWN) { + *keycode = KEY_RESERVED; + return 0; + } + + return -EINVAL; +} + +/** + * ir_setkeycode() - set a keycode at the evdev scancode ->keycode table + * @dev: the struct input_dev device descriptor + * @scancode: the desired scancode + * @keycode: the keycode to be retorned. + * + * This routine is used to handle evdev EVIOCSKEY ioctl. + * There's one caveat here: how can we increase the size of the table? + * If the key is not found, returns -EINVAL, otherwise, returns 0. + */ +static int ir_setkeycode(struct input_dev *dev, + int scancode, int keycode) +{ + int i; + struct ir_scancode_table *rc_tab = input_get_drvdata(dev); + struct ir_scancode *keymap = rc_tab->scan; + + /* Search if it is replacing an existing keycode */ + for (i = 0; i < rc_tab->size; i++) + if (keymap[i].scancode == scancode) { + keymap[i].keycode = keycode; + return 0; + } + + /* Search if is there a clean entry. If so, use it */ + for (i = 0; i < rc_tab->size; i++) + if (keymap[i].keycode == KEY_RESERVED || + keymap[i].keycode == KEY_UNKNOWN) { + keymap[i].scancode = scancode; + keymap[i].keycode = keycode; + return 0; + } + + /* + * FIXME: Currently, it is not possible to increase the size of + * scancode table. For it to happen, one possibility + * would be to allocate a table with key_map_size + 1, + * copying data, appending the new key on it, and freeing + * the old one - or maybe just allocating some spare space + */ + + return -EINVAL; +} + +/** + * ir_g_keycode_from_table() - gets the keycode that corresponds to a scancode + * @rc_tab: the ir_scancode_table with the keymap to be used + * @scancode: the scancode that we're seeking + * + * This routine is used by the input routines when a key is pressed at the + * IR. The scancode is received and needs to be converted into a keycode. + * If the key is not found, it returns KEY_UNKNOWN. Otherwise, returns the + * corresponding keycode from the table. + */ +u32 ir_g_keycode_from_table(struct input_dev *dev, u32 scancode) +{ + int i; + struct ir_scancode_table *rc_tab = input_get_drvdata(dev); + struct ir_scancode *keymap = rc_tab->scan; + + for (i = 0; i < rc_tab->size; i++) + if (keymap[i].scancode == scancode) { + IR_dprintk(1, "%s: scancode 0x%04x keycode 0x%02x\n", + dev->name, scancode, keymap[i].keycode); + + return keymap[i].keycode; + } + + printk(KERN_INFO "%s: unknown key for scancode 0x%04x\n", + dev->name, scancode); + + return KEY_UNKNOWN; +} +EXPORT_SYMBOL_GPL(ir_g_keycode_from_table); + +/** + * ir_set_keycode_table() - sets the IR keycode table and add the handlers + * for keymap table get/set + * @input_dev: the struct input_dev descriptor of the device + * @rc_tab: the struct ir_scancode_table table of scancode/keymap + * + * This routine is used to initialize the input infrastructure to work with + * an IR. It requires that the caller initializes the input_dev struct with + * some fields: name, + */ +int ir_set_keycode_table(struct input_dev *input_dev, + struct ir_scancode_table *rc_tab) +{ + struct ir_scancode *keymap = rc_tab->scan; + int i; + + if (rc_tab->scan == NULL || !rc_tab->size) + return -EINVAL; + + /* set the bits for the keys */ + IR_dprintk(1, "key map size: %d\n", rc_tab->size); + for (i = 0; i < rc_tab->size; i++) { + IR_dprintk(1, "#%d: setting bit for keycode 0x%04x\n", + i, keymap[i].keycode); + set_bit(keymap[i].keycode, input_dev->keybit); + } + + input_dev->getkeycode = ir_getkeycode; + input_dev->setkeycode = ir_setkeycode; + input_set_drvdata(input_dev, rc_tab); + + return 0; +} +EXPORT_SYMBOL_GPL(ir_set_keycode_table); diff --git a/include/media/ir-common.h b/include/media/ir-common.h index 5964145d65e6..805f1e09770f 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -106,6 +106,8 @@ struct card_ir { struct tasklet_struct tlet; }; +/* Routines from ir-functions.c */ + void ir_input_init(struct input_dev *dev, struct ir_input_state *ir, int ir_type, struct ir_scancode_table *ir_codes); void ir_input_nokey(struct input_dev *dev, struct ir_input_state *ir); @@ -120,7 +122,15 @@ u32 ir_rc5_decode(unsigned int code); void ir_rc5_timer_end(unsigned long data); void ir_rc5_timer_keyup(unsigned long data); -/* Keymaps to be used by other modules */ +/* Routines from ir-keytable.c */ + +u32 ir_g_keycode_from_table(struct input_dev *input_dev, + u32 scancode); + +int ir_set_keycode_table(struct input_dev *input_dev, + struct ir_scancode_table *rc_tab); + +/* scancode->keycode map tables from ir-keymaps.c */ extern struct ir_scancode_table ir_codes_empty_table; extern struct ir_scancode_table ir_codes_avermedia_table; -- cgit v1.3-8-gc7d7 From 8573b74af25c279de3e309beddcba984bee9ec15 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Nov 2009 22:40:22 -0300 Subject: V4L/DVB (13533): ir: use dynamic tables, instead of static ones Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/ir-functions.c | 32 ++++++----------------------- drivers/media/common/ir-keytable.c | 2 -- drivers/media/dvb/dm1105/dm1105.c | 2 +- drivers/media/dvb/ttpci/budget-ci.c | 2 +- drivers/media/video/bt8xx/bttv-input.c | 8 ++++---- drivers/media/video/cx231xx/cx231xx-input.c | 3 +-- drivers/media/video/cx23885/cx23885-input.c | 3 +-- drivers/media/video/cx88/cx88-input.c | 14 ++++++------- drivers/media/video/em28xx/em28xx-input.c | 3 +-- drivers/media/video/ir-kbd-i2c.c | 2 +- drivers/media/video/saa7134/saa7134-input.c | 8 ++++---- include/media/ir-common.h | 5 ++--- 12 files changed, 29 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/drivers/media/common/ir-functions.c b/drivers/media/common/ir-functions.c index b111a0d9409b..29885c2893d2 100644 --- a/drivers/media/common/ir-functions.c +++ b/drivers/media/common/ir-functions.c @@ -42,8 +42,8 @@ module_param_named(debug, media_ir_debug, int, 0644); static void ir_input_key_event(struct input_dev *dev, struct ir_input_state *ir) { if (KEY_RESERVED == ir->keycode) { - printk(KERN_INFO "%s: unknown key: key=0x%02x raw=0x%02x down=%d\n", - dev->name,ir->ir_key,ir->ir_raw,ir->keypressed); + printk(KERN_INFO "%s: unknown key: key=0x%02x down=%d\n", + dev->name, ir->ir_key, ir->keypressed); return; } IR_dprintk(1,"%s: key event code=%d down=%d\n", @@ -57,28 +57,10 @@ static void ir_input_key_event(struct input_dev *dev, struct ir_input_state *ir) void ir_input_init(struct input_dev *dev, struct ir_input_state *ir, int ir_type, struct ir_scancode_table *ir_codes) { - int i; - ir->ir_type = ir_type; - memset(ir->ir_codes, 0, sizeof(ir->ir_codes)); - - /* - * FIXME: This is a temporary workaround to use the new IR tables - * with the old approach. Later patches will replace this to a - * proper method - */ - - if (ir_codes) - for (i = 0; i < ir_codes->size; i++) - if (ir_codes->scan[i].scancode < IR_KEYTAB_SIZE) - ir->ir_codes[ir_codes->scan[i].scancode] = ir_codes->scan[i].keycode; + ir_set_keycode_table(dev, ir_codes); - dev->keycode = ir->ir_codes; - dev->keycodesize = sizeof(IR_KEYTAB_TYPE); - dev->keycodemax = IR_KEYTAB_SIZE; - for (i = 0; i < IR_KEYTAB_SIZE; i++) - set_bit(ir->ir_codes[i], dev->keybit); clear_bit(0, dev->keybit); set_bit(EV_KEY, dev->evbit); @@ -97,9 +79,9 @@ void ir_input_nokey(struct input_dev *dev, struct ir_input_state *ir) EXPORT_SYMBOL_GPL(ir_input_nokey); void ir_input_keydown(struct input_dev *dev, struct ir_input_state *ir, - u32 ir_key, u32 ir_raw) + u32 ir_key) { - u32 keycode = IR_KEYCODE(ir->ir_codes, ir_key); + u32 keycode = ir_g_keycode_from_table(dev, ir_key); if (ir->keypressed && ir->keycode != keycode) { ir->keypressed = 0; @@ -107,7 +89,6 @@ void ir_input_keydown(struct input_dev *dev, struct ir_input_state *ir, } if (!ir->keypressed) { ir->ir_key = ir_key; - ir->ir_raw = ir_raw; ir->keycode = keycode; ir->keypressed = 1; ir_input_key_event(dev,ir); @@ -354,8 +335,7 @@ void ir_rc5_timer_end(unsigned long data) IR_dprintk(1, "ir-common: instruction %x, toggle %x\n", instr, toggle); ir_input_nokey(ir->dev, &ir->ir); - ir_input_keydown(ir->dev, &ir->ir, instr, - instr); + ir_input_keydown(ir->dev, &ir->ir, instr); } /* Set/reset key-up timer */ diff --git a/drivers/media/common/ir-keytable.c b/drivers/media/common/ir-keytable.c index 2de7caeef437..65e352389b2d 100644 --- a/drivers/media/common/ir-keytable.c +++ b/drivers/media/common/ir-keytable.c @@ -118,7 +118,6 @@ u32 ir_g_keycode_from_table(struct input_dev *dev, u32 scancode) return KEY_UNKNOWN; } -EXPORT_SYMBOL_GPL(ir_g_keycode_from_table); /** * ir_set_keycode_table() - sets the IR keycode table and add the handlers @@ -153,4 +152,3 @@ int ir_set_keycode_table(struct input_dev *input_dev, return 0; } -EXPORT_SYMBOL_GPL(ir_set_keycode_table); diff --git a/drivers/media/dvb/dm1105/dm1105.c b/drivers/media/dvb/dm1105/dm1105.c index 2d099e271751..4c28632f94c4 100644 --- a/drivers/media/dvb/dm1105/dm1105.c +++ b/drivers/media/dvb/dm1105/dm1105.c @@ -510,7 +510,7 @@ static void dm1105_emit_key(struct work_struct *work) data = (ircom >> 8) & 0x7f; - ir_input_keydown(ir->input_dev, &ir->ir, data, data); + ir_input_keydown(ir->input_dev, &ir->ir, data); ir_input_nokey(ir->input_dev, &ir->ir); } diff --git a/drivers/media/dvb/ttpci/budget-ci.c b/drivers/media/dvb/ttpci/budget-ci.c index 88a0a5670ccf..adc4b954e5ad 100644 --- a/drivers/media/dvb/ttpci/budget-ci.c +++ b/drivers/media/dvb/ttpci/budget-ci.c @@ -178,7 +178,7 @@ static void msp430_ir_interrupt(unsigned long data) if (budget_ci->ir.last_raw != raw || !timer_pending(&budget_ci->ir.timer_keyup)) { ir_input_nokey(dev, &budget_ci->ir.state); ir_input_keydown(dev, &budget_ci->ir.state, - budget_ci->ir.ir_key, raw); + budget_ci->ir.ir_key); budget_ci->ir.last_raw = raw; } diff --git a/drivers/media/video/bt8xx/bttv-input.c b/drivers/media/video/bt8xx/bttv-input.c index ebd51afe8761..62408ccf34c8 100644 --- a/drivers/media/video/bt8xx/bttv-input.c +++ b/drivers/media/video/bt8xx/bttv-input.c @@ -73,12 +73,12 @@ static void ir_handle_key(struct bttv *btv) if ((ir->mask_keydown && (0 != (gpio & ir->mask_keydown))) || (ir->mask_keyup && (0 == (gpio & ir->mask_keyup)))) { - ir_input_keydown(ir->dev,&ir->ir,data,data); + ir_input_keydown(ir->dev, &ir->ir, data); } else { /* HACK: Probably, ir->mask_keydown is missing for this board */ if (btv->c.type == BTTV_BOARD_WINFAST2000) - ir_input_keydown(ir->dev, &ir->ir, data, data); + ir_input_keydown(ir->dev, &ir->ir, data); ir_input_nokey(ir->dev,&ir->ir); } @@ -104,7 +104,7 @@ static void ir_enltv_handle_key(struct bttv *btv) gpio, data, (gpio & ir->mask_keyup) ? " up" : "up/down"); - ir_input_keydown(ir->dev, &ir->ir, data, data); + ir_input_keydown(ir->dev, &ir->ir, data); if (keyup) ir_input_nokey(ir->dev, &ir->ir); } else { @@ -118,7 +118,7 @@ static void ir_enltv_handle_key(struct bttv *btv) if (keyup) ir_input_nokey(ir->dev, &ir->ir); else - ir_input_keydown(ir->dev, &ir->ir, data, data); + ir_input_keydown(ir->dev, &ir->ir, data); } ir->last_gpio = data | keyup; diff --git a/drivers/media/video/cx231xx/cx231xx-input.c b/drivers/media/video/cx231xx/cx231xx-input.c index 48f22fa38e6c..58dd39bc6787 100644 --- a/drivers/media/video/cx231xx/cx231xx-input.c +++ b/drivers/media/video/cx231xx/cx231xx-input.c @@ -126,8 +126,7 @@ static void cx231xx_ir_handle_key(struct cx231xx_IR *ir) if (do_sendkey) { dprintk("sending keypress\n"); - ir_input_keydown(ir->input, &ir->ir, poll_result.rc_data[0], - poll_result.rc_data[0]); + ir_input_keydown(ir->input, &ir->ir, poll_result.rc_data[0]); ir_input_nokey(ir->input, &ir->ir); } diff --git a/drivers/media/video/cx23885/cx23885-input.c b/drivers/media/video/cx23885/cx23885-input.c index f000ed787d4e..fea882d1fbcb 100644 --- a/drivers/media/video/cx23885/cx23885-input.c +++ b/drivers/media/video/cx23885/cx23885-input.c @@ -94,8 +94,7 @@ static void cx23885_input_process_raw_rc5(struct cx23885_dev *dev) RC5_START(ir_input->last_rc5) == 0) { /* This keypress is differnet: not an auto repeat */ ir_input_nokey(ir_input->dev, &ir_input->ir); - ir_input_keydown(ir_input->dev, &ir_input->ir, - command, ir_input->code); + ir_input_keydown(ir_input->dev, &ir_input->ir, command); } ir_input->last_rc5 = rc5; diff --git a/drivers/media/video/cx88/cx88-input.c b/drivers/media/video/cx88/cx88-input.c index 47c03019357d..7b2066415d7e 100644 --- a/drivers/media/video/cx88/cx88-input.c +++ b/drivers/media/video/cx88/cx88-input.c @@ -118,13 +118,13 @@ static void cx88_ir_handle_key(struct cx88_IR *ir) data = (data << 4) | ((gpio_key & 0xf0) >> 4); - ir_input_keydown(ir->input, &ir->ir, data, data); + ir_input_keydown(ir->input, &ir->ir, data); ir_input_nokey(ir->input, &ir->ir); } else if (ir->mask_keydown) { /* bit set on keydown */ if (gpio & ir->mask_keydown) { - ir_input_keydown(ir->input, &ir->ir, data, data); + ir_input_keydown(ir->input, &ir->ir, data); } else { ir_input_nokey(ir->input, &ir->ir); } @@ -132,14 +132,14 @@ static void cx88_ir_handle_key(struct cx88_IR *ir) } else if (ir->mask_keyup) { /* bit cleared on keydown */ if (0 == (gpio & ir->mask_keyup)) { - ir_input_keydown(ir->input, &ir->ir, data, data); + ir_input_keydown(ir->input, &ir->ir, data); } else { ir_input_nokey(ir->input, &ir->ir); } } else { /* can't distinguish keydown/up :-/ */ - ir_input_keydown(ir->input, &ir->ir, data, data); + ir_input_keydown(ir->input, &ir->ir, data); ir_input_nokey(ir->input, &ir->ir); } } @@ -487,7 +487,7 @@ void cx88_ir_irq(struct cx88_core *core) ir_dprintk("Key Code: %x\n", (ircode >> 16) & 0x7f); - ir_input_keydown(ir->input, &ir->ir, (ircode >> 16) & 0x7f, (ircode >> 16) & 0xff); + ir_input_keydown(ir->input, &ir->ir, (ircode >> 16) & 0x7f); ir->release = jiffies + msecs_to_jiffies(120); break; case CX88_BOARD_HAUPPAUGE: @@ -524,7 +524,7 @@ void cx88_ir_irq(struct cx88_core *core) if ( dev != 0x1e && dev != 0x1f ) /* not a hauppauge remote */ break; - ir_input_keydown(ir->input, &ir->ir, code, ircode); + ir_input_keydown(ir->input, &ir->ir, code); ir->release = jiffies + msecs_to_jiffies(120); break; case CX88_BOARD_PINNACLE_PCTV_HD_800i: @@ -532,7 +532,7 @@ void cx88_ir_irq(struct cx88_core *core) ir_dprintk("biphase decoded: %x\n", ircode); if ((ircode & 0xfffff000) != 0x3000) break; - ir_input_keydown(ir->input, &ir->ir, ircode & 0x3f, ircode); + ir_input_keydown(ir->input, &ir->ir, ircode & 0x3f); ir->release = jiffies + msecs_to_jiffies(120); break; } diff --git a/drivers/media/video/em28xx/em28xx-input.c b/drivers/media/video/em28xx/em28xx-input.c index 7a0fe3816e3d..990ee04bbd5e 100644 --- a/drivers/media/video/em28xx/em28xx-input.c +++ b/drivers/media/video/em28xx/em28xx-input.c @@ -282,8 +282,7 @@ static void em28xx_ir_handle_key(struct em28xx_IR *ir) if (do_sendkey) { dprintk("sending keypress\n"); - ir_input_keydown(ir->input, &ir->ir, poll_result.rc_data[0], - poll_result.rc_data[0]); + ir_input_keydown(ir->input, &ir->ir, poll_result.rc_data[0]); ir_input_nokey(ir->input, &ir->ir); } diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c index aec36660987d..9c6d0ae58b1f 100644 --- a/drivers/media/video/ir-kbd-i2c.c +++ b/drivers/media/video/ir-kbd-i2c.c @@ -275,7 +275,7 @@ static void ir_key_poll(struct IR_i2c *ir) if (0 == rc) { ir_input_nokey(ir->input, &ir->ir); } else { - ir_input_keydown(ir->input, &ir->ir, ir_key, ir_raw); + ir_input_keydown(ir->input, &ir->ir, ir_key); } } diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c index f658f2513992..439f3d54d009 100644 --- a/drivers/media/video/saa7134/saa7134-input.c +++ b/drivers/media/video/saa7134/saa7134-input.c @@ -102,14 +102,14 @@ static int build_key(struct saa7134_dev *dev) if (data == ir->mask_keycode) ir_input_nokey(ir->dev, &ir->ir); else - ir_input_keydown(ir->dev, &ir->ir, data, data); + ir_input_keydown(ir->dev, &ir->ir, data); return 0; } if (ir->polling) { if ((ir->mask_keydown && (0 != (gpio & ir->mask_keydown))) || (ir->mask_keyup && (0 == (gpio & ir->mask_keyup)))) { - ir_input_keydown(ir->dev, &ir->ir, data, data); + ir_input_keydown(ir->dev, &ir->ir, data); } else { ir_input_nokey(ir->dev, &ir->ir); } @@ -117,7 +117,7 @@ static int build_key(struct saa7134_dev *dev) else { /* IRQ driven mode - handle key press and release in one go */ if ((ir->mask_keydown && (0 != (gpio & ir->mask_keydown))) || (ir->mask_keyup && (0 == (gpio & ir->mask_keyup)))) { - ir_input_keydown(ir->dev, &ir->ir, data, data); + ir_input_keydown(ir->dev, &ir->ir, data); ir_input_nokey(ir->dev, &ir->ir); } } @@ -938,7 +938,7 @@ static void nec_task(unsigned long data) dprintk("scancode = 0x%02x (code = 0x%02x, notcode= 0x%02x)\n", ir->code, ircode, not_code); - ir_input_keydown(ir->dev, &ir->ir, ir->code, ir->code); + ir_input_keydown(ir->dev, &ir->ir, ir->code); } else dprintk("Repeat last key\n"); diff --git a/include/media/ir-common.h b/include/media/ir-common.h index 805f1e09770f..262347b61506 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -62,8 +62,7 @@ struct ir_input_state { IR_KEYTAB_TYPE ir_codes[IR_KEYTAB_SIZE]; /* key info */ - u32 ir_raw; /* raw data */ - u32 ir_key; /* ir key code */ + u32 ir_key; /* ir scancode */ u32 keycode; /* linux key code */ int keypressed; /* current state */ }; @@ -112,7 +111,7 @@ void ir_input_init(struct input_dev *dev, struct ir_input_state *ir, int ir_type, struct ir_scancode_table *ir_codes); void ir_input_nokey(struct input_dev *dev, struct ir_input_state *ir); void ir_input_keydown(struct input_dev *dev, struct ir_input_state *ir, - u32 ir_key, u32 ir_raw); + u32 ir_key); u32 ir_extract_bits(u32 data, u32 mask); int ir_dump_samples(u32 *samples, int count); int ir_decode_biphase(u32 *samples, int count, int low, int high); -- cgit v1.3-8-gc7d7 From 6d691237e61ed68a04b14e3c89364e481421d6e8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Nov 2009 22:51:56 -0300 Subject: V4L/DVB (13534): ir-common: Remove some unused fields/structs Now that the IR conversion to dynamic tables has finished, we can get rid of some fields and definitions that aren't used anymore. Signed-off-by: Mauro Carvalho Chehab --- include/media/ir-common.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/media/ir-common.h b/include/media/ir-common.h index 262347b61506..1a619a4ec5cc 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -35,9 +35,6 @@ extern int media_ir_debug; /* media_ir_debug level (0,1,2) */ #define IR_TYPE_PD 2 /* Pulse distance encoded IR */ #define IR_TYPE_OTHER 99 -#define IR_KEYTAB_TYPE u32 -#define IR_KEYTAB_SIZE 128 /* enougth for rc5, probably need more some day */ - struct ir_scancode { u16 scancode; u32 keycode; @@ -48,9 +45,6 @@ struct ir_scancode_table { int size; }; -#define IR_KEYCODE(tab,code) (((unsigned)code < IR_KEYTAB_SIZE) \ - ? tab[code] : KEY_RESERVED) - #define RC5_START(x) (((x)>>12)&3) #define RC5_TOGGLE(x) (((x)>>11)&1) #define RC5_ADDR(x) (((x)>>6)&31) @@ -59,7 +53,6 @@ struct ir_scancode_table { struct ir_input_state { /* configuration */ int ir_type; - IR_KEYTAB_TYPE ir_codes[IR_KEYTAB_SIZE]; /* key info */ u32 ir_key; /* ir scancode */ -- cgit v1.3-8-gc7d7 From 35d1988c6e19db3d4240e2a60c71b3a13abf0781 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Nov 2009 23:25:13 -0300 Subject: V4L/DVB (13535): ir-common: Add a hauppauge new table with the complete RC5 code Now that V4L drivers can support more than 7 bits for scan code, let's add a modified version for the Hauppauge Grey IR containing the full IR scancode. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/ir-keymaps.c | 71 +++++++++++++++++++++++++++++++++++++++ include/media/ir-common.h | 1 + 2 files changed, 72 insertions(+) (limited to 'include') diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c index e353dffe696a..328c973a0838 100644 --- a/drivers/media/common/ir-keymaps.c +++ b/drivers/media/common/ir-keymaps.c @@ -1847,6 +1847,76 @@ struct ir_scancode_table ir_codes_hauppauge_new_table = { }; EXPORT_SYMBOL_GPL(ir_codes_hauppauge_new_table); +/* + * Hauppauge:the newer, gray remotes (seems there are multiple + * slightly different versions), shipped with cx88+ivtv cards. + * + * This table contains the complete RC5 code, instead of just the data part + */ +static struct ir_scancode ir_codes_rc5_hauppauge_new[] = { + /* Keys 0 to 9 */ + { 0x1e00, KEY_0 }, + { 0x1e01, KEY_1 }, + { 0x1e02, KEY_2 }, + { 0x1e03, KEY_3 }, + { 0x1e04, KEY_4 }, + { 0x1e05, KEY_5 }, + { 0x1e06, KEY_6 }, + { 0x1e07, KEY_7 }, + { 0x1e08, KEY_8 }, + { 0x1e09, KEY_9 }, + + { 0x1e0a, KEY_TEXT }, /* keypad asterisk as well */ + { 0x1e0b, KEY_RED }, /* red button */ + { 0x1e0c, KEY_RADIO }, + { 0x1e0d, KEY_MENU }, + { 0x1e0e, KEY_SUBTITLE }, /* also the # key */ + { 0x1e0f, KEY_MUTE }, + { 0x1e10, KEY_VOLUMEUP }, + { 0x1e11, KEY_VOLUMEDOWN }, + { 0x1e12, KEY_PREVIOUS }, /* previous channel */ + { 0x1e14, KEY_UP }, + { 0x1e15, KEY_DOWN }, + { 0x1e16, KEY_LEFT }, + { 0x1e17, KEY_RIGHT }, + { 0x1e18, KEY_VIDEO }, /* Videos */ + { 0x1e19, KEY_AUDIO }, /* Music */ + /* 0x1e1a: Pictures - presume this means + "Multimedia Home Platform" - + no "PICTURES" key in input.h + */ + { 0x1e1a, KEY_MHP }, + + { 0x1e1b, KEY_EPG }, /* Guide */ + { 0x1e1c, KEY_TV }, + { 0x1e1e, KEY_NEXTSONG }, /* skip >| */ + { 0x1e1f, KEY_EXIT }, /* back/exit */ + { 0x1e20, KEY_CHANNELUP }, /* channel / program + */ + { 0x1e21, KEY_CHANNELDOWN }, /* channel / program - */ + { 0x1e22, KEY_CHANNEL }, /* source (old black remote) */ + { 0x1e24, KEY_PREVIOUSSONG }, /* replay |< */ + { 0x1e25, KEY_ENTER }, /* OK */ + { 0x1e26, KEY_SLEEP }, /* minimize (old black remote) */ + { 0x1e29, KEY_BLUE }, /* blue key */ + { 0x1e2e, KEY_GREEN }, /* green button */ + { 0x1e30, KEY_PAUSE }, /* pause */ + { 0x1e32, KEY_REWIND }, /* backward << */ + { 0x1e34, KEY_FASTFORWARD }, /* forward >> */ + { 0x1e35, KEY_PLAY }, + { 0x1e36, KEY_STOP }, + { 0x1e37, KEY_RECORD }, /* recording */ + { 0x1e38, KEY_YELLOW }, /* yellow key */ + { 0x1e3b, KEY_SELECT }, /* top right button */ + { 0x1e3c, KEY_ZOOM }, /* full */ + { 0x1e3d, KEY_POWER }, /* system power (green button) */ +}; + +struct ir_scancode_table ir_codes_rc5_hauppauge_new_table = { + .scan = ir_codes_rc5_hauppauge_new, + .size = ARRAY_SIZE(ir_codes_rc5_hauppauge_new), +}; +EXPORT_SYMBOL_GPL(ir_codes_rc5_hauppauge_new_table); + static struct ir_scancode ir_codes_npgtech[] = { { 0x1d, KEY_SWITCHVIDEOMODE }, /* switch inputs */ { 0x2a, KEY_FRONT }, @@ -3243,3 +3313,4 @@ struct ir_scancode_table ir_codes_gadmei_rm008z_table = { .size = ARRAY_SIZE(ir_codes_gadmei_rm008z), }; EXPORT_SYMBOL_GPL(ir_codes_gadmei_rm008z_table); + diff --git a/include/media/ir-common.h b/include/media/ir-common.h index 1a619a4ec5cc..cd21cb55d5d5 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -157,6 +157,7 @@ extern struct ir_scancode_table ir_codes_rc5_tv_table; extern struct ir_scancode_table ir_codes_winfast_table; extern struct ir_scancode_table ir_codes_pinnacle_color_table; extern struct ir_scancode_table ir_codes_hauppauge_new_table; +extern struct ir_scancode_table ir_codes_rc5_hauppauge_new_table; extern struct ir_scancode_table ir_codes_npgtech_table; extern struct ir_scancode_table ir_codes_norwood_table; extern struct ir_scancode_table ir_codes_proteus_2309_table; -- cgit v1.3-8-gc7d7 From 055cd55601f948675006ca90362fc2bfaae90a86 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 29 Nov 2009 08:19:59 -0300 Subject: V4L/DVB (13537): ir: Prepare the code for dynamic keycode table allocation Currently, the IR table is initialized by calling ir_input_init(). However, this function doesn't return any error code, nor has a function to be called when de-initializing the IR's. Change the return argment to integer and make sure that each driver will handle the error code. Also adds a function to free any resources that may be allocating there: ir_input_free(). Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/ir-functions.c | 12 +++++++++++- drivers/media/dvb/dm1105/dm1105.c | 10 ++++++++-- drivers/media/dvb/ttpci/budget-ci.c | 14 +++++++++++--- drivers/media/video/bt8xx/bttv-input.c | 7 ++++++- drivers/media/video/cx231xx/cx231xx-input.c | 8 +++++++- drivers/media/video/cx23885/cx23885-input.c | 7 ++++++- drivers/media/video/cx88/cx88-input.c | 7 ++++++- drivers/media/video/em28xx/em28xx-input.c | 8 +++++++- drivers/media/video/ir-kbd-i2c.c | 7 ++++++- drivers/media/video/saa7134/saa7134-input.c | 7 ++++++- include/media/ir-common.h | 3 ++- 11 files changed, 76 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/media/common/ir-functions.c b/drivers/media/common/ir-functions.c index 29885c2893d2..b31bd27da374 100644 --- a/drivers/media/common/ir-functions.c +++ b/drivers/media/common/ir-functions.c @@ -54,11 +54,13 @@ static void ir_input_key_event(struct input_dev *dev, struct ir_input_state *ir) /* -------------------------------------------------------------------------- */ -void ir_input_init(struct input_dev *dev, struct ir_input_state *ir, +int ir_input_init(struct input_dev *dev, struct ir_input_state *ir, int ir_type, struct ir_scancode_table *ir_codes) { ir->ir_type = ir_type; + /* FIXME: Add the proper code to dynamically allocate IR table */ + ir_set_keycode_table(dev, ir_codes); clear_bit(0, dev->keybit); @@ -66,9 +68,17 @@ void ir_input_init(struct input_dev *dev, struct ir_input_state *ir, set_bit(EV_KEY, dev->evbit); if (repeat) set_bit(EV_REP, dev->evbit); + + return 0; } EXPORT_SYMBOL_GPL(ir_input_init); +void ir_input_free(struct input_dev *input_dev) +{ + /* FIXME: Add the proper code to free allocated resources */ +} +EXPORT_SYMBOL_GPL(ir_input_free); + void ir_input_nokey(struct input_dev *dev, struct ir_input_state *ir) { if (ir->keypressed) { diff --git a/drivers/media/dvb/dm1105/dm1105.c b/drivers/media/dvb/dm1105/dm1105.c index 4c28632f94c4..53e3f2a7d31a 100644 --- a/drivers/media/dvb/dm1105/dm1105.c +++ b/drivers/media/dvb/dm1105/dm1105.c @@ -589,7 +589,12 @@ int __devinit dm1105_ir_init(struct dm1105dvb *dm1105) snprintf(dm1105->ir.input_phys, sizeof(dm1105->ir.input_phys), "pci-%s/ir0", pci_name(dm1105->pdev)); - ir_input_init(input_dev, &dm1105->ir.ir, ir_type, ir_codes); + err = ir_input_init(input_dev, &dm1105->ir.ir, ir_type, ir_codes); + if (err < 0) { + input_free_device(input_dev); + return err; + } + input_dev->name = "DVB on-card IR receiver"; input_dev->phys = dm1105->ir.input_phys; input_dev->id.bustype = BUS_PCI; @@ -608,6 +613,7 @@ int __devinit dm1105_ir_init(struct dm1105dvb *dm1105) err = input_register_device(input_dev); if (err) { + ir_input_free(input_dev); input_free_device(input_dev); return err; } @@ -617,8 +623,8 @@ int __devinit dm1105_ir_init(struct dm1105dvb *dm1105) void __devexit dm1105_ir_exit(struct dm1105dvb *dm1105) { + ir_input_free(dm1105->ir.input_dev); input_unregister_device(dm1105->ir.input_dev); - } static int __devinit dm1105dvb_hw_init(struct dm1105dvb *dm1105dvb) diff --git a/drivers/media/dvb/ttpci/budget-ci.c b/drivers/media/dvb/ttpci/budget-ci.c index adc4b954e5ad..7d193ebc0aea 100644 --- a/drivers/media/dvb/ttpci/budget-ci.c +++ b/drivers/media/dvb/ttpci/budget-ci.c @@ -224,8 +224,10 @@ static int msp430_ir_init(struct budget_ci *budget_ci) case 0x1011: case 0x1012: /* The hauppauge keymap is a superset of these remotes */ - ir_input_init(input_dev, &budget_ci->ir.state, + error = ir_input_init(input_dev, &budget_ci->ir.state, IR_TYPE_RC5, &ir_codes_hauppauge_new_table); + if (error < 0) + goto out2; if (rc5_device < 0) budget_ci->ir.rc5_device = 0x1f; @@ -236,8 +238,10 @@ static int msp430_ir_init(struct budget_ci *budget_ci) case 0x1017: case 0x101a: /* for the Technotrend 1500 bundled remote */ - ir_input_init(input_dev, &budget_ci->ir.state, + error = ir_input_init(input_dev, &budget_ci->ir.state, IR_TYPE_RC5, &ir_codes_tt_1500_table); + if (error < 0) + goto out2; if (rc5_device < 0) budget_ci->ir.rc5_device = IR_DEVICE_ANY; @@ -246,8 +250,10 @@ static int msp430_ir_init(struct budget_ci *budget_ci) break; default: /* unknown remote */ - ir_input_init(input_dev, &budget_ci->ir.state, + error = ir_input_init(input_dev, &budget_ci->ir.state, IR_TYPE_RC5, &ir_codes_budget_ci_old_table); + if (error < 0) + goto out2; if (rc5_device < 0) budget_ci->ir.rc5_device = IR_DEVICE_ANY; @@ -280,6 +286,7 @@ static int msp430_ir_init(struct budget_ci *budget_ci) return 0; out2: + ir_input_free(input_dev); input_free_device(input_dev); out1: return error; @@ -297,6 +304,7 @@ static void msp430_ir_deinit(struct budget_ci *budget_ci) del_timer_sync(&dev->timer); ir_input_nokey(dev, &budget_ci->ir.state); + ir_input_free(dev); input_unregister_device(dev); } diff --git a/drivers/media/video/bt8xx/bttv-input.c b/drivers/media/video/bt8xx/bttv-input.c index 62408ccf34c8..84a957e52c4b 100644 --- a/drivers/media/video/bt8xx/bttv-input.c +++ b/drivers/media/video/bt8xx/bttv-input.c @@ -368,7 +368,10 @@ int bttv_input_init(struct bttv *btv) snprintf(ir->phys, sizeof(ir->phys), "pci-%s/ir0", pci_name(btv->c.pci)); - ir_input_init(input_dev, &ir->ir, ir_type, ir_codes); + err = ir_input_init(input_dev, &ir->ir, ir_type, ir_codes); + if (err < 0) + goto err_out_free; + input_dev->name = ir->name; input_dev->phys = ir->phys; input_dev->id.bustype = BUS_PCI; @@ -400,6 +403,7 @@ int bttv_input_init(struct bttv *btv) bttv_ir_stop(btv); btv->remote = NULL; err_out_free: + ir_input_free(input_dev); input_free_device(input_dev); kfree(ir); return err; @@ -411,6 +415,7 @@ void bttv_input_fini(struct bttv *btv) return; bttv_ir_stop(btv); + ir_input_free(btv->remote->dev); input_unregister_device(btv->remote->dev); kfree(btv->remote); btv->remote = NULL; diff --git a/drivers/media/video/cx231xx/cx231xx-input.c b/drivers/media/video/cx231xx/cx231xx-input.c index 58dd39bc6787..cd135f01b9c1 100644 --- a/drivers/media/video/cx231xx/cx231xx-input.c +++ b/drivers/media/video/cx231xx/cx231xx-input.c @@ -197,7 +197,11 @@ int cx231xx_ir_init(struct cx231xx *dev) usb_make_path(dev->udev, ir->phys, sizeof(ir->phys)); strlcat(ir->phys, "/input0", sizeof(ir->phys)); - ir_input_init(input_dev, &ir->ir, IR_TYPE_OTHER, dev->board.ir_codes); + err = ir_input_init(input_dev, &ir->ir, IR_TYPE_OTHER, + dev->board.ir_codes); + if (err < 0) + goto err_out_free; + input_dev->name = ir->name; input_dev->phys = ir->phys; input_dev->id.bustype = BUS_USB; @@ -222,6 +226,7 @@ err_out_stop: cx231xx_ir_stop(ir); dev->ir = NULL; err_out_free: + ir_input_free(input_dev); input_free_device(input_dev); kfree(ir); return err; @@ -236,6 +241,7 @@ int cx231xx_ir_fini(struct cx231xx *dev) return 0; cx231xx_ir_stop(ir); + ir_input_free(ir->input); input_unregister_device(ir->input); kfree(ir); diff --git a/drivers/media/video/cx23885/cx23885-input.c b/drivers/media/video/cx23885/cx23885-input.c index fea882d1fbcb..469e083dd5f8 100644 --- a/drivers/media/video/cx23885/cx23885-input.c +++ b/drivers/media/video/cx23885/cx23885-input.c @@ -377,7 +377,10 @@ int cx23885_input_init(struct cx23885_dev *dev) cx23885_boards[dev->board].name); snprintf(ir->phys, sizeof(ir->phys), "pci-%s/ir0", pci_name(dev->pci)); - ir_input_init(input_dev, &ir->ir, ir_type, ir_codes); + ret = ir_input_init(input_dev, &ir->ir, ir_type, ir_codes); + if (ret < 0) + goto err_out_free; + input_dev->name = ir->name; input_dev->phys = ir->phys; input_dev->id.bustype = BUS_PCI; @@ -404,6 +407,7 @@ err_out_stop: cx23885_input_ir_stop(dev); dev->ir_input = NULL; err_out_free: + ir_input_free(input_dev); input_free_device(input_dev); kfree(ir); return ret; @@ -416,6 +420,7 @@ void cx23885_input_fini(struct cx23885_dev *dev) if (dev->ir_input == NULL) return; + ir_input_free(dev->ir_input->dev); input_unregister_device(dev->ir_input->dev); kfree(dev->ir_input); dev->ir_input = NULL; diff --git a/drivers/media/video/cx88/cx88-input.c b/drivers/media/video/cx88/cx88-input.c index 7b2066415d7e..92b8cdf9fb81 100644 --- a/drivers/media/video/cx88/cx88-input.c +++ b/drivers/media/video/cx88/cx88-input.c @@ -360,7 +360,10 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci) snprintf(ir->name, sizeof(ir->name), "cx88 IR (%s)", core->board.name); snprintf(ir->phys, sizeof(ir->phys), "pci-%s/ir0", pci_name(pci)); - ir_input_init(input_dev, &ir->ir, ir_type, ir_codes); + err = ir_input_init(input_dev, &ir->ir, ir_type, ir_codes); + if (err < 0) + goto err_out_free; + input_dev->name = ir->name; input_dev->phys = ir->phys; input_dev->id.bustype = BUS_PCI; @@ -390,6 +393,7 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci) cx88_ir_stop(core, ir); core->ir = NULL; err_out_free: + ir_input_free(input_dev); input_free_device(input_dev); kfree(ir); return err; @@ -404,6 +408,7 @@ int cx88_ir_fini(struct cx88_core *core) return 0; cx88_ir_stop(core, ir); + ir_input_free(ir->input); input_unregister_device(ir->input); kfree(ir); diff --git a/drivers/media/video/em28xx/em28xx-input.c b/drivers/media/video/em28xx/em28xx-input.c index 5550de9c669f..d96ec7c09dca 100644 --- a/drivers/media/video/em28xx/em28xx-input.c +++ b/drivers/media/video/em28xx/em28xx-input.c @@ -367,7 +367,11 @@ int em28xx_ir_init(struct em28xx *dev) usb_make_path(dev->udev, ir->phys, sizeof(ir->phys)); strlcat(ir->phys, "/input0", sizeof(ir->phys)); - ir_input_init(input_dev, &ir->ir, IR_TYPE_OTHER, dev->board.ir_codes); + err = ir_input_init(input_dev, &ir->ir, IR_TYPE_OTHER, + dev->board.ir_codes); + if (err < 0) + goto err_out_free; + input_dev->name = ir->name; input_dev->phys = ir->phys; input_dev->id.bustype = BUS_USB; @@ -392,6 +396,7 @@ int em28xx_ir_init(struct em28xx *dev) em28xx_ir_stop(ir); dev->ir = NULL; err_out_free: + ir_input_free(input_dev); input_free_device(input_dev); kfree(ir); return err; @@ -406,6 +411,7 @@ int em28xx_ir_fini(struct em28xx *dev) return 0; em28xx_ir_stop(ir); + ir_input_free(ir->input); input_unregister_device(ir->input); kfree(ir); diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c index 9c6d0ae58b1f..64360d26b32d 100644 --- a/drivers/media/video/ir-kbd-i2c.c +++ b/drivers/media/video/ir-kbd-i2c.c @@ -437,7 +437,10 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_name(&client->dev)); /* init + register input device */ - ir_input_init(input_dev, &ir->ir, ir_type, ir->ir_codes); + err = ir_input_init(input_dev, &ir->ir, ir_type, ir->ir_codes); + if (err < 0) + goto err_out_free; + input_dev->id.bustype = BUS_I2C; input_dev->name = ir->name; input_dev->phys = ir->phys; @@ -456,6 +459,7 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id) return 0; err_out_free: + ir_input_free(input_dev); input_free_device(input_dev); kfree(ir); return err; @@ -469,6 +473,7 @@ static int ir_remove(struct i2c_client *client) cancel_delayed_work_sync(&ir->work); /* unregister device */ + ir_input_free(ir->input); input_unregister_device(ir->input); /* free memory */ diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c index 439f3d54d009..744918b1cd47 100644 --- a/drivers/media/video/saa7134/saa7134-input.c +++ b/drivers/media/video/saa7134/saa7134-input.c @@ -652,7 +652,10 @@ int saa7134_input_init1(struct saa7134_dev *dev) snprintf(ir->phys, sizeof(ir->phys), "pci-%s/ir0", pci_name(dev->pci)); - ir_input_init(input_dev, &ir->ir, ir_type, ir_codes); + err = ir_input_init(input_dev, &ir->ir, ir_type, ir_codes); + if (err < 0) + goto err_out_free; + input_dev->name = ir->name; input_dev->phys = ir->phys; input_dev->id.bustype = BUS_PCI; @@ -683,6 +686,7 @@ int saa7134_input_init1(struct saa7134_dev *dev) saa7134_ir_stop(dev); dev->remote = NULL; err_out_free: + ir_input_free(input_dev); input_free_device(input_dev); kfree(ir); return err; @@ -694,6 +698,7 @@ void saa7134_input_fini(struct saa7134_dev *dev) return; saa7134_ir_stop(dev); + ir_input_free(dev->remote->dev); input_unregister_device(dev->remote->dev); kfree(dev->remote); dev->remote = NULL; diff --git a/include/media/ir-common.h b/include/media/ir-common.h index cd21cb55d5d5..16b8f17bcfec 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -100,7 +100,7 @@ struct card_ir { /* Routines from ir-functions.c */ -void ir_input_init(struct input_dev *dev, struct ir_input_state *ir, +int ir_input_init(struct input_dev *dev, struct ir_input_state *ir, int ir_type, struct ir_scancode_table *ir_codes); void ir_input_nokey(struct input_dev *dev, struct ir_input_state *ir); void ir_input_keydown(struct input_dev *dev, struct ir_input_state *ir, @@ -121,6 +121,7 @@ u32 ir_g_keycode_from_table(struct input_dev *input_dev, int ir_set_keycode_table(struct input_dev *input_dev, struct ir_scancode_table *rc_tab); +void ir_input_free(struct input_dev *input_dev); /* scancode->keycode map tables from ir-keymaps.c */ -- cgit v1.3-8-gc7d7 From f6fc50494027e913ff0159e43c593cd75f35ec7a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 29 Nov 2009 11:08:02 -0300 Subject: V4L/DVB (13538): ir-common: Use a dynamic keycode table Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/ir-functions.c | 19 ++++++---- drivers/media/common/ir-keytable.c | 75 +++++++++++++++++++++++++++++++++++++ include/media/ir-common.h | 6 +++ 3 files changed, 92 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/media/common/ir-functions.c b/drivers/media/common/ir-functions.c index b31bd27da374..e616f624ceaa 100644 --- a/drivers/media/common/ir-functions.c +++ b/drivers/media/common/ir-functions.c @@ -59,12 +59,20 @@ int ir_input_init(struct input_dev *dev, struct ir_input_state *ir, { ir->ir_type = ir_type; - /* FIXME: Add the proper code to dynamically allocate IR table */ + ir->keytable.size = ir_roundup_tablesize(ir_codes->size); + ir->keytable.scan = kzalloc(ir->keytable.size * + sizeof(struct ir_scancode), GFP_KERNEL); + if (!ir->keytable.scan) + return -ENOMEM; - ir_set_keycode_table(dev, ir_codes); + IR_dprintk(1, "Allocated space for %d keycode entries (%zd bytes)\n", + ir->keytable.size, + ir->keytable.size * sizeof(ir->keytable.scan)); - clear_bit(0, dev->keybit); + ir_copy_table(&ir->keytable, ir_codes); + ir_set_keycode_table(dev, &ir->keytable); + clear_bit(0, dev->keybit); set_bit(EV_KEY, dev->evbit); if (repeat) set_bit(EV_REP, dev->evbit); @@ -73,11 +81,6 @@ int ir_input_init(struct input_dev *dev, struct ir_input_state *ir, } EXPORT_SYMBOL_GPL(ir_input_init); -void ir_input_free(struct input_dev *input_dev) -{ - /* FIXME: Add the proper code to free allocated resources */ -} -EXPORT_SYMBOL_GPL(ir_input_free); void ir_input_nokey(struct input_dev *dev, struct ir_input_state *ir) { diff --git a/drivers/media/common/ir-keytable.c b/drivers/media/common/ir-keytable.c index 65e352389b2d..6e3cc78c33b2 100644 --- a/drivers/media/common/ir-keytable.c +++ b/drivers/media/common/ir-keytable.c @@ -7,6 +7,68 @@ #include +#define IR_TAB_MIN_SIZE 32 + +/** + * ir_roundup_tablesize() - gets an optimum value for the table size + * @n_elems: minimum number of entries to store keycodes + * + * This routine is used to choose the keycode table size. + * + * In order to have some empty space for new keycodes, + * and knowing in advance that kmalloc allocates only power of two + * segments, it optimizes the allocated space to have some spare space + * for those new keycodes by using the maximum number of entries that + * will be effectively be allocated by kmalloc. + * In order to reduce the quantity of table resizes, it has a minimum + * table size of IR_TAB_MIN_SIZE. + */ +int ir_roundup_tablesize(int n_elems) +{ + size_t size; + + if (n_elems < IR_TAB_MIN_SIZE) + n_elems = IR_TAB_MIN_SIZE; + + /* + * As kmalloc only allocates sizes of power of two, get as + * much entries as possible for the allocated memory segment + */ + size = roundup_pow_of_two(n_elems * sizeof(struct ir_scancode)); + n_elems = size / sizeof(struct ir_scancode); + + return n_elems; +} + +/** + * ir_copy_table() - copies a keytable, discarding the unused entries + * @destin: destin table + * @origin: origin table + * + * Copies all entries where the keycode is not KEY_UNKNOWN/KEY_RESERVED + */ + +int ir_copy_table(struct ir_scancode_table *destin, + const struct ir_scancode_table *origin) +{ + int i, j = 0; + + for (i = 0; i < origin->size; i++) { + if (origin->scan[i].keycode != KEY_UNKNOWN && + origin->scan[i].keycode != KEY_RESERVED) { + memcpy(&destin->scan[j], &origin->scan[i], + sizeof(struct ir_scancode)); + j++; + } + } + destin->size = j; + + IR_dprintk(1, "Copied %d scancodes to the new keycode table\n", j); + + return 0; +} + + /** * ir_getkeycode() - get a keycode at the evdev scancode ->keycode table * @dev: the struct input_dev device descriptor @@ -152,3 +214,16 @@ int ir_set_keycode_table(struct input_dev *input_dev, return 0; } + +void ir_input_free(struct input_dev *dev) +{ + struct ir_scancode_table *rc_tab = input_get_drvdata(dev); + + IR_dprintk(1, "Freed keycode table\n"); + + rc_tab->size = 0; + kfree(rc_tab->scan); + rc_tab->scan = NULL; +} +EXPORT_SYMBOL_GPL(ir_input_free); + diff --git a/include/media/ir-common.h b/include/media/ir-common.h index 16b8f17bcfec..72df0467d2b9 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -54,6 +54,8 @@ struct ir_input_state { /* configuration */ int ir_type; + struct ir_scancode_table keytable; + /* key info */ u32 ir_key; /* ir scancode */ u32 keycode; /* linux key code */ @@ -121,6 +123,10 @@ u32 ir_g_keycode_from_table(struct input_dev *input_dev, int ir_set_keycode_table(struct input_dev *input_dev, struct ir_scancode_table *rc_tab); + +int ir_roundup_tablesize(int n_elems); +int ir_copy_table(struct ir_scancode_table *destin, + const struct ir_scancode_table *origin); void ir_input_free(struct input_dev *input_dev); /* scancode->keycode map tables from ir-keymaps.c */ -- cgit v1.3-8-gc7d7 From a53e21257171af42c9fa6aee417f7891744d6ebf Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 2 Dec 2009 15:44:30 -0300 Subject: V4L/DVB (13539): ir-common: add __func__ for debug messages Signed-off-by: Mauro Carvalho Chehab --- include/media/ir-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/ir-common.h b/include/media/ir-common.h index 72df0467d2b9..452f6e86084d 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -29,7 +29,7 @@ extern int media_ir_debug; /* media_ir_debug level (0,1,2) */ #define IR_dprintk(level, fmt, arg...) if (media_ir_debug >= level) \ - printk(KERN_DEBUG fmt , ## arg) + printk(KERN_DEBUG "%s: " fmt , __func__, ## arg) #define IR_TYPE_RC5 1 #define IR_TYPE_PD 2 /* Pulse distance encoded IR */ -- cgit v1.3-8-gc7d7 From 7fee03e487e87a196deb5602ee3c7676511995c9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 2 Dec 2009 15:56:47 -0300 Subject: V4L/DVB (13540): ir-common: Cleanup get key evdev code The same loop to seek for a key were used on different places. Also, no spinlock were protecting it to avoid the risk of replacing a keycode while seeking for a new code. This cleanup does: - create an unique function to seek for a code; - adds an spinlock to protect the table lookup; - remove some unused code; - simplifies to code to make it easier to understand. Basically no change in behavior should be noticed after this patch. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/ir-keytable.c | 137 ++++++++++++++++++++----------------- include/media/ir-common.h | 2 + 2 files changed, 77 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/drivers/media/common/ir-keytable.c b/drivers/media/common/ir-keytable.c index 6e3cc78c33b2..b06f023ca760 100644 --- a/drivers/media/common/ir-keytable.c +++ b/drivers/media/common/ir-keytable.c @@ -9,6 +9,38 @@ #define IR_TAB_MIN_SIZE 32 +/** + * ir_seek_table() - returns the element order on the table + * @rc_tab: the ir_scancode_table with the keymap to be used + * @scancode: the scancode that we're seeking + * + * This routine is used by the input routines when a key is pressed at the + * IR. The scancode is received and needs to be converted into a keycode. + * If the key is not found, it returns KEY_UNKNOWN. Otherwise, returns the + * corresponding keycode from the table. + */ +static int ir_seek_table(struct ir_scancode_table *rc_tab, u32 scancode) +{ + int rc; + unsigned long flags; + struct ir_scancode *keymap = rc_tab->scan; + + spin_lock_irqsave(&rc_tab->lock, flags); + + /* FIXME: replace it by a binary search */ + + for (rc = 0; rc < rc_tab->size; rc++) + if (keymap[rc].scancode == scancode) + goto exit; + + /* Not found */ + rc = -EINVAL; + +exit: + spin_unlock_irqrestore(&rc_tab->lock, flags); + return rc; +} + /** * ir_roundup_tablesize() - gets an optimum value for the table size * @n_elems: minimum number of entries to store keycodes @@ -54,21 +86,20 @@ int ir_copy_table(struct ir_scancode_table *destin, int i, j = 0; for (i = 0; i < origin->size; i++) { - if (origin->scan[i].keycode != KEY_UNKNOWN && - origin->scan[i].keycode != KEY_RESERVED) { - memcpy(&destin->scan[j], &origin->scan[i], - sizeof(struct ir_scancode)); - j++; - } + if (origin->scan[i].keycode == KEY_UNKNOWN || + origin->scan[i].keycode == KEY_RESERVED) + continue; + + memcpy(&destin->scan[j], &origin->scan[i], sizeof(struct ir_scancode)); + j++; } destin->size = j; - IR_dprintk(1, "Copied %d scancodes to the new keycode table\n", j); + IR_dprintk(1, "Copied %d scancodes to the new keycode table\n", destin->size); return 0; } - /** * ir_getkeycode() - get a keycode at the evdev scancode ->keycode table * @dev: the struct input_dev device descriptor @@ -81,28 +112,14 @@ int ir_copy_table(struct ir_scancode_table *destin, static int ir_getkeycode(struct input_dev *dev, int scancode, int *keycode) { - int i; + int elem; struct ir_scancode_table *rc_tab = input_get_drvdata(dev); - struct ir_scancode *keymap = rc_tab->scan; - - /* See if we can match the raw key code. */ - for (i = 0; i < rc_tab->size; i++) - if (keymap[i].scancode == scancode) { - *keycode = keymap[i].keycode; - return 0; - } - /* - * If is there extra space, returns KEY_RESERVED, - * otherwise, input core won't let ir_setkeycode - * to work - */ - for (i = 0; i < rc_tab->size; i++) - if (keymap[i].keycode == KEY_RESERVED || - keymap[i].keycode == KEY_UNKNOWN) { - *keycode = KEY_RESERVED; - return 0; - } + elem = ir_seek_table(rc_tab, scancode); + if (elem >= 0) { + *keycode = rc_tab->scan[elem].keycode; + return 0; + } return -EINVAL; } @@ -120,40 +137,33 @@ static int ir_getkeycode(struct input_dev *dev, static int ir_setkeycode(struct input_dev *dev, int scancode, int keycode) { - int i; + int rc = 0; struct ir_scancode_table *rc_tab = input_get_drvdata(dev); struct ir_scancode *keymap = rc_tab->scan; + unsigned long flags; /* Search if it is replacing an existing keycode */ - for (i = 0; i < rc_tab->size; i++) - if (keymap[i].scancode == scancode) { - keymap[i].keycode = keycode; - return 0; - } - - /* Search if is there a clean entry. If so, use it */ - for (i = 0; i < rc_tab->size; i++) - if (keymap[i].keycode == KEY_RESERVED || - keymap[i].keycode == KEY_UNKNOWN) { - keymap[i].scancode = scancode; - keymap[i].keycode = keycode; - return 0; - } + rc = ir_seek_table(rc_tab, scancode); + if (rc <0) + return rc; - /* - * FIXME: Currently, it is not possible to increase the size of - * scancode table. For it to happen, one possibility - * would be to allocate a table with key_map_size + 1, - * copying data, appending the new key on it, and freeing - * the old one - or maybe just allocating some spare space - */ + IR_dprintk(1, "#%d: Replacing scan 0x%04x with key 0x%04x\n", + rc, scancode, keycode); - return -EINVAL; + clear_bit(keymap[rc].keycode, dev->keybit); + + spin_lock_irqsave(&rc_tab->lock, flags); + keymap[rc].keycode = keycode; + spin_unlock_irqrestore(&rc_tab->lock, flags); + + set_bit(keycode, dev->keybit); + + return 0; } /** * ir_g_keycode_from_table() - gets the keycode that corresponds to a scancode - * @rc_tab: the ir_scancode_table with the keymap to be used + * @input_dev: the struct input_dev descriptor of the device * @scancode: the scancode that we're seeking * * This routine is used by the input routines when a key is pressed at the @@ -163,22 +173,23 @@ static int ir_setkeycode(struct input_dev *dev, */ u32 ir_g_keycode_from_table(struct input_dev *dev, u32 scancode) { - int i; struct ir_scancode_table *rc_tab = input_get_drvdata(dev); struct ir_scancode *keymap = rc_tab->scan; + int elem; - for (i = 0; i < rc_tab->size; i++) - if (keymap[i].scancode == scancode) { - IR_dprintk(1, "%s: scancode 0x%04x keycode 0x%02x\n", - dev->name, scancode, keymap[i].keycode); + elem = ir_seek_table(rc_tab, scancode); + if (elem >= 0) { + IR_dprintk(1, "%s: scancode 0x%04x keycode 0x%02x\n", + dev->name, scancode, keymap[elem].keycode); - return keymap[i].keycode; - } + return rc_tab->scan[elem].keycode; + } printk(KERN_INFO "%s: unknown key for scancode 0x%04x\n", dev->name, scancode); - return KEY_UNKNOWN; + /* Reports userspace that an unknown keycode were got */ + return KEY_RESERVED; } /** @@ -188,8 +199,8 @@ u32 ir_g_keycode_from_table(struct input_dev *dev, u32 scancode) * @rc_tab: the struct ir_scancode_table table of scancode/keymap * * This routine is used to initialize the input infrastructure to work with - * an IR. It requires that the caller initializes the input_dev struct with - * some fields: name, + * an IR. + * It should be called before registering the IR device. */ int ir_set_keycode_table(struct input_dev *input_dev, struct ir_scancode_table *rc_tab) @@ -197,6 +208,8 @@ int ir_set_keycode_table(struct input_dev *input_dev, struct ir_scancode *keymap = rc_tab->scan; int i; + spin_lock_init(&rc_tab->lock); + if (rc_tab->scan == NULL || !rc_tab->size) return -EINVAL; diff --git a/include/media/ir-common.h b/include/media/ir-common.h index 452f6e86084d..e41a99ee353e 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -26,6 +26,7 @@ #include #include #include +#include extern int media_ir_debug; /* media_ir_debug level (0,1,2) */ #define IR_dprintk(level, fmt, arg...) if (media_ir_debug >= level) \ @@ -43,6 +44,7 @@ struct ir_scancode { struct ir_scancode_table { struct ir_scancode *scan; int size; + spinlock_t lock; }; #define RC5_START(x) (((x)>>12)&3) -- cgit v1.3-8-gc7d7 From 7a1a8eb58a2c6cd819d17332c5a2c369203635d5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 3 Dec 2009 21:19:18 +0100 Subject: PM: Add flag for devices capable of generating run-time wake-up events Apparently, there are devices that can wake up the system from sleep states and yet are incapable of generating wake-up events at run time. Thus, introduce a flag indicating if given device is capable of generating run-time wake-up events. Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.txt | 7 +++++-- include/linux/pm.h | 8 +++++--- include/linux/pm_runtime.h | 12 ++++++++++++ 3 files changed, 22 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 6bb25cb24da9..4a3109b28847 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -71,9 +71,9 @@ what to do to handle the device). purpose). In particular, if the driver requires remote wakeup capability for proper -functioning and device_may_wakeup() returns 'false' for the device, then +functioning and device_run_wake() returns 'false' for the device, then ->runtime_suspend() should return -EBUSY. On the other hand, if -device_may_wakeup() returns 'true' for the device and the device is put +device_run_wake() returns 'true' for the device and the device is put into a low power state during the execution of its bus type's ->runtime_suspend(), it is expected that remote wake-up (i.e. hardware mechanism allowing the device to request a change of its power state, such as PCI PME) @@ -215,6 +215,9 @@ defined in include/linux/pm.h: being executed for that device and it is not practical to wait for the suspend to complete; means "start a resume as soon as you've suspended" + unsigned int run_wake; + - set if the device is capable of generating run-time wake-up events + enum rpm_status runtime_status; - the run-time PM status of the device; this field's initial value is RPM_SUSPENDED, which means that each device is initially regarded by the diff --git a/include/linux/pm.h b/include/linux/pm.h index 3b7e04b95bd2..0d65934246af 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -178,9 +178,10 @@ typedef struct pm_message { * This need not mean that the device should be put into a low power state. * For example, if the device is behind a link which is about to be turned * off, the device may remain at full power. If the device does go to low - * power and if device_may_wakeup(dev) is true, remote wake-up (i.e., a - * hardware mechanism allowing the device to request a change of its power - * state, such as PCI PME) should be enabled for it. + * power and is capable of generating run-time wake-up events, remote + * wake-up (i.e., a hardware mechanism allowing the device to request a + * change of its power state via a wake-up event, such as PCI PME) should + * be enabled for it. * * @runtime_resume: Put the device into the fully active state in response to a * wake-up event generated by hardware or at the request of software. If @@ -428,6 +429,7 @@ struct dev_pm_info { unsigned int idle_notification:1; unsigned int request_pending:1; unsigned int deferred_resume:1; + unsigned int run_wake:1; enum rpm_request request; enum rpm_status runtime_status; int runtime_error; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 44087044910f..370ce0a6fe4a 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -50,6 +50,16 @@ static inline void pm_runtime_put_noidle(struct device *dev) atomic_add_unless(&dev->power.usage_count, -1, 0); } +static inline bool device_run_wake(struct device *dev) +{ + return dev->power.run_wake; +} + +static inline void device_set_run_wake(struct device *dev, bool enable) +{ + dev->power.run_wake = enable; +} + #else /* !CONFIG_PM_RUNTIME */ static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; } @@ -73,6 +83,8 @@ static inline bool pm_children_suspended(struct device *dev) { return false; } static inline void pm_suspend_ignore_children(struct device *dev, bool en) {} static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} +static inline bool device_run_wake(struct device *dev) { return false; } +static inline void device_set_run_wake(struct device *dev, bool enable) {} #endif /* !CONFIG_PM_RUNTIME */ -- cgit v1.3-8-gc7d7 From 194684e596af4bdaebb424166d94a8aa528edfda Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Sun, 6 Dec 2009 17:06:22 +0100 Subject: i2c: Prevent priority inversion on top of bus lock Low priority thread holding the i2c bus mutex could block higher priority threads to access the bus resulting in unacceptable latencies. Change the mutex type to rt_mutex preventing priority inversion. Tested-by: Peter Ujfalusi Signed-off-by: Mika Kuoppala Signed-off-by: Jean Delvare --- drivers/i2c/Kconfig | 1 + drivers/i2c/i2c-core.c | 12 ++++++------ include/linux/i2c.h | 7 +++---- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index d7ece131b4f4..8d8a00e5a30e 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -5,6 +5,7 @@ menuconfig I2C tristate "I2C support" depends on HAS_IOMEM + select RT_MUTEXES ---help--- I2C (pronounce: I-square-C) is a slow serial bus protocol used in many micro controller applications and developed by Philips. SMBus, diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 296504355142..d664b4a97a31 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -584,7 +584,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap) goto out_list; } - mutex_init(&adap->bus_lock); + rt_mutex_init(&adap->bus_lock); /* Set default timeout to 1 second if not already set */ if (adap->timeout == 0) @@ -1092,12 +1092,12 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) #endif if (in_atomic() || irqs_disabled()) { - ret = mutex_trylock(&adap->bus_lock); + ret = rt_mutex_trylock(&adap->bus_lock); if (!ret) /* I2C activity is ongoing. */ return -EAGAIN; } else { - mutex_lock_nested(&adap->bus_lock, adap->level); + rt_mutex_lock(&adap->bus_lock); } /* Retry automatically on arbitration loss */ @@ -1109,7 +1109,7 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) if (time_after(jiffies, orig_jiffies + adap->timeout)) break; } - mutex_unlock(&adap->bus_lock); + rt_mutex_unlock(&adap->bus_lock); return ret; } else { @@ -1913,7 +1913,7 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags, flags &= I2C_M_TEN | I2C_CLIENT_PEC; if (adapter->algo->smbus_xfer) { - mutex_lock(&adapter->bus_lock); + rt_mutex_lock(&adapter->bus_lock); /* Retry automatically on arbitration loss */ orig_jiffies = jiffies; @@ -1927,7 +1927,7 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags, orig_jiffies + adapter->timeout)) break; } - mutex_unlock(&adapter->bus_lock); + rt_mutex_unlock(&adapter->bus_lock); } else res = i2c_smbus_xfer_emulated(adapter,addr,flags,read_write, command, protocol, data); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 7b40cda57a70..52317fb5917e 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -338,8 +338,7 @@ struct i2c_adapter { void *algo_data; /* data fields that are valid for all devices */ - u8 level; /* nesting level for lockdep */ - struct mutex bus_lock; + struct rt_mutex bus_lock; int timeout; /* in jiffies */ int retries; @@ -367,7 +366,7 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) */ static inline void i2c_lock_adapter(struct i2c_adapter *adapter) { - mutex_lock(&adapter->bus_lock); + rt_mutex_lock(&adapter->bus_lock); } /** @@ -376,7 +375,7 @@ static inline void i2c_lock_adapter(struct i2c_adapter *adapter) */ static inline void i2c_unlock_adapter(struct i2c_adapter *adapter) { - mutex_unlock(&adapter->bus_lock); + rt_mutex_unlock(&adapter->bus_lock); } /*flags for the client struct: */ -- cgit v1.3-8-gc7d7 From c7b25a9e96dc89954ae8d8f473f56fae62030f84 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 6 Dec 2009 17:06:24 +0100 Subject: i2c: Drop probe, ignore and force module parameters The legacy probe and force module parameters are obsolete now, the same can be achieved using the new_device sysfs interface, which is both more flexible and cheaper (it is implemented by i2c-core rather than replicated in every driver module.) The legacy ignore module parameters can be dropped as well. Ignoring can be done by instantiating a "dummy" device at the problematic address. This is the first step of a huge cleanup to i2c-core's i2c_detect function, i2c.h's I2C_CLIENT_INSMOD* macros, and all drivers that made use of them. Signed-off-by: Jean Delvare --- Documentation/i2c/old-module-parameters | 44 ++++++++++++++++ drivers/i2c/i2c-core.c | 65 +---------------------- include/linux/i2c.h | 91 +-------------------------------- 3 files changed, 46 insertions(+), 154 deletions(-) create mode 100644 Documentation/i2c/old-module-parameters (limited to 'include') diff --git a/Documentation/i2c/old-module-parameters b/Documentation/i2c/old-module-parameters new file mode 100644 index 000000000000..8e2b629d533c --- /dev/null +++ b/Documentation/i2c/old-module-parameters @@ -0,0 +1,44 @@ +I2C device driver binding control from user-space +================================================= + +Up to kernel 2.6.32, many i2c drivers used helper macros provided by + which created standard module parameters to let the user +control how the driver would probe i2c buses and attach to devices. These +parameters were known as "probe" (to let the driver probe for an extra +address), "force" (to forcibly attach the driver to a given device) and +"ignore" (to prevent a driver from probing a given address). + +With the conversion of the i2c subsystem to the standard device driver +binding model, it became clear that these per-module parameters were no +longer needed, and that a centralized implementation was possible. The new, +sysfs-based interface is described in the documentation file +"instantiating-devices", section "Method 4: Instantiate from user-space". + +Below is a mapping from the old module parameters to the new interface. + +Attaching a driver to an I2C device +----------------------------------- + +Old method (module parameters): +# modprobe probe=1,0x2d +# modprobe force=1,0x2d +# modprobe force_=1,0x2d + +New method (sysfs interface): +# echo 0x2d > /sys/bus/i2c/devices/i2c-1/new_device + +Preventing a driver from attaching to an I2C device +--------------------------------------------------- + +Old method (module parameters): +# modprobe ignore=1,0x2f + +New method (sysfs interface): +# echo dummy 0x2f > /sys/bus/i2c/devices/i2c-1/new_device +# modprobe + +Of course, it is important to instantiate the "dummy" device before loading +the driver. The dummy device will be handled by i2c-core itself, preventing +other drivers from binding to it later on. If there is a real device at the +problematic address, and you want another driver to bind to it, then simply +pass the name of the device in question instead of "dummy". diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index d664b4a97a31..fdfaebdf3bfe 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1259,40 +1259,13 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver) return -ENOMEM; temp_client->adapter = adapter; - /* Force entries are done first, and are not affected by ignore - entries */ - if (address_data->forces) { - const unsigned short * const *forces = address_data->forces; - int kind; - - for (kind = 0; forces[kind]; kind++) { - for (i = 0; forces[kind][i] != I2C_CLIENT_END; - i += 2) { - if (forces[kind][i] == adap_id - || forces[kind][i] == ANY_I2C_BUS) { - dev_dbg(&adapter->dev, "found force " - "parameter for adapter %d, " - "addr 0x%02x, kind %d\n", - adap_id, forces[kind][i + 1], - kind); - temp_client->addr = forces[kind][i + 1]; - err = i2c_detect_address(temp_client, - kind, driver); - if (err) - goto exit_free; - } - } - } - } - /* Stop here if the classes do not match */ if (!(adapter->class & driver->class)) goto exit_free; /* Stop here if we can't use SMBUS_QUICK */ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_QUICK)) { - if (address_data->probe[0] == I2C_CLIENT_END - && address_data->normal_i2c[0] == I2C_CLIENT_END) + if (address_data->normal_i2c[0] == I2C_CLIENT_END) goto exit_free; dev_warn(&adapter->dev, "SMBus Quick command not supported, " @@ -1301,43 +1274,7 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver) goto exit_free; } - /* Probe entries are done second, and are not affected by ignore - entries either */ - for (i = 0; address_data->probe[i] != I2C_CLIENT_END; i += 2) { - if (address_data->probe[i] == adap_id - || address_data->probe[i] == ANY_I2C_BUS) { - dev_dbg(&adapter->dev, "found probe parameter for " - "adapter %d, addr 0x%02x\n", adap_id, - address_data->probe[i + 1]); - temp_client->addr = address_data->probe[i + 1]; - err = i2c_detect_address(temp_client, -1, driver); - if (err) - goto exit_free; - } - } - - /* Normal entries are done last, unless shadowed by an ignore entry */ for (i = 0; address_data->normal_i2c[i] != I2C_CLIENT_END; i += 1) { - int j, ignore; - - ignore = 0; - for (j = 0; address_data->ignore[j] != I2C_CLIENT_END; - j += 2) { - if ((address_data->ignore[j] == adap_id || - address_data->ignore[j] == ANY_I2C_BUS) - && address_data->ignore[j + 1] - == address_data->normal_i2c[i]) { - dev_dbg(&adapter->dev, "found ignore " - "parameter for adapter %d, " - "addr 0x%02x\n", adap_id, - address_data->ignore[j + 1]); - ignore = 1; - break; - } - } - if (ignore) - continue; - dev_dbg(&adapter->dev, "found normal entry for adapter %d, " "addr 0x%02x\n", adap_id, address_data->normal_i2c[i]); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 52317fb5917e..419ab546b266 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -110,7 +110,7 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client, * @driver: Device driver model driver * @id_table: List of I2C devices supported by this driver * @detect: Callback for device detection - * @address_data: The I2C addresses to probe, ignore or force (for detect) + * @address_data: The I2C addresses to probe (for detect) * @clients: List of detected clients we created (for i2c-core use only) * * The driver.owner field should be set to the module owner of this driver. @@ -397,9 +397,6 @@ static inline void i2c_unlock_adapter(struct i2c_adapter *adapter) */ struct i2c_client_address_data { const unsigned short *normal_i2c; - const unsigned short *probe; - const unsigned short *ignore; - const unsigned short * const *forces; }; /* Internal numbers to terminate lists */ @@ -613,134 +610,48 @@ union i2c_smbus_data { module_param_array(var, short, &var##_num, 0); \ MODULE_PARM_DESC(var, desc) -#define I2C_CLIENT_MODULE_PARM_FORCE(name) \ -I2C_CLIENT_MODULE_PARM(force_##name, \ - "List of adapter,address pairs which are " \ - "unquestionably assumed to contain a `" \ - # name "' chip") - - #define I2C_CLIENT_INSMOD_COMMON \ -I2C_CLIENT_MODULE_PARM(probe, "List of adapter,address pairs to scan " \ - "additionally"); \ -I2C_CLIENT_MODULE_PARM(ignore, "List of adapter,address pairs not to " \ - "scan"); \ static const struct i2c_client_address_data addr_data = { \ .normal_i2c = normal_i2c, \ - .probe = probe, \ - .ignore = ignore, \ - .forces = forces, \ } -#define I2C_CLIENT_FORCE_TEXT \ - "List of adapter,address pairs to boldly assume to be present" - /* These are the ones you want to use in your own drivers. Pick the one which matches the number of devices the driver differenciates between. */ #define I2C_CLIENT_INSMOD \ -I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT); \ -static const unsigned short * const forces[] = { force, NULL }; \ I2C_CLIENT_INSMOD_COMMON #define I2C_CLIENT_INSMOD_1(chip1) \ enum chips { any_chip, chip1 }; \ -I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ -static const unsigned short * const forces[] = { force, \ - force_##chip1, NULL }; \ I2C_CLIENT_INSMOD_COMMON #define I2C_CLIENT_INSMOD_2(chip1, chip2) \ enum chips { any_chip, chip1, chip2 }; \ -I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ -static const unsigned short * const forces[] = { force, \ - force_##chip1, force_##chip2, NULL }; \ I2C_CLIENT_INSMOD_COMMON #define I2C_CLIENT_INSMOD_3(chip1, chip2, chip3) \ enum chips { any_chip, chip1, chip2, chip3 }; \ -I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ -static const unsigned short * const forces[] = { force, \ - force_##chip1, force_##chip2, force_##chip3, NULL }; \ I2C_CLIENT_INSMOD_COMMON #define I2C_CLIENT_INSMOD_4(chip1, chip2, chip3, chip4) \ enum chips { any_chip, chip1, chip2, chip3, chip4 }; \ -I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip4); \ -static const unsigned short * const forces[] = { force, \ - force_##chip1, force_##chip2, force_##chip3, \ - force_##chip4, NULL}; \ I2C_CLIENT_INSMOD_COMMON #define I2C_CLIENT_INSMOD_5(chip1, chip2, chip3, chip4, chip5) \ enum chips { any_chip, chip1, chip2, chip3, chip4, chip5 }; \ -I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip4); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip5); \ -static const unsigned short * const forces[] = { force, \ - force_##chip1, force_##chip2, force_##chip3, \ - force_##chip4, force_##chip5, NULL }; \ I2C_CLIENT_INSMOD_COMMON #define I2C_CLIENT_INSMOD_6(chip1, chip2, chip3, chip4, chip5, chip6) \ enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6 }; \ -I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip4); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip5); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip6); \ -static const unsigned short * const forces[] = { force, \ - force_##chip1, force_##chip2, force_##chip3, \ - force_##chip4, force_##chip5, force_##chip6, NULL }; \ I2C_CLIENT_INSMOD_COMMON #define I2C_CLIENT_INSMOD_7(chip1, chip2, chip3, chip4, chip5, chip6, chip7) \ enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6, \ chip7 }; \ -I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip4); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip5); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip6); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip7); \ -static const unsigned short * const forces[] = { force, \ - force_##chip1, force_##chip2, force_##chip3, \ - force_##chip4, force_##chip5, force_##chip6, \ - force_##chip7, NULL }; \ I2C_CLIENT_INSMOD_COMMON #define I2C_CLIENT_INSMOD_8(chip1, chip2, chip3, chip4, chip5, chip6, chip7, chip8) \ enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6, \ chip7, chip8 }; \ -I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip4); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip5); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip6); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip7); \ -I2C_CLIENT_MODULE_PARM_FORCE(chip8); \ -static const unsigned short * const forces[] = { force, \ - force_##chip1, force_##chip2, force_##chip3, \ - force_##chip4, force_##chip5, force_##chip6, \ - force_##chip7, force_##chip8, NULL }; \ I2C_CLIENT_INSMOD_COMMON #endif /* __KERNEL__ */ #endif /* _LINUX_I2C_H */ -- cgit v1.3-8-gc7d7 From 36203c4f3d091b5f6c082663bd1f74273798043a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 4 Dec 2009 10:22:23 -0800 Subject: Input: add generic support for sparse keymaps More and more devices choose to reimplement support for sparse keymaps first introduced by wistron driver. Move it into a library module so it can be easily used by interested parties. Reviewed-by: Anisse Astier Signed-off-by: Dmitry Torokhov --- Documentation/DocBook/device-drivers.tmpl | 4 + drivers/input/Kconfig | 28 +++- drivers/input/Makefile | 1 + drivers/input/input-polldev.c | 14 +- drivers/input/sparse-keymap.c | 250 ++++++++++++++++++++++++++++++ include/linux/input/sparse-keymap.h | 62 ++++++++ 6 files changed, 347 insertions(+), 12 deletions(-) create mode 100644 drivers/input/sparse-keymap.c create mode 100644 include/linux/input/sparse-keymap.h (limited to 'include') diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index e994d1d9fbe6..f9a6e2c75f12 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -306,6 +306,10 @@ X!Idrivers/video/console/fonts.c Matrix keyboars/keypads !Iinclude/linux/input/matrix_keypad.h + Sparse keymap support +!Iinclude/linux/input/sparse-keymap.h +!Edrivers/input/sparse-keymap.c + diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index cd50c00ab20f..50af91ebd075 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig @@ -8,7 +8,7 @@ menu "Input device support" config INPUT tristate "Generic input layer (needed for keyboard, mouse, ...)" if EMBEDDED default y - ---help--- + help Say Y here if you have any input device (mouse, keyboard, tablet, joystick, steering wheel ...) connected to your system and want it to be available to applications. This includes standard PS/2 @@ -27,8 +27,7 @@ if INPUT config INPUT_FF_MEMLESS tristate "Support for memoryless force-feedback devices" - default n - ---help--- + help Say Y here if you have memoryless force-feedback input device such as Logitech WingMan Force 3D, ThrustMaster FireStorm Dual Power 2, or similar. You will also need to enable hardware-specific @@ -52,12 +51,25 @@ config INPUT_POLLDEV To compile this driver as a module, choose M here: the module will be called input-polldev. +config INPUT_SPARSEKMAP + tristate "Sparse keymap support library" + help + Say Y here if you are using a driver for an input + device that uses sparse keymap. This option is only + useful for out-of-tree drivers since in-tree drivers + select it automatically. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called sparse-keymap. + comment "Userland interfaces" config INPUT_MOUSEDEV tristate "Mouse interface" if EMBEDDED default y - ---help--- + help Say Y here if you want your mouse to be accessible as char devices 13:32+ - /dev/input/mouseX and 13:63 - /dev/input/mice as an emulated IntelliMouse Explorer PS/2 mouse. That way, all user space @@ -73,7 +85,7 @@ config INPUT_MOUSEDEV_PSAUX bool "Provide legacy /dev/psaux device" default y depends on INPUT_MOUSEDEV - ---help--- + help Say Y here if you want your mouse also be accessible as char device 10:1 - /dev/psaux. The data available through /dev/psaux is exactly the same as the data from /dev/input/mice. @@ -103,7 +115,7 @@ config INPUT_MOUSEDEV_SCREEN_Y config INPUT_JOYDEV tristate "Joystick interface" - ---help--- + help Say Y here if you want your joystick or gamepad to be accessible as char device 13:0+ - /dev/input/jsX device. @@ -125,7 +137,7 @@ config INPUT_EVDEV config INPUT_EVBUG tristate "Event debugging" - ---help--- + help Say Y here if you have a problem with the input subsystem and want all events (keypresses, mouse movements), to be output to the system log. While this is useful for debugging, it's also @@ -140,7 +152,7 @@ config INPUT_EVBUG config INPUT_APMPOWER tristate "Input Power Event -> APM Bridge" if EMBEDDED depends on INPUT && APM_EMULATION - ---help--- + help Say Y here if you want suspend key events to trigger a user requested suspend through APM. This is useful on embedded systems where such behaviour is desired without userspace diff --git a/drivers/input/Makefile b/drivers/input/Makefile index 4c9c745a7020..7ad212d31f99 100644 --- a/drivers/input/Makefile +++ b/drivers/input/Makefile @@ -9,6 +9,7 @@ input-core-objs := input.o input-compat.o ff-core.o obj-$(CONFIG_INPUT_FF_MEMLESS) += ff-memless.o obj-$(CONFIG_INPUT_POLLDEV) += input-polldev.o +obj-$(CONFIG_INPUT_SPARSEKMAP) += sparse-keymap.o obj-$(CONFIG_INPUT_MOUSEDEV) += mousedev.o obj-$(CONFIG_INPUT_JOYDEV) += joydev.o diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c index 6a2eb399b988..aa6713b4a988 100644 --- a/drivers/input/input-polldev.c +++ b/drivers/input/input-polldev.c @@ -212,7 +212,7 @@ EXPORT_SYMBOL(input_allocate_polled_device); * @dev: device to free * * The function frees memory allocated for polling device and drops - * reference to the associated input device (if present). + * reference to the associated input device. */ void input_free_polled_device(struct input_polled_dev *dev) { @@ -258,6 +258,15 @@ int input_register_polled_device(struct input_polled_dev *dev) return error; } + /* + * Take extra reference to the underlying input device so + * that it survives call to input_unregister_polled_device() + * and is deleted only after input_free_polled_device() + * has been invoked. This is needed to ease task of freeing + * sparse keymaps. + */ + input_get_device(input); + return 0; } EXPORT_SYMBOL(input_register_polled_device); @@ -269,8 +278,6 @@ EXPORT_SYMBOL(input_register_polled_device); * The function unregisters previously registered polled input * device from input layer. Polling is stopped and device is * ready to be freed with call to input_free_polled_device(). - * Callers should not attempt to access dev->input pointer - * after calling this function. */ void input_unregister_polled_device(struct input_polled_dev *dev) { @@ -278,7 +285,6 @@ void input_unregister_polled_device(struct input_polled_dev *dev) &input_polldev_attribute_group); input_unregister_device(dev->input); - dev->input = NULL; } EXPORT_SYMBOL(input_unregister_polled_device); diff --git a/drivers/input/sparse-keymap.c b/drivers/input/sparse-keymap.c new file mode 100644 index 000000000000..fbd3987af57f --- /dev/null +++ b/drivers/input/sparse-keymap.c @@ -0,0 +1,250 @@ +/* + * Generic support for sparse keymaps + * + * Copyright (c) 2009 Dmitry Torokhov + * + * Derived from wistron button driver: + * Copyright (C) 2005 Miloslav Trmac + * Copyright (C) 2005 Bernhard Rosenkraenzer + * Copyright (C) 2005 Dmitry Torokhov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include + +MODULE_AUTHOR("Dmitry Torokhov "); +MODULE_DESCRIPTION("Generic support for sparse keymaps"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION("0.1"); + +/** + * sparse_keymap_entry_from_scancode - perform sparse keymap lookup + * @dev: Input device using sparse keymap + * @code: Scan code + * + * This function is used to perform &struct key_entry lookup in an + * input device using sparse keymap. + */ +struct key_entry *sparse_keymap_entry_from_scancode(struct input_dev *dev, + unsigned int code) +{ + struct key_entry *key; + + for (key = dev->keycode; key->type != KE_END; key++) + if (code == key->code) + return key; + + return NULL; +} +EXPORT_SYMBOL(sparse_keymap_entry_from_scancode); + +/** + * sparse_keymap_entry_from_keycode - perform sparse keymap lookup + * @dev: Input device using sparse keymap + * @keycode: Key code + * + * This function is used to perform &struct key_entry lookup in an + * input device using sparse keymap. + */ +struct key_entry *sparse_keymap_entry_from_keycode(struct input_dev *dev, + unsigned int keycode) +{ + struct key_entry *key; + + for (key = dev->keycode; key->type != KE_END; key++) + if (key->type == KE_KEY && keycode == key->keycode) + return key; + + return NULL; +} +EXPORT_SYMBOL(sparse_keymap_entry_from_keycode); + +static int sparse_keymap_getkeycode(struct input_dev *dev, + int scancode, int *keycode) +{ + const struct key_entry *key = + sparse_keymap_entry_from_scancode(dev, scancode); + + if (key && key->type == KE_KEY) { + *keycode = key->keycode; + return 0; + } + + return -EINVAL; +} + +static int sparse_keymap_setkeycode(struct input_dev *dev, + int scancode, int keycode) +{ + struct key_entry *key; + int old_keycode; + + if (keycode < 0 || keycode > KEY_MAX) + return -EINVAL; + + key = sparse_keymap_entry_from_scancode(dev, scancode); + if (key && key->type == KE_KEY) { + old_keycode = key->keycode; + key->keycode = keycode; + set_bit(keycode, dev->keybit); + if (!sparse_keymap_entry_from_keycode(dev, old_keycode)) + clear_bit(old_keycode, dev->keybit); + return 0; + } + + return -EINVAL; +} + +/** + * sparse_keymap_setup - set up sparse keymap for an input device + * @dev: Input device + * @keymap: Keymap in form of array of &key_entry structures ending + * with %KE_END type entry + * @setup: Function that can be used to adjust keymap entries + * depending on device's deeds, may be %NULL + * + * The function calculates size and allocates copy of the original + * keymap after which sets up input device event bits appropriately. + * Before destroying input device allocated keymap should be freed + * with a call to sparse_keymap_free(). + */ +int sparse_keymap_setup(struct input_dev *dev, + const struct key_entry *keymap, + int (*setup)(struct input_dev *, struct key_entry *)) +{ + size_t map_size = 1; /* to account for the last KE_END entry */ + const struct key_entry *e; + struct key_entry *map, *entry; + int i; + int error; + + for (e = keymap; e->type != KE_END; e++) + map_size++; + + map = kcalloc(map_size, sizeof (struct key_entry), GFP_KERNEL); + if (!map) + return -ENOMEM; + + memcpy(map, keymap, map_size * sizeof (struct key_entry)); + + for (i = 0; i < map_size; i++) { + entry = &map[i]; + + if (setup) { + error = setup(dev, entry); + if (error) + goto err_out; + } + + switch (entry->type) { + case KE_KEY: + __set_bit(EV_KEY, dev->evbit); + __set_bit(entry->keycode, dev->keybit); + break; + + case KE_SW: + __set_bit(EV_SW, dev->evbit); + __set_bit(entry->sw.code, dev->swbit); + break; + } + } + + dev->keycode = map; + dev->keycodemax = map_size; + dev->getkeycode = sparse_keymap_getkeycode; + dev->setkeycode = sparse_keymap_setkeycode; + + return 0; + + err_out: + kfree(keymap); + return error; + +} +EXPORT_SYMBOL(sparse_keymap_setup); + +/** + * sparse_keymap_free - free memory allocated for sparse keymap + * @dev: Input device using sparse keymap + * + * This function is used to free memory allocated by sparse keymap + * in an input device that was set up by sparse_keymap_setup(). + */ +void sparse_keymap_free(struct input_dev *dev) +{ + kfree(dev->keycode); + dev->keycode = NULL; + dev->keycodemax = 0; + dev->getkeycode = NULL; + dev->setkeycode = NULL; +} +EXPORT_SYMBOL(sparse_keymap_free); + +/** + * sparse_keymap_report_entry - report event corresponding to given key entry + * @dev: Input device for which event should be reported + * @ke: key entry describing event + * @value: Value that should be reported (ignored by %KE_SW entries) + * @autorelease: Signals whether release event should be emitted for %KE_KEY + * entries right after reporting press event, ignored by all other + * entries + * + * This function is used to report input event described by given + * &struct key_entry. + */ +void sparse_keymap_report_entry(struct input_dev *dev, const struct key_entry *ke, + unsigned int value, bool autorelease) +{ + switch (ke->type) { + case KE_KEY: + input_report_key(dev, ke->keycode, value); + input_sync(dev); + if (value && autorelease) { + input_report_key(dev, ke->keycode, 0); + input_sync(dev); + } + break; + + case KE_SW: + value = ke->sw.value; + /* fall through */ + + case KE_VSW: + input_report_switch(dev, ke->sw.code, value); + break; + } +} +EXPORT_SYMBOL(sparse_keymap_report_entry); + +/** + * sparse_keymap_report_event - report event corresponding to given scancode + * @dev: Input device using sparse keymap + * @code: Scan code + * @value: Value that should be reported (ignored by %KE_SW entries) + * @autorelease: Signals whether release event should be emitted for %KE_KEY + * entries right after reporting press event, ignored by all other + * entries + * + * This function is used to perform lookup in an input device using sparse + * keymap and report corresponding event. Returns %true if lookup was + * successful and %false otherwise. + */ +bool sparse_keymap_report_event(struct input_dev *dev, unsigned int code, + unsigned int value, bool autorelease) +{ + const struct key_entry *ke = + sparse_keymap_entry_from_scancode(dev, code); + + if (ke) { + sparse_keymap_report_entry(dev, ke, value, autorelease); + return true; + } + + return false; +} +EXPORT_SYMBOL(sparse_keymap_report_event); + diff --git a/include/linux/input/sparse-keymap.h b/include/linux/input/sparse-keymap.h new file mode 100644 index 000000000000..52db62064c6e --- /dev/null +++ b/include/linux/input/sparse-keymap.h @@ -0,0 +1,62 @@ +#ifndef _SPARSE_KEYMAP_H +#define _SPARSE_KEYMAP_H + +/* + * Copyright (c) 2009 Dmitry Torokhov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#define KE_END 0 /* Indicates end of keymap */ +#define KE_KEY 1 /* Ordinary key/button */ +#define KE_SW 2 /* Switch (predetermined value) */ +#define KE_VSW 3 /* Switch (value supplied at runtime) */ +#define KE_IGNORE 4 /* Known entry that should be ignored */ +#define KE_LAST KE_IGNORE + +/** + * struct key_entry - keymap entry for use in sparse keymap + * @type: Type of the key entry (KE_KEY, KE_SW, KE_VSW, KE_END); + * drivers are allowed to extend the list with their own + * private definitions. + * @code: Device-specific data identifying the button/switch + * @keycode: KEY_* code assigned to a key/button + * @sw.code: SW_* code assigned to a switch + * @sw.value: Value that should be sent in an input even when KE_SW + * switch is toggled. KE_VSW switches ignore this field and + * expect driver to supply value for the event. + * + * This structure defines an entry in a sparse keymap used by some + * input devices for which traditional table-based approach is not + * suitable. + */ +struct key_entry { + int type; /* See KE_* above */ + u32 code; + union { + u16 keycode; /* For KE_KEY */ + struct { /* For KE_SW, KE_VSW */ + u8 code; + u8 value; /* For KE_SW, ignored by KE_VSW */ + } sw; + }; +}; + +struct key_entry *sparse_keymap_entry_from_scancode(struct input_dev *dev, + unsigned int code); +struct key_entry *sparse_keymap_entry_from_keycode(struct input_dev *dev, + unsigned int code); +int sparse_keymap_setup(struct input_dev *dev, + const struct key_entry *keymap, + int (*setup)(struct input_dev *, struct key_entry *)); +void sparse_keymap_free(struct input_dev *dev); + +void sparse_keymap_report_entry(struct input_dev *dev, const struct key_entry *ke, + unsigned int value, bool autorelease); + +bool sparse_keymap_report_event(struct input_dev *dev, unsigned int code, + unsigned int value, bool autorelease); + +#endif /* _SPARSE_KEYMAP_H */ -- cgit v1.3-8-gc7d7