From ee17e5d6201c66492a0e8053190fca2ed2b8457d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 12 Jan 2019 11:48:20 -0600 Subject: signal: Make siginmask safe when passed a signal of 0 Eric Biggers reported: > The following commit, which went into v4.20, introduced undefined behavior when > sys_rt_sigqueueinfo() is called with sig=0: > > commit 4ce5f9c9e7546915c559ffae594e6d73f918db00 > Author: Eric W. Biederman > Date: Tue Sep 25 12:59:31 2018 +0200 > > signal: Use a smaller struct siginfo in the kernel > > In sig_specific_sicodes(), used from known_siginfo_layout(), the expression > '1ULL << ((sig)-1)' is undefined as it evaluates to 1ULL << 4294967295. > > Reproducer: > > #include > #include > #include > > int main(void) > { > siginfo_t si = { .si_code = 1 }; > syscall(__NR_rt_sigqueueinfo, 0, 0, &si); > } > > UBSAN report for v5.0-rc1: > > UBSAN: Undefined behaviour in kernel/signal.c:2946:7 > shift exponent 4294967295 is too large for 64-bit type 'long unsigned int' > CPU: 2 PID: 346 Comm: syz_signal Not tainted 5.0.0-rc1 #25 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 > Call Trace: > __dump_stack lib/dump_stack.c:77 [inline] > dump_stack+0x70/0xa5 lib/dump_stack.c:113 > ubsan_epilogue+0xd/0x40 lib/ubsan.c:159 > __ubsan_handle_shift_out_of_bounds+0x12c/0x170 lib/ubsan.c:425 > known_siginfo_layout+0xae/0xe0 kernel/signal.c:2946 > post_copy_siginfo_from_user kernel/signal.c:3009 [inline] > __copy_siginfo_from_user+0x35/0x60 kernel/signal.c:3035 > __do_sys_rt_sigqueueinfo kernel/signal.c:3553 [inline] > __se_sys_rt_sigqueueinfo kernel/signal.c:3549 [inline] > __x64_sys_rt_sigqueueinfo+0x31/0x70 kernel/signal.c:3549 > do_syscall_64+0x4c/0x1b0 arch/x86/entry/common.c:290 > entry_SYSCALL_64_after_hwframe+0x49/0xbe > RIP: 0033:0x433639 > Code: c4 18 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b 27 00 00 c3 66 2e 0f 1f 84 00 00 00 00 > RSP: 002b:00007fffcb289fc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000081 > RAX: ffffffffffffffda RBX: 00000000004002e0 RCX: 0000000000433639 > RDX: 00007fffcb289fd0 RSI: 0000000000000000 RDI: 0000000000000000 > RBP: 00000000006b2018 R08: 000000000000004d R09: 0000000000000000 > R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000401560 > R13: 00000000004015f0 R14: 0000000000000000 R15: 0000000000000000 I have looked at the other callers of siginmask and they all appear to in locations where sig can not be zero. I have looked at the code generation of adding an extra test against zero and gcc was able with a simple decrement instruction to combine the two tests together. So the at most adding this test cost a single cpu cycle. In practice that decrement instruction was already present as part of the mask comparison, so the only change was when the instruction was executed. So given that it is cheap, and obviously correct to update siginmask to verify the signal is not zero. Fix this issue there to avoid any future problems. Reported-by: Eric Biggers Fixes: 4ce5f9c9e754 ("signal: Use a smaller struct siginfo in the kernel") Signed-off-by: "Eric W. Biederman" --- include/linux/signal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index cc7e2c1cd444..9702016734b1 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -392,7 +392,7 @@ extern bool unhandled_signal(struct task_struct *tsk, int sig); #endif #define siginmask(sig, mask) \ - ((sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) + ((sig) > 0 && (sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) #define SIG_KERNEL_ONLY_MASK (\ rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP)) -- cgit v1.3-8-gc7d7 From 1278cf66cf4b1c3d30e311200b50c45457c92baa Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: nvram: Replace nvram_* function exports with static functions Replace nvram_* functions with static functions in nvram.h. These will become wrappers for struct nvram_ops method calls. This patch effectively disables existing NVRAM functionality so as to allow the rest of the series to be bisected without build failures. That functionality is gradually re-implemented in subsequent patches. Replace the sole validate-checksum-and-read-byte sequence with a call to nvram_read() which will gain the same semantics in subsequent patches. Remove unused exports. Acked-by: Geert Uytterhoeven Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/m68k/atari/nvram.c | 39 +++------------------------------------ drivers/char/nvram.c | 27 +++++---------------------- drivers/scsi/atari_scsi.c | 8 +++++--- include/linux/nvram.h | 32 +++++++++++++++++++++++++------- 4 files changed, 38 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/arch/m68k/atari/nvram.c b/arch/m68k/atari/nvram.c index a8c457e40b0b..1d767847ffa6 100644 --- a/arch/m68k/atari/nvram.c +++ b/arch/m68k/atari/nvram.c @@ -34,38 +34,17 @@ * periodic 11 min sync from kernel/time/ntp.c vs. this driver.) */ -unsigned char __nvram_read_byte(int i) +static unsigned char __nvram_read_byte(int i) { return CMOS_READ(NVRAM_FIRST_BYTE + i); } -unsigned char nvram_read_byte(int i) -{ - unsigned long flags; - unsigned char c; - - spin_lock_irqsave(&rtc_lock, flags); - c = __nvram_read_byte(i); - spin_unlock_irqrestore(&rtc_lock, flags); - return c; -} -EXPORT_SYMBOL(nvram_read_byte); - /* This races nicely with trying to read with checksum checking */ -void __nvram_write_byte(unsigned char c, int i) +static void __nvram_write_byte(unsigned char c, int i) { CMOS_WRITE(c, NVRAM_FIRST_BYTE + i); } -void nvram_write_byte(unsigned char c, int i) -{ - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - __nvram_write_byte(c, i); - spin_unlock_irqrestore(&rtc_lock, flags); -} - /* On Ataris, the checksum is over all bytes except the checksum bytes * themselves; these are at the very end. */ @@ -73,7 +52,7 @@ void nvram_write_byte(unsigned char c, int i) #define ATARI_CKS_RANGE_END 47 #define ATARI_CKS_LOC 48 -int __nvram_check_checksum(void) +static int __nvram_check_checksum(void) { int i; unsigned char sum = 0; @@ -84,18 +63,6 @@ int __nvram_check_checksum(void) (__nvram_read_byte(ATARI_CKS_LOC + 1) == (sum & 0xff)); } -int nvram_check_checksum(void) -{ - unsigned long flags; - int rv; - - spin_lock_irqsave(&rtc_lock, flags); - rv = __nvram_check_checksum(); - spin_unlock_irqrestore(&rtc_lock, flags); - return rv; -} -EXPORT_SYMBOL(nvram_check_checksum); - static void __nvram_set_checksum(void) { int i; diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index c660cff9faf4..c98775bfd896 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -74,13 +74,12 @@ static int nvram_open_mode; /* special open modes */ * periodic 11 min sync from kernel/time/ntp.c vs. this driver.) */ -unsigned char __nvram_read_byte(int i) +static unsigned char __nvram_read_byte(int i) { return CMOS_READ(NVRAM_FIRST_BYTE + i); } -EXPORT_SYMBOL(__nvram_read_byte); -unsigned char nvram_read_byte(int i) +static unsigned char pc_nvram_read_byte(int i) { unsigned long flags; unsigned char c; @@ -90,16 +89,14 @@ unsigned char nvram_read_byte(int i) spin_unlock_irqrestore(&rtc_lock, flags); return c; } -EXPORT_SYMBOL(nvram_read_byte); /* This races nicely with trying to read with checksum checking (nvram_read) */ -void __nvram_write_byte(unsigned char c, int i) +static void __nvram_write_byte(unsigned char c, int i) { CMOS_WRITE(c, NVRAM_FIRST_BYTE + i); } -EXPORT_SYMBOL(__nvram_write_byte); -void nvram_write_byte(unsigned char c, int i) +static void pc_nvram_write_byte(unsigned char c, int i) { unsigned long flags; @@ -107,14 +104,13 @@ void nvram_write_byte(unsigned char c, int i) __nvram_write_byte(c, i); spin_unlock_irqrestore(&rtc_lock, flags); } -EXPORT_SYMBOL(nvram_write_byte); /* On PCs, the checksum is built only over bytes 2..31 */ #define PC_CKS_RANGE_START 2 #define PC_CKS_RANGE_END 31 #define PC_CKS_LOC 32 -int __nvram_check_checksum(void) +static int __nvram_check_checksum(void) { int i; unsigned short sum = 0; @@ -126,19 +122,6 @@ int __nvram_check_checksum(void) __nvram_read_byte(PC_CKS_LOC+1); return (sum & 0xffff) == expect; } -EXPORT_SYMBOL(__nvram_check_checksum); - -int nvram_check_checksum(void) -{ - unsigned long flags; - int rv; - - spin_lock_irqsave(&rtc_lock, flags); - rv = __nvram_check_checksum(); - spin_unlock_irqrestore(&rtc_lock, flags); - return rv; -} -EXPORT_SYMBOL(nvram_check_checksum); static void __nvram_set_checksum(void) { diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c index 78b43200c99e..e809493d0d06 100644 --- a/drivers/scsi/atari_scsi.c +++ b/drivers/scsi/atari_scsi.c @@ -759,13 +759,15 @@ static int __init atari_scsi_probe(struct platform_device *pdev) atari_scsi_template.this_id = setup_hostid & 7; } else if (IS_REACHABLE(CONFIG_NVRAM)) { /* Test if a host id is set in the NVRam */ - if (ATARIHW_PRESENT(TT_CLK) && nvram_check_checksum()) { - unsigned char b = nvram_read_byte(16); + if (ATARIHW_PRESENT(TT_CLK)) { + unsigned char b; + loff_t offset = 16; + ssize_t count = nvram_read(&b, 1, &offset); /* Arbitration enabled? (for TOS) * If yes, use configured host ID */ - if (b & 0x80) + if ((count == 1) && (b & 0x80)) atari_scsi_template.this_id = b & 7; } } diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 28bfb9ab94ca..eb5b52a9a747 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -2,13 +2,31 @@ #ifndef _LINUX_NVRAM_H #define _LINUX_NVRAM_H +#include #include -/* __foo is foo without grabbing the rtc_lock - get it yourself */ -extern unsigned char __nvram_read_byte(int i); -extern unsigned char nvram_read_byte(int i); -extern void __nvram_write_byte(unsigned char c, int i); -extern void nvram_write_byte(unsigned char c, int i); -extern int __nvram_check_checksum(void); -extern int nvram_check_checksum(void); +static inline ssize_t nvram_get_size(void) +{ + return -ENODEV; +} + +static inline unsigned char nvram_read_byte(int addr) +{ + return 0xFF; +} + +static inline void nvram_write_byte(unsigned char val, int addr) +{ +} + +static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) +{ + return -ENODEV; +} + +static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) +{ + return -ENODEV; +} + #endif /* _LINUX_NVRAM_H */ -- cgit v1.3-8-gc7d7 From a084dbf6592c22468eb946014b2e731fb42da7a9 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: m68k/atari: Implement arch_nvram_ops struct By implementing an arch_nvram_ops struct, a platform can re-use the drivers/char/nvram.c module without needing any arch-specific code in that module. Atari does so here. Acked-by: Geert Uytterhoeven Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/m68k/atari/nvram.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nvram.h | 14 ++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'include/linux') diff --git a/arch/m68k/atari/nvram.c b/arch/m68k/atari/nvram.c index 1d767847ffa6..e75adebe6e7d 100644 --- a/arch/m68k/atari/nvram.c +++ b/arch/m68k/atari/nvram.c @@ -74,6 +74,55 @@ static void __nvram_set_checksum(void) __nvram_write_byte(sum, ATARI_CKS_LOC + 1); } +static ssize_t atari_nvram_read(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + *p = __nvram_read_byte(i); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + +static ssize_t atari_nvram_write(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + __nvram_write_byte(*p, i); + __nvram_set_checksum(); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + +static ssize_t atari_nvram_get_size(void) +{ + return NVRAM_BYTES; +} + +const struct nvram_ops arch_nvram_ops = { + .read = atari_nvram_read, + .write = atari_nvram_write, + .get_size = atari_nvram_get_size, +}; +EXPORT_SYMBOL(arch_nvram_ops); + #ifdef CONFIG_PROC_FS static struct { unsigned char val; diff --git a/include/linux/nvram.h b/include/linux/nvram.h index eb5b52a9a747..a1e01dc89759 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -5,8 +5,18 @@ #include #include +struct nvram_ops { + ssize_t (*get_size)(void); + ssize_t (*read)(char *, size_t, loff_t *); + ssize_t (*write)(char *, size_t, loff_t *); +}; + +extern const struct nvram_ops arch_nvram_ops; + static inline ssize_t nvram_get_size(void) { + if (arch_nvram_ops.get_size) + return arch_nvram_ops.get_size(); return -ENODEV; } @@ -21,11 +31,15 @@ static inline void nvram_write_byte(unsigned char val, int addr) static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) { + if (arch_nvram_ops.read) + return arch_nvram_ops.read(buf, count, ppos); return -ENODEV; } static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) { + if (arch_nvram_ops.write) + return arch_nvram_ops.write(buf, count, ppos); return -ENODEV; } -- cgit v1.3-8-gc7d7 From a156c7ba669c65b55c7afcc3994e1199cc0cad47 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: powerpc: Replace nvram_* extern declarations with standard header Remove the nvram_read_byte() and nvram_write_byte() declarations in powerpc/include/asm/nvram.h and use the cross-platform static functions in linux/nvram.h instead. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/nvram.h | 6 ------ arch/powerpc/kernel/setup_32.c | 25 +------------------------ drivers/char/generic_nvram.c | 1 + drivers/video/fbdev/matrox/matroxfb_base.c | 2 +- include/linux/nvram.h | 3 +++ 5 files changed, 6 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h index 09a518bb7c03..56a388da9c4f 100644 --- a/arch/powerpc/include/asm/nvram.h +++ b/arch/powerpc/include/asm/nvram.h @@ -98,10 +98,4 @@ extern int nvram_write_os_partition(struct nvram_os_partition *part, unsigned int err_type, unsigned int error_log_cnt); -/* Determine NVRAM size */ -extern ssize_t nvram_get_size(void); - -/* Normal access to NVRAM */ -extern unsigned char nvram_read_byte(int i); -extern void nvram_write_byte(unsigned char c, int i); #endif /* _ASM_POWERPC_NVRAM_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 947f904688b0..f5107796e2d7 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -149,30 +150,6 @@ __setup("l3cr=", ppc_setup_l3cr); #ifdef CONFIG_GENERIC_NVRAM -/* Generic nvram hooks used by drivers/char/gen_nvram.c */ -unsigned char nvram_read_byte(int addr) -{ - if (ppc_md.nvram_read_val) - return ppc_md.nvram_read_val(addr); - return 0xff; -} -EXPORT_SYMBOL(nvram_read_byte); - -void nvram_write_byte(unsigned char val, int addr) -{ - if (ppc_md.nvram_write_val) - ppc_md.nvram_write_val(addr, val); -} -EXPORT_SYMBOL(nvram_write_byte); - -ssize_t nvram_get_size(void) -{ - if (ppc_md.nvram_size) - return ppc_md.nvram_size(); - return -1; -} -EXPORT_SYMBOL(nvram_get_size); - void nvram_sync(void) { if (ppc_md.nvram_sync) diff --git a/drivers/char/generic_nvram.c b/drivers/char/generic_nvram.c index ff5394f47587..0c22b9503e84 100644 --- a/drivers/char/generic_nvram.c +++ b/drivers/char/generic_nvram.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c index 838869c6490c..0a4e5bad33f4 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.c +++ b/drivers/video/fbdev/matrox/matroxfb_base.c @@ -111,12 +111,12 @@ #include "matroxfb_g450.h" #include #include +#include #include #include #ifdef CONFIG_PPC_PMAC #include -unsigned char nvram_read_byte(int); static int default_vmode = VMODE_NVRAM; static int default_cmode = CMODE_NVRAM; #endif diff --git a/include/linux/nvram.h b/include/linux/nvram.h index a1e01dc89759..79431dab87a1 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -15,8 +15,11 @@ extern const struct nvram_ops arch_nvram_ops; static inline ssize_t nvram_get_size(void) { +#ifdef CONFIG_PPC +#else if (arch_nvram_ops.get_size) return arch_nvram_ops.get_size(); +#endif return -ENODEV; } -- cgit v1.3-8-gc7d7 From d5bbb5021ce8d9ff561c7469f5b4589ccb3bc4a6 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: char/nvram: Adopt arch_nvram_ops NVRAMs on different platforms and architectures have different attributes and access methods. E.g. some platforms have byte-at-a-time accessor functions while others have byte-range accessor functions. Some have checksum functionality while others do not. By calling ops struct methods via the common wrapper functions, the nvram module and other drivers can make use of the available NVRAM functionality in a portable way. Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 30 ++++++++++++++++++++++++------ include/linux/nvram.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index c98775bfd896..2df391f78986 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -52,9 +52,11 @@ static DEFINE_MUTEX(nvram_mutex); static DEFINE_SPINLOCK(nvram_state_lock); static int nvram_open_cnt; /* #times opened */ static int nvram_open_mode; /* special open modes */ +static ssize_t nvram_size; #define NVRAM_WRITE 1 /* opened for writing (exclusive) */ #define NVRAM_EXCL 2 /* opened with O_EXCL */ +#ifdef CONFIG_X86 /* * These functions are provided to be called internally or by other parts of * the kernel. It's up to the caller to ensure correct checksum before reading @@ -145,6 +147,19 @@ void nvram_set_checksum(void) } #endif /* 0 */ +static ssize_t pc_nvram_get_size(void) +{ + return NVRAM_BYTES; +} + +const struct nvram_ops arch_nvram_ops = { + .read_byte = pc_nvram_read_byte, + .write_byte = pc_nvram_write_byte, + .get_size = pc_nvram_get_size, +}; +EXPORT_SYMBOL(arch_nvram_ops); +#endif /* CONFIG_X86 */ + /* * The are the file operation function for user access to /dev/nvram */ @@ -152,7 +167,7 @@ void nvram_set_checksum(void) static loff_t nvram_misc_llseek(struct file *file, loff_t offset, int origin) { return generic_file_llseek_size(file, offset, origin, MAX_LFS_FILESIZE, - NVRAM_BYTES); + nvram_size); } static ssize_t nvram_misc_read(struct file *file, char __user *buf, @@ -303,8 +318,7 @@ static int nvram_misc_release(struct inode *inode, struct file *file) return 0; } -#ifdef CONFIG_PROC_FS - +#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS) static const char * const floppy_types[] = { "none", "5.25'' 360k", "5.25'' 1.2M", "3.5'' 720k", "3.5'' 1.44M", "3.5'' 2.88M", "3.5'' 2.88M" @@ -394,7 +408,7 @@ static int nvram_proc_read(struct seq_file *seq, void *offset) return 0; } -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_X86 && CONFIG_PROC_FS */ static const struct file_operations nvram_misc_fops = { .owner = THIS_MODULE, @@ -416,13 +430,17 @@ static int __init nvram_module_init(void) { int ret; + nvram_size = nvram_get_size(); + if (nvram_size < 0) + return nvram_size; + ret = misc_register(&nvram_misc); if (ret) { pr_err("nvram: can't misc_register on minor=%d\n", NVRAM_MINOR); return ret; } -#ifdef CONFIG_PROC_FS +#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS) if (!proc_create_single("driver/nvram", 0, NULL, nvram_proc_read)) { pr_err("nvram: can't create /proc/driver/nvram\n"); misc_deregister(&nvram_misc); @@ -436,7 +454,7 @@ static int __init nvram_module_init(void) static void __exit nvram_module_exit(void) { -#ifdef CONFIG_PROC_FS +#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS) remove_proc_entry("driver/nvram", NULL); #endif misc_deregister(&nvram_misc); diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 79431dab87a1..bb4ea8cc6ea6 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -5,8 +5,30 @@ #include #include +/** + * struct nvram_ops - NVRAM functionality made available to drivers + * @read: validate checksum (if any) then load a range of bytes from NVRAM + * @write: store a range of bytes to NVRAM then update checksum (if any) + * @read_byte: load a single byte from NVRAM + * @write_byte: store a single byte to NVRAM + * @get_size: return the fixed number of bytes in the NVRAM + * + * Architectures which provide an nvram ops struct need not implement all + * of these methods. If the NVRAM hardware can be accessed only one byte + * at a time then it may be sufficient to provide .read_byte and .write_byte. + * If the NVRAM has a checksum (and it is to be checked) the .read and + * .write methods can be used to implement that efficiently. + * + * Portable drivers may use the wrapper functions defined here. + * The nvram_read() and nvram_write() functions call the .read and .write + * methods when available and fall back on the .read_byte and .write_byte + * methods otherwise. + */ + struct nvram_ops { ssize_t (*get_size)(void); + unsigned char (*read_byte)(int); + void (*write_byte)(unsigned char, int); ssize_t (*read)(char *, size_t, loff_t *); ssize_t (*write)(char *, size_t, loff_t *); }; @@ -25,11 +47,21 @@ static inline ssize_t nvram_get_size(void) static inline unsigned char nvram_read_byte(int addr) { +#ifdef CONFIG_PPC +#else + if (arch_nvram_ops.read_byte) + return arch_nvram_ops.read_byte(addr); +#endif return 0xFF; } static inline void nvram_write_byte(unsigned char val, int addr) { +#ifdef CONFIG_PPC +#else + if (arch_nvram_ops.write_byte) + arch_nvram_ops.write_byte(val, addr); +#endif } static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) -- cgit v1.3-8-gc7d7 From 2d58636e0af724f38acad25246c1625efec36122 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: char/nvram: Allow the set_checksum and initialize ioctls to be omitted The drivers/char/nvram.c module has previously supported only RTC "CMOS" NVRAM, for which it provides appropriate checksum ioctls. Make these ioctls optional so the module can be re-used with other kinds of NVRAM. The ops struct methods that implement the ioctls now return error codes so that a multi-platform kernel binary can do the right thing when running on hardware without a suitable NVRAM. Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 70 +++++++++++++++++++++++++++++---------------------- include/linux/nvram.h | 2 ++ 2 files changed, 42 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index 2df391f78986..f88ef41d0598 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -136,16 +136,25 @@ static void __nvram_set_checksum(void) __nvram_write_byte(sum & 0xff, PC_CKS_LOC + 1); } -#if 0 -void nvram_set_checksum(void) +static long pc_nvram_set_checksum(void) { - unsigned long flags; + spin_lock_irq(&rtc_lock); + __nvram_set_checksum(); + spin_unlock_irq(&rtc_lock); + return 0; +} - spin_lock_irqsave(&rtc_lock, flags); +static long pc_nvram_initialize(void) +{ + ssize_t i; + + spin_lock_irq(&rtc_lock); + for (i = 0; i < NVRAM_BYTES; ++i) + __nvram_write_byte(0, i); __nvram_set_checksum(); - spin_unlock_irqrestore(&rtc_lock, flags); + spin_unlock_irq(&rtc_lock); + return 0; } -#endif /* 0 */ static ssize_t pc_nvram_get_size(void) { @@ -156,6 +165,8 @@ const struct nvram_ops arch_nvram_ops = { .read_byte = pc_nvram_read_byte, .write_byte = pc_nvram_write_byte, .get_size = pc_nvram_get_size, + .set_checksum = pc_nvram_set_checksum, + .initialize = pc_nvram_initialize, }; EXPORT_SYMBOL(arch_nvram_ops); #endif /* CONFIG_X86 */ @@ -241,51 +252,50 @@ checksum_err: static long nvram_misc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int i; + long ret = -ENOTTY; switch (cmd) { - case NVRAM_INIT: /* initialize NVRAM contents and checksum */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; - mutex_lock(&nvram_mutex); - spin_lock_irq(&rtc_lock); - - for (i = 0; i < NVRAM_BYTES; ++i) - __nvram_write_byte(0, i); - __nvram_set_checksum(); - - spin_unlock_irq(&rtc_lock); - mutex_unlock(&nvram_mutex); - return 0; - + if (arch_nvram_ops.initialize != NULL) { + mutex_lock(&nvram_mutex); + ret = arch_nvram_ops.initialize(); + mutex_unlock(&nvram_mutex); + } + break; case NVRAM_SETCKS: /* just set checksum, contents unchanged (maybe useful after * checksum garbaged somehow...) */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; - mutex_lock(&nvram_mutex); - spin_lock_irq(&rtc_lock); - __nvram_set_checksum(); - spin_unlock_irq(&rtc_lock); - mutex_unlock(&nvram_mutex); - return 0; - - default: - return -ENOTTY; + if (arch_nvram_ops.set_checksum != NULL) { + mutex_lock(&nvram_mutex); + ret = arch_nvram_ops.set_checksum(); + mutex_unlock(&nvram_mutex); + } + break; } + return ret; } static int nvram_misc_open(struct inode *inode, struct file *file) { spin_lock(&nvram_state_lock); + /* Prevent multiple readers/writers if desired. */ if ((nvram_open_cnt && (file->f_flags & O_EXCL)) || - (nvram_open_mode & NVRAM_EXCL) || - ((file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE))) { + (nvram_open_mode & NVRAM_EXCL)) { + spin_unlock(&nvram_state_lock); + return -EBUSY; + } + + /* Prevent multiple writers if the set_checksum ioctl is implemented. */ + if ((arch_nvram_ops.set_checksum != NULL) && + (file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE)) { spin_unlock(&nvram_state_lock); return -EBUSY; } diff --git a/include/linux/nvram.h b/include/linux/nvram.h index bb4ea8cc6ea6..31c763087746 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -31,6 +31,8 @@ struct nvram_ops { void (*write_byte)(unsigned char, int); ssize_t (*read)(char *, size_t, loff_t *); ssize_t (*write)(char *, size_t, loff_t *); + long (*initialize)(void); + long (*set_checksum)(void); }; extern const struct nvram_ops arch_nvram_ops; -- cgit v1.3-8-gc7d7 From 109b3a89a7c48405d61a05d7a1720581a4f1574c Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: char/nvram: Implement NVRAM read/write methods Refactor the RTC "CMOS" NVRAM functions so that they can be used as arch_nvram_ops methods. Checksumming logic is moved from the misc device operations to the nvram read/write operations. This makes the misc device implementation more generic. This preserves the locking mechanism such that "read if checksum valid" and "write and update checksum" remain atomic operations. Some platforms implement byte-range read/write methods which are similar to file_operations struct methods. Other platforms provide only byte-at-a-time methods. The former are more efficient but may be unavailable so fall back on the latter methods when necessary. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 120 +++++++++++++++++++++++++++++++------------------- include/linux/nvram.h | 32 +++++++++++++- 2 files changed, 104 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index f88ef41d0598..adcc213c331e 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -161,7 +162,46 @@ static ssize_t pc_nvram_get_size(void) return NVRAM_BYTES; } +static ssize_t pc_nvram_read(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + *p = __nvram_read_byte(i); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + +static ssize_t pc_nvram_write(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + __nvram_write_byte(*p, i); + __nvram_set_checksum(); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + const struct nvram_ops arch_nvram_ops = { + .read = pc_nvram_read, + .write = pc_nvram_write, .read_byte = pc_nvram_read_byte, .write_byte = pc_nvram_write_byte, .get_size = pc_nvram_get_size, @@ -184,69 +224,57 @@ static loff_t nvram_misc_llseek(struct file *file, loff_t offset, int origin) static ssize_t nvram_misc_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - unsigned char contents[NVRAM_BYTES]; - unsigned i = *ppos; - unsigned char *tmp; - - spin_lock_irq(&rtc_lock); + char *tmp; + ssize_t ret; - if (!__nvram_check_checksum()) - goto checksum_err; - for (tmp = contents; count-- > 0 && i < NVRAM_BYTES; ++i, ++tmp) - *tmp = __nvram_read_byte(i); + if (!access_ok(buf, count)) + return -EFAULT; + if (*ppos >= nvram_size) + return 0; - spin_unlock_irq(&rtc_lock); + count = min_t(size_t, count, nvram_size - *ppos); + count = min_t(size_t, count, PAGE_SIZE); - if (copy_to_user(buf, contents, tmp - contents)) - return -EFAULT; + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) + return -ENOMEM; - *ppos = i; + ret = nvram_read(tmp, count, ppos); + if (ret <= 0) + goto out; - return tmp - contents; + if (copy_to_user(buf, tmp, ret)) { + *ppos -= ret; + ret = -EFAULT; + } -checksum_err: - spin_unlock_irq(&rtc_lock); - return -EIO; +out: + kfree(tmp); + return ret; } static ssize_t nvram_misc_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned char contents[NVRAM_BYTES]; - unsigned i = *ppos; - unsigned char *tmp; - - if (i >= NVRAM_BYTES) - return 0; /* Past EOF */ - - if (count > NVRAM_BYTES - i) - count = NVRAM_BYTES - i; - if (count > NVRAM_BYTES) - return -EFAULT; /* Can't happen, but prove it to gcc */ + char *tmp; + ssize_t ret; - if (copy_from_user(contents, buf, count)) + if (!access_ok(buf, count)) return -EFAULT; + if (*ppos >= nvram_size) + return 0; - spin_lock_irq(&rtc_lock); - - if (!__nvram_check_checksum()) - goto checksum_err; - - for (tmp = contents; count--; ++i, ++tmp) - __nvram_write_byte(*tmp, i); + count = min_t(size_t, count, nvram_size - *ppos); + count = min_t(size_t, count, PAGE_SIZE); - __nvram_set_checksum(); - - spin_unlock_irq(&rtc_lock); + tmp = memdup_user(buf, count); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); - *ppos = i; - - return tmp - contents; - -checksum_err: - spin_unlock_irq(&rtc_lock); - return -EIO; + ret = nvram_write(tmp, count, ppos); + kfree(tmp); + return ret; } static long nvram_misc_ioctl(struct file *file, unsigned int cmd, diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 31c763087746..9df85703735c 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -66,18 +66,46 @@ static inline void nvram_write_byte(unsigned char val, int addr) #endif } +static inline ssize_t nvram_read_bytes(char *buf, size_t count, loff_t *ppos) +{ + ssize_t nvram_size = nvram_get_size(); + loff_t i; + char *p = buf; + + if (nvram_size < 0) + return nvram_size; + for (i = *ppos; count > 0 && i < nvram_size; ++i, ++p, --count) + *p = nvram_read_byte(i); + *ppos = i; + return p - buf; +} + +static inline ssize_t nvram_write_bytes(char *buf, size_t count, loff_t *ppos) +{ + ssize_t nvram_size = nvram_get_size(); + loff_t i; + char *p = buf; + + if (nvram_size < 0) + return nvram_size; + for (i = *ppos; count > 0 && i < nvram_size; ++i, ++p, --count) + nvram_write_byte(*p, i); + *ppos = i; + return p - buf; +} + static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) { if (arch_nvram_ops.read) return arch_nvram_ops.read(buf, count, ppos); - return -ENODEV; + return nvram_read_bytes(buf, count, ppos); } static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) { if (arch_nvram_ops.write) return arch_nvram_ops.write(buf, count, ppos); - return -ENODEV; + return nvram_write_bytes(buf, count, ppos); } #endif /* _LINUX_NVRAM_H */ -- cgit v1.3-8-gc7d7 From 95ac14b8a32817dcd1f13ae4787891484966d2d5 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: powerpc: Implement nvram ioctls Add the powerpc-specific ioctls to the nvram module. This allows the nvram module to replace the generic_nvram module. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/nvram.h | 2 ++ 2 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index c9e295d73dc5..944f05fddacd 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -48,6 +48,9 @@ #include #include +#ifdef CONFIG_PPC +#include +#endif static DEFINE_MUTEX(nvram_mutex); static DEFINE_SPINLOCK(nvram_state_lock); @@ -283,6 +286,38 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd, long ret = -ENOTTY; switch (cmd) { +#ifdef CONFIG_PPC + case OBSOLETE_PMAC_NVRAM_GET_OFFSET: + pr_warn("nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n"); + /* fall through */ + case IOC_NVRAM_GET_OFFSET: + ret = -EINVAL; +#ifdef CONFIG_PPC_PMAC + if (machine_is(powermac)) { + int part, offset; + + if (copy_from_user(&part, (void __user *)arg, + sizeof(part)) != 0) + return -EFAULT; + if (part < pmac_nvram_OF || part > pmac_nvram_NR) + return -EINVAL; + offset = pmac_get_partition(part); + if (copy_to_user((void __user *)arg, + &offset, sizeof(offset)) != 0) + return -EFAULT; + ret = 0; + } +#endif + break; + case IOC_NVRAM_SYNC: + if (ppc_md.nvram_sync != NULL) { + mutex_lock(&nvram_mutex); + ppc_md.nvram_sync(); + mutex_unlock(&nvram_mutex); + } + ret = 0; + break; +#elif defined(CONFIG_X86) || defined(CONFIG_M68K) case NVRAM_INIT: /* initialize NVRAM contents and checksum */ if (!capable(CAP_SYS_ADMIN)) @@ -306,6 +341,7 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd, mutex_unlock(&nvram_mutex); } break; +#endif /* CONFIG_X86 || CONFIG_M68K */ } return ret; } @@ -321,12 +357,14 @@ static int nvram_misc_open(struct inode *inode, struct file *file) return -EBUSY; } +#if defined(CONFIG_X86) || defined(CONFIG_M68K) /* Prevent multiple writers if the set_checksum ioctl is implemented. */ if ((arch_nvram_ops.set_checksum != NULL) && (file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE)) { spin_unlock(&nvram_state_lock); return -EBUSY; } +#endif if (file->f_flags & O_EXCL) nvram_open_mode |= NVRAM_EXCL; diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 9df85703735c..9e3a957c8f1f 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -31,8 +31,10 @@ struct nvram_ops { void (*write_byte)(unsigned char, int); ssize_t (*read)(char *, size_t, loff_t *); ssize_t (*write)(char *, size_t, loff_t *); +#if defined(CONFIG_X86) || defined(CONFIG_M68K) long (*initialize)(void); long (*set_checksum)(void); +#endif }; extern const struct nvram_ops arch_nvram_ops; -- cgit v1.3-8-gc7d7 From f9c3a570f5fc584f2ca2dd222d1b8c8537fc55f6 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: powerpc: Enable HAVE_ARCH_NVRAM_OPS and disable GENERIC_NVRAM Switch PPC32 kernels from the generic_nvram module to the nvram module. Also fix a theoretical bug where CHRP omits the chrp_nvram_init() call when CONFIG_NVRAM_MODULE=m. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 6 +----- arch/powerpc/include/asm/nvram.h | 3 --- arch/powerpc/kernel/setup_32.c | 11 ----------- arch/powerpc/platforms/chrp/Makefile | 2 +- arch/powerpc/platforms/chrp/setup.c | 2 +- arch/powerpc/platforms/powermac/setup.c | 3 +-- drivers/char/Kconfig | 19 +++++++++---------- include/linux/nvram.h | 20 ++++++++++++++++++++ 8 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2890d36eb531..f62e6a3f9c4e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -178,6 +178,7 @@ config PPC select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_NVRAM_OPS if PPC32 select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_CBPF_JIT if !PPC64 @@ -274,11 +275,6 @@ config SYSVIPC_COMPAT depends on COMPAT && SYSVIPC default y -# All PPC32s use generic nvram driver through ppc_md -config GENERIC_NVRAM - bool - default y if PPC32 - config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h index 56a388da9c4f..629a5cdcc865 100644 --- a/arch/powerpc/include/asm/nvram.h +++ b/arch/powerpc/include/asm/nvram.h @@ -78,9 +78,6 @@ extern int pmac_get_partition(int partition); extern u8 pmac_xpram_read(int xpaddr); extern void pmac_xpram_write(int xpaddr, u8 data); -/* Synchronize NVRAM */ -extern void nvram_sync(void); - /* Initialize NVRAM OS partition */ extern int __init nvram_init_os_partition(struct nvram_os_partition *part); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index f5107796e2d7..c31082233a25 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -148,17 +148,6 @@ static int __init ppc_setup_l3cr(char *str) } __setup("l3cr=", ppc_setup_l3cr); -#ifdef CONFIG_GENERIC_NVRAM - -void nvram_sync(void) -{ - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); -} -EXPORT_SYMBOL(nvram_sync); - -#endif /* CONFIG_NVRAM */ - static int __init ppc_init(void) { /* clear the progress line */ diff --git a/arch/powerpc/platforms/chrp/Makefile b/arch/powerpc/platforms/chrp/Makefile index 4b3bfadc70fa..dc3465cc8bc6 100644 --- a/arch/powerpc/platforms/chrp/Makefile +++ b/arch/powerpc/platforms/chrp/Makefile @@ -1,3 +1,3 @@ obj-y += setup.o time.o pegasos_eth.o pci.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_NVRAM) += nvram.o +obj-$(CONFIG_NVRAM:m=y) += nvram.o diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index e66644e0fb40..e8e804289c8e 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -550,7 +550,7 @@ static void __init chrp_init_IRQ(void) static void __init chrp_init2(void) { -#ifdef CONFIG_NVRAM +#if IS_ENABLED(CONFIG_NVRAM) chrp_nvram_init(); #endif diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 2e8221e20ee8..b47f49cf9c4d 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -316,8 +316,7 @@ static void __init pmac_setup_arch(void) find_via_pmu(); smu_init(); -#if defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE) || \ - defined(CONFIG_PPC64) +#if IS_ENABLED(CONFIG_NVRAM) || defined(CONFIG_PPC64) pmac_nvram_init(); #endif #ifdef CONFIG_PPC32 diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index ce9979529cf3..72866a004f07 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -244,25 +244,24 @@ source "drivers/char/hw_random/Kconfig" config NVRAM tristate "/dev/nvram support" - depends on X86 || GENERIC_NVRAM || HAVE_ARCH_NVRAM_OPS - default M68K + depends on X86 || HAVE_ARCH_NVRAM_OPS + default M68K || PPC ---help--- If you say Y here and create a character special file /dev/nvram with major number 10 and minor number 144 using mknod ("man mknod"), - you get read and write access to the extra bytes of non-volatile - memory in the real time clock (RTC), which is contained in every PC - and most Ataris. The actual number of bytes varies, depending on the - nvram in the system, but is usually 114 (128-14 for the RTC). - - This memory is conventionally called "CMOS RAM" on PCs and "NVRAM" - on Ataris. /dev/nvram may be used to view settings there, or to - change them (with some utility). It could also be used to frequently + you get read and write access to the non-volatile memory. + + /dev/nvram may be used to view settings in NVRAM or to change them + (with some utility). It could also be used to frequently save a few bits of very important data that may not be lost over power-off and for which writing to disk is too insecure. Note however that most NVRAM space in a PC belongs to the BIOS and you should NEVER idly tamper with it. See Ralf Brown's interrupt list for a guide to the use of CMOS bytes by your BIOS. + This memory is conventionally called "NVRAM" on PowerPC machines, + "CMOS RAM" on PCs, "NVRAM" on Ataris and "PRAM" on Macintoshes. + To compile this driver as a module, choose M here: the module will be called nvram. diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 9e3a957c8f1f..d29d9c93a927 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -5,6 +5,10 @@ #include #include +#ifdef CONFIG_PPC +#include +#endif + /** * struct nvram_ops - NVRAM functionality made available to drivers * @read: validate checksum (if any) then load a range of bytes from NVRAM @@ -42,6 +46,8 @@ extern const struct nvram_ops arch_nvram_ops; static inline ssize_t nvram_get_size(void) { #ifdef CONFIG_PPC + if (ppc_md.nvram_size) + return ppc_md.nvram_size(); #else if (arch_nvram_ops.get_size) return arch_nvram_ops.get_size(); @@ -52,6 +58,8 @@ static inline ssize_t nvram_get_size(void) static inline unsigned char nvram_read_byte(int addr) { #ifdef CONFIG_PPC + if (ppc_md.nvram_read_val) + return ppc_md.nvram_read_val(addr); #else if (arch_nvram_ops.read_byte) return arch_nvram_ops.read_byte(addr); @@ -62,6 +70,8 @@ static inline unsigned char nvram_read_byte(int addr) static inline void nvram_write_byte(unsigned char val, int addr) { #ifdef CONFIG_PPC + if (ppc_md.nvram_write_val) + ppc_md.nvram_write_val(addr, val); #else if (arch_nvram_ops.write_byte) arch_nvram_ops.write_byte(val, addr); @@ -98,15 +108,25 @@ static inline ssize_t nvram_write_bytes(char *buf, size_t count, loff_t *ppos) static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) { +#ifdef CONFIG_PPC + if (ppc_md.nvram_read) + return ppc_md.nvram_read(buf, count, ppos); +#else if (arch_nvram_ops.read) return arch_nvram_ops.read(buf, count, ppos); +#endif return nvram_read_bytes(buf, count, ppos); } static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) { +#ifdef CONFIG_PPC + if (ppc_md.nvram_write) + return ppc_md.nvram_write(buf, count, ppos); +#else if (arch_nvram_ops.write) return arch_nvram_ops.write(buf, count, ppos); +#endif return nvram_write_bytes(buf, count, ppos); } -- cgit v1.3-8-gc7d7 From 11f1ceca7031deefc1a34236ab7b94360016b71d Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 16 Jan 2019 18:10:56 +0200 Subject: interconnect: Add generic on-chip interconnect API This patch introduces a new API to get requirements and configure the interconnect buses across the entire chipset to fit with the current demand. The API is using a consumer/provider-based model, where the providers are the interconnect buses and the consumers could be various drivers. The consumers request interconnect resources (path) between endpoints and set the desired constraints on this data flow path. The providers receive requests from consumers and aggregate these requests for all master-slave pairs on that path. Then the providers configure each node along the path to support a bandwidth that satisfies all bandwidth requests that cross through that node. The topology could be complicated and multi-tiered and is SoC specific. Reviewed-by: Evan Green Signed-off-by: Georgi Djakov Signed-off-by: Greg Kroah-Hartman --- Documentation/interconnect/interconnect.rst | 94 +++++ drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/interconnect/Kconfig | 10 + drivers/interconnect/Makefile | 5 + drivers/interconnect/core.c | 567 ++++++++++++++++++++++++++++ include/linux/interconnect-provider.h | 125 ++++++ include/linux/interconnect.h | 52 +++ 8 files changed, 856 insertions(+) create mode 100644 Documentation/interconnect/interconnect.rst create mode 100644 drivers/interconnect/Kconfig create mode 100644 drivers/interconnect/Makefile create mode 100644 drivers/interconnect/core.c create mode 100644 include/linux/interconnect-provider.h create mode 100644 include/linux/interconnect.h (limited to 'include/linux') diff --git a/Documentation/interconnect/interconnect.rst b/Documentation/interconnect/interconnect.rst new file mode 100644 index 000000000000..b8107dcc4cd3 --- /dev/null +++ b/Documentation/interconnect/interconnect.rst @@ -0,0 +1,94 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================================== +GENERIC SYSTEM INTERCONNECT SUBSYSTEM +===================================== + +Introduction +------------ + +This framework is designed to provide a standard kernel interface to control +the settings of the interconnects on an SoC. These settings can be throughput, +latency and priority between multiple interconnected devices or functional +blocks. This can be controlled dynamically in order to save power or provide +maximum performance. + +The interconnect bus is hardware with configurable parameters, which can be +set on a data path according to the requests received from various drivers. +An example of interconnect buses are the interconnects between various +components or functional blocks in chipsets. There can be multiple interconnects +on an SoC that can be multi-tiered. + +Below is a simplified diagram of a real-world SoC interconnect bus topology. + +:: + + +----------------+ +----------------+ + | HW Accelerator |--->| M NoC |<---------------+ + +----------------+ +----------------+ | + | | +------------+ + +-----+ +-------------+ V +------+ | | + | DDR | | +--------+ | PCIe | | | + +-----+ | | Slaves | +------+ | | + ^ ^ | +--------+ | | C NoC | + | | V V | | + +------------------+ +------------------------+ | | +-----+ + | |-->| |-->| |-->| CPU | + | |-->| |<--| | +-----+ + | Mem NoC | | S NoC | +------------+ + | |<--| |---------+ | + | |<--| |<------+ | | +--------+ + +------------------+ +------------------------+ | | +-->| Slaves | + ^ ^ ^ ^ ^ | | +--------+ + | | | | | | V + +------+ | +-----+ +-----+ +---------+ +----------------+ +--------+ + | CPUs | | | GPU | | DSP | | Masters |-->| P NoC |-->| Slaves | + +------+ | +-----+ +-----+ +---------+ +----------------+ +--------+ + | + +-------+ + | Modem | + +-------+ + +Terminology +----------- + +Interconnect provider is the software definition of the interconnect hardware. +The interconnect providers on the above diagram are M NoC, S NoC, C NoC, P NoC +and Mem NoC. + +Interconnect node is the software definition of the interconnect hardware +port. Each interconnect provider consists of multiple interconnect nodes, +which are connected to other SoC components including other interconnect +providers. The point on the diagram where the CPUs connect to the memory is +called an interconnect node, which belongs to the Mem NoC interconnect provider. + +Interconnect endpoints are the first or the last element of the path. Every +endpoint is a node, but not every node is an endpoint. + +Interconnect path is everything between two endpoints including all the nodes +that have to be traversed to reach from a source to destination node. It may +include multiple master-slave pairs across several interconnect providers. + +Interconnect consumers are the entities which make use of the data paths exposed +by the providers. The consumers send requests to providers requesting various +throughput, latency and priority. Usually the consumers are device drivers, that +send request based on their needs. An example for a consumer is a video decoder +that supports various formats and image sizes. + +Interconnect providers +---------------------- + +Interconnect provider is an entity that implements methods to initialize and +configure interconnect bus hardware. The interconnect provider drivers should +be registered with the interconnect provider core. + +.. kernel-doc:: include/linux/interconnect-provider.h + +Interconnect consumers +---------------------- + +Interconnect consumers are the clients which use the interconnect APIs to +get paths between endpoints and set their bandwidth/latency/QoS requirements +for these interconnect paths. + +.. kernel-doc:: include/linux/interconnect.h diff --git a/drivers/Kconfig b/drivers/Kconfig index 4f9f99057ff8..45f9decb9848 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -228,4 +228,6 @@ source "drivers/siox/Kconfig" source "drivers/slimbus/Kconfig" +source "drivers/interconnect/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index e1ce029d28fd..bb15b9d0e793 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -186,3 +186,4 @@ obj-$(CONFIG_MULTIPLEXER) += mux/ obj-$(CONFIG_UNISYS_VISORBUS) += visorbus/ obj-$(CONFIG_SIOX) += siox/ obj-$(CONFIG_GNSS) += gnss/ +obj-$(CONFIG_INTERCONNECT) += interconnect/ diff --git a/drivers/interconnect/Kconfig b/drivers/interconnect/Kconfig new file mode 100644 index 000000000000..a261c7d41deb --- /dev/null +++ b/drivers/interconnect/Kconfig @@ -0,0 +1,10 @@ +menuconfig INTERCONNECT + tristate "On-Chip Interconnect management support" + help + Support for management of the on-chip interconnects. + + This framework is designed to provide a generic interface for + managing the interconnects in a SoC. + + If unsure, say no. + diff --git a/drivers/interconnect/Makefile b/drivers/interconnect/Makefile new file mode 100644 index 000000000000..7a01f33b5593 --- /dev/null +++ b/drivers/interconnect/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +icc-core-objs := core.o + +obj-$(CONFIG_INTERCONNECT) += icc-core.o diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c new file mode 100644 index 000000000000..2b937b4f43c4 --- /dev/null +++ b/drivers/interconnect/core.c @@ -0,0 +1,567 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Interconnect framework core driver + * + * Copyright (c) 2017-2019, Linaro Ltd. + * Author: Georgi Djakov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_IDR(icc_idr); +static LIST_HEAD(icc_providers); +static DEFINE_MUTEX(icc_lock); + +/** + * struct icc_req - constraints that are attached to each node + * @req_node: entry in list of requests for the particular @node + * @node: the interconnect node to which this constraint applies + * @dev: reference to the device that sets the constraints + * @avg_bw: an integer describing the average bandwidth in kBps + * @peak_bw: an integer describing the peak bandwidth in kBps + */ +struct icc_req { + struct hlist_node req_node; + struct icc_node *node; + struct device *dev; + u32 avg_bw; + u32 peak_bw; +}; + +/** + * struct icc_path - interconnect path structure + * @num_nodes: number of hops (nodes) + * @reqs: array of the requests applicable to this path of nodes + */ +struct icc_path { + size_t num_nodes; + struct icc_req reqs[]; +}; + +static struct icc_node *node_find(const int id) +{ + return idr_find(&icc_idr, id); +} + +static struct icc_path *path_init(struct device *dev, struct icc_node *dst, + ssize_t num_nodes) +{ + struct icc_node *node = dst; + struct icc_path *path; + int i; + + path = kzalloc(struct_size(path, reqs, num_nodes), GFP_KERNEL); + if (!path) + return ERR_PTR(-ENOMEM); + + path->num_nodes = num_nodes; + + for (i = num_nodes - 1; i >= 0; i--) { + node->provider->users++; + hlist_add_head(&path->reqs[i].req_node, &node->req_list); + path->reqs[i].node = node; + path->reqs[i].dev = dev; + /* reference to previous node was saved during path traversal */ + node = node->reverse; + } + + return path; +} + +static struct icc_path *path_find(struct device *dev, struct icc_node *src, + struct icc_node *dst) +{ + struct icc_path *path = ERR_PTR(-EPROBE_DEFER); + struct icc_node *n, *node = NULL; + struct list_head traverse_list; + struct list_head edge_list; + struct list_head visited_list; + size_t i, depth = 1; + bool found = false; + + INIT_LIST_HEAD(&traverse_list); + INIT_LIST_HEAD(&edge_list); + INIT_LIST_HEAD(&visited_list); + + list_add(&src->search_list, &traverse_list); + src->reverse = NULL; + + do { + list_for_each_entry_safe(node, n, &traverse_list, search_list) { + if (node == dst) { + found = true; + list_splice_init(&edge_list, &visited_list); + list_splice_init(&traverse_list, &visited_list); + break; + } + for (i = 0; i < node->num_links; i++) { + struct icc_node *tmp = node->links[i]; + + if (!tmp) { + path = ERR_PTR(-ENOENT); + goto out; + } + + if (tmp->is_traversed) + continue; + + tmp->is_traversed = true; + tmp->reverse = node; + list_add_tail(&tmp->search_list, &edge_list); + } + } + + if (found) + break; + + list_splice_init(&traverse_list, &visited_list); + list_splice_init(&edge_list, &traverse_list); + + /* count the hops including the source */ + depth++; + + } while (!list_empty(&traverse_list)); + +out: + + /* reset the traversed state */ + list_for_each_entry_reverse(n, &visited_list, search_list) + n->is_traversed = false; + + if (found) + path = path_init(dev, dst, depth); + + return path; +} + +/* + * We want the path to honor all bandwidth requests, so the average and peak + * bandwidth requirements from each consumer are aggregated at each node. + * The aggregation is platform specific, so each platform can customize it by + * implementing its own aggregate() function. + */ + +static int aggregate_requests(struct icc_node *node) +{ + struct icc_provider *p = node->provider; + struct icc_req *r; + + node->avg_bw = 0; + node->peak_bw = 0; + + hlist_for_each_entry(r, &node->req_list, req_node) + p->aggregate(node, r->avg_bw, r->peak_bw, + &node->avg_bw, &node->peak_bw); + + return 0; +} + +static int apply_constraints(struct icc_path *path) +{ + struct icc_node *next, *prev = NULL; + int ret = -EINVAL; + int i; + + for (i = 0; i < path->num_nodes; i++) { + next = path->reqs[i].node; + + /* + * Both endpoints should be valid master-slave pairs of the + * same interconnect provider that will be configured. + */ + if (!prev || next->provider != prev->provider) { + prev = next; + continue; + } + + /* set the constraints */ + ret = next->provider->set(prev, next); + if (ret) + goto out; + + prev = next; + } +out: + return ret; +} + +/** + * icc_set_bw() - set bandwidth constraints on an interconnect path + * @path: reference to the path returned by icc_get() + * @avg_bw: average bandwidth in kilobytes per second + * @peak_bw: peak bandwidth in kilobytes per second + * + * This function is used by an interconnect consumer to express its own needs + * in terms of bandwidth for a previously requested path between two endpoints. + * The requests are aggregated and each node is updated accordingly. The entire + * path is locked by a mutex to ensure that the set() is completed. + * The @path can be NULL when the "interconnects" DT properties is missing, + * which will mean that no constraints will be set. + * + * Returns 0 on success, or an appropriate error code otherwise. + */ +int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) +{ + struct icc_node *node; + size_t i; + int ret; + + if (!path) + return 0; + + mutex_lock(&icc_lock); + + for (i = 0; i < path->num_nodes; i++) { + node = path->reqs[i].node; + + /* update the consumer request for this path */ + path->reqs[i].avg_bw = avg_bw; + path->reqs[i].peak_bw = peak_bw; + + /* aggregate requests for this node */ + aggregate_requests(node); + } + + ret = apply_constraints(path); + if (ret) + pr_debug("interconnect: error applying constraints (%d)\n", + ret); + + mutex_unlock(&icc_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(icc_set_bw); + +/** + * icc_get() - return a handle for path between two endpoints + * @dev: the device requesting the path + * @src_id: source device port id + * @dst_id: destination device port id + * + * This function will search for a path between two endpoints and return an + * icc_path handle on success. Use icc_put() to release + * constraints when they are not needed anymore. + * If the interconnect API is disabled, NULL is returned and the consumer + * drivers will still build. Drivers are free to handle this specifically, + * but they don't have to. + * + * Return: icc_path pointer on success, ERR_PTR() on error or NULL if the + * interconnect API is disabled. + */ +struct icc_path *icc_get(struct device *dev, const int src_id, const int dst_id) +{ + struct icc_node *src, *dst; + struct icc_path *path = ERR_PTR(-EPROBE_DEFER); + + mutex_lock(&icc_lock); + + src = node_find(src_id); + if (!src) + goto out; + + dst = node_find(dst_id); + if (!dst) + goto out; + + path = path_find(dev, src, dst); + if (IS_ERR(path)) + dev_err(dev, "%s: invalid path=%ld\n", __func__, PTR_ERR(path)); + +out: + mutex_unlock(&icc_lock); + return path; +} +EXPORT_SYMBOL_GPL(icc_get); + +/** + * icc_put() - release the reference to the icc_path + * @path: interconnect path + * + * Use this function to release the constraints on a path when the path is + * no longer needed. The constraints will be re-aggregated. + */ +void icc_put(struct icc_path *path) +{ + struct icc_node *node; + size_t i; + int ret; + + if (!path || WARN_ON(IS_ERR(path))) + return; + + ret = icc_set_bw(path, 0, 0); + if (ret) + pr_err("%s: error (%d)\n", __func__, ret); + + mutex_lock(&icc_lock); + for (i = 0; i < path->num_nodes; i++) { + node = path->reqs[i].node; + hlist_del(&path->reqs[i].req_node); + if (!WARN_ON(!node->provider->users)) + node->provider->users--; + } + mutex_unlock(&icc_lock); + + kfree(path); +} +EXPORT_SYMBOL_GPL(icc_put); + +static struct icc_node *icc_node_create_nolock(int id) +{ + struct icc_node *node; + + /* check if node already exists */ + node = node_find(id); + if (node) + return node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + id = idr_alloc(&icc_idr, node, id, id + 1, GFP_KERNEL); + if (id < 0) { + WARN(1, "%s: couldn't get idr\n", __func__); + kfree(node); + return ERR_PTR(id); + } + + node->id = id; + + return node; +} + +/** + * icc_node_create() - create a node + * @id: node id + * + * Return: icc_node pointer on success, or ERR_PTR() on error + */ +struct icc_node *icc_node_create(int id) +{ + struct icc_node *node; + + mutex_lock(&icc_lock); + + node = icc_node_create_nolock(id); + + mutex_unlock(&icc_lock); + + return node; +} +EXPORT_SYMBOL_GPL(icc_node_create); + +/** + * icc_node_destroy() - destroy a node + * @id: node id + */ +void icc_node_destroy(int id) +{ + struct icc_node *node; + + mutex_lock(&icc_lock); + + node = node_find(id); + if (node) { + idr_remove(&icc_idr, node->id); + WARN_ON(!hlist_empty(&node->req_list)); + } + + mutex_unlock(&icc_lock); + + kfree(node); +} +EXPORT_SYMBOL_GPL(icc_node_destroy); + +/** + * icc_link_create() - create a link between two nodes + * @node: source node id + * @dst_id: destination node id + * + * Create a link between two nodes. The nodes might belong to different + * interconnect providers and the @dst_id node might not exist (if the + * provider driver has not probed yet). So just create the @dst_id node + * and when the actual provider driver is probed, the rest of the node + * data is filled. + * + * Return: 0 on success, or an error code otherwise + */ +int icc_link_create(struct icc_node *node, const int dst_id) +{ + struct icc_node *dst; + struct icc_node **new; + int ret = 0; + + if (!node->provider) + return -EINVAL; + + mutex_lock(&icc_lock); + + dst = node_find(dst_id); + if (!dst) { + dst = icc_node_create_nolock(dst_id); + + if (IS_ERR(dst)) { + ret = PTR_ERR(dst); + goto out; + } + } + + new = krealloc(node->links, + (node->num_links + 1) * sizeof(*node->links), + GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + goto out; + } + + node->links = new; + node->links[node->num_links++] = dst; + +out: + mutex_unlock(&icc_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(icc_link_create); + +/** + * icc_link_destroy() - destroy a link between two nodes + * @src: pointer to source node + * @dst: pointer to destination node + * + * Return: 0 on success, or an error code otherwise + */ +int icc_link_destroy(struct icc_node *src, struct icc_node *dst) +{ + struct icc_node **new; + size_t slot; + int ret = 0; + + if (IS_ERR_OR_NULL(src)) + return -EINVAL; + + if (IS_ERR_OR_NULL(dst)) + return -EINVAL; + + mutex_lock(&icc_lock); + + for (slot = 0; slot < src->num_links; slot++) + if (src->links[slot] == dst) + break; + + if (WARN_ON(slot == src->num_links)) { + ret = -ENXIO; + goto out; + } + + src->links[slot] = src->links[--src->num_links]; + + new = krealloc(src->links, src->num_links * sizeof(*src->links), + GFP_KERNEL); + if (new) + src->links = new; + +out: + mutex_unlock(&icc_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(icc_link_destroy); + +/** + * icc_node_add() - add interconnect node to interconnect provider + * @node: pointer to the interconnect node + * @provider: pointer to the interconnect provider + */ +void icc_node_add(struct icc_node *node, struct icc_provider *provider) +{ + mutex_lock(&icc_lock); + + node->provider = provider; + list_add_tail(&node->node_list, &provider->nodes); + + mutex_unlock(&icc_lock); +} +EXPORT_SYMBOL_GPL(icc_node_add); + +/** + * icc_node_del() - delete interconnect node from interconnect provider + * @node: pointer to the interconnect node + */ +void icc_node_del(struct icc_node *node) +{ + mutex_lock(&icc_lock); + + list_del(&node->node_list); + + mutex_unlock(&icc_lock); +} +EXPORT_SYMBOL_GPL(icc_node_del); + +/** + * icc_provider_add() - add a new interconnect provider + * @provider: the interconnect provider that will be added into topology + * + * Return: 0 on success, or an error code otherwise + */ +int icc_provider_add(struct icc_provider *provider) +{ + if (WARN_ON(!provider->set)) + return -EINVAL; + + mutex_lock(&icc_lock); + + INIT_LIST_HEAD(&provider->nodes); + list_add_tail(&provider->provider_list, &icc_providers); + + mutex_unlock(&icc_lock); + + dev_dbg(provider->dev, "interconnect provider added to topology\n"); + + return 0; +} +EXPORT_SYMBOL_GPL(icc_provider_add); + +/** + * icc_provider_del() - delete previously added interconnect provider + * @provider: the interconnect provider that will be removed from topology + * + * Return: 0 on success, or an error code otherwise + */ +int icc_provider_del(struct icc_provider *provider) +{ + mutex_lock(&icc_lock); + if (provider->users) { + pr_warn("interconnect provider still has %d users\n", + provider->users); + mutex_unlock(&icc_lock); + return -EBUSY; + } + + if (!list_empty(&provider->nodes)) { + pr_warn("interconnect provider still has nodes\n"); + mutex_unlock(&icc_lock); + return -EBUSY; + } + + list_del(&provider->provider_list); + mutex_unlock(&icc_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(icc_provider_del); + +MODULE_AUTHOR("Georgi Djakov "); +MODULE_DESCRIPTION("Interconnect Driver Core"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h new file mode 100644 index 000000000000..78208a754181 --- /dev/null +++ b/include/linux/interconnect-provider.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018, Linaro Ltd. + * Author: Georgi Djakov + */ + +#ifndef __LINUX_INTERCONNECT_PROVIDER_H +#define __LINUX_INTERCONNECT_PROVIDER_H + +#include + +#define icc_units_to_bps(bw) ((bw) * 1000ULL) + +struct icc_node; + +/** + * struct icc_provider - interconnect provider (controller) entity that might + * provide multiple interconnect controls + * + * @provider_list: list of the registered interconnect providers + * @nodes: internal list of the interconnect provider nodes + * @set: pointer to device specific set operation function + * @aggregate: pointer to device specific aggregate operation function + * @dev: the device this interconnect provider belongs to + * @users: count of active users + * @data: pointer to private data + */ +struct icc_provider { + struct list_head provider_list; + struct list_head nodes; + int (*set)(struct icc_node *src, struct icc_node *dst); + int (*aggregate)(struct icc_node *node, u32 avg_bw, u32 peak_bw, + u32 *agg_avg, u32 *agg_peak); + struct device *dev; + int users; + void *data; +}; + +/** + * struct icc_node - entity that is part of the interconnect topology + * + * @id: platform specific node id + * @name: node name used in debugfs + * @links: a list of targets pointing to where we can go next when traversing + * @num_links: number of links to other interconnect nodes + * @provider: points to the interconnect provider of this node + * @node_list: the list entry in the parent provider's "nodes" list + * @search_list: list used when walking the nodes graph + * @reverse: pointer to previous node when walking the nodes graph + * @is_traversed: flag that is used when walking the nodes graph + * @req_list: a list of QoS constraint requests associated with this node + * @avg_bw: aggregated value of average bandwidth requests from all consumers + * @peak_bw: aggregated value of peak bandwidth requests from all consumers + * @data: pointer to private data + */ +struct icc_node { + int id; + const char *name; + struct icc_node **links; + size_t num_links; + + struct icc_provider *provider; + struct list_head node_list; + struct list_head search_list; + struct icc_node *reverse; + u8 is_traversed:1; + struct hlist_head req_list; + u32 avg_bw; + u32 peak_bw; + void *data; +}; + +#if IS_ENABLED(CONFIG_INTERCONNECT) + +struct icc_node *icc_node_create(int id); +void icc_node_destroy(int id); +int icc_link_create(struct icc_node *node, const int dst_id); +int icc_link_destroy(struct icc_node *src, struct icc_node *dst); +void icc_node_add(struct icc_node *node, struct icc_provider *provider); +void icc_node_del(struct icc_node *node); +int icc_provider_add(struct icc_provider *provider); +int icc_provider_del(struct icc_provider *provider); + +#else + +static inline struct icc_node *icc_node_create(int id) +{ + return ERR_PTR(-ENOTSUPP); +} + +void icc_node_destroy(int id) +{ +} + +static inline int icc_link_create(struct icc_node *node, const int dst_id) +{ + return -ENOTSUPP; +} + +int icc_link_destroy(struct icc_node *src, struct icc_node *dst) +{ + return -ENOTSUPP; +} + +void icc_node_add(struct icc_node *node, struct icc_provider *provider) +{ +} + +void icc_node_del(struct icc_node *node) +{ +} + +static inline int icc_provider_add(struct icc_provider *provider) +{ + return -ENOTSUPP; +} + +static inline int icc_provider_del(struct icc_provider *provider) +{ + return -ENOTSUPP; +} + +#endif /* CONFIG_INTERCONNECT */ + +#endif /* __LINUX_INTERCONNECT_PROVIDER_H */ diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h new file mode 100644 index 000000000000..c331afb3a2c8 --- /dev/null +++ b/include/linux/interconnect.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018-2019, Linaro Ltd. + * Author: Georgi Djakov + */ + +#ifndef __LINUX_INTERCONNECT_H +#define __LINUX_INTERCONNECT_H + +#include +#include + +/* macros for converting to icc units */ +#define Bps_to_icc(x) ((x) / 1000) +#define kBps_to_icc(x) (x) +#define MBps_to_icc(x) ((x) * 1000) +#define GBps_to_icc(x) ((x) * 1000 * 1000) +#define bps_to_icc(x) (1) +#define kbps_to_icc(x) ((x) / 8 + ((x) % 8 ? 1 : 0)) +#define Mbps_to_icc(x) ((x) * 1000 / 8) +#define Gbps_to_icc(x) ((x) * 1000 * 1000 / 8) + +struct icc_path; +struct device; + +#if IS_ENABLED(CONFIG_INTERCONNECT) + +struct icc_path *icc_get(struct device *dev, const int src_id, + const int dst_id); +void icc_put(struct icc_path *path); +int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw); + +#else + +static inline struct icc_path *icc_get(struct device *dev, const int src_id, + const int dst_id) +{ + return NULL; +} + +static inline void icc_put(struct icc_path *path) +{ +} + +static inline int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) +{ + return 0; +} + +#endif /* CONFIG_INTERCONNECT */ + +#endif /* __LINUX_INTERCONNECT_H */ -- cgit v1.3-8-gc7d7 From 87e3031b6fbd83ea83adf1bf9602bcce313ee787 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 16 Jan 2019 18:10:58 +0200 Subject: interconnect: Allow endpoints translation via DT Currently we support only platform data for specifying the interconnect endpoints. As now the endpoints are hard-coded into the consumer driver this may lead to complications when a single driver is used by multiple SoCs, which may have different interconnect topology. To avoid cluttering the consumer drivers, introduce a translation function to help us get the board specific interconnect data from device-tree. Reviewed-by: Evan Green Signed-off-by: Georgi Djakov Signed-off-by: Greg Kroah-Hartman --- drivers/interconnect/core.c | 149 ++++++++++++++++++++++++++++++++++ include/linux/interconnect-provider.h | 17 ++++ include/linux/interconnect.h | 7 ++ 3 files changed, 173 insertions(+) (limited to 'include/linux') diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 2b937b4f43c4..a8c2bd35197f 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include static DEFINE_IDR(icc_idr); @@ -194,6 +195,152 @@ out: return ret; } +/* of_icc_xlate_onecell() - Translate function using a single index. + * @spec: OF phandle args to map into an interconnect node. + * @data: private data (pointer to struct icc_onecell_data) + * + * This is a generic translate function that can be used to model simple + * interconnect providers that have one device tree node and provide + * multiple interconnect nodes. A single cell is used as an index into + * an array of icc nodes specified in the icc_onecell_data struct when + * registering the provider. + */ +struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, + void *data) +{ + struct icc_onecell_data *icc_data = data; + unsigned int idx = spec->args[0]; + + if (idx >= icc_data->num_nodes) { + pr_err("%s: invalid index %u\n", __func__, idx); + return ERR_PTR(-EINVAL); + } + + return icc_data->nodes[idx]; +} +EXPORT_SYMBOL_GPL(of_icc_xlate_onecell); + +/** + * of_icc_get_from_provider() - Look-up interconnect node + * @spec: OF phandle args to use for look-up + * + * Looks for interconnect provider under the node specified by @spec and if + * found, uses xlate function of the provider to map phandle args to node. + * + * Returns a valid pointer to struct icc_node on success or ERR_PTR() + * on failure. + */ +static struct icc_node *of_icc_get_from_provider(struct of_phandle_args *spec) +{ + struct icc_node *node = ERR_PTR(-EPROBE_DEFER); + struct icc_provider *provider; + + if (!spec || spec->args_count != 1) + return ERR_PTR(-EINVAL); + + mutex_lock(&icc_lock); + list_for_each_entry(provider, &icc_providers, provider_list) { + if (provider->dev->of_node == spec->np) + node = provider->xlate(spec, provider->data); + if (!IS_ERR(node)) + break; + } + mutex_unlock(&icc_lock); + + return node; +} + +/** + * of_icc_get() - get a path handle from a DT node based on name + * @dev: device pointer for the consumer device + * @name: interconnect path name + * + * This function will search for a path between two endpoints and return an + * icc_path handle on success. Use icc_put() to release constraints when they + * are not needed anymore. + * If the interconnect API is disabled, NULL is returned and the consumer + * drivers will still build. Drivers are free to handle this specifically, + * but they don't have to. + * + * Return: icc_path pointer on success or ERR_PTR() on error. NULL is returned + * when the API is disabled or the "interconnects" DT property is missing. + */ +struct icc_path *of_icc_get(struct device *dev, const char *name) +{ + struct icc_path *path = ERR_PTR(-EPROBE_DEFER); + struct icc_node *src_node, *dst_node; + struct device_node *np = NULL; + struct of_phandle_args src_args, dst_args; + int idx = 0; + int ret; + + if (!dev || !dev->of_node) + return ERR_PTR(-ENODEV); + + np = dev->of_node; + + /* + * When the consumer DT node do not have "interconnects" property + * return a NULL path to skip setting constraints. + */ + if (!of_find_property(np, "interconnects", NULL)) + return NULL; + + /* + * We use a combination of phandle and specifier for endpoint. For now + * lets support only global ids and extend this in the future if needed + * without breaking DT compatibility. + */ + if (name) { + idx = of_property_match_string(np, "interconnect-names", name); + if (idx < 0) + return ERR_PTR(idx); + } + + ret = of_parse_phandle_with_args(np, "interconnects", + "#interconnect-cells", idx * 2, + &src_args); + if (ret) + return ERR_PTR(ret); + + of_node_put(src_args.np); + + ret = of_parse_phandle_with_args(np, "interconnects", + "#interconnect-cells", idx * 2 + 1, + &dst_args); + if (ret) + return ERR_PTR(ret); + + of_node_put(dst_args.np); + + src_node = of_icc_get_from_provider(&src_args); + + if (IS_ERR(src_node)) { + if (PTR_ERR(src_node) != -EPROBE_DEFER) + dev_err(dev, "error finding src node: %ld\n", + PTR_ERR(src_node)); + return ERR_CAST(src_node); + } + + dst_node = of_icc_get_from_provider(&dst_args); + + if (IS_ERR(dst_node)) { + if (PTR_ERR(dst_node) != -EPROBE_DEFER) + dev_err(dev, "error finding dst node: %ld\n", + PTR_ERR(dst_node)); + return ERR_CAST(dst_node); + } + + mutex_lock(&icc_lock); + path = path_find(dev, src_node, dst_node); + if (IS_ERR(path)) + dev_err(dev, "%s: invalid path=%ld\n", __func__, PTR_ERR(path)); + mutex_unlock(&icc_lock); + + return path; +} +EXPORT_SYMBOL_GPL(of_icc_get); + /** * icc_set_bw() - set bandwidth constraints on an interconnect path * @path: reference to the path returned by icc_get() @@ -519,6 +666,8 @@ int icc_provider_add(struct icc_provider *provider) { if (WARN_ON(!provider->set)) return -EINVAL; + if (WARN_ON(!provider->xlate)) + return -EINVAL; mutex_lock(&icc_lock); diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 78208a754181..63caccadc2db 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -12,6 +12,21 @@ #define icc_units_to_bps(bw) ((bw) * 1000ULL) struct icc_node; +struct of_phandle_args; + +/** + * struct icc_onecell_data - driver data for onecell interconnect providers + * + * @num_nodes: number of nodes in this device + * @nodes: array of pointers to the nodes in this device + */ +struct icc_onecell_data { + unsigned int num_nodes; + struct icc_node *nodes[]; +}; + +struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, + void *data); /** * struct icc_provider - interconnect provider (controller) entity that might @@ -21,6 +36,7 @@ struct icc_node; * @nodes: internal list of the interconnect provider nodes * @set: pointer to device specific set operation function * @aggregate: pointer to device specific aggregate operation function + * @xlate: provider-specific callback for mapping nodes from phandle arguments * @dev: the device this interconnect provider belongs to * @users: count of active users * @data: pointer to private data @@ -31,6 +47,7 @@ struct icc_provider { int (*set)(struct icc_node *src, struct icc_node *dst); int (*aggregate)(struct icc_node *node, u32 avg_bw, u32 peak_bw, u32 *agg_avg, u32 *agg_peak); + struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data); struct device *dev; int users; void *data; diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h index c331afb3a2c8..dc25864755ba 100644 --- a/include/linux/interconnect.h +++ b/include/linux/interconnect.h @@ -27,6 +27,7 @@ struct device; struct icc_path *icc_get(struct device *dev, const int src_id, const int dst_id); +struct icc_path *of_icc_get(struct device *dev, const char *name); void icc_put(struct icc_path *path); int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw); @@ -38,6 +39,12 @@ static inline struct icc_path *icc_get(struct device *dev, const int src_id, return NULL; } +static inline struct icc_path *of_icc_get(struct device *dev, + const char *name) +{ + return NULL; +} + static inline void icc_put(struct icc_path *path) { } -- cgit v1.3-8-gc7d7 From c81d64d3dc1f2decf8f3a9354416b7496b5c389b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 16 Jan 2019 11:25:21 -0700 Subject: io-64-nonatomic: add io{read|write}64[be]{_lo_hi|_hi_lo} macros This patch adds generic io{read|write}64[be]{_lo_hi|_hi_lo} macros if they are not already defined by the architecture. (As they are provided by the generic iomap library). The patch also points io{read|write}64[be] to the variant specified by the header name. This is because new drivers are encouraged to use ioreadXX, et al instead of readX[1], et al -- and mixing ioreadXX with readq is pretty ugly. [1] LDD3: section 9.4.2 Signed-off-by: Logan Gunthorpe Reviewed-by: Andy Shevchenko Cc: Christoph Hellwig Cc: Arnd Bergmann Cc: Alan Cox Cc: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- include/linux/io-64-nonatomic-hi-lo.h | 64 +++++++++++++++++++++++++++++++++++ include/linux/io-64-nonatomic-lo-hi.h | 64 +++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h index 862d786a904f..ae21b72cce85 100644 --- a/include/linux/io-64-nonatomic-hi-lo.h +++ b/include/linux/io-64-nonatomic-hi-lo.h @@ -55,4 +55,68 @@ static inline void hi_lo_writeq_relaxed(__u64 val, volatile void __iomem *addr) #define writeq_relaxed hi_lo_writeq_relaxed #endif +#ifndef ioread64_hi_lo +#define ioread64_hi_lo ioread64_hi_lo +static inline u64 ioread64_hi_lo(void __iomem *addr) +{ + u32 low, high; + + high = ioread32(addr + sizeof(u32)); + low = ioread32(addr); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64_hi_lo +#define iowrite64_hi_lo iowrite64_hi_lo +static inline void iowrite64_hi_lo(u64 val, void __iomem *addr) +{ + iowrite32(val >> 32, addr + sizeof(u32)); + iowrite32(val, addr); +} +#endif + +#ifndef ioread64be_hi_lo +#define ioread64be_hi_lo ioread64be_hi_lo +static inline u64 ioread64be_hi_lo(void __iomem *addr) +{ + u32 low, high; + + high = ioread32be(addr); + low = ioread32be(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64be_hi_lo +#define iowrite64be_hi_lo iowrite64be_hi_lo +static inline void iowrite64be_hi_lo(u64 val, void __iomem *addr) +{ + iowrite32be(val >> 32, addr); + iowrite32be(val, addr + sizeof(u32)); +} +#endif + +#ifndef ioread64 +#define ioread64_is_nonatomic +#define ioread64 ioread64_hi_lo +#endif + +#ifndef iowrite64 +#define iowrite64_is_nonatomic +#define iowrite64 iowrite64_hi_lo +#endif + +#ifndef ioread64be +#define ioread64be_is_nonatomic +#define ioread64be ioread64be_hi_lo +#endif + +#ifndef iowrite64be +#define iowrite64be_is_nonatomic +#define iowrite64be iowrite64be_hi_lo +#endif + #endif /* _LINUX_IO_64_NONATOMIC_HI_LO_H_ */ diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h index d042e7bb5adb..faaa842dbdb9 100644 --- a/include/linux/io-64-nonatomic-lo-hi.h +++ b/include/linux/io-64-nonatomic-lo-hi.h @@ -55,4 +55,68 @@ static inline void lo_hi_writeq_relaxed(__u64 val, volatile void __iomem *addr) #define writeq_relaxed lo_hi_writeq_relaxed #endif +#ifndef ioread64_lo_hi +#define ioread64_lo_hi ioread64_lo_hi +static inline u64 ioread64_lo_hi(void __iomem *addr) +{ + u32 low, high; + + low = ioread32(addr); + high = ioread32(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64_lo_hi +#define iowrite64_lo_hi iowrite64_lo_hi +static inline void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32(val, addr); + iowrite32(val >> 32, addr + sizeof(u32)); +} +#endif + +#ifndef ioread64be_lo_hi +#define ioread64be_lo_hi ioread64be_lo_hi +static inline u64 ioread64be_lo_hi(void __iomem *addr) +{ + u32 low, high; + + low = ioread32be(addr + sizeof(u32)); + high = ioread32be(addr); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64be_lo_hi +#define iowrite64be_lo_hi iowrite64be_lo_hi +static inline void iowrite64be_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32be(val, addr + sizeof(u32)); + iowrite32be(val >> 32, addr); +} +#endif + +#ifndef ioread64 +#define ioread64_is_nonatomic +#define ioread64 ioread64_lo_hi +#endif + +#ifndef iowrite64 +#define iowrite64_is_nonatomic +#define iowrite64 iowrite64_lo_hi +#endif + +#ifndef ioread64be +#define ioread64be_is_nonatomic +#define ioread64be ioread64be_lo_hi +#endif + +#ifndef iowrite64be +#define iowrite64be_is_nonatomic +#define iowrite64be iowrite64be_lo_hi +#endif + #endif /* _LINUX_IO_64_NONATOMIC_LO_HI_H_ */ -- cgit v1.3-8-gc7d7 From 13054abbaa4f1fd4e6f3b4b63439ec033b4c8035 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Tue, 29 Jan 2019 11:58:35 +0100 Subject: HID: debug: fix the ring buffer implementation Ring buffer implementation in hid_debug_event() and hid_debug_events_read() is strange allowing lost or corrupted data. After commit 717adfdaf147 ("HID: debug: check length before copy_to_user()") it is possible to enter an infinite loop in hid_debug_events_read() by providing 0 as count, this locks up a system. Fix this by rewriting the ring buffer implementation with kfifo and simplify the code. This fixes CVE-2019-3819. v2: fix an execution logic and add a comment v3: use __set_current_state() instead of set_current_state() Link: https://bugzilla.redhat.com/show_bug.cgi?id=1669187 Cc: stable@vger.kernel.org # v4.18+ Fixes: cd667ce24796 ("HID: use debugfs for events/reports dumping") Fixes: 717adfdaf147 ("HID: debug: check length before copy_to_user()") Signed-off-by: Vladis Dronov Reviewed-by: Oleg Nesterov Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-debug.c | 120 ++++++++++++++++++---------------------------- include/linux/hid-debug.h | 9 ++-- 2 files changed, 51 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index c530476edba6..ac9fda1b5a72 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -661,17 +662,12 @@ EXPORT_SYMBOL_GPL(hid_dump_device); /* enqueue string to 'events' ring buffer */ void hid_debug_event(struct hid_device *hdev, char *buf) { - unsigned i; struct hid_debug_list *list; unsigned long flags; spin_lock_irqsave(&hdev->debug_list_lock, flags); - list_for_each_entry(list, &hdev->debug_list, node) { - for (i = 0; buf[i]; i++) - list->hid_debug_buf[(list->tail + i) % HID_DEBUG_BUFSIZE] = - buf[i]; - list->tail = (list->tail + i) % HID_DEBUG_BUFSIZE; - } + list_for_each_entry(list, &hdev->debug_list, node) + kfifo_in(&list->hid_debug_fifo, buf, strlen(buf)); spin_unlock_irqrestore(&hdev->debug_list_lock, flags); wake_up_interruptible(&hdev->debug_wait); @@ -722,8 +718,7 @@ void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 valu hid_debug_event(hdev, buf); kfree(buf); - wake_up_interruptible(&hdev->debug_wait); - + wake_up_interruptible(&hdev->debug_wait); } EXPORT_SYMBOL_GPL(hid_dump_input); @@ -1083,8 +1078,8 @@ static int hid_debug_events_open(struct inode *inode, struct file *file) goto out; } - if (!(list->hid_debug_buf = kzalloc(HID_DEBUG_BUFSIZE, GFP_KERNEL))) { - err = -ENOMEM; + err = kfifo_alloc(&list->hid_debug_fifo, HID_DEBUG_FIFOSIZE, GFP_KERNEL); + if (err) { kfree(list); goto out; } @@ -1104,77 +1099,57 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct hid_debug_list *list = file->private_data; - int ret = 0, len; + int ret = 0, copied; DECLARE_WAITQUEUE(wait, current); mutex_lock(&list->read_mutex); - while (ret == 0) { - if (list->head == list->tail) { - add_wait_queue(&list->hdev->debug_wait, &wait); - set_current_state(TASK_INTERRUPTIBLE); - - while (list->head == list->tail) { - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } + if (kfifo_is_empty(&list->hid_debug_fifo)) { + add_wait_queue(&list->hdev->debug_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + while (kfifo_is_empty(&list->hid_debug_fifo)) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; + } - if (!list->hdev || !list->hdev->debug) { - ret = -EIO; - set_current_state(TASK_RUNNING); - goto out; - } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } - /* allow O_NONBLOCK from other threads */ - mutex_unlock(&list->read_mutex); - schedule(); - mutex_lock(&list->read_mutex); - set_current_state(TASK_INTERRUPTIBLE); + /* if list->hdev is NULL we cannot remove_wait_queue(). + * if list->hdev->debug is 0 then hid_debug_unregister() + * was already called and list->hdev is being destroyed. + * if we add remove_wait_queue() here we can hit a race. + */ + if (!list->hdev || !list->hdev->debug) { + ret = -EIO; + set_current_state(TASK_RUNNING); + goto out; } - set_current_state(TASK_RUNNING); - remove_wait_queue(&list->hdev->debug_wait, &wait); + /* allow O_NONBLOCK from other threads */ + mutex_unlock(&list->read_mutex); + schedule(); + mutex_lock(&list->read_mutex); + set_current_state(TASK_INTERRUPTIBLE); } - if (ret) - goto out; + __set_current_state(TASK_RUNNING); + remove_wait_queue(&list->hdev->debug_wait, &wait); - /* pass the ringbuffer contents to userspace */ -copy_rest: - if (list->tail == list->head) + if (ret) goto out; - if (list->tail > list->head) { - len = list->tail - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - ret += len; - list->head += len; - } else { - len = HID_DEBUG_BUFSIZE - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - list->head = 0; - ret += len; - count -= len; - if (count > 0) - goto copy_rest; - } - } + + /* pass the fifo content to userspace, locking is not needed with only + * one concurrent reader and one concurrent writer + */ + ret = kfifo_to_user(&list->hid_debug_fifo, buffer, count, &copied); + if (ret) + goto out; + ret = copied; out: mutex_unlock(&list->read_mutex); return ret; @@ -1185,7 +1160,7 @@ static __poll_t hid_debug_events_poll(struct file *file, poll_table *wait) struct hid_debug_list *list = file->private_data; poll_wait(file, &list->hdev->debug_wait, wait); - if (list->head != list->tail) + if (!kfifo_is_empty(&list->hid_debug_fifo)) return EPOLLIN | EPOLLRDNORM; if (!list->hdev->debug) return EPOLLERR | EPOLLHUP; @@ -1200,7 +1175,7 @@ static int hid_debug_events_release(struct inode *inode, struct file *file) spin_lock_irqsave(&list->hdev->debug_list_lock, flags); list_del(&list->node); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); - kfree(list->hid_debug_buf); + kfifo_free(&list->hid_debug_fifo); kfree(list); return 0; @@ -1246,4 +1221,3 @@ void hid_debug_exit(void) { debugfs_remove_recursive(hid_debug_root); } - diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h index 8663f216c563..2d6100edf204 100644 --- a/include/linux/hid-debug.h +++ b/include/linux/hid-debug.h @@ -24,7 +24,10 @@ #ifdef CONFIG_DEBUG_FS +#include + #define HID_DEBUG_BUFSIZE 512 +#define HID_DEBUG_FIFOSIZE 512 void hid_dump_input(struct hid_device *, struct hid_usage *, __s32); void hid_dump_report(struct hid_device *, int , u8 *, int); @@ -37,11 +40,8 @@ void hid_debug_init(void); void hid_debug_exit(void); void hid_debug_event(struct hid_device *, char *); - struct hid_debug_list { - char *hid_debug_buf; - int head; - int tail; + DECLARE_KFIFO_PTR(hid_debug_fifo, char); struct fasync_struct *fasync; struct hid_device *hdev; struct list_head node; @@ -64,4 +64,3 @@ struct hid_debug_list { #endif #endif - -- cgit v1.3-8-gc7d7 From d5256083f62e2720f75bb3c5a928a0afe47d6bc3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Jan 2019 12:49:48 +0100 Subject: ipvlan, l3mdev: fix broken l3s mode wrt local routes While implementing ipvlan l3 and l3s mode for kubernetes CNI plugin, I ran into the issue that while l3 mode is working fine, l3s mode does not have any connectivity to kube-apiserver and hence all pods end up in Error state as well. The ipvlan master device sits on top of a bond device and hostns traffic to kube-apiserver (also running in hostns) is DNATed from 10.152.183.1:443 to 139.178.29.207:37573 where the latter is the address of the bond0. While in l3 mode, a curl to https://10.152.183.1:443 or to https://139.178.29.207:37573 works fine from hostns, neither of them do in case of l3s. In the latter only a curl to https://127.0.0.1:37573 appeared to work where for local addresses of bond0 I saw kernel suddenly starting to emit ARP requests to query HW address of bond0 which remained unanswered and neighbor entries in INCOMPLETE state. These ARP requests only happen while in l3s. Debugging this further, I found the issue is that l3s mode is piggy- backing on l3 master device, and in this case local routes are using l3mdev_master_dev_rcu(dev) instead of net->loopback_dev as per commit f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") and 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback"). I found that reverting them back into using the net->loopback_dev fixed ipvlan l3s connectivity and got everything working for the CNI. Now judging from 4fbae7d83c98 ("ipvlan: Introduce l3s mode") and the l3mdev paper in [0] the only sole reason why ipvlan l3s is relying on l3 master device is to get the l3mdev_ip_rcv() receive hook for setting the dst entry of the input route without adding its own ipvlan specific hacks into the receive path, however, any l3 domain semantics beyond just that are breaking l3s operation. Note that ipvlan also has the ability to dynamically switch its internal operation from l3 to l3s for all ports via ipvlan_set_port_mode() at runtime. In any case, l3 vs l3s soley distinguishes itself by 'de-confusing' netfilter through switching skb->dev to ipvlan slave device late in NF_INET_LOCAL_IN before handing the skb to L4. Minimal fix taken here is to add a IFF_L3MDEV_RX_HANDLER flag which, if set from ipvlan setup, gets us only the wanted l3mdev_l3_rcv() hook without any additional l3mdev semantics on top. This should also have minimal impact since dev->priv_flags is already hot in cache. With this set, l3s mode is working fine and I also get things like masquerading pod traffic on the ipvlan master properly working. [0] https://netdevconf.org/1.2/papers/ahern-what-is-l3mdev-paper.pdf Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") Fixes: 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback") Fixes: 4fbae7d83c98 ("ipvlan: Introduce l3s mode") Signed-off-by: Daniel Borkmann Cc: Mahesh Bandewar Cc: David Ahern Cc: Florian Westphal Cc: Martynas Pumputis Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 6 +++--- include/linux/netdevice.h | 8 ++++++++ include/net/l3mdev.h | 3 ++- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 19bdde60680c..7cdac77d0c68 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -100,12 +100,12 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval, err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); if (!err) { mdev->l3mdev_ops = &ipvl_l3mdev_ops; - mdev->priv_flags |= IFF_L3MDEV_MASTER; + mdev->priv_flags |= IFF_L3MDEV_RX_HANDLER; } else goto fail; } else if (port->mode == IPVLAN_MODE_L3S) { /* Old mode was L3S */ - mdev->priv_flags &= ~IFF_L3MDEV_MASTER; + mdev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); mdev->l3mdev_ops = NULL; } @@ -167,7 +167,7 @@ static void ipvlan_port_destroy(struct net_device *dev) struct sk_buff *skb; if (port->mode == IPVLAN_MODE_L3S) { - dev->priv_flags &= ~IFF_L3MDEV_MASTER; + dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; ipvlan_unregister_nf_hook(dev_net(dev)); dev->l3mdev_ops = NULL; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1377d085ef99..86dbb3e29139 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1483,6 +1483,7 @@ struct net_device_ops { * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device + * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1514,6 +1515,7 @@ enum netdev_priv_flags { IFF_NO_RX_HANDLER = 1<<26, IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, + IFF_L3MDEV_RX_HANDLER = 1<<29, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1544,6 +1546,7 @@ enum netdev_priv_flags { #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE +#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER /** * struct net_device - The DEVICE structure. @@ -4549,6 +4552,11 @@ static inline bool netif_supports_nofcs(struct net_device *dev) return dev->priv_flags & IFF_SUPP_NOFCS; } +static inline bool netif_has_l3_rx_handler(const struct net_device *dev) +{ + return dev->priv_flags & IFF_L3MDEV_RX_HANDLER; +} + static inline bool netif_is_l3_master(const struct net_device *dev) { return dev->priv_flags & IFF_L3MDEV_MASTER; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 78fa0ac4613c..5175fd63cd82 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -153,7 +153,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(skb->dev)) master = netdev_master_upper_dev_get_rcu(skb->dev); - else if (netif_is_l3_master(skb->dev)) + else if (netif_is_l3_master(skb->dev) || + netif_has_l3_rx_handler(skb->dev)) master = skb->dev; if (master && master->l3mdev_ops->l3mdev_l3_rcv) -- cgit v1.3-8-gc7d7 From 4ec5302fa906ec9d86597b236f62315bacdb9622 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 30 Jan 2019 15:54:19 +0100 Subject: net: stmmac: Fallback to Platform Data clock in Watchdog conversion If we don't have DT then stmmac_clk will not be available. Let's add a new Platform Data field so that we can specify the refclk by this mean. This way we can still use the coalesce command in PCI based setups. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 14 ++++++++++---- include/linux/stmmac.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index d1f61c25d82b..5d85742a2be0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -721,8 +721,11 @@ static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (usec * (clk / 1000000)) / 256; } @@ -731,8 +734,11 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (riwt * 256) / (clk / 1000000); } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 7ddfc65586b0..4335bd771ce5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -184,6 +184,7 @@ struct plat_stmmacenet_data { struct clk *pclk; struct clk *clk_ptp_ref; unsigned int clk_ptp_rate; + unsigned int clk_ref_rate; struct reset_control *stmmac_rst; struct stmmac_axi *axi; int has_gmac4; -- cgit v1.3-8-gc7d7 From 56841070ccc87b463ac037d2d1f2beb8e5e35f0c Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Thu, 31 Jan 2019 11:19:43 +0000 Subject: irqchip/gic-v3-its: Fix ITT_entry_size accessor According to ARM IHI 0069C (ID070116), we should use GITS_TYPER's bits [7:4] as ITT_entry_size instead of [8:4]. Although this is pretty annoying, it only results in a potential over-allocation of memory, and nothing bad happens. Fixes: 3dfa576bfb45 ("irqchip/gic-v3-its: Add probing for VLPI properties") Signed-off-by: Zenghui Yu [maz: massaged subject and commit message] Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 071b4cbdf010..c848a7cc502e 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -319,7 +319,7 @@ #define GITS_TYPER_PLPIS (1UL << 0) #define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 -#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0xf) + 1) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) -- cgit v1.3-8-gc7d7 From 6cab5e90ab2bd323c9f3811b6c70a4687df51e27 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 28 Jan 2019 18:43:34 -0800 Subject: bpf: run bpf programs with preemption disabled Disabled preemption is necessary for proper access to per-cpu maps from BPF programs. But the sender side of socket filters didn't have preemption disabled: unix_dgram_sendmsg->sk_filter->sk_filter_trim_cap->bpf_prog_run_save_cb->BPF_PROG_RUN and a combination of af_packet with tun device didn't disable either: tpacket_snd->packet_direct_xmit->packet_pick_tx_queue->ndo_select_queue-> tun_select_queue->tun_ebpf_select_queue->bpf_prog_run_clear_cb->BPF_PROG_RUN Disable preemption before executing BPF programs (both classic and extended). Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 21 ++++++++++++++++++--- kernel/bpf/cgroup.c | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ad106d845b22..e532fcc6e4b5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -591,8 +591,8 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb) return qdisc_skb_cb(skb)->data; } -static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, - struct sk_buff *skb) +static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); u8 cb_saved[BPF_SKB_CB_LEN]; @@ -611,15 +611,30 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, return res; } +static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) +{ + u32 res; + + preempt_disable(); + res = __bpf_prog_run_save_cb(prog, skb); + preempt_enable(); + return res; +} + static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); + u32 res; if (unlikely(prog->cb_access)) memset(cb_data, 0, BPF_SKB_CB_LEN); - return BPF_PROG_RUN(prog, skb); + preempt_disable(); + res = BPF_PROG_RUN(prog, skb); + preempt_enable(); + return res; } static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ab612fe9862f..d17d05570a3f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -572,7 +572,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, bpf_compute_and_save_data_end(skb, &saved_data_end); ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, - bpf_prog_run_save_cb); + __bpf_prog_run_save_cb); bpf_restore_data_end(skb, saved_data_end); __skb_pull(skb, offset); skb->sk = save_sk; -- cgit v1.3-8-gc7d7 From 0803de78049fe1b0baf44bcddc727b036fb9139b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 7 Feb 2019 11:55:39 +0100 Subject: blktrace: Show requests without sector Currently, blktrace will not show requests that don't have any data as rq->__sector is initialized to -1 which is out of device range and thus discarded by act_log_check(). This is most notably the case for cache flush requests sent to the device. Fix the problem by making blk_rq_trace_sector() return 0 for requests without initialized sector. Reviewed-by: Johannes Thumshirn Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 8804753805ac..7bb2d8de9f30 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -116,7 +116,13 @@ extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); static inline sector_t blk_rq_trace_sector(struct request *rq) { - return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq); + /* + * Tracing should ignore starting sector for passthrough requests and + * requests where starting sector didn't get set. + */ + if (blk_rq_is_passthrough(rq) || blk_rq_pos(rq) == (sector_t)-1) + return 0; + return blk_rq_pos(rq); } static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq) -- cgit v1.3-8-gc7d7 From e11a5795cb7cd1e25bbd1697baa109943938c0f6 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 5 Feb 2019 16:24:56 -0700 Subject: perf/aux: Make perf_event accessible to setup_aux() When pmu::setup_aux() is called the coresight PMU needs to know which sink to use for the session by looking up the information in the event's attr::config2 field. As such simply replace the cpu information by the complete perf_event structure and change all affected customers. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Acked-by: Peter Zijlstra (Intel) Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/perf_cpum_sf.c | 6 +++--- arch/x86/events/intel/bts.c | 4 +++- arch/x86/events/intel/pt.c | 5 +++-- drivers/hwtracing/coresight/coresight-etm-perf.c | 6 +++--- drivers/perf/arm_spe_pmu.c | 6 +++--- include/linux/perf_event.h | 2 +- kernel/events/ring_buffer.c | 2 +- 7 files changed, 17 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index bfabeb1889cc..1266194afb02 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1600,7 +1600,7 @@ static void aux_sdb_init(unsigned long sdb) /* * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling - * @cpu: On which to allocate, -1 means current + * @event: Event the buffer is setup for, event->cpu == -1 means current * @pages: Array of pointers to buffer pages passed from perf core * @nr_pages: Total pages * @snapshot: Flag for snapshot mode @@ -1612,8 +1612,8 @@ static void aux_sdb_init(unsigned long sdb) * * Return the private AUX buffer structure if success or NULL if fails. */ -static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, - bool snapshot) +static void *aux_buffer_setup(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { struct sf_buffer *sfb; struct aux_buffer *aux; diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index a01ef1b0f883..7cdd7b13bbda 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -77,10 +77,12 @@ static size_t buf_size(struct page *page) } static void * -bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) +bts_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool overwrite) { struct bts_buffer *buf; struct page *page; + int cpu = event->cpu; int node = (cpu == -1) ? cpu : cpu_to_node(cpu); unsigned long offset; size_t size = nr_pages << PAGE_SHIFT; diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 9494ca68fd9d..c0e86ff21f81 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1114,10 +1114,11 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages, * Return: Our private PT buffer structure. */ static void * -pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot) +pt_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { struct pt_buffer *buf; - int node, ret; + int node, ret, cpu = event->cpu; if (!nr_pages) return NULL; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index abe8249b893b..f21eb28b6782 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -177,15 +177,15 @@ static void etm_free_aux(void *data) schedule_work(&event_data->work); } -static void *etm_setup_aux(int event_cpu, void **pages, +static void *etm_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { - int cpu; + int cpu = event->cpu; cpumask_t *mask; struct coresight_device *sink; struct etm_event_data *event_data = NULL; - event_data = alloc_event_data(event_cpu); + event_data = alloc_event_data(cpu); if (!event_data) return NULL; INIT_WORK(&event_data->work, free_event_data); diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 8e46a9dad2fa..7cb766dafe85 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -824,10 +824,10 @@ static void arm_spe_pmu_read(struct perf_event *event) { } -static void *arm_spe_pmu_setup_aux(int cpu, void **pages, int nr_pages, - bool snapshot) +static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { - int i; + int i, cpu = event->cpu; struct page **pglist; struct arm_spe_pmu_buf *buf; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1d5c551a5add..3e49b2144808 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -409,7 +409,7 @@ struct pmu { /* * Set up pmu-private data structures for an AUX area */ - void *(*setup_aux) (int cpu, void **pages, + void *(*setup_aux) (struct perf_event *event, void **pages, int nr_pages, bool overwrite); /* optional */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 4a9937076331..857308295f63 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -658,7 +658,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, goto out; } - rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, + rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages, overwrite); if (!rb->aux_priv) goto out; -- cgit v1.3-8-gc7d7 From 988036f9d322cbd787d8f6a776dbe903d05bae22 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 5 Feb 2019 16:24:57 -0700 Subject: coresight: perf: Add "sinks" group to PMU directory Add a "sinks" directory entry so that users can see all the sinks available in the system in a single place. Individual sink are added as they are registered with the coresight bus. Signed-off-by: Mathieu Poirier Acked-by: Peter Zijlstra (Intel) Reviewed-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-etm-perf.c | 82 ++++++++++++++++++++++++ drivers/hwtracing/coresight/coresight-etm-perf.h | 6 +- drivers/hwtracing/coresight/coresight.c | 18 ++++++ include/linux/coresight.h | 7 +- 4 files changed, 110 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index f21eb28b6782..cdbdb28dc175 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -43,8 +44,18 @@ static const struct attribute_group etm_pmu_format_group = { .attrs = etm_config_formats_attr, }; +static struct attribute *etm_config_sinks_attr[] = { + NULL, +}; + +static const struct attribute_group etm_pmu_sinks_group = { + .name = "sinks", + .attrs = etm_config_sinks_attr, +}; + static const struct attribute_group *etm_pmu_attr_groups[] = { &etm_pmu_format_group, + &etm_pmu_sinks_group, NULL, }; @@ -479,6 +490,77 @@ int etm_perf_symlink(struct coresight_device *csdev, bool link) return 0; } +static ssize_t etm_perf_sink_name_show(struct device *dev, + struct device_attribute *dattr, + char *buf) +{ + struct dev_ext_attribute *ea; + + ea = container_of(dattr, struct dev_ext_attribute, attr); + return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var)); +} + +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ + int ret; + unsigned long hash; + const char *name; + struct device *pmu_dev = etm_pmu.dev; + struct device *pdev = csdev->dev.parent; + struct dev_ext_attribute *ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return -EINVAL; + + if (csdev->ea != NULL) + return -EINVAL; + + if (!etm_perf_up) + return -EPROBE_DEFER; + + ea = devm_kzalloc(pdev, sizeof(*ea), GFP_KERNEL); + if (!ea) + return -ENOMEM; + + name = dev_name(pdev); + /* See function coresight_get_sink_by_id() to know where this is used */ + hash = hashlen_hash(hashlen_string(NULL, name)); + + ea->attr.attr.name = devm_kstrdup(pdev, name, GFP_KERNEL); + if (!ea->attr.attr.name) + return -ENOMEM; + + ea->attr.attr.mode = 0444; + ea->attr.show = etm_perf_sink_name_show; + ea->var = (unsigned long *)hash; + + ret = sysfs_add_file_to_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + + if (!ret) + csdev->ea = ea; + + return ret; +} + +void etm_perf_del_symlink_sink(struct coresight_device *csdev) +{ + struct device *pmu_dev = etm_pmu.dev; + struct dev_ext_attribute *ea = csdev->ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return; + + if (!ea) + return; + + sysfs_remove_file_from_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + csdev->ea = NULL; +} + static int __init etm_perf_init(void) { int ret; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h index da7d9336a15c..015213abe00a 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.h +++ b/drivers/hwtracing/coresight/coresight-etm-perf.h @@ -59,6 +59,8 @@ struct etm_event_data { #ifdef CONFIG_CORESIGHT int etm_perf_symlink(struct coresight_device *csdev, bool link); +int etm_perf_add_symlink_sink(struct coresight_device *csdev); +void etm_perf_del_symlink_sink(struct coresight_device *csdev); static inline void *etm_perf_sink_config(struct perf_output_handle *handle) { struct etm_event_data *data = perf_get_aux(handle); @@ -70,7 +72,9 @@ static inline void *etm_perf_sink_config(struct perf_output_handle *handle) #else static inline int etm_perf_symlink(struct coresight_device *csdev, bool link) { return -EINVAL; } - +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ return -EINVAL; } +void etm_perf_del_symlink_sink(struct coresight_device *csdev) {} static inline void *etm_perf_sink_config(struct perf_output_handle *handle) { return NULL; diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 2b0df1a0a8df..d7fa90be6f42 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -18,6 +18,7 @@ #include #include +#include "coresight-etm-perf.h" #include "coresight-priv.h" static DEFINE_MUTEX(coresight_mutex); @@ -1167,6 +1168,22 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) goto err_out; } + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { + ret = etm_perf_add_symlink_sink(csdev); + + if (ret) { + device_unregister(&csdev->dev); + /* + * As with the above, all resources are free'd + * explicitly via coresight_device_release() triggered + * from put_device(), which is in turn called from + * function device_unregister(). + */ + goto err_out; + } + } + mutex_lock(&coresight_mutex); coresight_fixup_device_conns(csdev); @@ -1185,6 +1202,7 @@ EXPORT_SYMBOL_GPL(coresight_register); void coresight_unregister(struct coresight_device *csdev) { + etm_perf_del_symlink_sink(csdev); /* Remove references of that device in the topology */ coresight_remove_conns(csdev); device_unregister(&csdev->dev); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 46c67a764877..7b87965f7a65 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -154,8 +154,9 @@ struct coresight_connection { * @orphan: true if the component has connections that haven't been linked. * @enable: 'true' if component is currently part of an active path. * @activated: 'true' only if a _sink_ has been activated. A sink can be - activated but not yet enabled. Enabling for a _sink_ - happens when a source has been selected for that it. + * activated but not yet enabled. Enabling for a _sink_ + * appens when a source has been selected for that it. + * @ea: Device attribute for sink representation under PMU directory. */ struct coresight_device { struct coresight_connection *conns; @@ -168,7 +169,9 @@ struct coresight_device { atomic_t *refcnt; bool orphan; bool enable; /* true only if configured as part of a path */ + /* sink specific fields */ bool activated; /* true only if a sink is part of a path */ + struct dev_ext_attribute *ea; }; #define to_coresight_device(d) container_of(d, struct coresight_device, dev) -- cgit v1.3-8-gc7d7 From 32ea33a044842ae6c5fc7e33426e0a7bd50f8801 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sat, 9 Feb 2019 18:42:05 +0200 Subject: mei: bus: export to_mei_cl_device for mei client devices drivers Export to_mei_cl_device macro, as it is needed also in the mei client drivers. Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 1 - include/linux/mei_cl_bus.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index fc3872fe7b25..e5456faf00e6 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -28,7 +28,6 @@ #include "client.h" #define to_mei_cl_driver(d) container_of(d, struct mei_cl_driver, driver) -#define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev) /** * __mei_cl_send - internal client send (write) diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 7fde40e17c8b..03b6ba2a63f8 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -55,6 +55,8 @@ struct mei_cl_device { void *priv_data; }; +#define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev) + struct mei_cl_driver { struct device_driver driver; const char *name; -- cgit v1.3-8-gc7d7