diff options
207 files changed, 4626 insertions, 2855 deletions
@@ -69,6 +69,7 @@ Jean Tourrilhes <jt@hpl.hp.com> Jeff Garzik <jgarzik@pretzel.yyz.us> Jens Axboe <axboe@suse.de> Jens Osterkamp <Jens.Osterkamp@de.ibm.com> +John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> John Stultz <johnstul@us.ibm.com> <josh@joshtriplett.org> <josh@freedesktop.org> <josh@joshtriplett.org> <josh@kernel.org> diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt index 30df832a6f2f..87adfb227ca9 100644 --- a/Documentation/devicetree/bindings/ata/ahci-platform.txt +++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt @@ -32,6 +32,10 @@ Optional properties: - target-supply : regulator for SATA target power - phys : reference to the SATA PHY node - phy-names : must be "sata-phy" +- ports-implemented : Mask that indicates which ports that the HBA supports + are available for software to use. Useful if PORTS_IMPL + is not programmed by the BIOS, which is true with + some embedded SOC's. Required properties when using sub-nodes: - #address-cells : number of cells to encode an address diff --git a/Documentation/networking/checksum-offloads.txt b/Documentation/networking/checksum-offloads.txt index de2a327766a7..56e36861245f 100644 --- a/Documentation/networking/checksum-offloads.txt +++ b/Documentation/networking/checksum-offloads.txt @@ -69,18 +69,18 @@ LCO: Local Checksum Offload LCO is a technique for efficiently computing the outer checksum of an encapsulated datagram when the inner checksum is due to be offloaded. The ones-complement sum of a correctly checksummed TCP or UDP packet is - equal to the sum of the pseudo header, because everything else gets - 'cancelled out' by the checksum field. This is because the sum was + equal to the complement of the sum of the pseudo header, because everything + else gets 'cancelled out' by the checksum field. This is because the sum was complemented before being written to the checksum field. More generally, this holds in any case where the 'IP-style' ones complement checksum is used, and thus any checksum that TX Checksum Offload supports. That is, if we have set up TX Checksum Offload with a start/offset pair, we - know that _after the device has filled in that checksum_, the ones + know that after the device has filled in that checksum, the ones complement sum from csum_start to the end of the packet will be equal to - _whatever value we put in the checksum field beforehand_. This allows us - to compute the outer checksum without looking at the payload: we simply - stop summing when we get to csum_start, then add the 16-bit word at - (csum_start + csum_offset). + the complement of whatever value we put in the checksum field beforehand. + This allows us to compute the outer checksum without looking at the payload: + we simply stop summing when we get to csum_start, then add the complement of + the 16-bit word at (csum_start + csum_offset). Then, when the true inner checksum is filled in (either by hardware or by skb_checksum_help()), the outer checksum will become correct by virtue of the arithmetic. diff --git a/MAINTAINERS b/MAINTAINERS index e425912ff933..b57df66532d2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -872,9 +872,9 @@ F: drivers/perf/arm_pmu.c F: include/linux/perf/arm_pmu.h ARM PORT -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -W: http://www.arm.linux.org.uk/ +W: http://www.armlinux.org.uk/ S: Maintained F: arch/arm/ @@ -886,35 +886,35 @@ F: arch/arm/plat-*/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc.git ARM PRIMECELL AACI PL041 DRIVER -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> S: Maintained F: sound/arm/aaci.* ARM PRIMECELL CLCD PL110 DRIVER -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> S: Maintained F: drivers/video/fbdev/amba-clcd.* ARM PRIMECELL KMI PL050 DRIVER -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> S: Maintained F: drivers/input/serio/ambakmi.* F: include/linux/amba/kmi.h ARM PRIMECELL MMCI PL180/1 DRIVER -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> S: Maintained F: drivers/mmc/host/mmci.* F: include/linux/amba/mmci.h ARM PRIMECELL UART PL010 AND PL011 DRIVERS -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> S: Maintained F: drivers/tty/serial/amba-pl01*.c F: include/linux/amba/serial.h ARM PRIMECELL BUS SUPPORT -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> S: Maintained F: drivers/amba/ F: include/linux/amba/bus.h @@ -1036,7 +1036,7 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained ARM/CLKDEV SUPPORT -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/include/asm/clkdev.h @@ -1093,9 +1093,9 @@ F: arch/arm/boot/dts/cx92755* N: digicolor ARM/EBSA110 MACHINE SUPPORT -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -W: http://www.arm.linux.org.uk/ +W: http://www.armlinux.org.uk/ S: Maintained F: arch/arm/mach-ebsa110/ F: drivers/net/ethernet/amd/am79c961a.* @@ -1124,9 +1124,9 @@ T: git git://git.berlios.de/gemini-board F: arch/arm/mm/*-fa* ARM/FOOTBRIDGE ARCHITECTURE -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -W: http://www.arm.linux.org.uk/ +W: http://www.armlinux.org.uk/ S: Maintained F: arch/arm/include/asm/hardware/dec21285.h F: arch/arm/mach-footbridge/ @@ -1457,7 +1457,7 @@ S: Maintained ARM/PT DIGITAL BOARD PORT M: Stefan Eletzhofer <stefan.eletzhofer@eletztrick.de> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -W: http://www.arm.linux.org.uk/ +W: http://www.armlinux.org.uk/ S: Maintained ARM/QUALCOMM SUPPORT @@ -1493,9 +1493,9 @@ S: Supported F: arch/arm64/boot/dts/renesas/ ARM/RISCPC ARCHITECTURE -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -W: http://www.arm.linux.org.uk/ +W: http://www.armlinux.org.uk/ S: Maintained F: arch/arm/include/asm/hardware/entry-macro-iomd.S F: arch/arm/include/asm/hardware/ioc.h @@ -1773,9 +1773,9 @@ F: drivers/clk/versatile/clk-vexpress-osc.c F: drivers/clocksource/versatile.c ARM/VFP SUPPORT -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -W: http://www.arm.linux.org.uk/ +W: http://www.armlinux.org.uk/ S: Maintained F: arch/arm/vfp/ @@ -2924,7 +2924,7 @@ F: mm/cleancache.c F: include/linux/cleancache.h CLK API -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> L: linux-clk@vger.kernel.org S: Maintained F: include/linux/clk.h @@ -3358,9 +3358,9 @@ S: Supported F: drivers/net/ethernet/stmicro/stmmac/ CYBERPRO FB DRIVER -M: Russell King <linux@arm.linux.org.uk> +M: Russell King <linux@armlinux.org.uk> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -W: http://www.arm.linux.org.uk/ +W: http://www.armlinux.org.uk/ S: Maintained F: drivers/video/fbdev/cyber2000fb.* @@ -3885,7 +3885,7 @@ F: Documentation/devicetree/bindings/display/st,stih4xx.txt DRM DRIVERS FOR VIVANTE GPU IP M: Lucas Stach <l.stach@pengutronix.de> -R: Russell King <linux+etnaviv@arm.linux.org.uk> +R: Russell King <linux+etnaviv@armlinux.org.uk> R: Christian Gmeiner <christian.gmeiner@gmail.com> L: dri-devel@lists.freedesktop.org S: Maintained @@ -4227,8 +4227,8 @@ F: Documentation/efi-stub.txt F: arch/ia64/kernel/efi.c F: arch/x86/boot/compressed/eboot.[ch] F: arch/x86/include/asm/efi.h -F: arch/x86/platform/efi/* -F: drivers/firmware/efi/* +F: arch/x86/platform/efi/ +F: drivers/firmware/efi/ F: include/linux/efi*.h EFI VARIABLE FILESYSTEM @@ -6902,7 +6902,7 @@ L: linux-man@vger.kernel.org S: Maintained MARVELL ARMADA DRM SUPPORT -M: Russell King <rmk+kernel@arm.linux.org.uk> +M: Russell King <rmk+kernel@armlinux.org.uk> S: Maintained F: drivers/gpu/drm/armada/ @@ -7902,7 +7902,7 @@ S: Supported F: drivers/nfc/nxp-nci NXP TDA998X DRM DRIVER -M: Russell King <rmk+kernel@arm.linux.org.uk> +M: Russell King <rmk+kernel@armlinux.org.uk> S: Supported F: drivers/gpu/drm/i2c/tda998x_drv.c F: include/drm/i2c/tda998x.h @@ -7975,7 +7975,7 @@ F: arch/arm/*omap*/*pm* F: drivers/cpufreq/omap-cpufreq.c OMAP POWERDOMAIN SOC ADAPTATION LAYER SUPPORT -M: Rajendra Nayak <rnayak@ti.com> +M: Rajendra Nayak <rnayak@codeaurora.org> M: Paul Walmsley <paul@pwsan.com> L: linux-omap@vger.kernel.org S: Maintained @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Charred Weasel # *DOCUMENTATION* diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index ec4791ea6911..a8767430df7d 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -58,6 +58,9 @@ config GENERIC_CSUM config RWSEM_GENERIC_SPINLOCK def_bool y +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + config ARCH_FLATMEM_ENABLE def_bool y @@ -347,6 +350,15 @@ config ARC_HUGEPAGE_16M endchoice +config NODES_SHIFT + int "Maximum NUMA Nodes (as a power of 2)" + default "1" if !DISCONTIGMEM + default "2" if DISCONTIGMEM + depends on NEED_MULTIPLE_NODES + ---help--- + Accessing memory beyond 1GB (with or w/o PAE) requires 2 memory + zones. + if ISA_ARCOMPACT config ARC_COMPACT_IRQ_LEVELS @@ -455,6 +467,7 @@ config LINUX_LINK_BASE config HIGHMEM bool "High Memory Support" + select DISCONTIGMEM help With ARC 2G:2G address split, only upper 2G is directly addressable by kernel. Enable this to potentially allow access to rest of 2G and PAE diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index 17f85c9c73cf..c22b181e8206 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -13,6 +13,15 @@ #include <asm/byteorder.h> #include <asm/page.h> +#ifdef CONFIG_ISA_ARCV2 +#include <asm/barrier.h> +#define __iormb() rmb() +#define __iowmb() wmb() +#else +#define __iormb() do { } while (0) +#define __iowmb() do { } while (0) +#endif + extern void __iomem *ioremap(phys_addr_t paddr, unsigned long size); extern void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, unsigned long flags); @@ -31,6 +40,15 @@ extern void iounmap(const void __iomem *addr); #define ioremap_wc(phy, sz) ioremap(phy, sz) #define ioremap_wt(phy, sz) ioremap(phy, sz) +/* + * io{read,write}{16,32}be() macros + */ +#define ioread16be(p) ({ u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; }) +#define ioread32be(p) ({ u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; }) + +#define iowrite16be(v,p) ({ __iowmb(); __raw_writew((__force u16)cpu_to_be16(v), p); }) +#define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force u32)cpu_to_be32(v), p); }) + /* Change struct page to physical address */ #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) @@ -108,15 +126,6 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) } -#ifdef CONFIG_ISA_ARCV2 -#include <asm/barrier.h> -#define __iormb() rmb() -#define __iowmb() wmb() -#else -#define __iormb() do { } while (0) -#define __iowmb() do { } while (0) -#endif - /* * MMIO can also get buffered/optimized in micro-arch, so barriers needed * Based on ARM model for the typical use case diff --git a/arch/arc/include/asm/mmzone.h b/arch/arc/include/asm/mmzone.h new file mode 100644 index 000000000000..8e97136413d9 --- /dev/null +++ b/arch/arc/include/asm/mmzone.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2016 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_ARC_MMZONE_H +#define _ASM_ARC_MMZONE_H + +#ifdef CONFIG_DISCONTIGMEM + +extern struct pglist_data node_data[]; +#define NODE_DATA(nid) (&node_data[nid]) + +static inline int pfn_to_nid(unsigned long pfn) +{ + int is_end_low = 1; + + if (IS_ENABLED(CONFIG_ARC_HAS_PAE40)) + is_end_low = pfn <= virt_to_pfn(0xFFFFFFFFUL); + + /* + * node 0: lowmem: 0x8000_0000 to 0xFFFF_FFFF + * node 1: HIGHMEM w/o PAE40: 0x0 to 0x7FFF_FFFF + * HIGHMEM with PAE40: 0x1_0000_0000 to ... + */ + if (pfn >= ARCH_PFN_OFFSET && is_end_low) + return 0; + + return 1; +} + +static inline int pfn_valid(unsigned long pfn) +{ + int nid = pfn_to_nid(pfn); + + return (pfn <= node_end_pfn(nid)); +} +#endif /* CONFIG_DISCONTIGMEM */ + +#endif diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 36da89e2c853..0d53854884d0 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -72,11 +72,20 @@ typedef unsigned long pgprot_t; typedef pte_t * pgtable_t; +/* + * Use virt_to_pfn with caution: + * If used in pte or paddr related macros, it could cause truncation + * in PAE40 builds + * As a rule of thumb, only use it in helpers starting with virt_ + * You have been warned ! + */ #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define ARCH_PFN_OFFSET virt_to_pfn(CONFIG_LINUX_LINK_BASE) +#ifdef CONFIG_FLATMEM #define pfn_valid(pfn) (((pfn) - ARCH_PFN_OFFSET) < max_mapnr) +#endif /* * __pa, __va, virt_to_page (ALERT: deprecated, don't use them) @@ -85,12 +94,10 @@ typedef pte_t * pgtable_t; * virt here means link-address/program-address as embedded in object code. * And for ARC, link-addr = physical address */ -#define __pa(vaddr) ((unsigned long)vaddr) +#define __pa(vaddr) ((unsigned long)(vaddr)) #define __va(paddr) ((void *)((unsigned long)(paddr))) -#define virt_to_page(kaddr) \ - (mem_map + virt_to_pfn((kaddr) - CONFIG_LINUX_LINK_BASE)) - +#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) /* Default Permissions for stack/heaps pages (Non Executable) */ diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 7d6c93e63adf..10d4b8b8e545 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -278,14 +278,13 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) #define pmd_present(x) (pmd_val(x)) #define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) -#define pte_page(pte) \ - (mem_map + virt_to_pfn(pte_val(pte) - CONFIG_LINUX_LINK_BASE)) - +#define pte_page(pte) pfn_to_page(pte_pfn(pte)) #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) -#define pte_pfn(pte) virt_to_pfn(pte_val(pte)) -#define pfn_pte(pfn, prot) (__pte(((pte_t)(pfn) << PAGE_SHIFT) | \ - pgprot_val(prot))) -#define __pte_index(addr) (virt_to_pfn(addr) & (PTRS_PER_PTE - 1)) +#define pfn_pte(pfn, prot) (__pte(((pte_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) + +/* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/ +#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) +#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) /* * pte_offset gets a @ptr to PMD entry (PGD in our 2-tier paging system) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 5487d0b97400..8be930394750 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -30,11 +30,16 @@ static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE; static unsigned long low_mem_sz; #ifdef CONFIG_HIGHMEM -static unsigned long min_high_pfn; +static unsigned long min_high_pfn, max_high_pfn; static u64 high_mem_start; static u64 high_mem_sz; #endif +#ifdef CONFIG_DISCONTIGMEM +struct pglist_data node_data[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_data); +#endif + /* User can over-ride above with "mem=nnn[KkMm]" in cmdline */ static int __init setup_mem_sz(char *str) { @@ -109,13 +114,11 @@ void __init setup_arch_memory(void) /* Last usable page of low mem */ max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz); -#ifdef CONFIG_HIGHMEM - min_high_pfn = PFN_DOWN(high_mem_start); - max_pfn = PFN_DOWN(high_mem_start + high_mem_sz); +#ifdef CONFIG_FLATMEM + /* pfn_valid() uses this */ + max_mapnr = max_low_pfn - min_low_pfn; #endif - max_mapnr = max_pfn - min_low_pfn; - /*------------- bootmem allocator setup -----------------------*/ /* @@ -129,7 +132,7 @@ void __init setup_arch_memory(void) * the crash */ - memblock_add(low_mem_start, low_mem_sz); + memblock_add_node(low_mem_start, low_mem_sz, 0); memblock_reserve(low_mem_start, __pa(_end) - low_mem_start); #ifdef CONFIG_BLK_DEV_INITRD @@ -149,13 +152,6 @@ void __init setup_arch_memory(void) zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; zones_holes[ZONE_NORMAL] = 0; -#ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = max_pfn - max_low_pfn; - - /* This handles the peripheral address space hole */ - zones_holes[ZONE_HIGHMEM] = min_high_pfn - max_low_pfn; -#endif - /* * We can't use the helper free_area_init(zones[]) because it uses * PAGE_OFFSET to compute the @min_low_pfn which would be wrong @@ -168,6 +164,34 @@ void __init setup_arch_memory(void) zones_holes); /* holes */ #ifdef CONFIG_HIGHMEM + /* + * Populate a new node with highmem + * + * On ARC (w/o PAE) HIGHMEM addresses are actually smaller (0 based) + * than addresses in normal ala low memory (0x8000_0000 based). + * Even with PAE, the huge peripheral space hole would waste a lot of + * mem with single mem_map[]. This warrants a mem_map per region design. + * Thus HIGHMEM on ARC is imlemented with DISCONTIGMEM. + * + * DISCONTIGMEM in turns requires multiple nodes. node 0 above is + * populated with normal memory zone while node 1 only has highmem + */ + node_set_online(1); + + min_high_pfn = PFN_DOWN(high_mem_start); + max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz); + + zones_size[ZONE_NORMAL] = 0; + zones_holes[ZONE_NORMAL] = 0; + + zones_size[ZONE_HIGHMEM] = max_high_pfn - min_high_pfn; + zones_holes[ZONE_HIGHMEM] = 0; + + free_area_init_node(1, /* node-id */ + zones_size, /* num pages per zone */ + min_high_pfn, /* first pfn of node */ + zones_holes); /* holes */ + high_memory = (void *)(min_high_pfn << PAGE_SHIFT); kmap_init(); #endif @@ -185,7 +209,7 @@ void __init mem_init(void) unsigned long tmp; reset_all_zones_managed_pages(); - for (tmp = min_high_pfn; tmp < max_pfn; tmp++) + for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++) free_highmem_page(pfn_to_page(tmp)); #endif diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index b3c26a96a726..d9e2d9c6e999 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -329,6 +329,7 @@ regulator-name = "V28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; + regulator-initial-mode = <0x0e>; /* RES_STATE_ACTIVE */ regulator-always-on; /* due to battery cover sensor */ }; @@ -336,30 +337,35 @@ regulator-name = "VCSI"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-initial-mode = <0x0e>; /* RES_STATE_ACTIVE */ }; &vaux3 { regulator-name = "VMMC2_30"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <3000000>; + regulator-initial-mode = <0x0e>; /* RES_STATE_ACTIVE */ }; &vaux4 { regulator-name = "VCAM_ANA_28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; + regulator-initial-mode = <0x0e>; /* RES_STATE_ACTIVE */ }; &vmmc1 { regulator-name = "VMMC1"; regulator-min-microvolt = <1850000>; regulator-max-microvolt = <3150000>; + regulator-initial-mode = <0x0e>; /* RES_STATE_ACTIVE */ }; &vmmc2 { regulator-name = "V28_A"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <3000000>; + regulator-initial-mode = <0x0e>; /* RES_STATE_ACTIVE */ regulator-always-on; /* due VIO leak to AIC34 VDDs */ }; @@ -367,6 +373,7 @@ regulator-name = "VPLL"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-initial-mode = <0x0e>; /* RES_STATE_ACTIVE */ regulator-always-on; }; @@ -374,6 +381,7 @@ regulator-name = "VSDI_CSI"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-initial-mode = <0x0e>; /* RES_STATE_ACTIVE */ regulator-always-on; }; @@ -381,6 +389,7 @@ regulator-name = "VMMC2_IO_18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-initial-mode = <0x0e>; /* RES_STATE_ACTIVE */ }; &vio { diff --git a/arch/arm/boot/dts/omap34xx.dtsi b/arch/arm/boot/dts/omap34xx.dtsi index 387dc31822fe..96f8ce7bd2af 100644 --- a/arch/arm/boot/dts/omap34xx.dtsi +++ b/arch/arm/boot/dts/omap34xx.dtsi @@ -46,7 +46,7 @@ 0x480bd800 0x017c>; interrupts = <24>; iommus = <&mmu_isp>; - syscon = <&scm_conf 0xdc>; + syscon = <&scm_conf 0x6c>; ti,phy-type = <OMAP3ISP_PHY_TYPE_COMPLEX_IO>; #clock-cells = <1>; ports { diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 902657d6713b..914bf4c47404 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -472,7 +472,7 @@ ldo1_reg: ldo1 { /* VDDAPHY_CAM: vdda_csiport */ regulator-name = "ldo1"; - regulator-min-microvolt = <1500000>; + regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; }; @@ -498,7 +498,7 @@ ldo4_reg: ldo4 { /* VDDAPHY_DISP: vdda_dsiport/hdmi */ regulator-name = "ldo4"; - regulator-min-microvolt = <1500000>; + regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; }; diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts index ecc591dc0778..4d87d9c6c86d 100644 --- a/arch/arm/boot/dts/omap5-cm-t54.dts +++ b/arch/arm/boot/dts/omap5-cm-t54.dts @@ -513,7 +513,7 @@ ldo1_reg: ldo1 { /* VDDAPHY_CAM: vdda_csiport */ regulator-name = "ldo1"; - regulator-min-microvolt = <1500000>; + regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; }; @@ -537,7 +537,7 @@ ldo4_reg: ldo4 { /* VDDAPHY_DISP: vdda_dsiport/hdmi */ regulator-name = "ldo4"; - regulator-min-microvolt = <1500000>; + regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; }; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 38805ebbe2ba..120b6b80cd39 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -269,7 +269,7 @@ omap5_pmx_wkup: pinmux@c840 { compatible = "ti,omap5-padconf", "pinctrl-single"; - reg = <0xc840 0x0038>; + reg = <0xc840 0x003c>; #address-cells = <1>; #size-cells = <0>; #interrupt-cells = <1>; diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi index 65d0e8d98259..04f541bffbdd 100644 --- a/arch/arm/boot/dts/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom-apq8064.dtsi @@ -666,7 +666,7 @@ }; sata0: sata@29000000 { - compatible = "generic-ahci"; + compatible = "qcom,apq8064-ahci", "generic-ahci"; status = "disabled"; reg = <0x29000000 0x180>; interrupts = <GIC_SPI 209 IRQ_TYPE_NONE>; @@ -688,6 +688,7 @@ phys = <&sata_phy0>; phy-names = "sata-phy"; + ports-implemented = <0x1>; }; /* Temporary fixed regulator */ diff --git a/arch/arm/boot/dts/sun8i-q8-common.dtsi b/arch/arm/boot/dts/sun8i-q8-common.dtsi index 9d2b7e2f5975..346a49d805a7 100644 --- a/arch/arm/boot/dts/sun8i-q8-common.dtsi +++ b/arch/arm/boot/dts/sun8i-q8-common.dtsi @@ -125,8 +125,6 @@ }; ®_dc1sw { - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; regulator-name = "vcc-lcd"; }; diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index e11d99d529ee..690352d3ba4c 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -91,10 +91,7 @@ CONFIG_SATA_AHCI=y CONFIG_SATA_MV=y CONFIG_NETDEVICES=y CONFIG_NET_DSA_MV88E6060=y -CONFIG_NET_DSA_MV88E6131=y -CONFIG_NET_DSA_MV88E6123=y -CONFIG_NET_DSA_MV88E6171=y -CONFIG_NET_DSA_MV88E6352=y +CONFIG_NET_DSA_MV88E6XXX=y CONFIG_MV643XX_ETH=y CONFIG_R8169=y CONFIG_MARVELL_PHY=y diff --git a/arch/arm/configs/mvebu_v7_defconfig b/arch/arm/configs/mvebu_v7_defconfig index dc5797a2efab..6492407efd7e 100644 --- a/arch/arm/configs/mvebu_v7_defconfig +++ b/arch/arm/configs/mvebu_v7_defconfig @@ -66,7 +66,7 @@ CONFIG_SATA_AHCI=y CONFIG_AHCI_MVEBU=y CONFIG_SATA_MV=y CONFIG_NETDEVICES=y -CONFIG_NET_DSA_MV88E6171=y +CONFIG_NET_DSA_MV88E6XXX=y CONFIG_MV643XX_ETH=y CONFIG_MVNETA=y CONFIG_MVPP2=y diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index 6a5bc27538f1..27a70a7a50f6 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -85,8 +85,7 @@ CONFIG_ATA=y CONFIG_SATA_MV=y CONFIG_NETDEVICES=y CONFIG_MII=y -CONFIG_NET_DSA_MV88E6131=y -CONFIG_NET_DSA_MV88E6123=y +CONFIG_NET_DSA_MV88E6XXX=y CONFIG_MV643XX_ETH=y CONFIG_MARVELL_PHY=y # CONFIG_INPUT_MOUSEDEV is not set diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index fc8ba1663601..99d9f630d6b6 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -84,6 +84,7 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_CPU_CP15_MMU static inline unsigned int get_domain(void) { unsigned int domain; @@ -103,6 +104,16 @@ static inline void set_domain(unsigned val) : : "r" (val) : "memory"); isb(); } +#else +static inline unsigned int get_domain(void) +{ + return 0; +} + +static inline void set_domain(unsigned val) +{ +} +#endif #ifdef CONFIG_CPU_USE_DOMAINS #define modify_domain(dom,type) \ diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 9b8c5a113434..fb1a69eb49c1 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -236,7 +236,7 @@ ENTRY(__setup_mpu) mov r0, #CONFIG_VECTORS_BASE @ Cover from VECTORS_BASE ldr r5,=(MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL) /* Writing N to bits 5:1 (RSR_SZ) --> region size 2^N+1 */ - mov r6, #(((PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN) + mov r6, #(((2 * PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN) setup_region r0, r5, r6, MPU_DATA_SIDE @ VECTORS_BASE, PL0 NA, enabled beq 3f @ Memory-map not unified diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 58dbd5c439df..d6d4191e68f2 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1004,7 +1004,7 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) kvm_pfn_t pfn = *pfnp; gfn_t gfn = *ipap >> PAGE_SHIFT; - if (PageTransCompound(pfn_to_page(pfn))) { + if (PageTransCompoundMap(pfn_to_page(pfn))) { unsigned long mask; /* * The address we faulted on is backed by a transparent huge diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c index d97c588550ad..bc4e63fa9808 100644 --- a/arch/arm/mach-davinci/board-mityomapl138.c +++ b/arch/arm/mach-davinci/board-mityomapl138.c @@ -121,6 +121,11 @@ static void read_factory_config(struct nvmem_device *nvmem, void *context) const char *partnum = NULL; struct davinci_soc_info *soc_info = &davinci_soc_info; + if (!IS_BUILTIN(CONFIG_NVMEM)) { + pr_warn("Factory Config not available without CONFIG_NVMEM\n"); + goto bad_config; + } + ret = nvmem_device_read(nvmem, 0, sizeof(factory_config), &factory_config); if (ret != sizeof(struct factory_config)) { diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c index f55ef2ef2f92..742133b7266a 100644 --- a/arch/arm/mach-davinci/common.c +++ b/arch/arm/mach-davinci/common.c @@ -33,6 +33,11 @@ void davinci_get_mac_addr(struct nvmem_device *nvmem, void *context) char *mac_addr = davinci_soc_info.emac_pdata->mac_addr; off_t offset = (off_t)context; + if (!IS_BUILTIN(CONFIG_NVMEM)) { + pr_warn("Cannot read MAC addr from EEPROM without CONFIG_NVMEM\n"); + return; + } + /* Read MAC addr from EEPROM */ if (nvmem_device_read(nvmem, offset, ETH_ALEN, mac_addr) == ETH_ALEN) pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr); diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c index 7c21760f590f..875a2bab64f6 100644 --- a/arch/arm/mach-exynos/pm_domains.c +++ b/arch/arm/mach-exynos/pm_domains.c @@ -92,7 +92,7 @@ static int exynos_pd_power(struct generic_pm_domain *domain, bool power_on) if (IS_ERR(pd->clk[i])) break; - if (IS_ERR(pd->clk[i])) + if (IS_ERR(pd->pclk[i])) continue; /* Skip on first power up */ if (clk_set_parent(pd->clk[i], pd->pclk[i])) pr_err("%s: error setting parent to clock%d\n", diff --git a/arch/arm/mach-socfpga/headsmp.S b/arch/arm/mach-socfpga/headsmp.S index 5d94b7a2fb10..c160fa3007e9 100644 --- a/arch/arm/mach-socfpga/headsmp.S +++ b/arch/arm/mach-socfpga/headsmp.S @@ -13,6 +13,7 @@ #include <asm/assembler.h> .arch armv7-a + .arm ENTRY(secondary_trampoline) /* CPU1 will always fetch from 0x0 when it is brought out of reset. diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 1dd10936d68d..d5805e4bf2fc 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -87,7 +87,6 @@ static unsigned long irbar_read(void) /* MPU initialisation functions */ void __init sanity_check_meminfo_mpu(void) { - int i; phys_addr_t phys_offset = PHYS_OFFSET; phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size; struct memblock_region *reg; @@ -110,11 +109,13 @@ void __init sanity_check_meminfo_mpu(void) } else { /* * memblock auto merges contiguous blocks, remove - * all blocks afterwards + * all blocks afterwards in one go (we can't remove + * blocks separately while iterating) */ pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", - &mem_start, ®->base); - memblock_remove(reg->base, reg->size); + &mem_end, ®->base); + memblock_remove(reg->base, 0 - reg->base); + break; } } @@ -144,7 +145,7 @@ void __init sanity_check_meminfo_mpu(void) pr_warn("Truncating memory from %pa to %pa (MPU region constraints)", &specified_mem_size, &aligned_region_size); memblock_remove(mem_start + aligned_region_size, - specified_mem_size - aligned_round_size); + specified_mem_size - aligned_region_size); mem_end = mem_start + aligned_region_size; } @@ -261,7 +262,7 @@ void __init mpu_setup(void) return; region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET, - ilog2(meminfo.bank[0].size), + ilog2(memblock.memory.regions[0].size), MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); if (region_err) { panic("MPU region initialization failure! %d", region_err); @@ -285,7 +286,7 @@ void __init arm_mm_memblock_reserve(void) * some architectures which the DRAM is the exception vector to trap, * alloc_page breaks with error, although it is not NULL, but "0." */ - memblock_reserve(CONFIG_VECTORS_BASE, PAGE_SIZE); + memblock_reserve(CONFIG_VECTORS_BASE, 2 * PAGE_SIZE); #else /* ifndef CONFIG_CPU_V7M */ /* * There is no dedicated vector page on V7-M. So nothing needs to be diff --git a/arch/arm64/boot/dts/renesas/r8a7795.dtsi b/arch/arm64/boot/dts/renesas/r8a7795.dtsi index a7315ebe3883..706d2426024f 100644 --- a/arch/arm64/boot/dts/renesas/r8a7795.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a7795.dtsi @@ -120,7 +120,6 @@ compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <0>; - status = "disabled"; }; soc { diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index c976ebfe2269..57b4836b7ecd 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -344,7 +344,7 @@ tracesys_next: #endif cmpib,COND(=),n -1,%r20,tracesys_exit /* seccomp may have returned -1 */ - comiclr,>>= __NR_Linux_syscalls, %r20, %r0 + comiclr,>> __NR_Linux_syscalls, %r20, %r0 b,n .Ltracesys_nosys LDREGX %r20(%r19), %r19 diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index e4396a7d0f7c..4afe66aa1400 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -82,7 +82,7 @@ static inline unsigned long create_zero_mask(unsigned long bits) "andc %1,%1,%2\n\t" "popcntd %0,%1" : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask) - : "r" (bits)); + : "b" (bits)); return leading_zero_bits; } diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index 3f3dfb8b150a..718905557f7e 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -221,8 +221,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_TUN=y CONFIG_VETH=m CONFIG_NET_DSA_MV88E6060=y -CONFIG_NET_DSA_MV88E6131=y -CONFIG_NET_DSA_MV88E6123=y +CONFIG_NET_DSA_MV88E6XXX=y CONFIG_SKY2=y CONFIG_PTP_1588_CLOCK_TILEGX=y # CONFIG_WLAN is not set diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index ef9e27eb2f50..dc85468afd5e 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -340,8 +340,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_TUN=y CONFIG_VETH=m CONFIG_NET_DSA_MV88E6060=y -CONFIG_NET_DSA_MV88E6131=y -CONFIG_NET_DSA_MV88E6123=y +CONFIG_NET_DSA_MV88E6XXX=y # CONFIG_NET_VENDOR_3COM is not set CONFIG_E1000E=y # CONFIG_WLAN is not set diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 40625ca7a190..6011a573dd64 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -474,6 +474,7 @@ static __init int _init_perf_amd_iommu( static struct perf_amd_iommu __perf_iommu = { .pmu = { + .task_ctx_nr = perf_invalid_context, .event_init = perf_iommu_event_init, .add = perf_iommu_add, .del = perf_iommu_del, diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index aff79884e17d..a6fd4dbcf820 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3637,6 +3637,8 @@ __init int intel_pmu_init(void) pr_cont("Knights Landing events, "); break; + case 142: /* 14nm Kabylake Mobile */ + case 158: /* 14nm Kabylake Desktop */ case 78: /* 14nm Skylake Mobile */ case 94: /* 14nm Skylake Desktop */ case 85: /* 14nm Skylake Server */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 8f4942e2bcbb..d7ce96a7daca 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -891,9 +891,7 @@ void __init uv_system_init(void) } pr_info("UV: Found %s hub\n", hub); - /* We now only need to map the MMRs on UV1 */ - if (is_uv1_hub()) - map_low_mmrs(); + map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c index b285d4e8c68e..5da924bbf0a0 100644 --- a/arch/x86/kernel/sysfb_efi.c +++ b/arch/x86/kernel/sysfb_efi.c @@ -106,14 +106,24 @@ static int __init efifb_set_system(const struct dmi_system_id *id) continue; for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { resource_size_t start, end; + unsigned long flags; + + flags = pci_resource_flags(dev, i); + if (!(flags & IORESOURCE_MEM)) + continue; + + if (flags & IORESOURCE_UNSET) + continue; + + if (pci_resource_len(dev, i) == 0) + continue; start = pci_resource_start(dev, i); - if (start == 0) - break; end = pci_resource_end(dev, i); if (screen_info.lfb_base >= start && screen_info.lfb_base < end) { found_bar = 1; + break; } } } diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 92ae6acac8a7..6aa0f4d9eea6 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -92,7 +92,7 @@ unsigned long try_msr_calibrate_tsc(void) if (freq_desc_tables[cpu_index].msr_plat) { rdmsr(MSR_PLATFORM_INFO, lo, hi); - ratio = (lo >> 8) & 0x1f; + ratio = (lo >> 8) & 0xff; } else { rdmsr(MSR_IA32_PERF_STATUS, lo, hi); ratio = (hi >> 8) & 0x1f; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1ff4dbb73fb7..b6f50e8b0a39 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2823,7 +2823,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, */ if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && - PageTransCompound(pfn_to_page(pfn)) && + PageTransCompoundMap(pfn_to_page(pfn)) && !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { unsigned long mask; /* @@ -4785,7 +4785,7 @@ restart: */ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && - PageTransCompound(pfn_to_page(pfn))) { + PageTransCompoundMap(pfn_to_page(pfn))) { drop_spte(kvm, sptep); need_tlb_flush = 1; goto restart; diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index a2433817c987..6a2f5691b1ab 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -43,40 +43,40 @@ void __init efi_bgrt_init(void) return; if (bgrt_tab->header.length < sizeof(*bgrt_tab)) { - pr_err("Ignoring BGRT: invalid length %u (expected %zu)\n", + pr_notice("Ignoring BGRT: invalid length %u (expected %zu)\n", bgrt_tab->header.length, sizeof(*bgrt_tab)); return; } if (bgrt_tab->version != 1) { - pr_err("Ignoring BGRT: invalid version %u (expected 1)\n", + pr_notice("Ignoring BGRT: invalid version %u (expected 1)\n", bgrt_tab->version); return; } if (bgrt_tab->status & 0xfe) { - pr_err("Ignoring BGRT: reserved status bits are non-zero %u\n", + pr_notice("Ignoring BGRT: reserved status bits are non-zero %u\n", bgrt_tab->status); return; } if (bgrt_tab->image_type != 0) { - pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n", + pr_notice("Ignoring BGRT: invalid image type %u (expected 0)\n", bgrt_tab->image_type); return; } if (!bgrt_tab->image_address) { - pr_err("Ignoring BGRT: null image address\n"); + pr_notice("Ignoring BGRT: null image address\n"); return; } image = memremap(bgrt_tab->image_address, sizeof(bmp_header), MEMREMAP_WB); if (!image) { - pr_err("Ignoring BGRT: failed to map image header memory\n"); + pr_notice("Ignoring BGRT: failed to map image header memory\n"); return; } memcpy(&bmp_header, image, sizeof(bmp_header)); memunmap(image); if (bmp_header.id != 0x4d42) { - pr_err("Ignoring BGRT: Incorrect BMP magic number 0x%x (expected 0x4d42)\n", + pr_notice("Ignoring BGRT: Incorrect BMP magic number 0x%x (expected 0x4d42)\n", bmp_header.id); return; } @@ -84,14 +84,14 @@ void __init efi_bgrt_init(void) bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); if (!bgrt_image) { - pr_err("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n", + pr_notice("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n", bgrt_image_size); return; } image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); if (!image) { - pr_err("Ignoring BGRT: failed to map image memory\n"); + pr_notice("Ignoring BGRT: failed to map image memory\n"); kfree(bgrt_image); bgrt_image = NULL; return; diff --git a/crypto/Kconfig b/crypto/Kconfig index 93a1fdc1feee..1d33beb6a1ae 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -96,6 +96,7 @@ config CRYPTO_AKCIPHER config CRYPTO_RSA tristate "RSA algorithm" select CRYPTO_AKCIPHER + select CRYPTO_MANAGER select MPILIB select ASN1 help diff --git a/crypto/ahash.c b/crypto/ahash.c index 5fc1f172963d..3887a98abcc3 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -69,8 +69,9 @@ static int hash_walk_new_entry(struct crypto_hash_walk *walk) struct scatterlist *sg; sg = walk->sg; - walk->pg = sg_page(sg); walk->offset = sg->offset; + walk->pg = sg_page(walk->sg) + (walk->offset >> PAGE_SHIFT); + walk->offset = offset_in_page(walk->offset); walk->entrylen = sg->length; if (walk->entrylen > walk->total) diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c index 1982310e6d83..da198b864107 100644 --- a/drivers/acpi/acpica/dsmethod.c +++ b/drivers/acpi/acpica/dsmethod.c @@ -428,6 +428,9 @@ acpi_ds_begin_method_execution(struct acpi_namespace_node *method_node, obj_desc->method.mutex->mutex. original_sync_level = obj_desc->method.mutex->mutex.sync_level; + + obj_desc->method.mutex->mutex.thread_id = + acpi_os_get_thread_id(); } } diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index d0f35e63640b..63cc9dbe4f3b 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -287,8 +287,11 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, offset); rc = -ENXIO; } - } else + } else { rc = 0; + if (cmd_rc) + *cmd_rc = xlat_status(buf, cmd); + } out: ACPI_FREE(out_obj); diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 5083f85efea7..cfa936a32513 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -202,6 +202,14 @@ config SATA_FSL If unsure, say N. +config SATA_AHCI_SEATTLE + tristate "AMD Seattle 6.0Gbps AHCI SATA host controller support" + depends on ARCH_SEATTLE + help + This option enables support for AMD Seattle SATA host controller. + + If unsure, say N + config SATA_INIC162X tristate "Initio 162x SATA support (Very Experimental)" depends on PCI diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 18579521464e..0b2afb7e5f35 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_ATA) += libata.o # non-SFF interface obj-$(CONFIG_SATA_AHCI) += ahci.o libahci.o obj-$(CONFIG_SATA_ACARD_AHCI) += acard-ahci.o libahci.o +obj-$(CONFIG_SATA_AHCI_SEATTLE) += ahci_seattle.o libahci.o libahci_platform.o obj-$(CONFIG_SATA_AHCI_PLATFORM) += ahci_platform.o libahci.o libahci_platform.o obj-$(CONFIG_SATA_FSL) += sata_fsl.o obj-$(CONFIG_SATA_INIC162X) += sata_inic162x.o diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 40442332bfa7..62a04c8fb5c9 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -51,6 +51,9 @@ static int ahci_probe(struct platform_device *pdev) if (rc) return rc; + of_property_read_u32(dev->of_node, + "ports-implemented", &hpriv->force_port_map); + if (of_device_is_compatible(dev->of_node, "hisilicon,hisi-ahci")) hpriv->flags |= AHCI_HFLAG_NO_FBS | AHCI_HFLAG_NO_NCQ; diff --git a/drivers/ata/ahci_seattle.c b/drivers/ata/ahci_seattle.c new file mode 100644 index 000000000000..6e702ab57220 --- /dev/null +++ b/drivers/ata/ahci_seattle.c @@ -0,0 +1,210 @@ +/* + * AMD Seattle AHCI SATA driver + * + * Copyright (c) 2015, Advanced Micro Devices + * Author: Brijesh Singh <brijesh.singh@amd.com> + * + * based on the AHCI SATA platform driver by Jeff Garzik and Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pm.h> +#include <linux/device.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/libata.h> +#include <linux/ahci_platform.h> +#include <linux/acpi.h> +#include <linux/pci_ids.h> +#include "ahci.h" + +/* SGPIO Control Register definition + * + * Bit Type Description + * 31 RW OD7.2 (activity) + * 30 RW OD7.1 (locate) + * 29 RW OD7.0 (fault) + * 28...8 RW OD6.2...OD0.0 (3bits per port, 1 bit per LED) + * 7 RO SGPIO feature flag + * 6:4 RO Reserved + * 3:0 RO Number of ports (0 means no port supported) + */ +#define ACTIVITY_BIT_POS(x) (8 + (3 * x)) +#define LOCATE_BIT_POS(x) (ACTIVITY_BIT_POS(x) + 1) +#define FAULT_BIT_POS(x) (LOCATE_BIT_POS(x) + 1) + +#define ACTIVITY_MASK 0x00010000 +#define LOCATE_MASK 0x00080000 +#define FAULT_MASK 0x00400000 + +#define DRV_NAME "ahci-seattle" + +static ssize_t seattle_transmit_led_message(struct ata_port *ap, u32 state, + ssize_t size); + +struct seattle_plat_data { + void __iomem *sgpio_ctrl; +}; + +static struct ata_port_operations ahci_port_ops = { + .inherits = &ahci_ops, +}; + +static const struct ata_port_info ahci_port_info = { + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_port_ops, +}; + +static struct ata_port_operations ahci_seattle_ops = { + .inherits = &ahci_ops, + .transmit_led_message = seattle_transmit_led_message, +}; + +static const struct ata_port_info ahci_port_seattle_info = { + .flags = AHCI_FLAG_COMMON | ATA_FLAG_EM | ATA_FLAG_SW_ACTIVITY, + .link_flags = ATA_LFLAG_SW_ACTIVITY, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_seattle_ops, +}; + +static struct scsi_host_template ahci_platform_sht = { + AHCI_SHT(DRV_NAME), +}; + +static ssize_t seattle_transmit_led_message(struct ata_port *ap, u32 state, + ssize_t size) +{ + struct ahci_host_priv *hpriv = ap->host->private_data; + struct ahci_port_priv *pp = ap->private_data; + struct seattle_plat_data *plat_data = hpriv->plat_data; + unsigned long flags; + int pmp; + struct ahci_em_priv *emp; + u32 val; + + /* get the slot number from the message */ + pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8; + if (pmp >= EM_MAX_SLOTS) + return -EINVAL; + emp = &pp->em_priv[pmp]; + + val = ioread32(plat_data->sgpio_ctrl); + if (state & ACTIVITY_MASK) + val |= 1 << ACTIVITY_BIT_POS((ap->port_no)); + else + val &= ~(1 << ACTIVITY_BIT_POS((ap->port_no))); + + if (state & LOCATE_MASK) + val |= 1 << LOCATE_BIT_POS((ap->port_no)); + else + val &= ~(1 << LOCATE_BIT_POS((ap->port_no))); + + if (state & FAULT_MASK) + val |= 1 << FAULT_BIT_POS((ap->port_no)); + else + val &= ~(1 << FAULT_BIT_POS((ap->port_no))); + + iowrite32(val, plat_data->sgpio_ctrl); + + spin_lock_irqsave(ap->lock, flags); + + /* save off new led state for port/slot */ + emp->led_state = state; + + spin_unlock_irqrestore(ap->lock, flags); + + return size; +} + +static const struct ata_port_info *ahci_seattle_get_port_info( + struct platform_device *pdev, struct ahci_host_priv *hpriv) +{ + struct device *dev = &pdev->dev; + struct seattle_plat_data *plat_data; + u32 val; + + plat_data = devm_kzalloc(dev, sizeof(*plat_data), GFP_KERNEL); + if (IS_ERR(plat_data)) + return &ahci_port_info; + + plat_data->sgpio_ctrl = devm_ioremap_resource(dev, + platform_get_resource(pdev, IORESOURCE_MEM, 1)); + if (IS_ERR(plat_data->sgpio_ctrl)) + return &ahci_port_info; + + val = ioread32(plat_data->sgpio_ctrl); + + if (!(val & 0xf)) + return &ahci_port_info; + + hpriv->em_loc = 0; + hpriv->em_buf_sz = 4; + hpriv->em_msg_type = EM_MSG_TYPE_LED; + hpriv->plat_data = plat_data; + + dev_info(dev, "SGPIO LED control is enabled.\n"); + return &ahci_port_seattle_info; +} + +static int ahci_seattle_probe(struct platform_device *pdev) +{ + int rc; + struct ahci_host_priv *hpriv; + + hpriv = ahci_platform_get_resources(pdev); + if (IS_ERR(hpriv)) + return PTR_ERR(hpriv); + + rc = ahci_platform_enable_resources(hpriv); + if (rc) + return rc; + + rc = ahci_platform_init_host(pdev, hpriv, + ahci_seattle_get_port_info(pdev, hpriv), + &ahci_platform_sht); + if (rc) + goto disable_resources; + + return 0; +disable_resources: + ahci_platform_disable_resources(hpriv); + return rc; +} + +static SIMPLE_DEV_PM_OPS(ahci_pm_ops, ahci_platform_suspend, + ahci_platform_resume); + +static const struct acpi_device_id ahci_acpi_match[] = { + { "AMDI0600", 0 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, ahci_acpi_match); + +static struct platform_driver ahci_seattle_driver = { + .probe = ahci_seattle_probe, + .remove = ata_platform_remove_one, + .driver = { + .name = DRV_NAME, + .acpi_match_table = ahci_acpi_match, + .pm = &ahci_pm_ops, + }, +}; +module_platform_driver(ahci_seattle_driver); + +MODULE_DESCRIPTION("Seattle AHCI SATA platform driver"); +MODULE_AUTHOR("Brijesh Singh <brijesh.singh@amd.com>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 3982054060b8..a5d7c1c2a05e 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -507,6 +507,7 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv) dev_info(dev, "forcing port_map 0x%x -> 0x%x\n", port_map, hpriv->force_port_map); port_map = hpriv->force_port_map; + hpriv->saved_port_map = port_map; } if (hpriv->mask_port_map) { diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 433b60092972..d8f4cc22856c 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -259,9 +259,6 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) reg = opp_table->regulator; if (IS_ERR(reg)) { /* Regulator may not be required for device */ - if (reg) - dev_err(dev, "%s: Invalid regulator (%ld)\n", __func__, - PTR_ERR(reg)); rcu_read_unlock(); return 0; } diff --git a/drivers/base/property.c b/drivers/base/property.c index 9b1a65debd49..7f692accdc90 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -21,7 +21,7 @@ static inline bool is_pset_node(struct fwnode_handle *fwnode) { - return fwnode && fwnode->type == FWNODE_PDATA; + return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_PDATA; } static inline struct property_set *to_pset_node(struct fwnode_handle *fwnode) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e93405f0eac4..c4acfc5273b3 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1557,21 +1557,25 @@ void cpufreq_suspend(void) if (!cpufreq_driver) return; - if (!has_target()) + if (!has_target() && !cpufreq_driver->suspend) goto suspend; pr_debug("%s: Suspending Governors\n", __func__); for_each_active_policy(policy) { - down_write(&policy->rwsem); - ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); - up_write(&policy->rwsem); + if (has_target()) { + down_write(&policy->rwsem); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); + up_write(&policy->rwsem); - if (ret) - pr_err("%s: Failed to stop governor for policy: %p\n", - __func__, policy); - else if (cpufreq_driver->suspend - && cpufreq_driver->suspend(policy)) + if (ret) { + pr_err("%s: Failed to stop governor for policy: %p\n", + __func__, policy); + continue; + } + } + + if (cpufreq_driver->suspend && cpufreq_driver->suspend(policy)) pr_err("%s: Failed to suspend driver: %p\n", __func__, policy); } @@ -1596,7 +1600,7 @@ void cpufreq_resume(void) cpufreq_suspended = false; - if (!has_target()) + if (!has_target() && !cpufreq_driver->resume) return; pr_debug("%s: Resuming Governors\n", __func__); @@ -1605,7 +1609,7 @@ void cpufreq_resume(void) if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) { pr_err("%s: Failed to resume driver: %p\n", __func__, policy); - } else { + } else if (has_target()) { down_write(&policy->rwsem); ret = cpufreq_start_governor(policy); up_write(&policy->rwsem); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f502d5b90c25..b230ebaae66c 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -453,6 +453,14 @@ static void intel_pstate_hwp_set(const struct cpumask *cpumask) } } +static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy) +{ + if (hwp_active) + intel_pstate_hwp_set(policy->cpus); + + return 0; +} + static void intel_pstate_hwp_set_online_cpus(void) { get_online_cpus(); @@ -1062,8 +1070,9 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) static inline int32_t get_avg_frequency(struct cpudata *cpu) { - return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf * - cpu->pstate.scaling, cpu->sample.mperf); + return fp_toint(mul_fp(cpu->sample.core_pct_busy, + int_tofp(cpu->pstate.max_pstate_physical * + cpu->pstate.scaling / 100))); } static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) @@ -1106,8 +1115,6 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) int32_t core_busy, max_pstate, current_pstate, sample_ratio; u64 duration_ns; - intel_pstate_calc_busy(cpu); - /* * core_busy is the ratio of actual performance to max * max_pstate is the max non turbo pstate available @@ -1191,8 +1198,11 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, if ((s64)delta_ns >= pid_params.sample_rate_ns) { bool sample_taken = intel_pstate_sample(cpu, time); - if (sample_taken && !hwp_active) - intel_pstate_adjust_busy_pstate(cpu); + if (sample_taken) { + intel_pstate_calc_busy(cpu); + if (!hwp_active) + intel_pstate_adjust_busy_pstate(cpu); + } } } @@ -1346,8 +1356,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) out: intel_pstate_set_update_util_hook(policy->cpu); - if (hwp_active) - intel_pstate_hwp_set(policy->cpus); + intel_pstate_hwp_set_policy(policy); return 0; } @@ -1411,6 +1420,7 @@ static struct cpufreq_driver intel_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_pstate_verify_policy, .setpolicy = intel_pstate_set_policy, + .resume = intel_pstate_hwp_set_policy, .get = intel_pstate_get, .init = intel_pstate_cpu_init, .stop_cpu = intel_pstate_stop_cpu, diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index a9c659f58974..04042038ec4b 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -259,6 +259,10 @@ static int sti_cpufreq_init(void) { int ret; + if ((!of_machine_is_compatible("st,stih407")) && + (!of_machine_is_compatible("st,stih410"))) + return -ENODEV; + ddata.cpu = get_cpu_device(0); if (!ddata.cpu) { dev_err(ddata.cpu, "Failed to get device for CPU0\n"); diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index 545069d5fdfb..e342565e8715 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -50,7 +50,7 @@ static int arm_enter_idle_state(struct cpuidle_device *dev, * call the CPU ops suspend protocol with idle index as a * parameter. */ - arm_cpuidle_suspend(idx); + ret = arm_cpuidle_suspend(idx); cpu_pm_exit(); } diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 0e82ce3c383e..976b01e58afb 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -236,6 +236,8 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, uint32_t vf_mask); void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); +int adf_init_pf_wq(void); +void adf_exit_pf_wq(void); #else static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs) { @@ -253,5 +255,14 @@ static inline void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) { } + +static inline int adf_init_pf_wq(void) +{ + return 0; +} + +static inline void adf_exit_pf_wq(void) +{ +} #endif #endif diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index 5c897e6e7994..3c3f948290ca 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -462,12 +462,17 @@ static int __init adf_register_ctl_device_driver(void) if (adf_init_aer()) goto err_aer; + if (adf_init_pf_wq()) + goto err_pf_wq; + if (qat_crypto_register()) goto err_crypto_register; return 0; err_crypto_register: + adf_exit_pf_wq(); +err_pf_wq: adf_exit_aer(); err_aer: adf_chr_drv_destroy(); @@ -480,6 +485,7 @@ static void __exit adf_unregister_ctl_device_driver(void) { adf_chr_drv_destroy(); adf_exit_aer(); + adf_exit_pf_wq(); qat_crypto_unregister(); adf_clean_vf_map(false); mutex_destroy(&adf_ctl_lock); diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 1117a8b58280..38a0415e767d 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -119,11 +119,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) int i; u32 reg; - /* Workqueue for PF2VF responses */ - pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); - if (!pf2vf_resp_wq) - return -ENOMEM; - for (i = 0, vf_info = accel_dev->pf.vf_info; i < totalvfs; i++, vf_info++) { /* This ptr will be populated when VFs will be created */ @@ -216,11 +211,6 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) kfree(accel_dev->pf.vf_info); accel_dev->pf.vf_info = NULL; - - if (pf2vf_resp_wq) { - destroy_workqueue(pf2vf_resp_wq); - pf2vf_resp_wq = NULL; - } } EXPORT_SYMBOL_GPL(adf_disable_sriov); @@ -304,3 +294,19 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs) return numvfs; } EXPORT_SYMBOL_GPL(adf_sriov_configure); + +int __init adf_init_pf_wq(void) +{ + /* Workqueue for PF2VF responses */ + pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); + + return !pf2vf_resp_wq ? -ENOMEM : 0; +} + +void adf_exit_pf_wq(void) +{ + if (pf2vf_resp_wq) { + destroy_workqueue(pf2vf_resp_wq); + pf2vf_resp_wq = NULL; + } +} diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 815c4a5cae54..1b95475b6aef 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -77,7 +77,7 @@ static inline u16 fw_cfg_sel_endianness(u16 key) static inline void fw_cfg_read_blob(u16 key, void *buf, loff_t pos, size_t count) { - u32 glk; + u32 glk = -1U; acpi_status status; /* If we have ACPI, ensure mutual exclusion against any potential diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e557fc1f17c8..7ecea83ce453 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -541,6 +541,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, if (!metadata_size) { if (bo->metadata_size) { kfree(bo->metadata); + bo->metadata = NULL; bo->metadata_size = 0; } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 1e0bba29e167..1cd6de575305 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -298,6 +298,10 @@ bool amdgpu_atombios_encoder_mode_fixup(struct drm_encoder *encoder, && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2))) adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2; + /* vertical FP must be at least 1 */ + if (mode->crtc_vsync_start == mode->crtc_vdisplay) + adjusted_mode->crtc_vsync_start++; + /* get the native mode for scaling */ if (amdgpu_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT)) amdgpu_panel_mode_fixup(encoder, adjusted_mode); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 30798cbc6fc0..6d2fb3f4ac62 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -792,7 +792,7 @@ static int i915_drm_resume(struct drm_device *dev) static int i915_drm_resume_early(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int ret = 0; + int ret; /* * We have a resume ordering issue with the snd-hda driver also @@ -803,6 +803,36 @@ static int i915_drm_resume_early(struct drm_device *dev) * FIXME: This should be solved with a special hdmi sink device or * similar so that power domains can be employed. */ + + /* + * Note that we need to set the power state explicitly, since we + * powered off the device during freeze and the PCI core won't power + * it back up for us during thaw. Powering off the device during + * freeze is not a hard requirement though, and during the + * suspend/resume phases the PCI core makes sure we get here with the + * device powered on. So in case we change our freeze logic and keep + * the device powered we can also remove the following set power state + * call. + */ + ret = pci_set_power_state(dev->pdev, PCI_D0); + if (ret) { + DRM_ERROR("failed to set PCI D0 power state (%d)\n", ret); + goto out; + } + + /* + * Note that pci_enable_device() first enables any parent bridge + * device and only then sets the power state for this device. The + * bridge enabling is a nop though, since bridge devices are resumed + * first. The order of enabling power and enabling the device is + * imposed by the PCI core as described above, so here we preserve the + * same order for the freeze/thaw phases. + * + * TODO: eventually we should remove pci_disable_device() / + * pci_enable_enable_device() from suspend/resume. Due to how they + * depend on the device enable refcount we can't anyway depend on them + * disabling/enabling the device. + */ if (pci_enable_device(dev->pdev)) { ret = -EIO; goto out; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f76cbf3e5d1e..fffdac801d3b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2907,7 +2907,14 @@ enum skl_disp_power_wells { #define GEN6_RP_STATE_CAP _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998) #define BXT_RP_STATE_CAP _MMIO(0x138170) -#define INTERVAL_1_28_US(us) (((us) * 100) >> 7) +/* + * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS + * 8300) freezing up around GPU hangs. Looks as if even + * scheduling/timer interrupts start misbehaving if the RPS + * EI/thresholds are "bad", leading to a very sluggish or even + * frozen machine. + */ +#define INTERVAL_1_28_US(us) roundup(((us) * 100) >> 7, 25) #define INTERVAL_1_33_US(us) (((us) * 3) >> 2) #define INTERVAL_0_833_US(us) (((us) * 6) / 5) #define GT_INTERVAL_FROM_US(dev_priv, us) (IS_GEN9(dev_priv) ? \ diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 62de9f4bce09..3b57bf06abe8 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -443,9 +443,17 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) } else if (IS_BROADWELL(dev_priv)) { ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; - ddi_translations_edp = bdw_ddi_translations_edp; + + if (dev_priv->edp_low_vswing) { + ddi_translations_edp = bdw_ddi_translations_edp; + n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); + } else { + ddi_translations_edp = bdw_ddi_translations_dp; + n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + } + ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); + n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); hdmi_default_entry = 7; @@ -3201,12 +3209,6 @@ void intel_ddi_get_config(struct intel_encoder *encoder, intel_ddi_clock_get(encoder, pipe_config); } -static void intel_ddi_destroy(struct drm_encoder *encoder) -{ - /* HDMI has nothing special to destroy, so we can go with this. */ - intel_dp_encoder_destroy(encoder); -} - static bool intel_ddi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { @@ -3225,7 +3227,8 @@ static bool intel_ddi_compute_config(struct intel_encoder *encoder, } static const struct drm_encoder_funcs intel_ddi_funcs = { - .destroy = intel_ddi_destroy, + .reset = intel_dp_encoder_reset, + .destroy = intel_dp_encoder_destroy, }; static struct intel_connector * @@ -3324,6 +3327,7 @@ void intel_ddi_init(struct drm_device *dev, enum port port) intel_encoder->post_disable = intel_ddi_post_disable; intel_encoder->get_hw_state = intel_ddi_get_hw_state; intel_encoder->get_config = intel_ddi_get_config; + intel_encoder->suspend = intel_dp_encoder_suspend; intel_dig_port->port = port; intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6e0d8283daa6..182f84937345 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13351,6 +13351,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, } for_each_crtc_in_state(state, crtc, crtc_state, i) { + if (state->legacy_cursor_update) + continue; + ret = intel_crtc_wait_for_pending_flips(crtc); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index f069a82deb57..412a34c39522 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4898,7 +4898,7 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder) kfree(intel_dig_port); } -static void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) +void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base); @@ -4940,7 +4940,7 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) edp_panel_vdd_schedule_off(intel_dp); } -static void intel_dp_encoder_reset(struct drm_encoder *encoder) +void intel_dp_encoder_reset(struct drm_encoder *encoder) { struct intel_dp *intel_dp; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 4c027d69fac9..7d3af3a72abe 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1238,6 +1238,8 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp, void intel_dp_start_link_train(struct intel_dp *intel_dp); void intel_dp_stop_link_train(struct intel_dp *intel_dp); void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); +void intel_dp_encoder_reset(struct drm_encoder *encoder); +void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder); void intel_dp_encoder_destroy(struct drm_encoder *encoder); int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc); bool intel_dp_compute_config(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index a0d8daed2470..1ab6f687f640 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1415,8 +1415,16 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) hdmi_to_dig_port(intel_hdmi)); } - if (!live_status) - DRM_DEBUG_KMS("Live status not up!"); + if (!live_status) { + DRM_DEBUG_KMS("HDMI live status down\n"); + /* + * Live status register is not reliable on all intel platforms. + * So consider live_status only for certain platforms, for + * others, read EDID to determine presence of sink. + */ + if (INTEL_INFO(dev_priv)->gen < 7 || IS_IVYBRIDGE(dev_priv)) + live_status = true; + } intel_hdmi_unset_edid(connector); diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index edd05cdb0cd8..587cae4e73c9 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -310,6 +310,10 @@ static bool radeon_atom_mode_fixup(struct drm_encoder *encoder, && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2))) adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2; + /* vertical FP must be at least 1 */ + if (mode->crtc_vsync_start == mode->crtc_vdisplay) + adjusted_mode->crtc_vsync_start++; + /* get the native mode for scaling */ if (radeon_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT)) { radeon_panel_mode_fixup(encoder, adjusted_mode); diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index e00db3f510dd..abb98c77bad2 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -1068,7 +1068,6 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base) goto err_register; } - pdev->dev.of_node = of_node; pdev->dev.parent = dev; ret = platform_device_add_data(pdev, ®->pdata, @@ -1079,6 +1078,12 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base) platform_device_put(pdev); goto err_register; } + + /* + * Set of_node only after calling platform_device_add. Otherwise + * the platform:imx-ipuv3-crtc modalias won't be used. + */ + pdev->dev.of_node = of_node; } return 0; diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 5613e2b5cff7..a40a73a7b71d 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -103,15 +103,29 @@ static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi) * there is room for the producer to send the pending packet. */ -static bool hv_need_to_signal_on_read(u32 prev_write_sz, - struct hv_ring_buffer_info *rbi) +static bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) { u32 cur_write_sz; u32 r_size; - u32 write_loc = rbi->ring_buffer->write_index; + u32 write_loc; u32 read_loc = rbi->ring_buffer->read_index; - u32 pending_sz = rbi->ring_buffer->pending_send_sz; + u32 pending_sz; + /* + * Issue a full memory barrier before making the signaling decision. + * Here is the reason for having this barrier: + * If the reading of the pend_sz (in this function) + * were to be reordered and read before we commit the new read + * index (in the calling function) we could + * have a problem. If the host were to set the pending_sz after we + * have sampled pending_sz and go to sleep before we commit the + * read index, we could miss sending the interrupt. Issue a full + * memory barrier to address this. + */ + mb(); + + pending_sz = rbi->ring_buffer->pending_send_sz; + write_loc = rbi->ring_buffer->write_index; /* If the other end is not blocked on write don't bother. */ if (pending_sz == 0) return false; @@ -120,7 +134,7 @@ static bool hv_need_to_signal_on_read(u32 prev_write_sz, cur_write_sz = write_loc >= read_loc ? r_size - (write_loc - read_loc) : read_loc - write_loc; - if ((prev_write_sz < pending_sz) && (cur_write_sz >= pending_sz)) + if (cur_write_sz >= pending_sz) return true; return false; @@ -455,7 +469,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, /* Update the read index */ hv_set_next_read_location(inring_info, next_read_location); - *signal = hv_need_to_signal_on_read(bytes_avail_towrite, inring_info); + *signal = hv_need_to_signal_on_read(inring_info); return ret; } diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index dbee13ad33a3..2e154cb51685 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -451,6 +451,8 @@ static int at91_adc_probe(struct platform_device *pdev) if (ret) goto vref_disable; + platform_set_drvdata(pdev, indio_dev); + ret = iio_device_register(indio_dev); if (ret < 0) goto per_clk_disable_unprepare; diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c index f581256d9d4c..5ee4e0dc093e 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c @@ -104,6 +104,19 @@ static int inv_mpu6050_deselect_bypass(struct i2c_adapter *adap, return 0; } +static const char *inv_mpu_match_acpi_device(struct device *dev, int *chip_id) +{ + const struct acpi_device_id *id; + + id = acpi_match_device(dev->driver->acpi_match_table, dev); + if (!id) + return NULL; + + *chip_id = (int)id->driver_data; + + return dev_name(dev); +} + /** * inv_mpu_probe() - probe function. * @client: i2c client. @@ -115,14 +128,25 @@ static int inv_mpu_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct inv_mpu6050_state *st; - int result; - const char *name = id ? id->name : NULL; + int result, chip_type; struct regmap *regmap; + const char *name; if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) return -EOPNOTSUPP; + if (id) { + chip_type = (int)id->driver_data; + name = id->name; + } else if (ACPI_HANDLE(&client->dev)) { + name = inv_mpu_match_acpi_device(&client->dev, &chip_type); + if (!name) + return -ENODEV; + } else { + return -ENOSYS; + } + regmap = devm_regmap_init_i2c(client, &inv_mpu_regmap_config); if (IS_ERR(regmap)) { dev_err(&client->dev, "Failed to register i2c regmap %d\n", @@ -131,7 +155,7 @@ static int inv_mpu_probe(struct i2c_client *client, } result = inv_mpu_core_probe(regmap, client->irq, name, - NULL, id->driver_data); + NULL, chip_type); if (result < 0) return result; diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c index dea6c4361de0..7bcb8d839f05 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c @@ -46,6 +46,7 @@ static int inv_mpu_probe(struct spi_device *spi) struct regmap *regmap; const struct spi_device_id *id = spi_get_device_id(spi); const char *name = id ? id->name : NULL; + const int chip_type = id ? id->driver_data : 0; regmap = devm_regmap_init_spi(spi, &inv_mpu_regmap_config); if (IS_ERR(regmap)) { @@ -55,7 +56,7 @@ static int inv_mpu_probe(struct spi_device *spi) } return inv_mpu_core_probe(regmap, spi->irq, name, - inv_mpu_i2c_disable, id->driver_data); + inv_mpu_i2c_disable, chip_type); } static int inv_mpu_remove(struct spi_device *spi) diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index 9c5c9ef3f1da..0e931a9a1669 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -462,6 +462,8 @@ static int ak8975_setup_irq(struct ak8975_data *data) int rc; int irq; + init_waitqueue_head(&data->data_ready_queue); + clear_bit(0, &data->flags); if (client->irq) irq = client->irq; else @@ -477,8 +479,6 @@ static int ak8975_setup_irq(struct ak8975_data *data) return rc; } - init_waitqueue_head(&data->data_ready_queue); - clear_bit(0, &data->flags); data->eoc_irq = irq; return rc; @@ -732,7 +732,7 @@ static int ak8975_probe(struct i2c_client *client, int eoc_gpio; int err; const char *name = NULL; - enum asahi_compass_chipset chipset; + enum asahi_compass_chipset chipset = AK_MAX_TYPE; /* Grab and set up the supplied GPIO. */ if (client->dev.platform_data) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 80b6bedc172f..64b3d11dcf1e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -612,6 +612,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, struct Scsi_Host *shost; struct iser_conn *iser_conn = NULL; struct ib_conn *ib_conn; + u32 max_fr_sectors; u16 max_cmds; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); @@ -632,7 +633,6 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, iser_conn = ep->dd_data; max_cmds = iser_conn->max_cmds; shost->sg_tablesize = iser_conn->scsi_sg_tablesize; - shost->max_sectors = iser_conn->scsi_max_sectors; mutex_lock(&iser_conn->state_mutex); if (iser_conn->state != ISER_CONN_UP) { @@ -657,8 +657,6 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, */ shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize, ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len); - shost->max_sectors = min_t(unsigned int, - 1024, (shost->sg_tablesize * PAGE_SIZE) >> 9); if (iscsi_host_add(shost, ib_conn->device->ib_device->dma_device)) { @@ -672,6 +670,15 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, goto free_host; } + /* + * FRs or FMRs can only map up to a (device) page per entry, but if the + * first entry is misaligned we'll end up using using two entries + * (head and tail) for a single page worth data, so we have to drop + * one segment from the calculation. + */ + max_fr_sectors = ((shost->sg_tablesize - 1) * PAGE_SIZE) >> 9; + shost->max_sectors = min(iser_max_sectors, max_fr_sectors); + if (cmds_max > max_cmds) { iser_info("cmds_max changed from %u to %u\n", cmds_max, max_cmds); @@ -989,7 +996,6 @@ static struct scsi_host_template iscsi_iser_sht = { .queuecommand = iscsi_queuecommand, .change_queue_depth = scsi_change_queue_depth, .sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE, - .max_sectors = ISER_DEF_MAX_SECTORS, .cmd_per_lun = ISER_DEF_CMD_PER_LUN, .eh_abort_handler = iscsi_eh_abort, .eh_device_reset_handler= iscsi_eh_device_reset, diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c index 53e33fab3f7a..df3581f60628 100644 --- a/drivers/input/misc/twl6040-vibra.c +++ b/drivers/input/misc/twl6040-vibra.c @@ -181,6 +181,14 @@ static void vibra_play_work(struct work_struct *work) { struct vibra_info *info = container_of(work, struct vibra_info, play_work); + int ret; + + /* Do not allow effect, while the routing is set to use audio */ + ret = twl6040_get_vibralr_status(info->twl6040); + if (ret & TWL6040_VIBSEL) { + dev_info(info->dev, "Vibra is configured for audio\n"); + return; + } mutex_lock(&info->mutex); @@ -199,14 +207,6 @@ static int vibra_play(struct input_dev *input, void *data, struct ff_effect *effect) { struct vibra_info *info = input_get_drvdata(input); - int ret; - - /* Do not allow effect, while the routing is set to use audio */ - ret = twl6040_get_vibralr_status(info->twl6040); - if (ret & TWL6040_VIBSEL) { - dev_info(&input->dev, "Vibra is configured for audio\n"); - return -EBUSY; - } info->weak_speed = effect->u.rumble.weak_magnitude; info->strong_speed = effect->u.rumble.strong_magnitude; diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 2160512e861a..5af7907d0af4 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -1093,6 +1093,19 @@ static int mxt_t6_command(struct mxt_data *data, u16 cmd_offset, return 0; } +static int mxt_acquire_irq(struct mxt_data *data) +{ + int error; + + enable_irq(data->irq); + + error = mxt_process_messages_until_invalid(data); + if (error) + return error; + + return 0; +} + static int mxt_soft_reset(struct mxt_data *data) { struct device *dev = &data->client->dev; @@ -1111,7 +1124,7 @@ static int mxt_soft_reset(struct mxt_data *data) /* Ignore CHG line for 100ms after reset */ msleep(100); - enable_irq(data->irq); + mxt_acquire_irq(data); ret = mxt_wait_for_completion(data, &data->reset_completion, MXT_RESET_TIMEOUT); @@ -1466,19 +1479,6 @@ release_mem: return ret; } -static int mxt_acquire_irq(struct mxt_data *data) -{ - int error; - - enable_irq(data->irq); - - error = mxt_process_messages_until_invalid(data); - if (error) - return error; - - return 0; -} - static int mxt_get_info(struct mxt_data *data) { struct i2c_client *client = data->client; diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c index 9bbadaaf6bc3..7b3845aa5983 100644 --- a/drivers/input/touchscreen/zforce_ts.c +++ b/drivers/input/touchscreen/zforce_ts.c @@ -370,8 +370,8 @@ static int zforce_touch_event(struct zforce_ts *ts, u8 *payload) point.coord_x = point.coord_y = 0; } - point.state = payload[9 * i + 5] & 0x03; - point.id = (payload[9 * i + 5] & 0xfc) >> 2; + point.state = payload[9 * i + 5] & 0x0f; + point.id = (payload[9 * i + 5] & 0xf0) >> 4; /* determine touch major, minor and orientation */ point.area_major = max(payload[9 * i + 6], diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c index 6e43c95629ea..3cfd7af8c5ca 100644 --- a/drivers/media/media-device.c +++ b/drivers/media/media-device.c @@ -846,11 +846,11 @@ struct media_device *media_device_find_devres(struct device *dev) } EXPORT_SYMBOL_GPL(media_device_find_devres); +#if IS_ENABLED(CONFIG_PCI) void media_device_pci_init(struct media_device *mdev, struct pci_dev *pci_dev, const char *name) { -#ifdef CONFIG_PCI mdev->dev = &pci_dev->dev; if (name) @@ -866,16 +866,16 @@ void media_device_pci_init(struct media_device *mdev, mdev->driver_version = LINUX_VERSION_CODE; media_device_init(mdev); -#endif } EXPORT_SYMBOL_GPL(media_device_pci_init); +#endif +#if IS_ENABLED(CONFIG_USB) void __media_device_usb_init(struct media_device *mdev, struct usb_device *udev, const char *board_name, const char *driver_name) { -#ifdef CONFIG_USB mdev->dev = &udev->dev; if (driver_name) @@ -895,9 +895,9 @@ void __media_device_usb_init(struct media_device *mdev, mdev->driver_version = LINUX_VERSION_CODE; media_device_init(mdev); -#endif } EXPORT_SYMBOL_GPL(__media_device_usb_init); +#endif #endif /* CONFIG_MEDIA_CONTROLLER */ diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c index feb521f28e14..4f494acd8150 100644 --- a/drivers/media/platform/exynos4-is/media-dev.c +++ b/drivers/media/platform/exynos4-is/media-dev.c @@ -1446,22 +1446,13 @@ static int fimc_md_probe(struct platform_device *pdev) platform_set_drvdata(pdev, fmd); - /* Protect the media graph while we're registering entities */ - mutex_lock(&fmd->media_dev.graph_mutex); - ret = fimc_md_register_platform_entities(fmd, dev->of_node); - if (ret) { - mutex_unlock(&fmd->media_dev.graph_mutex); + if (ret) goto err_clk; - } ret = fimc_md_register_sensor_entities(fmd); - if (ret) { - mutex_unlock(&fmd->media_dev.graph_mutex); + if (ret) goto err_m_ent; - } - - mutex_unlock(&fmd->media_dev.graph_mutex); ret = device_create_file(&pdev->dev, &dev_attr_subdev_conf_mode); if (ret) diff --git a/drivers/media/platform/s3c-camif/camif-core.c b/drivers/media/platform/s3c-camif/camif-core.c index 0b44b9accf50..af237af204e2 100644 --- a/drivers/media/platform/s3c-camif/camif-core.c +++ b/drivers/media/platform/s3c-camif/camif-core.c @@ -493,21 +493,17 @@ static int s3c_camif_probe(struct platform_device *pdev) if (ret < 0) goto err_sens; - mutex_lock(&camif->media_dev.graph_mutex); - ret = v4l2_device_register_subdev_nodes(&camif->v4l2_dev); if (ret < 0) - goto err_unlock; + goto err_sens; ret = camif_register_video_nodes(camif); if (ret < 0) - goto err_unlock; + goto err_sens; ret = camif_create_media_links(camif); if (ret < 0) - goto err_unlock; - - mutex_unlock(&camif->media_dev.graph_mutex); + goto err_sens; ret = media_device_register(&camif->media_dev); if (ret < 0) @@ -516,8 +512,6 @@ static int s3c_camif_probe(struct platform_device *pdev) pm_runtime_put(dev); return 0; -err_unlock: - mutex_unlock(&camif->media_dev.graph_mutex); err_sens: v4l2_device_unregister(&camif->v4l2_dev); media_device_unregister(&camif->media_dev); diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c index e94c7fb6712a..88e45234d527 100644 --- a/drivers/misc/mic/vop/vop_vringh.c +++ b/drivers/misc/mic/vop/vop_vringh.c @@ -945,6 +945,11 @@ static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg) ret = -EFAULT; goto free_ret; } + /* Ensure desc has not changed between the two reads */ + if (memcmp(&dd, dd_config, sizeof(dd))) { + ret = -EINVAL; + goto free_ret; + } mutex_lock(&vdev->vdev_mutex); mutex_lock(&vi->vop_mutex); ret = vop_virtio_add_device(vdev, dd_config); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index befd67df08e1..0c5415b05ea9 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -192,6 +192,23 @@ config GENEVE To compile this driver as a module, choose M here: the module will be called geneve. +config GTP + tristate "GPRS Tunneling Protocol datapath (GTP-U)" + depends on INET && NET_UDP_TUNNEL + select NET_IP_TUNNEL + ---help--- + This allows one to create gtp virtual interfaces that provide + the GPRS Tunneling Protocol datapath (GTP-U). This tunneling protocol + is used to prevent subscribers from accessing mobile carrier core + network infrastructure. This driver requires a userspace software that + implements the signaling protocol (GTP-C) to update its PDP context + base, such as OpenGGSN <http://git.osmocom.org/openggsn/). This + tunneling protocol is implemented according to the GSM TS 09.60 and + 3GPP TS 29.060 standards. + + To compile this drivers as a module, choose M here: the module + wil be called gtp. + config MACSEC tristate "IEEE 802.1AE MAC-level encryption (MACsec)" select CRYPTO diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 1aa7cb845663..7336cbd3ef5d 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o obj-$(CONFIG_GENEVE) += geneve.o +obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 90ba003d8fdf..200663c43ce9 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -1,10 +1,6 @@ menu "Distributed Switch Architecture drivers" depends on HAVE_NET_DSA -config NET_DSA_MV88E6XXX - tristate - default n - config NET_DSA_MV88E6060 tristate "Marvell 88E6060 ethernet switch chip support" depends on NET_DSA @@ -13,46 +9,13 @@ config NET_DSA_MV88E6060 This enables support for the Marvell 88E6060 ethernet switch chip. -config NET_DSA_MV88E6XXX_NEED_PPU - bool - default n - -config NET_DSA_MV88E6131 - tristate "Marvell 88E6085/6095/6095F/6131 ethernet switch chip support" - depends on NET_DSA - select NET_DSA_MV88E6XXX - select NET_DSA_MV88E6XXX_NEED_PPU - select NET_DSA_TAG_DSA - ---help--- - This enables support for the Marvell 88E6085/6095/6095F/6131 - ethernet switch chips. - -config NET_DSA_MV88E6123 - tristate "Marvell 88E6123/6161/6165 ethernet switch chip support" - depends on NET_DSA - select NET_DSA_MV88E6XXX - select NET_DSA_TAG_EDSA - ---help--- - This enables support for the Marvell 88E6123/6161/6165 - ethernet switch chips. - -config NET_DSA_MV88E6171 - tristate "Marvell 88E6171/6175/6350/6351 ethernet switch chip support" - depends on NET_DSA - select NET_DSA_MV88E6XXX - select NET_DSA_TAG_EDSA - ---help--- - This enables support for the Marvell 88E6171/6175/6350/6351 - ethernet switches chips. - -config NET_DSA_MV88E6352 - tristate "Marvell 88E6172/6176/6320/6321/6352 ethernet switch chip support" +config NET_DSA_MV88E6XXX + tristate "Marvell 88E6xxx Ethernet switch chip support" depends on NET_DSA - select NET_DSA_MV88E6XXX select NET_DSA_TAG_EDSA ---help--- - This enables support for the Marvell 88E6172, 88E6176, 88E6320, - 88E6321 and 88E6352 ethernet switch chips. + This enables support for most of the Marvell 88E6xxx models of + Ethernet switch chips, except 88E6060. config NET_DSA_BCM_SF2 tristate "Broadcom Starfighter 2 Ethernet switch support" diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index a6e09939be65..76b751dd9efd 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -1,16 +1,3 @@ obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o -obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx_drv.o -mv88e6xxx_drv-y += mv88e6xxx.o -ifdef CONFIG_NET_DSA_MV88E6123 -mv88e6xxx_drv-y += mv88e6123.o -endif -ifdef CONFIG_NET_DSA_MV88E6131 -mv88e6xxx_drv-y += mv88e6131.o -endif -ifdef CONFIG_NET_DSA_MV88E6352 -mv88e6xxx_drv-y += mv88e6352.o -endif -ifdef CONFIG_NET_DSA_MV88E6171 -mv88e6xxx_drv-y += mv88e6171.o -endif +obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o obj-$(CONFIG_NET_DSA_BCM_SF2) += bcm_sf2.o diff --git a/drivers/net/dsa/mv88e6123.c b/drivers/net/dsa/mv88e6123.c deleted file mode 100644 index 5535a42a6113..000000000000 --- a/drivers/net/dsa/mv88e6123.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support - * Copyright (c) 2008-2009 Marvell Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/delay.h> -#include <linux/jiffies.h> -#include <linux/list.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/phy.h> -#include <net/dsa.h> -#include "mv88e6xxx.h" - -static const struct mv88e6xxx_info mv88e6123_table[] = { - { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6123, - .family = MV88E6XXX_FAMILY_6165, - .name = "Marvell 88E6123", - .num_databases = 4096, - .num_ports = 3, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6161, - .family = MV88E6XXX_FAMILY_6165, - .name = "Marvell 88E6161", - .num_databases = 4096, - .num_ports = 6, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6165, - .family = MV88E6XXX_FAMILY_6165, - .name = "Marvell 88E6165", - .num_databases = 4096, - .num_ports = 6, - } -}; - -static const char *mv88e6123_drv_probe(struct device *dsa_dev, - struct device *host_dev, int sw_addr, - void **priv) -{ - return mv88e6xxx_drv_probe(dsa_dev, host_dev, sw_addr, priv, - mv88e6123_table, - ARRAY_SIZE(mv88e6123_table)); -} - -static int mv88e6123_setup_global(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - u32 upstream_port = dsa_upstream_port(ds); - int ret; - u32 reg; - - ret = mv88e6xxx_setup_global(ds); - if (ret) - return ret; - - /* Disable the PHY polling unit (since there won't be any - * external PHYs to poll), don't discard packets with - * excessive collisions, and mask all interrupt sources. - */ - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, 0x0000); - if (ret) - return ret; - - /* Configure the upstream port, and configure the upstream - * port as the port to which ingress and egress monitor frames - * are to be sent. - */ - reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | - upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | - upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); - if (ret) - return ret; - - /* Disable remote management for now, and set the switch's - * DSA device number. - */ - return mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL_2, - ds->index & 0x1f); -} - -static int mv88e6123_setup(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int ret; - - ps->ds = ds; - - ret = mv88e6xxx_setup_common(ps); - if (ret < 0) - return ret; - - ret = mv88e6xxx_switch_reset(ps, false); - if (ret < 0) - return ret; - - ret = mv88e6123_setup_global(ds); - if (ret < 0) - return ret; - - return mv88e6xxx_setup_ports(ds); -} - -struct dsa_switch_driver mv88e6123_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_EDSA, - .probe = mv88e6123_drv_probe, - .setup = mv88e6123_setup, - .set_addr = mv88e6xxx_set_addr_indirect, - .phy_read = mv88e6xxx_phy_read, - .phy_write = mv88e6xxx_phy_write, - .get_strings = mv88e6xxx_get_strings, - .get_ethtool_stats = mv88e6xxx_get_ethtool_stats, - .get_sset_count = mv88e6xxx_get_sset_count, - .adjust_link = mv88e6xxx_adjust_link, -#ifdef CONFIG_NET_DSA_HWMON - .get_temp = mv88e6xxx_get_temp, -#endif - .get_regs_len = mv88e6xxx_get_regs_len, - .get_regs = mv88e6xxx_get_regs, -}; - -MODULE_ALIAS("platform:mv88e6123"); -MODULE_ALIAS("platform:mv88e6161"); -MODULE_ALIAS("platform:mv88e6165"); diff --git a/drivers/net/dsa/mv88e6131.c b/drivers/net/dsa/mv88e6131.c deleted file mode 100644 index 357ab794d720..000000000000 --- a/drivers/net/dsa/mv88e6131.c +++ /dev/null @@ -1,204 +0,0 @@ -/* - * net/dsa/mv88e6131.c - Marvell 88e6095/6095f/6131 switch chip support - * Copyright (c) 2008-2009 Marvell Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/delay.h> -#include <linux/jiffies.h> -#include <linux/list.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/phy.h> -#include <net/dsa.h> -#include "mv88e6xxx.h" - -static const struct mv88e6xxx_info mv88e6131_table[] = { - { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6095, - .family = MV88E6XXX_FAMILY_6095, - .name = "Marvell 88E6095/88E6095F", - .num_databases = 256, - .num_ports = 11, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6085, - .family = MV88E6XXX_FAMILY_6097, - .name = "Marvell 88E6085", - .num_databases = 4096, - .num_ports = 10, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6131, - .family = MV88E6XXX_FAMILY_6185, - .name = "Marvell 88E6131", - .num_databases = 256, - .num_ports = 8, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6185, - .family = MV88E6XXX_FAMILY_6185, - .name = "Marvell 88E6185", - .num_databases = 256, - .num_ports = 10, - } -}; - -static const char *mv88e6131_drv_probe(struct device *dsa_dev, - struct device *host_dev, int sw_addr, - void **priv) -{ - return mv88e6xxx_drv_probe(dsa_dev, host_dev, sw_addr, priv, - mv88e6131_table, - ARRAY_SIZE(mv88e6131_table)); -} - -static int mv88e6131_setup_global(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - u32 upstream_port = dsa_upstream_port(ds); - int ret; - u32 reg; - - ret = mv88e6xxx_setup_global(ds); - if (ret) - return ret; - - /* Enable the PHY polling unit, don't discard packets with - * excessive collisions, use a weighted fair queueing scheme - * to arbitrate between packet queues, set the maximum frame - * size to 1632, and mask all interrupt sources. - */ - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, - GLOBAL_CONTROL_PPU_ENABLE | - GLOBAL_CONTROL_MAX_FRAME_1632); - if (ret) - return ret; - - /* Set the VLAN ethertype to 0x8100. */ - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CORE_TAG_TYPE, 0x8100); - if (ret) - return ret; - - /* Disable ARP mirroring, and configure the upstream port as - * the port to which ingress and egress monitor frames are to - * be sent. - */ - reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | - upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | - GLOBAL_MONITOR_CONTROL_ARP_DISABLED; - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); - if (ret) - return ret; - - /* Disable cascade port functionality unless this device - * is used in a cascade configuration, and set the switch's - * DSA device number. - */ - if (ds->dst->pd->nr_chips > 1) - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL_2, - GLOBAL_CONTROL_2_MULTIPLE_CASCADE | - (ds->index & 0x1f)); - else - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL_2, - GLOBAL_CONTROL_2_NO_CASCADE | - (ds->index & 0x1f)); - if (ret) - return ret; - - /* Force the priority of IGMP/MLD snoop frames and ARP frames - * to the highest setting. - */ - return mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, - GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP | - 7 << GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT | - GLOBAL2_PRIO_OVERRIDE_FORCE_ARP | - 7 << GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT); -} - -static int mv88e6131_setup(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int ret; - - ps->ds = ds; - - ret = mv88e6xxx_setup_common(ps); - if (ret < 0) - return ret; - - mv88e6xxx_ppu_state_init(ps); - - ret = mv88e6xxx_switch_reset(ps, false); - if (ret < 0) - return ret; - - ret = mv88e6131_setup_global(ds); - if (ret < 0) - return ret; - - return mv88e6xxx_setup_ports(ds); -} - -static int mv88e6131_port_to_phy_addr(struct dsa_switch *ds, int port) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - if (port >= 0 && port < ps->info->num_ports) - return port; - - return -EINVAL; -} - -static int -mv88e6131_phy_read(struct dsa_switch *ds, int port, int regnum) -{ - int addr = mv88e6131_port_to_phy_addr(ds, port); - - if (addr < 0) - return addr; - - return mv88e6xxx_phy_read_ppu(ds, addr, regnum); -} - -static int -mv88e6131_phy_write(struct dsa_switch *ds, - int port, int regnum, u16 val) -{ - int addr = mv88e6131_port_to_phy_addr(ds, port); - - if (addr < 0) - return addr; - - return mv88e6xxx_phy_write_ppu(ds, addr, regnum, val); -} - -struct dsa_switch_driver mv88e6131_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_DSA, - .probe = mv88e6131_drv_probe, - .setup = mv88e6131_setup, - .set_addr = mv88e6xxx_set_addr_direct, - .phy_read = mv88e6131_phy_read, - .phy_write = mv88e6131_phy_write, - .get_strings = mv88e6xxx_get_strings, - .get_ethtool_stats = mv88e6xxx_get_ethtool_stats, - .get_sset_count = mv88e6xxx_get_sset_count, - .adjust_link = mv88e6xxx_adjust_link, - .port_bridge_join = mv88e6xxx_port_bridge_join, - .port_bridge_leave = mv88e6xxx_port_bridge_leave, - .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, - .port_vlan_prepare = mv88e6xxx_port_vlan_prepare, - .port_vlan_add = mv88e6xxx_port_vlan_add, - .port_vlan_del = mv88e6xxx_port_vlan_del, - .port_vlan_dump = mv88e6xxx_port_vlan_dump, - .port_fdb_prepare = mv88e6xxx_port_fdb_prepare, - .port_fdb_add = mv88e6xxx_port_fdb_add, - .port_fdb_del = mv88e6xxx_port_fdb_del, - .port_fdb_dump = mv88e6xxx_port_fdb_dump, -}; - -MODULE_ALIAS("platform:mv88e6085"); -MODULE_ALIAS("platform:mv88e6095"); -MODULE_ALIAS("platform:mv88e6095f"); -MODULE_ALIAS("platform:mv88e6131"); diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c deleted file mode 100644 index f75164dc3bd6..000000000000 --- a/drivers/net/dsa/mv88e6171.c +++ /dev/null @@ -1,151 +0,0 @@ -/* net/dsa/mv88e6171.c - Marvell 88e6171 switch chip support - * Copyright (c) 2008-2009 Marvell Semiconductor - * Copyright (c) 2014 Claudio Leite <leitec@staticky.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/delay.h> -#include <linux/jiffies.h> -#include <linux/list.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/phy.h> -#include <net/dsa.h> -#include "mv88e6xxx.h" - -static const struct mv88e6xxx_info mv88e6171_table[] = { - { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6171, - .family = MV88E6XXX_FAMILY_6351, - .name = "Marvell 88E6171", - .num_databases = 4096, - .num_ports = 7, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6175, - .family = MV88E6XXX_FAMILY_6351, - .name = "Marvell 88E6175", - .num_databases = 4096, - .num_ports = 7, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6350, - .family = MV88E6XXX_FAMILY_6351, - .name = "Marvell 88E6350", - .num_databases = 4096, - .num_ports = 7, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6351, - .family = MV88E6XXX_FAMILY_6351, - .name = "Marvell 88E6351", - .num_databases = 4096, - .num_ports = 7, - } -}; - -static const char *mv88e6171_drv_probe(struct device *dsa_dev, - struct device *host_dev, int sw_addr, - void **priv) -{ - return mv88e6xxx_drv_probe(dsa_dev, host_dev, sw_addr, priv, - mv88e6171_table, - ARRAY_SIZE(mv88e6171_table)); -} - -static int mv88e6171_setup_global(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - u32 upstream_port = dsa_upstream_port(ds); - int ret; - u32 reg; - - ret = mv88e6xxx_setup_global(ds); - if (ret) - return ret; - - /* Discard packets with excessive collisions, mask all - * interrupt sources, enable PPU. - */ - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, - GLOBAL_CONTROL_PPU_ENABLE | - GLOBAL_CONTROL_DISCARD_EXCESS); - if (ret) - return ret; - - /* Configure the upstream port, and configure the upstream - * port as the port to which ingress and egress monitor frames - * are to be sent. - */ - reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | - upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | - upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT | - upstream_port << GLOBAL_MONITOR_CONTROL_MIRROR_SHIFT; - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); - if (ret) - return ret; - - /* Disable remote management for now, and set the switch's - * DSA device number. - */ - return mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL_2, - ds->index & 0x1f); -} - -static int mv88e6171_setup(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int ret; - - ps->ds = ds; - - ret = mv88e6xxx_setup_common(ps); - if (ret < 0) - return ret; - - ret = mv88e6xxx_switch_reset(ps, true); - if (ret < 0) - return ret; - - ret = mv88e6171_setup_global(ds); - if (ret < 0) - return ret; - - return mv88e6xxx_setup_ports(ds); -} - -struct dsa_switch_driver mv88e6171_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_EDSA, - .probe = mv88e6171_drv_probe, - .setup = mv88e6171_setup, - .set_addr = mv88e6xxx_set_addr_indirect, - .phy_read = mv88e6xxx_phy_read_indirect, - .phy_write = mv88e6xxx_phy_write_indirect, - .get_strings = mv88e6xxx_get_strings, - .get_ethtool_stats = mv88e6xxx_get_ethtool_stats, - .get_sset_count = mv88e6xxx_get_sset_count, - .adjust_link = mv88e6xxx_adjust_link, -#ifdef CONFIG_NET_DSA_HWMON - .get_temp = mv88e6xxx_get_temp, -#endif - .get_regs_len = mv88e6xxx_get_regs_len, - .get_regs = mv88e6xxx_get_regs, - .port_bridge_join = mv88e6xxx_port_bridge_join, - .port_bridge_leave = mv88e6xxx_port_bridge_leave, - .port_stp_state_set = mv88e6xxx_port_stp_state_set, - .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, - .port_vlan_prepare = mv88e6xxx_port_vlan_prepare, - .port_vlan_add = mv88e6xxx_port_vlan_add, - .port_vlan_del = mv88e6xxx_port_vlan_del, - .port_vlan_dump = mv88e6xxx_port_vlan_dump, - .port_fdb_prepare = mv88e6xxx_port_fdb_prepare, - .port_fdb_add = mv88e6xxx_port_fdb_add, - .port_fdb_del = mv88e6xxx_port_fdb_del, - .port_fdb_dump = mv88e6xxx_port_fdb_dump, -}; - -MODULE_ALIAS("platform:mv88e6171"); -MODULE_ALIAS("platform:mv88e6175"); -MODULE_ALIAS("platform:mv88e6350"); -MODULE_ALIAS("platform:mv88e6351"); diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c deleted file mode 100644 index c622a1d58480..000000000000 --- a/drivers/net/dsa/mv88e6352.c +++ /dev/null @@ -1,377 +0,0 @@ -/* - * net/dsa/mv88e6352.c - Marvell 88e6352 switch chip support - * - * Copyright (c) 2014 Guenter Roeck - * - * Derived from mv88e6123_61_65.c - * Copyright (c) 2008-2009 Marvell Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/delay.h> -#include <linux/jiffies.h> -#include <linux/list.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/platform_device.h> -#include <linux/phy.h> -#include <net/dsa.h> -#include "mv88e6xxx.h" - -static const struct mv88e6xxx_info mv88e6352_table[] = { - { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6320, - .family = MV88E6XXX_FAMILY_6320, - .name = "Marvell 88E6320", - .num_databases = 4096, - .num_ports = 7, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6321, - .family = MV88E6XXX_FAMILY_6320, - .name = "Marvell 88E6321", - .num_databases = 4096, - .num_ports = 7, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6172, - .family = MV88E6XXX_FAMILY_6352, - .name = "Marvell 88E6172", - .num_databases = 4096, - .num_ports = 7, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6176, - .family = MV88E6XXX_FAMILY_6352, - .name = "Marvell 88E6176", - .num_databases = 4096, - .num_ports = 7, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6240, - .family = MV88E6XXX_FAMILY_6352, - .name = "Marvell 88E6240", - .num_databases = 4096, - .num_ports = 7, - }, { - .prod_num = PORT_SWITCH_ID_PROD_NUM_6352, - .family = MV88E6XXX_FAMILY_6352, - .name = "Marvell 88E6352", - .num_databases = 4096, - .num_ports = 7, - } -}; - -static const char *mv88e6352_drv_probe(struct device *dsa_dev, - struct device *host_dev, int sw_addr, - void **priv) -{ - return mv88e6xxx_drv_probe(dsa_dev, host_dev, sw_addr, priv, - mv88e6352_table, - ARRAY_SIZE(mv88e6352_table)); -} - -static int mv88e6352_setup_global(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - u32 upstream_port = dsa_upstream_port(ds); - int ret; - u32 reg; - - ret = mv88e6xxx_setup_global(ds); - if (ret) - return ret; - - /* Discard packets with excessive collisions, - * mask all interrupt sources, enable PPU (bit 14, undocumented). - */ - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, - GLOBAL_CONTROL_PPU_ENABLE | - GLOBAL_CONTROL_DISCARD_EXCESS); - if (ret) - return ret; - - /* Configure the upstream port, and configure the upstream - * port as the port to which ingress and egress monitor frames - * are to be sent. - */ - reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | - upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | - upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); - if (ret) - return ret; - - /* Disable remote management for now, and set the switch's - * DSA device number. - */ - return mv88e6xxx_reg_write(ps, REG_GLOBAL, 0x1c, ds->index & 0x1f); -} - -static int mv88e6352_setup(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int ret; - - ps->ds = ds; - - ret = mv88e6xxx_setup_common(ps); - if (ret < 0) - return ret; - - mutex_init(&ps->eeprom_mutex); - - ret = mv88e6xxx_switch_reset(ps, true); - if (ret < 0) - return ret; - - ret = mv88e6352_setup_global(ds); - if (ret < 0) - return ret; - - return mv88e6xxx_setup_ports(ds); -} - -static int mv88e6352_read_eeprom_word(struct dsa_switch *ds, int addr) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int ret; - - mutex_lock(&ps->eeprom_mutex); - - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP, - GLOBAL2_EEPROM_OP_READ | - (addr & GLOBAL2_EEPROM_OP_ADDR_MASK)); - if (ret < 0) - goto error; - - ret = mv88e6xxx_eeprom_busy_wait(ds); - if (ret < 0) - goto error; - - ret = mv88e6xxx_reg_read(ps, REG_GLOBAL2, GLOBAL2_EEPROM_DATA); -error: - mutex_unlock(&ps->eeprom_mutex); - return ret; -} - -static int mv88e6352_get_eeprom(struct dsa_switch *ds, - struct ethtool_eeprom *eeprom, u8 *data) -{ - int offset; - int len; - int ret; - - offset = eeprom->offset; - len = eeprom->len; - eeprom->len = 0; - - eeprom->magic = 0xc3ec4951; - - ret = mv88e6xxx_eeprom_load_wait(ds); - if (ret < 0) - return ret; - - if (offset & 1) { - int word; - - word = mv88e6352_read_eeprom_word(ds, offset >> 1); - if (word < 0) - return word; - - *data++ = (word >> 8) & 0xff; - - offset++; - len--; - eeprom->len++; - } - - while (len >= 2) { - int word; - - word = mv88e6352_read_eeprom_word(ds, offset >> 1); - if (word < 0) - return word; - - *data++ = word & 0xff; - *data++ = (word >> 8) & 0xff; - - offset += 2; - len -= 2; - eeprom->len += 2; - } - - if (len) { - int word; - - word = mv88e6352_read_eeprom_word(ds, offset >> 1); - if (word < 0) - return word; - - *data++ = word & 0xff; - - offset++; - len--; - eeprom->len++; - } - - return 0; -} - -static int mv88e6352_eeprom_is_readonly(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int ret; - - ret = mv88e6xxx_reg_read(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP); - if (ret < 0) - return ret; - - if (!(ret & GLOBAL2_EEPROM_OP_WRITE_EN)) - return -EROFS; - - return 0; -} - -static int mv88e6352_write_eeprom_word(struct dsa_switch *ds, int addr, - u16 data) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int ret; - - mutex_lock(&ps->eeprom_mutex); - - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); - if (ret < 0) - goto error; - - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP, - GLOBAL2_EEPROM_OP_WRITE | - (addr & GLOBAL2_EEPROM_OP_ADDR_MASK)); - if (ret < 0) - goto error; - - ret = mv88e6xxx_eeprom_busy_wait(ds); -error: - mutex_unlock(&ps->eeprom_mutex); - return ret; -} - -static int mv88e6352_set_eeprom(struct dsa_switch *ds, - struct ethtool_eeprom *eeprom, u8 *data) -{ - int offset; - int ret; - int len; - - if (eeprom->magic != 0xc3ec4951) - return -EINVAL; - - ret = mv88e6352_eeprom_is_readonly(ds); - if (ret) - return ret; - - offset = eeprom->offset; - len = eeprom->len; - eeprom->len = 0; - - ret = mv88e6xxx_eeprom_load_wait(ds); - if (ret < 0) - return ret; - - if (offset & 1) { - int word; - - word = mv88e6352_read_eeprom_word(ds, offset >> 1); - if (word < 0) - return word; - - word = (*data++ << 8) | (word & 0xff); - - ret = mv88e6352_write_eeprom_word(ds, offset >> 1, word); - if (ret < 0) - return ret; - - offset++; - len--; - eeprom->len++; - } - - while (len >= 2) { - int word; - - word = *data++; - word |= *data++ << 8; - - ret = mv88e6352_write_eeprom_word(ds, offset >> 1, word); - if (ret < 0) - return ret; - - offset += 2; - len -= 2; - eeprom->len += 2; - } - - if (len) { - int word; - - word = mv88e6352_read_eeprom_word(ds, offset >> 1); - if (word < 0) - return word; - - word = (word & 0xff00) | *data++; - - ret = mv88e6352_write_eeprom_word(ds, offset >> 1, word); - if (ret < 0) - return ret; - - offset++; - len--; - eeprom->len++; - } - - return 0; -} - -struct dsa_switch_driver mv88e6352_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_EDSA, - .probe = mv88e6352_drv_probe, - .setup = mv88e6352_setup, - .set_addr = mv88e6xxx_set_addr_indirect, - .phy_read = mv88e6xxx_phy_read_indirect, - .phy_write = mv88e6xxx_phy_write_indirect, - .get_strings = mv88e6xxx_get_strings, - .get_ethtool_stats = mv88e6xxx_get_ethtool_stats, - .get_sset_count = mv88e6xxx_get_sset_count, - .adjust_link = mv88e6xxx_adjust_link, - .set_eee = mv88e6xxx_set_eee, - .get_eee = mv88e6xxx_get_eee, -#ifdef CONFIG_NET_DSA_HWMON - .get_temp = mv88e6xxx_get_temp, - .get_temp_limit = mv88e6xxx_get_temp_limit, - .set_temp_limit = mv88e6xxx_set_temp_limit, - .get_temp_alarm = mv88e6xxx_get_temp_alarm, -#endif - .get_eeprom = mv88e6352_get_eeprom, - .set_eeprom = mv88e6352_set_eeprom, - .get_regs_len = mv88e6xxx_get_regs_len, - .get_regs = mv88e6xxx_get_regs, - .port_bridge_join = mv88e6xxx_port_bridge_join, - .port_bridge_leave = mv88e6xxx_port_bridge_leave, - .port_stp_state_set = mv88e6xxx_port_stp_state_set, - .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, - .port_vlan_prepare = mv88e6xxx_port_vlan_prepare, - .port_vlan_add = mv88e6xxx_port_vlan_add, - .port_vlan_del = mv88e6xxx_port_vlan_del, - .port_vlan_dump = mv88e6xxx_port_vlan_dump, - .port_fdb_prepare = mv88e6xxx_port_fdb_prepare, - .port_fdb_add = mv88e6xxx_port_fdb_add, - .port_fdb_del = mv88e6xxx_port_fdb_del, - .port_fdb_dump = mv88e6xxx_port_fdb_dump, -}; - -MODULE_ALIAS("platform:mv88e6172"); -MODULE_ALIAS("platform:mv88e6176"); -MODULE_ALIAS("platform:mv88e6320"); -MODULE_ALIAS("platform:mv88e6321"); -MODULE_ALIAS("platform:mv88e6352"); diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 470cfc783baa..1e5ca8e0f48e 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -173,7 +173,7 @@ int mv88e6xxx_reg_write(struct mv88e6xxx_priv_state *ps, int addr, return ret; } -int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr) +static int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int err; @@ -192,7 +192,7 @@ int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr) (addr[4] << 8) | addr[5]); } -int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) +static int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; @@ -225,6 +225,16 @@ int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) return 0; } +int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_SWITCH_MAC)) + return mv88e6xxx_set_addr_indirect(ds, addr); + else + return mv88e6xxx_set_addr_direct(ds, addr); +} + static int _mv88e6xxx_phy_read(struct mv88e6xxx_priv_state *ps, int addr, int regnum) { @@ -241,24 +251,23 @@ static int _mv88e6xxx_phy_write(struct mv88e6xxx_priv_state *ps, int addr, return 0; } -#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU static int mv88e6xxx_ppu_disable(struct mv88e6xxx_priv_state *ps) { int ret; unsigned long timeout; - ret = mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_CONTROL); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_CONTROL); if (ret < 0) return ret; - ret = mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, - ret & ~GLOBAL_CONTROL_PPU_ENABLE); + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, + ret & ~GLOBAL_CONTROL_PPU_ENABLE); if (ret) return ret; timeout = jiffies + 1 * HZ; while (time_before(jiffies, timeout)) { - ret = mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATUS); + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, GLOBAL_STATUS); if (ret < 0) return ret; @@ -361,35 +370,33 @@ void mv88e6xxx_ppu_state_init(struct mv88e6xxx_priv_state *ps) ps->ppu_timer.function = mv88e6xxx_ppu_reenable_timer; } -int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum) +static int mv88e6xxx_phy_read_ppu(struct mv88e6xxx_priv_state *ps, int addr, + int regnum) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; ret = mv88e6xxx_ppu_access_get(ps); if (ret >= 0) { - ret = mv88e6xxx_reg_read(ps, addr, regnum); + ret = _mv88e6xxx_reg_read(ps, addr, regnum); mv88e6xxx_ppu_access_put(ps); } return ret; } -int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, - int regnum, u16 val) +static int mv88e6xxx_phy_write_ppu(struct mv88e6xxx_priv_state *ps, int addr, + int regnum, u16 val) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; ret = mv88e6xxx_ppu_access_get(ps); if (ret >= 0) { - ret = mv88e6xxx_reg_write(ps, addr, regnum, val); + ret = _mv88e6xxx_reg_write(ps, addr, regnum, val); mv88e6xxx_ppu_access_put(ps); } return ret; } -#endif static bool mv88e6xxx_6065_family(struct mv88e6xxx_priv_state *ps) { @@ -460,8 +467,8 @@ static bool mv88e6xxx_has_stu(struct mv88e6xxx_priv_state *ps) * phy. However, in the case of a fixed link phy, we force the port * settings from the fixed link settings. */ -void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, - struct phy_device *phydev) +static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, + struct phy_device *phydev) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u32 reg; @@ -707,7 +714,8 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_priv_state *ps, return value; } -void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, + uint8_t *data) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct mv88e6xxx_hw_stat *stat; @@ -723,7 +731,7 @@ void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, uint8_t *data) } } -int mv88e6xxx_get_sset_count(struct dsa_switch *ds) +static int mv88e6xxx_get_sset_count(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct mv88e6xxx_hw_stat *stat; @@ -737,9 +745,8 @@ int mv88e6xxx_get_sset_count(struct dsa_switch *ds) return j; } -void -mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, - int port, uint64_t *data) +static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct mv88e6xxx_hw_stat *stat; @@ -764,13 +771,13 @@ mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, mutex_unlock(&ps->smi_mutex); } -int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port) +static int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port) { return 32 * sizeof(u16); } -void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, - struct ethtool_regs *regs, void *_p) +static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, + struct ethtool_regs *regs, void *_p) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u16 *p = _p; @@ -780,13 +787,17 @@ void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, memset(p, 0xff, 32 * sizeof(u16)); + mutex_lock(&ps->smi_mutex); + for (i = 0; i < 32; i++) { int ret; - ret = mv88e6xxx_reg_read(ps, REG_PORT(port), i); + ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), i); if (ret >= 0) p[i] = ret; } + + mutex_unlock(&ps->smi_mutex); } static int _mv88e6xxx_wait(struct mv88e6xxx_priv_state *ps, int reg, int offset, @@ -826,7 +837,7 @@ static int _mv88e6xxx_phy_wait(struct mv88e6xxx_priv_state *ps) GLOBAL2_SMI_OP_BUSY); } -int mv88e6xxx_eeprom_load_wait(struct dsa_switch *ds) +static int mv88e6xxx_eeprom_load_wait(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); @@ -834,7 +845,7 @@ int mv88e6xxx_eeprom_load_wait(struct dsa_switch *ds) GLOBAL2_EEPROM_OP_LOAD); } -int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds) +static int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); @@ -842,6 +853,215 @@ int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds) GLOBAL2_EEPROM_OP_BUSY); } +static int mv88e6xxx_read_eeprom_word(struct dsa_switch *ds, int addr) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + + mutex_lock(&ps->eeprom_mutex); + + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP, + GLOBAL2_EEPROM_OP_READ | + (addr & GLOBAL2_EEPROM_OP_ADDR_MASK)); + if (ret < 0) + goto error; + + ret = mv88e6xxx_eeprom_busy_wait(ds); + if (ret < 0) + goto error; + + ret = mv88e6xxx_reg_read(ps, REG_GLOBAL2, GLOBAL2_EEPROM_DATA); +error: + mutex_unlock(&ps->eeprom_mutex); + return ret; +} + +static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, + struct ethtool_eeprom *eeprom, u8 *data) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int offset; + int len; + int ret; + + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEPROM)) + return -EOPNOTSUPP; + + offset = eeprom->offset; + len = eeprom->len; + eeprom->len = 0; + + eeprom->magic = 0xc3ec4951; + + ret = mv88e6xxx_eeprom_load_wait(ds); + if (ret < 0) + return ret; + + if (offset & 1) { + int word; + + word = mv88e6xxx_read_eeprom_word(ds, offset >> 1); + if (word < 0) + return word; + + *data++ = (word >> 8) & 0xff; + + offset++; + len--; + eeprom->len++; + } + + while (len >= 2) { + int word; + + word = mv88e6xxx_read_eeprom_word(ds, offset >> 1); + if (word < 0) + return word; + + *data++ = word & 0xff; + *data++ = (word >> 8) & 0xff; + + offset += 2; + len -= 2; + eeprom->len += 2; + } + + if (len) { + int word; + + word = mv88e6xxx_read_eeprom_word(ds, offset >> 1); + if (word < 0) + return word; + + *data++ = word & 0xff; + + offset++; + len--; + eeprom->len++; + } + + return 0; +} + +static int mv88e6xxx_eeprom_is_readonly(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + + ret = mv88e6xxx_reg_read(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP); + if (ret < 0) + return ret; + + if (!(ret & GLOBAL2_EEPROM_OP_WRITE_EN)) + return -EROFS; + + return 0; +} + +static int mv88e6xxx_write_eeprom_word(struct dsa_switch *ds, int addr, + u16 data) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + + mutex_lock(&ps->eeprom_mutex); + + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); + if (ret < 0) + goto error; + + ret = mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_EEPROM_OP, + GLOBAL2_EEPROM_OP_WRITE | + (addr & GLOBAL2_EEPROM_OP_ADDR_MASK)); + if (ret < 0) + goto error; + + ret = mv88e6xxx_eeprom_busy_wait(ds); +error: + mutex_unlock(&ps->eeprom_mutex); + return ret; +} + +static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, + struct ethtool_eeprom *eeprom, u8 *data) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int offset; + int ret; + int len; + + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEPROM)) + return -EOPNOTSUPP; + + if (eeprom->magic != 0xc3ec4951) + return -EINVAL; + + ret = mv88e6xxx_eeprom_is_readonly(ds); + if (ret) + return ret; + + offset = eeprom->offset; + len = eeprom->len; + eeprom->len = 0; + + ret = mv88e6xxx_eeprom_load_wait(ds); + if (ret < 0) + return ret; + + if (offset & 1) { + int word; + + word = mv88e6xxx_read_eeprom_word(ds, offset >> 1); + if (word < 0) + return word; + + word = (*data++ << 8) | (word & 0xff); + + ret = mv88e6xxx_write_eeprom_word(ds, offset >> 1, word); + if (ret < 0) + return ret; + + offset++; + len--; + eeprom->len++; + } + + while (len >= 2) { + int word; + + word = *data++; + word |= *data++ << 8; + + ret = mv88e6xxx_write_eeprom_word(ds, offset >> 1, word); + if (ret < 0) + return ret; + + offset += 2; + len -= 2; + eeprom->len += 2; + } + + if (len) { + int word; + + word = mv88e6xxx_read_eeprom_word(ds, offset >> 1); + if (word < 0) + return word; + + word = (word & 0xff00) | *data++; + + ret = mv88e6xxx_write_eeprom_word(ds, offset >> 1, word); + if (ret < 0) + return ret; + + offset++; + len--; + eeprom->len++; + } + + return 0; +} + static int _mv88e6xxx_atu_wait(struct mv88e6xxx_priv_state *ps) { return _mv88e6xxx_wait(ps, REG_GLOBAL, GLOBAL_ATU_OP, @@ -884,11 +1104,15 @@ static int _mv88e6xxx_phy_write_indirect(struct mv88e6xxx_priv_state *ps, return _mv88e6xxx_phy_wait(ps); } -int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) +static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, + struct ethtool_eee *e) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int reg; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEE)) + return -EOPNOTSUPP; + mutex_lock(&ps->smi_mutex); reg = _mv88e6xxx_phy_read_indirect(ps, port, 16); @@ -910,13 +1134,16 @@ out: return reg; } -int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, - struct phy_device *phydev, struct ethtool_eee *e) +static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, + struct phy_device *phydev, struct ethtool_eee *e) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int reg; int ret; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEE)) + return -EOPNOTSUPP; + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_read_indirect(ps, port, 16); @@ -1138,11 +1365,15 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_priv_state *ps, return _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_BASE_VLAN, reg); } -void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) +static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, + u8 state) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int stp_state; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_PORTSTATE)) + return; + switch (state) { case BR_STATE_DISABLED: stp_state = PORT_CONTROL_STATE_DISABLED; @@ -1358,15 +1589,18 @@ static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_priv_state *ps, return 0; } -int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, - struct switchdev_obj_port_vlan *vlan, - int (*cb)(struct switchdev_obj *obj)) +static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, + struct switchdev_obj_port_vlan *vlan, + int (*cb)(struct switchdev_obj *obj)) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct mv88e6xxx_vtu_stu_entry next; u16 pvid; int err; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VTU)) + return -EOPNOTSUPP; + mutex_lock(&ps->smi_mutex); err = _mv88e6xxx_port_pvid_get(ps, port, &pvid); @@ -1782,14 +2016,17 @@ static const char * const mv88e6xxx_port_8021q_mode_names[] = { [PORT_CONTROL_2_8021Q_SECURE] = "Secure", }; -int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, - bool vlan_filtering) +static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, + bool vlan_filtering) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : PORT_CONTROL_2_8021Q_DISABLED; int ret; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VTU)) + return -EOPNOTSUPP; + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_CONTROL_2); @@ -1819,12 +2056,16 @@ unlock: return ret; } -int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) +static int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int err; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VTU)) + return -EOPNOTSUPP; + /* If the requested port doesn't belong to the same bridge as the VLAN * members, do not support it (yet) and fallback to software VLAN. */ @@ -1856,15 +2097,18 @@ static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_priv_state *ps, int port, return _mv88e6xxx_vtu_loadpurge(ps, &vlan); } -void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) +static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; u16 vid; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VTU)) + return; + mutex_lock(&ps->smi_mutex); for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) @@ -1915,13 +2159,16 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_priv_state *ps, return _mv88e6xxx_atu_remove(ps, vlan.fid, port, false); } -int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u16 pvid, vid; int err = 0; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VTU)) + return -EOPNOTSUPP; + mutex_lock(&ps->smi_mutex); err = _mv88e6xxx_port_pvid_get(ps, port, &pvid); @@ -2026,37 +2273,48 @@ static int _mv88e6xxx_port_fdb_load(struct mv88e6xxx_priv_state *ps, int port, return _mv88e6xxx_atu_load(ps, &entry); } -int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans) +static int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) { + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_ATU)) + return -EOPNOTSUPP; + /* We don't need any dynamic resource from the kernel (yet), * so skip the prepare phase. */ return 0; } -void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans) +static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) { int state = is_multicast_ether_addr(fdb->addr) ? GLOBAL_ATU_DATA_STATE_MC_STATIC : GLOBAL_ATU_DATA_STATE_UC_STATIC; struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_ATU)) + return; + mutex_lock(&ps->smi_mutex); if (_mv88e6xxx_port_fdb_load(ps, port, fdb->addr, fdb->vid, state)) netdev_err(ds->ports[port], "failed to load MAC address\n"); mutex_unlock(&ps->smi_mutex); } -int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb) +static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_ATU)) + return -EOPNOTSUPP; + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_port_fdb_load(ps, port, fdb->addr, fdb->vid, GLOBAL_ATU_DATA_STATE_UNUSED); @@ -2151,9 +2409,9 @@ static int _mv88e6xxx_port_fdb_dump_one(struct mv88e6xxx_priv_state *ps, return err; } -int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, - struct switchdev_obj_port_fdb *fdb, - int (*cb)(struct switchdev_obj *obj)) +static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, + struct switchdev_obj_port_fdb *fdb, + int (*cb)(struct switchdev_obj *obj)) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct mv88e6xxx_vtu_stu_entry vlan = { @@ -2162,6 +2420,9 @@ int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, u16 fid; int err; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_ATU)) + return -EOPNOTSUPP; + mutex_lock(&ps->smi_mutex); /* Dump port's default Filtering Information Database (VLAN ID 0) */ @@ -2198,12 +2459,15 @@ unlock: return err; } -int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, - struct net_device *bridge) +static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, + struct net_device *bridge) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int i, err = 0; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VLANTABLE)) + return -EOPNOTSUPP; + mutex_lock(&ps->smi_mutex); /* Assign the bridge and remap each port's VLANTable */ @@ -2222,12 +2486,15 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, return err; } -void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) +static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); struct net_device *bridge = ps->ports[port].bridge_dev; int i; + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_VLANTABLE)) + return; + mutex_lock(&ps->smi_mutex); /* Unassign the bridge and remap each port's VLANTable */ @@ -2294,6 +2561,68 @@ restore_page_0: return ret; } +static int mv88e6xxx_switch_reset(struct mv88e6xxx_priv_state *ps) +{ + bool ppu_active = mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU_ACTIVE); + u16 is_reset = (ppu_active ? 0x8800 : 0xc800); + struct gpio_desc *gpiod = ps->ds->pd->reset; + unsigned long timeout; + int ret; + int i; + + /* Set all ports to the disabled state. */ + for (i = 0; i < ps->info->num_ports; i++) { + ret = _mv88e6xxx_reg_read(ps, REG_PORT(i), PORT_CONTROL); + if (ret < 0) + return ret; + + ret = _mv88e6xxx_reg_write(ps, REG_PORT(i), PORT_CONTROL, + ret & 0xfffc); + if (ret) + return ret; + } + + /* Wait for transmit queues to drain. */ + usleep_range(2000, 4000); + + /* If there is a gpio connected to the reset pin, toggle it */ + if (gpiod) { + gpiod_set_value_cansleep(gpiod, 1); + usleep_range(10000, 20000); + gpiod_set_value_cansleep(gpiod, 0); + usleep_range(10000, 20000); + } + + /* Reset the switch. Keep the PPU active if requested. The PPU + * needs to be active to support indirect phy register access + * through global registers 0x18 and 0x19. + */ + if (ppu_active) + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, 0x04, 0xc000); + else + ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, 0x04, 0xc400); + if (ret) + return ret; + + /* Wait up to one second for reset to complete. */ + timeout = jiffies + 1 * HZ; + while (time_before(jiffies, timeout)) { + ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, 0x00); + if (ret < 0) + return ret; + + if ((ret & is_reset) == is_reset) + break; + usleep_range(1000, 2000); + } + if (time_after(jiffies, timeout)) + ret = -ETIMEDOUT; + else + ret = 0; + + return ret; +} + static int mv88e6xxx_power_on_serdes(struct mv88e6xxx_priv_state *ps) { int ret; @@ -2313,14 +2642,12 @@ static int mv88e6xxx_power_on_serdes(struct mv88e6xxx_priv_state *ps) return ret; } -static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) +static int mv88e6xxx_setup_port(struct mv88e6xxx_priv_state *ps, int port) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + struct dsa_switch *ds = ps->ds; int ret; u16 reg; - mutex_lock(&ps->smi_mutex); - if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || mv88e6xxx_6185_family(ps) || mv88e6xxx_6095_family(ps) || @@ -2349,7 +2676,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_PCS_CTRL, reg); if (ret) - goto abort; + return ret; } /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, @@ -2413,7 +2740,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL, reg); if (ret) - goto abort; + return ret; } /* If this port is connected to a SerDes, make sure the SerDes is not @@ -2422,14 +2749,14 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) if (mv88e6xxx_6352_family(ps)) { ret = _mv88e6xxx_reg_read(ps, REG_PORT(port), PORT_STATUS); if (ret < 0) - goto abort; + return ret; ret &= PORT_STATUS_CMODE_MASK; if ((ret == PORT_STATUS_CMODE_100BASE_X) || (ret == PORT_STATUS_CMODE_1000BASE_X) || (ret == PORT_STATUS_CMODE_SGMII)) { ret = mv88e6xxx_power_on_serdes(ps); if (ret < 0) - goto abort; + return ret; } } @@ -2466,7 +2793,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL_2, reg); if (ret) - goto abort; + return ret; } /* Port Association Vector: when learning source addresses @@ -2481,13 +2808,13 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_ASSOC_VECTOR, reg); if (ret) - goto abort; + return ret; /* Egress rate control 2: disable egress rate control. */ ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_RATE_CONTROL_2, 0x0000); if (ret) - goto abort; + return ret; if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || mv88e6xxx_6165_family(ps) || mv88e6xxx_6097_family(ps) || @@ -2499,7 +2826,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_PAUSE_CTRL, 0x0000); if (ret) - goto abort; + return ret; /* Port ATU control: disable limiting the number of * address database entries that this port is allowed @@ -2513,7 +2840,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_PRI_OVERRIDE, 0x0000); if (ret) - goto abort; + return ret; /* Port Ethertype: use the Ethertype DSA Ethertype * value. @@ -2521,14 +2848,14 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_ETH_TYPE, ETH_P_EDSA); if (ret) - goto abort; + return ret; /* Tag Remap: use an identity 802.1p prio -> switch * prio mapping. */ ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_TAG_REGMAP_0123, 0x3210); if (ret) - goto abort; + return ret; /* Tag Remap 2: use an identity 802.1p prio -> switch * prio mapping. @@ -2536,7 +2863,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_TAG_REGMAP_4567, 0x7654); if (ret) - goto abort; + return ret; } if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || @@ -2547,7 +2874,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_RATE_CONTROL, 0x0001); if (ret) - goto abort; + return ret; } /* Port Control 1: disable trunking, disable sending @@ -2555,7 +2882,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) */ ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_CONTROL_1, 0x0000); if (ret) - goto abort; + return ret; /* Port based VLAN map: give each port the same default address * database, and allow bidirectional communication between the @@ -2563,52 +2890,60 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) */ ret = _mv88e6xxx_port_fid_set(ps, port, 0); if (ret) - goto abort; + return ret; ret = _mv88e6xxx_port_based_vlan_map(ps, port); if (ret) - goto abort; + return ret; /* Default VLAN ID and priority: don't set a default VLAN * ID, and set the default packet priority to zero. */ ret = _mv88e6xxx_reg_write(ps, REG_PORT(port), PORT_DEFAULT_VLAN, 0x0000); -abort: - mutex_unlock(&ps->smi_mutex); - return ret; -} - -int mv88e6xxx_setup_ports(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int ret; - int i; + if (ret) + return ret; - for (i = 0; i < ps->info->num_ports; i++) { - ret = mv88e6xxx_setup_port(ds, i); - if (ret < 0) - return ret; - } return 0; } -int mv88e6xxx_setup_common(struct mv88e6xxx_priv_state *ps) +static int mv88e6xxx_setup_global(struct mv88e6xxx_priv_state *ps) { - mutex_init(&ps->smi_mutex); + struct dsa_switch *ds = ps->ds; + u32 upstream_port = dsa_upstream_port(ds); + u16 reg; + int err; + int i; - INIT_WORK(&ps->bridge_work, mv88e6xxx_bridge_work); + /* Enable the PHY Polling Unit if present, don't discard any packets, + * and mask all interrupt sources. + */ + reg = 0; + if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU) || + mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU_ACTIVE)) + reg |= GLOBAL_CONTROL_PPU_ENABLE; - return 0; -} + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL, reg); + if (err) + return err; -int mv88e6xxx_setup_global(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int err; - int i; + /* Configure the upstream port, and configure it as the port to which + * ingress and egress and ARP monitor frames are to be sent. + */ + reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); + if (err) + return err; + + /* Disable remote management, and set the switch's DSA device number. */ + err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_CONTROL_2, + GLOBAL_CONTROL_2_MULTIPLE_CASCADE | + (ds->index & 0x1f)); + if (err) + return err; - mutex_lock(&ps->smi_mutex); /* Set the default address aging time to 5 minutes, and * enable address learn messages to be sent to all message * ports. @@ -2616,45 +2951,45 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_ATU_CONTROL, 0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL); if (err) - goto unlock; + return err; /* Configure the IP ToS mapping registers. */ err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000); if (err) - goto unlock; + return err; err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000); if (err) - goto unlock; + return err; err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555); if (err) - goto unlock; + return err; err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555); if (err) - goto unlock; + return err; err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa); if (err) - goto unlock; + return err; err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa); if (err) - goto unlock; + return err; err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff); if (err) - goto unlock; + return err; err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff); if (err) - goto unlock; + return err; /* Configure the IEEE 802.1p priority mapping register. */ err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41); if (err) - goto unlock; + return err; /* Send all frames with destination addresses matching * 01:80:c2:00:00:0x to the CPU port. */ err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff); if (err) - goto unlock; + return err; /* Ignore removed tag data on doubly tagged packets, disable * flow control messages, force flow control priority to the @@ -2665,15 +3000,15 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) 0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 | GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI); if (err) - goto unlock; + return err; /* Program the DSA routing table. */ for (i = 0; i < 32; i++) { int nexthop = 0x1f; - if (ds->pd->rtable && - i != ds->index && i < ds->dst->pd->nr_chips) - nexthop = ds->pd->rtable[i] & 0x1f; + if (ps->ds->pd->rtable && + i != ps->ds->index && i < ps->ds->dst->pd->nr_chips) + nexthop = ps->ds->pd->rtable[i] & 0x1f; err = _mv88e6xxx_reg_write( ps, REG_GLOBAL2, @@ -2681,7 +3016,7 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) GLOBAL2_DEVICE_MAPPING_UPDATE | (i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) | nexthop); if (err) - goto unlock; + return err; } /* Clear all trunk masks. */ @@ -2691,7 +3026,7 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) | ((1 << ps->info->num_ports) - 1)); if (err) - goto unlock; + return err; } /* Clear all trunk mappings. */ @@ -2702,7 +3037,7 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) GLOBAL2_TRUNK_MAPPING_UPDATE | (i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT)); if (err) - goto unlock; + return err; } if (mv88e6xxx_6352_family(ps) || mv88e6xxx_6351_family(ps) || @@ -2714,7 +3049,7 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, 0xffff); if (err) - goto unlock; + return err; /* Initialise cross-chip port VLAN table to reset * defaults. @@ -2722,7 +3057,7 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) err = _mv88e6xxx_reg_write(ps, REG_GLOBAL2, GLOBAL2_PVT_ADDR, 0x9000); if (err) - goto unlock; + return err; /* Clear the priority override table. */ for (i = 0; i < 16; i++) { @@ -2730,7 +3065,7 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) GLOBAL2_PRIO_OVERRIDE, 0x8000 | (i << 8)); if (err) - goto unlock; + return err; } } @@ -2747,7 +3082,7 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) GLOBAL2_INGRESS_OP, 0x9000 | (i << 8)); if (err) - goto unlock; + return err; } } @@ -2755,89 +3090,64 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds) err = _mv88e6xxx_reg_write(ps, REG_GLOBAL, GLOBAL_STATS_OP, GLOBAL_STATS_OP_FLUSH_ALL); if (err) - goto unlock; + return err; /* Wait for the flush to complete. */ err = _mv88e6xxx_stats_wait(ps); - if (err < 0) - goto unlock; + if (err) + return err; /* Clear all ATU entries */ err = _mv88e6xxx_atu_flush(ps, 0, true); - if (err < 0) - goto unlock; + if (err) + return err; /* Clear all the VTU and STU entries */ err = _mv88e6xxx_vtu_stu_flush(ps); -unlock: - mutex_unlock(&ps->smi_mutex); + if (err < 0) + return err; return err; } -int mv88e6xxx_switch_reset(struct mv88e6xxx_priv_state *ps, bool ppu_active) +static int mv88e6xxx_setup(struct dsa_switch *ds) { - u16 is_reset = (ppu_active ? 0x8800 : 0xc800); - struct gpio_desc *gpiod = ps->ds->pd->reset; - unsigned long timeout; - int ret; + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int err; int i; - mutex_lock(&ps->smi_mutex); + ps->ds = ds; - /* Set all ports to the disabled state. */ - for (i = 0; i < ps->info->num_ports; i++) { - ret = _mv88e6xxx_reg_read(ps, REG_PORT(i), PORT_CONTROL); - if (ret < 0) - goto unlock; + mutex_init(&ps->smi_mutex); - ret = _mv88e6xxx_reg_write(ps, REG_PORT(i), PORT_CONTROL, - ret & 0xfffc); - if (ret) - goto unlock; - } + INIT_WORK(&ps->bridge_work, mv88e6xxx_bridge_work); - /* Wait for transmit queues to drain. */ - usleep_range(2000, 4000); + if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_EEPROM)) + mutex_init(&ps->eeprom_mutex); - /* If there is a gpio connected to the reset pin, toggle it */ - if (gpiod) { - gpiod_set_value_cansleep(gpiod, 1); - usleep_range(10000, 20000); - gpiod_set_value_cansleep(gpiod, 0); - usleep_range(10000, 20000); - } + if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU)) + mv88e6xxx_ppu_state_init(ps); - /* Reset the switch. Keep the PPU active if requested. The PPU - * needs to be active to support indirect phy register access - * through global registers 0x18 and 0x19. - */ - if (ppu_active) - ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, 0x04, 0xc000); - else - ret = _mv88e6xxx_reg_write(ps, REG_GLOBAL, 0x04, 0xc400); - if (ret) + mutex_lock(&ps->smi_mutex); + + err = mv88e6xxx_switch_reset(ps); + if (err) goto unlock; - /* Wait up to one second for reset to complete. */ - timeout = jiffies + 1 * HZ; - while (time_before(jiffies, timeout)) { - ret = _mv88e6xxx_reg_read(ps, REG_GLOBAL, 0x00); - if (ret < 0) - goto unlock; + err = mv88e6xxx_setup_global(ps); + if (err) + goto unlock; - if ((ret & is_reset) == is_reset) - break; - usleep_range(1000, 2000); + for (i = 0; i < ps->info->num_ports; i++) { + err = mv88e6xxx_setup_port(ps, i); + if (err) + goto unlock; } - if (time_after(jiffies, timeout)) - ret = -ETIMEDOUT; - else - ret = 0; + unlock: mutex_unlock(&ps->smi_mutex); - return ret; + return err; } int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg) @@ -2873,8 +3183,7 @@ static int mv88e6xxx_port_to_phy_addr(struct mv88e6xxx_priv_state *ps, return -EINVAL; } -int -mv88e6xxx_phy_read(struct dsa_switch *ds, int port, int regnum) +static int mv88e6xxx_phy_read(struct dsa_switch *ds, int port, int regnum) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int addr = mv88e6xxx_port_to_phy_addr(ps, port); @@ -2884,29 +3193,20 @@ mv88e6xxx_phy_read(struct dsa_switch *ds, int port, int regnum) return 0xffff; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_read(ps, addr, regnum); - mutex_unlock(&ps->smi_mutex); - return ret; -} - -int -mv88e6xxx_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int addr = mv88e6xxx_port_to_phy_addr(ps, port); - int ret; - if (addr < 0) - return 0xffff; + if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU)) + ret = mv88e6xxx_phy_read_ppu(ps, addr, regnum); + else if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_SMI_PHY)) + ret = _mv88e6xxx_phy_read_indirect(ps, addr, regnum); + else + ret = _mv88e6xxx_phy_read(ps, addr, regnum); - mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_write(ps, addr, regnum, val); mutex_unlock(&ps->smi_mutex); return ret; } -int -mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int port, int regnum) +static int mv88e6xxx_phy_write(struct dsa_switch *ds, int port, int regnum, + u16 val) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int addr = mv88e6xxx_port_to_phy_addr(ps, port); @@ -2916,24 +3216,14 @@ mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int port, int regnum) return 0xffff; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_read_indirect(ps, addr, regnum); - mutex_unlock(&ps->smi_mutex); - return ret; -} -int -mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int port, int regnum, - u16 val) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int addr = mv88e6xxx_port_to_phy_addr(ps, port); - int ret; - - if (addr < 0) - return addr; + if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PPU)) + ret = mv88e6xxx_phy_write_ppu(ps, addr, regnum, val); + else if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_SMI_PHY)) + ret = _mv88e6xxx_phy_write_indirect(ps, addr, regnum, val); + else + ret = _mv88e6xxx_phy_write(ps, addr, regnum, val); - mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_write_indirect(ps, addr, regnum, val); mutex_unlock(&ps->smi_mutex); return ret; } @@ -3002,23 +3292,26 @@ static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp) return 0; } -int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) +static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_TEMP)) + return -EOPNOTSUPP; + if (mv88e6xxx_6320_family(ps) || mv88e6xxx_6352_family(ps)) return mv88e63xx_get_temp(ds, temp); return mv88e61xx_get_temp(ds, temp); } -int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) +static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int phy = mv88e6xxx_6320_family(ps) ? 3 : 0; int ret; - if (!mv88e6xxx_6320_family(ps) && !mv88e6xxx_6352_family(ps)) + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_TEMP_LIMIT)) return -EOPNOTSUPP; *temp = 0; @@ -3032,13 +3325,13 @@ int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) return 0; } -int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp) +static int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int phy = mv88e6xxx_6320_family(ps) ? 3 : 0; int ret; - if (!mv88e6xxx_6320_family(ps) && !mv88e6xxx_6352_family(ps)) + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_TEMP_LIMIT)) return -EOPNOTSUPP; ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26); @@ -3049,13 +3342,13 @@ int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp) (ret & 0xe0ff) | (temp << 8)); } -int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) +static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int phy = mv88e6xxx_6320_family(ps) ? 3 : 0; int ret; - if (!mv88e6xxx_6320_family(ps) && !mv88e6xxx_6352_family(ps)) + if (!mv88e6xxx_has(ps, MV88E6XXX_FLAG_TEMP_LIMIT)) return -EOPNOTSUPP; *alarm = false; @@ -3070,6 +3363,161 @@ int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) } #endif /* CONFIG_NET_DSA_HWMON */ +static const struct mv88e6xxx_info mv88e6xxx_table[] = { + [MV88E6085] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6085, + .family = MV88E6XXX_FAMILY_6097, + .name = "Marvell 88E6085", + .num_databases = 4096, + .num_ports = 10, + .flags = MV88E6XXX_FLAGS_FAMILY_6097, + }, + + [MV88E6095] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6095, + .family = MV88E6XXX_FAMILY_6095, + .name = "Marvell 88E6095/88E6095F", + .num_databases = 256, + .num_ports = 11, + .flags = MV88E6XXX_FLAGS_FAMILY_6095, + }, + + [MV88E6123] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6123, + .family = MV88E6XXX_FAMILY_6165, + .name = "Marvell 88E6123", + .num_databases = 4096, + .num_ports = 3, + .flags = MV88E6XXX_FLAGS_FAMILY_6165, + }, + + [MV88E6131] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6131, + .family = MV88E6XXX_FAMILY_6185, + .name = "Marvell 88E6131", + .num_databases = 256, + .num_ports = 8, + .flags = MV88E6XXX_FLAGS_FAMILY_6185, + }, + + [MV88E6161] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6161, + .family = MV88E6XXX_FAMILY_6165, + .name = "Marvell 88E6161", + .num_databases = 4096, + .num_ports = 6, + .flags = MV88E6XXX_FLAGS_FAMILY_6165, + }, + + [MV88E6165] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6165, + .family = MV88E6XXX_FAMILY_6165, + .name = "Marvell 88E6165", + .num_databases = 4096, + .num_ports = 6, + .flags = MV88E6XXX_FLAGS_FAMILY_6165, + }, + + [MV88E6171] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6171, + .family = MV88E6XXX_FAMILY_6351, + .name = "Marvell 88E6171", + .num_databases = 4096, + .num_ports = 7, + .flags = MV88E6XXX_FLAGS_FAMILY_6351, + }, + + [MV88E6172] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6172, + .family = MV88E6XXX_FAMILY_6352, + .name = "Marvell 88E6172", + .num_databases = 4096, + .num_ports = 7, + .flags = MV88E6XXX_FLAGS_FAMILY_6352, + }, + + [MV88E6175] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6175, + .family = MV88E6XXX_FAMILY_6351, + .name = "Marvell 88E6175", + .num_databases = 4096, + .num_ports = 7, + .flags = MV88E6XXX_FLAGS_FAMILY_6351, + }, + + [MV88E6176] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6176, + .family = MV88E6XXX_FAMILY_6352, + .name = "Marvell 88E6176", + .num_databases = 4096, + .num_ports = 7, + .flags = MV88E6XXX_FLAGS_FAMILY_6352, + }, + + [MV88E6185] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6185, + .family = MV88E6XXX_FAMILY_6185, + .name = "Marvell 88E6185", + .num_databases = 256, + .num_ports = 10, + .flags = MV88E6XXX_FLAGS_FAMILY_6185, + }, + + [MV88E6240] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6240, + .family = MV88E6XXX_FAMILY_6352, + .name = "Marvell 88E6240", + .num_databases = 4096, + .num_ports = 7, + .flags = MV88E6XXX_FLAGS_FAMILY_6352, + }, + + [MV88E6320] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6320, + .family = MV88E6XXX_FAMILY_6320, + .name = "Marvell 88E6320", + .num_databases = 4096, + .num_ports = 7, + .flags = MV88E6XXX_FLAGS_FAMILY_6320, + }, + + [MV88E6321] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6321, + .family = MV88E6XXX_FAMILY_6320, + .name = "Marvell 88E6321", + .num_databases = 4096, + .num_ports = 7, + .flags = MV88E6XXX_FLAGS_FAMILY_6320, + }, + + [MV88E6350] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6350, + .family = MV88E6XXX_FAMILY_6351, + .name = "Marvell 88E6350", + .num_databases = 4096, + .num_ports = 7, + .flags = MV88E6XXX_FLAGS_FAMILY_6351, + }, + + [MV88E6351] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6351, + .family = MV88E6XXX_FAMILY_6351, + .name = "Marvell 88E6351", + .num_databases = 4096, + .num_ports = 7, + .flags = MV88E6XXX_FLAGS_FAMILY_6351, + }, + + [MV88E6352] = { + .prod_num = PORT_SWITCH_ID_PROD_NUM_6352, + .family = MV88E6XXX_FAMILY_6352, + .name = "Marvell 88E6352", + .num_databases = 4096, + .num_ports = 7, + .flags = MV88E6XXX_FLAGS_FAMILY_6352, + }, +}; + static const struct mv88e6xxx_info * mv88e6xxx_lookup_info(unsigned int prod_num, const struct mv88e6xxx_info *table, unsigned int num) @@ -3083,10 +3531,9 @@ mv88e6xxx_lookup_info(unsigned int prod_num, const struct mv88e6xxx_info *table, return NULL; } -const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, - int sw_addr, void **priv, - const struct mv88e6xxx_info *table, - unsigned int num) +static const char *mv88e6xxx_probe(struct device *dsa_dev, + struct device *host_dev, int sw_addr, + void **priv) { const struct mv88e6xxx_info *info; struct mv88e6xxx_priv_state *ps; @@ -3105,7 +3552,8 @@ const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, prod_num = (id & 0xfff0) >> 4; rev = id & 0x000f; - info = mv88e6xxx_lookup_info(prod_num, table, num); + info = mv88e6xxx_lookup_info(prod_num, mv88e6xxx_table, + ARRAY_SIZE(mv88e6xxx_table)); if (!info) return NULL; @@ -3127,41 +3575,73 @@ const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, return name; } +struct dsa_switch_driver mv88e6xxx_switch_driver = { + .tag_protocol = DSA_TAG_PROTO_EDSA, + .probe = mv88e6xxx_probe, + .setup = mv88e6xxx_setup, + .set_addr = mv88e6xxx_set_addr, + .phy_read = mv88e6xxx_phy_read, + .phy_write = mv88e6xxx_phy_write, + .adjust_link = mv88e6xxx_adjust_link, + .get_strings = mv88e6xxx_get_strings, + .get_ethtool_stats = mv88e6xxx_get_ethtool_stats, + .get_sset_count = mv88e6xxx_get_sset_count, + .set_eee = mv88e6xxx_set_eee, + .get_eee = mv88e6xxx_get_eee, +#ifdef CONFIG_NET_DSA_HWMON + .get_temp = mv88e6xxx_get_temp, + .get_temp_limit = mv88e6xxx_get_temp_limit, + .set_temp_limit = mv88e6xxx_set_temp_limit, + .get_temp_alarm = mv88e6xxx_get_temp_alarm, +#endif + .get_eeprom = mv88e6xxx_get_eeprom, + .set_eeprom = mv88e6xxx_set_eeprom, + .get_regs_len = mv88e6xxx_get_regs_len, + .get_regs = mv88e6xxx_get_regs, + .port_bridge_join = mv88e6xxx_port_bridge_join, + .port_bridge_leave = mv88e6xxx_port_bridge_leave, + .port_stp_state_set = mv88e6xxx_port_stp_state_set, + .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, + .port_vlan_prepare = mv88e6xxx_port_vlan_prepare, + .port_vlan_add = mv88e6xxx_port_vlan_add, + .port_vlan_del = mv88e6xxx_port_vlan_del, + .port_vlan_dump = mv88e6xxx_port_vlan_dump, + .port_fdb_prepare = mv88e6xxx_port_fdb_prepare, + .port_fdb_add = mv88e6xxx_port_fdb_add, + .port_fdb_del = mv88e6xxx_port_fdb_del, + .port_fdb_dump = mv88e6xxx_port_fdb_dump, +}; + static int __init mv88e6xxx_init(void) { -#if IS_ENABLED(CONFIG_NET_DSA_MV88E6131) - register_switch_driver(&mv88e6131_switch_driver); -#endif -#if IS_ENABLED(CONFIG_NET_DSA_MV88E6123) - register_switch_driver(&mv88e6123_switch_driver); -#endif -#if IS_ENABLED(CONFIG_NET_DSA_MV88E6352) - register_switch_driver(&mv88e6352_switch_driver); -#endif -#if IS_ENABLED(CONFIG_NET_DSA_MV88E6171) - register_switch_driver(&mv88e6171_switch_driver); -#endif + register_switch_driver(&mv88e6xxx_switch_driver); + return 0; } module_init(mv88e6xxx_init); static void __exit mv88e6xxx_cleanup(void) { -#if IS_ENABLED(CONFIG_NET_DSA_MV88E6171) - unregister_switch_driver(&mv88e6171_switch_driver); -#endif -#if IS_ENABLED(CONFIG_NET_DSA_MV88E6352) - unregister_switch_driver(&mv88e6352_switch_driver); -#endif -#if IS_ENABLED(CONFIG_NET_DSA_MV88E6123) - unregister_switch_driver(&mv88e6123_switch_driver); -#endif -#if IS_ENABLED(CONFIG_NET_DSA_MV88E6131) - unregister_switch_driver(&mv88e6131_switch_driver); -#endif + unregister_switch_driver(&mv88e6xxx_switch_driver); } module_exit(mv88e6xxx_cleanup); +MODULE_ALIAS("platform:mv88e6085"); +MODULE_ALIAS("platform:mv88e6095"); +MODULE_ALIAS("platform:mv88e6095f"); +MODULE_ALIAS("platform:mv88e6123"); +MODULE_ALIAS("platform:mv88e6131"); +MODULE_ALIAS("platform:mv88e6161"); +MODULE_ALIAS("platform:mv88e6165"); +MODULE_ALIAS("platform:mv88e6171"); +MODULE_ALIAS("platform:mv88e6172"); +MODULE_ALIAS("platform:mv88e6175"); +MODULE_ALIAS("platform:mv88e6176"); +MODULE_ALIAS("platform:mv88e6320"); +MODULE_ALIAS("platform:mv88e6321"); +MODULE_ALIAS("platform:mv88e6350"); +MODULE_ALIAS("platform:mv88e6351"); +MODULE_ALIAS("platform:mv88e6352"); MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>"); MODULE_DESCRIPTION("Driver for Marvell 88E6XXX ethernet switch chips"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index 4f455d219859..ca69a93a42a0 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -338,6 +338,27 @@ #define MV88E6XXX_N_FID 4096 +/* List of supported models */ +enum mv88e6xxx_model { + MV88E6085, + MV88E6095, + MV88E6123, + MV88E6131, + MV88E6161, + MV88E6165, + MV88E6171, + MV88E6172, + MV88E6175, + MV88E6176, + MV88E6185, + MV88E6240, + MV88E6320, + MV88E6321, + MV88E6350, + MV88E6351, + MV88E6352, +}; + enum mv88e6xxx_family { MV88E6XXX_FAMILY_NONE, MV88E6XXX_FAMILY_6065, /* 6031 6035 6061 6065 */ @@ -350,12 +371,142 @@ enum mv88e6xxx_family { MV88E6XXX_FAMILY_6352, /* 6172 6176 6240 6352 */ }; +enum mv88e6xxx_cap { + /* Address Translation Unit. + * The ATU is used to lookup and learn MAC addresses. See GLOBAL_ATU_OP. + */ + MV88E6XXX_CAP_ATU, + + /* Energy Efficient Ethernet. + */ + MV88E6XXX_CAP_EEE, + + /* EEPROM Command and Data registers. + * See GLOBAL2_EEPROM_OP and GLOBAL2_EEPROM_DATA. + */ + MV88E6XXX_CAP_EEPROM, + + /* Port State Filtering for 802.1D Spanning Tree. + * See PORT_CONTROL_STATE_* values in the PORT_CONTROL register. + */ + MV88E6XXX_CAP_PORTSTATE, + + /* PHY Polling Unit. + * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING. + */ + MV88E6XXX_CAP_PPU, + MV88E6XXX_CAP_PPU_ACTIVE, + + /* SMI PHY Command and Data registers. + * This requires an indirect access to PHY registers through + * GLOBAL2_SMI_OP, otherwise direct access to PHY registers is done. + */ + MV88E6XXX_CAP_SMI_PHY, + + /* Switch MAC/WoL/WoF register. + * This requires an indirect access to set the switch MAC address + * through GLOBAL2_SWITCH_MAC, otherwise GLOBAL_MAC_01, GLOBAL_MAC_23, + * and GLOBAL_MAC_45 are used with a direct access. + */ + MV88E6XXX_CAP_SWITCH_MAC_WOL_WOF, + + /* Internal temperature sensor. + * Available from any enabled port's PHY register 26, page 6. + */ + MV88E6XXX_CAP_TEMP, + MV88E6XXX_CAP_TEMP_LIMIT, + + /* In-chip Port Based VLANs. + * Each port VLANTable register (see PORT_BASE_VLAN) is used to restrict + * the output (or egress) ports to which it is allowed to send frames. + */ + MV88E6XXX_CAP_VLANTABLE, + + /* VLAN Table Unit. + * The VTU is used to program 802.1Q VLANs. See GLOBAL_VTU_OP. + */ + MV88E6XXX_CAP_VTU, +}; + +/* Bitmask of capabilities */ +#define MV88E6XXX_FLAG_ATU BIT(MV88E6XXX_CAP_ATU) +#define MV88E6XXX_FLAG_EEE BIT(MV88E6XXX_CAP_EEE) +#define MV88E6XXX_FLAG_EEPROM BIT(MV88E6XXX_CAP_EEPROM) +#define MV88E6XXX_FLAG_PORTSTATE BIT(MV88E6XXX_CAP_PORTSTATE) +#define MV88E6XXX_FLAG_PPU BIT(MV88E6XXX_CAP_PPU) +#define MV88E6XXX_FLAG_PPU_ACTIVE BIT(MV88E6XXX_CAP_PPU_ACTIVE) +#define MV88E6XXX_FLAG_SMI_PHY BIT(MV88E6XXX_CAP_SMI_PHY) +#define MV88E6XXX_FLAG_SWITCH_MAC BIT(MV88E6XXX_CAP_SWITCH_MAC_WOL_WOF) +#define MV88E6XXX_FLAG_TEMP BIT(MV88E6XXX_CAP_TEMP) +#define MV88E6XXX_FLAG_TEMP_LIMIT BIT(MV88E6XXX_CAP_TEMP_LIMIT) +#define MV88E6XXX_FLAG_VLANTABLE BIT(MV88E6XXX_CAP_VLANTABLE) +#define MV88E6XXX_FLAG_VTU BIT(MV88E6XXX_CAP_VTU) + +#define MV88E6XXX_FLAGS_FAMILY_6095 \ + (MV88E6XXX_FLAG_ATU | \ + MV88E6XXX_FLAG_PPU | \ + MV88E6XXX_FLAG_VLANTABLE | \ + MV88E6XXX_FLAG_VTU) + +#define MV88E6XXX_FLAGS_FAMILY_6097 \ + (MV88E6XXX_FLAG_ATU | \ + MV88E6XXX_FLAG_PPU | \ + MV88E6XXX_FLAG_VLANTABLE | \ + MV88E6XXX_FLAG_VTU) + +#define MV88E6XXX_FLAGS_FAMILY_6165 \ + (MV88E6XXX_FLAG_SWITCH_MAC | \ + MV88E6XXX_FLAG_TEMP) + +#define MV88E6XXX_FLAGS_FAMILY_6185 \ + (MV88E6XXX_FLAG_ATU | \ + MV88E6XXX_FLAG_PPU | \ + MV88E6XXX_FLAG_VLANTABLE | \ + MV88E6XXX_FLAG_VTU) + +#define MV88E6XXX_FLAGS_FAMILY_6320 \ + (MV88E6XXX_FLAG_ATU | \ + MV88E6XXX_FLAG_EEE | \ + MV88E6XXX_FLAG_EEPROM | \ + MV88E6XXX_FLAG_PORTSTATE | \ + MV88E6XXX_FLAG_PPU_ACTIVE | \ + MV88E6XXX_FLAG_SMI_PHY | \ + MV88E6XXX_FLAG_SWITCH_MAC | \ + MV88E6XXX_FLAG_TEMP | \ + MV88E6XXX_FLAG_TEMP_LIMIT | \ + MV88E6XXX_FLAG_VLANTABLE | \ + MV88E6XXX_FLAG_VTU) + +#define MV88E6XXX_FLAGS_FAMILY_6351 \ + (MV88E6XXX_FLAG_ATU | \ + MV88E6XXX_FLAG_PORTSTATE | \ + MV88E6XXX_FLAG_PPU_ACTIVE | \ + MV88E6XXX_FLAG_SMI_PHY | \ + MV88E6XXX_FLAG_SWITCH_MAC | \ + MV88E6XXX_FLAG_TEMP | \ + MV88E6XXX_FLAG_VLANTABLE | \ + MV88E6XXX_FLAG_VTU) + +#define MV88E6XXX_FLAGS_FAMILY_6352 \ + (MV88E6XXX_FLAG_ATU | \ + MV88E6XXX_FLAG_EEE | \ + MV88E6XXX_FLAG_EEPROM | \ + MV88E6XXX_FLAG_PORTSTATE | \ + MV88E6XXX_FLAG_PPU_ACTIVE | \ + MV88E6XXX_FLAG_SMI_PHY | \ + MV88E6XXX_FLAG_SWITCH_MAC | \ + MV88E6XXX_FLAG_TEMP | \ + MV88E6XXX_FLAG_TEMP_LIMIT | \ + MV88E6XXX_FLAG_VLANTABLE | \ + MV88E6XXX_FLAG_VTU) + struct mv88e6xxx_info { enum mv88e6xxx_family family; u16 prod_num; const char *name; unsigned int num_databases; unsigned int num_ports; + unsigned long flags; }; struct mv88e6xxx_atu_entry { @@ -403,7 +554,6 @@ struct mv88e6xxx_priv_state { struct mii_bus *bus; int sw_addr; -#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU /* Handles automatic disabling and re-enabling of the PHY * polling unit. */ @@ -411,7 +561,6 @@ struct mv88e6xxx_priv_state { int ppu_disabled; struct work_struct ppu_work; struct timer_list ppu_timer; -#endif /* This mutex serialises access to the statistics unit. * Hold this mutex over snapshot + dump sequences. @@ -449,86 +598,10 @@ struct mv88e6xxx_hw_stat { enum stat_type type; }; -int mv88e6xxx_switch_reset(struct mv88e6xxx_priv_state *ps, bool ppu_active); -const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, - int sw_addr, void **priv, - const struct mv88e6xxx_info *table, - unsigned int num); - -int mv88e6xxx_setup_ports(struct dsa_switch *ds); -int mv88e6xxx_setup_common(struct mv88e6xxx_priv_state *ps); -int mv88e6xxx_setup_global(struct dsa_switch *ds); -int mv88e6xxx_reg_read(struct mv88e6xxx_priv_state *ps, int addr, int reg); -int mv88e6xxx_reg_write(struct mv88e6xxx_priv_state *ps, int addr, - int reg, u16 val); -int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr); -int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr); -int mv88e6xxx_phy_read(struct dsa_switch *ds, int port, int regnum); -int mv88e6xxx_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val); -int mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int port, int regnum); -int mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int port, int regnum, - u16 val); -void mv88e6xxx_ppu_state_init(struct mv88e6xxx_priv_state *ps); -int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum); -int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, - int regnum, u16 val); -void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, uint8_t *data); -void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port, - uint64_t *data); -int mv88e6xxx_get_sset_count(struct dsa_switch *ds); -int mv88e6xxx_get_sset_count_basic(struct dsa_switch *ds); -void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, - struct phy_device *phydev); -int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port); -void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, - struct ethtool_regs *regs, void *_p); -int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp); -int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp); -int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp); -int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm); -int mv88e6xxx_eeprom_load_wait(struct dsa_switch *ds); -int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds); -int mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int addr, int regnum); -int mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int addr, int regnum, - u16 val); -int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e); -int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, - struct phy_device *phydev, struct ethtool_eee *e); -int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, - struct net_device *bridge); -void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port); -void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state); -int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, - bool vlan_filtering); -int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); -void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); -int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan); -int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, - struct switchdev_obj_port_vlan *vlan, - int (*cb)(struct switchdev_obj *obj)); -int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans); -void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans); -int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb); -int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, - struct switchdev_obj_port_fdb *fdb, - int (*cb)(struct switchdev_obj *obj)); -int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg); -int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, - int reg, int val); - -extern struct dsa_switch_driver mv88e6131_switch_driver; -extern struct dsa_switch_driver mv88e6123_switch_driver; -extern struct dsa_switch_driver mv88e6352_switch_driver; -extern struct dsa_switch_driver mv88e6171_switch_driver; +static inline bool mv88e6xxx_has(struct mv88e6xxx_priv_state *ps, + unsigned long flags) +{ + return (ps->info->flags & flags) == flags; +} #endif diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 409152b21191..aa87049c353d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1608,21 +1608,22 @@ static int xgene_enet_probe(struct platform_device *pdev) ret = xgene_enet_init_hw(pdata); if (ret) - goto err; + goto err_netdev; mac_ops = pdata->mac_ops; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { ret = xgene_enet_mdio_config(pdata); if (ret) - goto err; + goto err_netdev; } else { INIT_DELAYED_WORK(&pdata->link_work, mac_ops->link_state); } xgene_enet_napi_add(pdata); return 0; -err: +err_netdev: unregister_netdev(ndev); +err: free_netdev(ndev); return ret; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index fd85b6dd4a6e..6a5a71710fa9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1439,6 +1439,10 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) if (!TX_CMP_VALID(txcmp, raw_cons)) break; + /* The valid test of the entry must be done first before + * reading any further. + */ + rmb(); if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { tx_pkts++; /* return full budget so NAPI will complete. */ @@ -4096,9 +4100,11 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) } static int bnxt_cfg_rx_mode(struct bnxt *); +static bool bnxt_mc_list_updated(struct bnxt *, u32 *); static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) { + struct bnxt_vnic_info *vnic = &bp->vnic_info[0]; int rc = 0; if (irq_re_init) { @@ -4154,13 +4160,22 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) netdev_err(bp->dev, "HWRM vnic filter failure rc: %x\n", rc); goto err_out; } - bp->vnic_info[0].uc_filter_count = 1; + vnic->uc_filter_count = 1; - bp->vnic_info[0].rx_mask = CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; + vnic->rx_mask = CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; if ((bp->dev->flags & IFF_PROMISC) && BNXT_PF(bp)) - bp->vnic_info[0].rx_mask |= - CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; + + if (bp->dev->flags & IFF_ALLMULTI) { + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; + vnic->mc_list_count = 0; + } else { + u32 mask = 0; + + bnxt_mc_list_updated(bp, &mask); + vnic->rx_mask |= mask; + } rc = bnxt_cfg_rx_mode(bp); if (rc) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index bfa10c3da35f..c9f77c324535 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1521,9 +1521,15 @@ fec_enet_rx(struct net_device *ndev, int budget) struct fec_enet_private *fep = netdev_priv(ndev); for_each_set_bit(queue_id, &fep->work_rx, FEC_ENET_MAX_RX_QS) { - clear_bit(queue_id, &fep->work_rx); - pkt_received += fec_enet_rx_queue(ndev, + int ret; + + ret = fec_enet_rx_queue(ndev, budget - pkt_received, queue_id); + + if (ret < budget - pkt_received) + clear_bit(queue_id, &fep->work_rx); + + pkt_received += ret; } return pkt_received; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 8ef6875b6cf9..c1b3a9c8cf3b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -698,7 +698,7 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) return -1; - hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8)); + hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); csum_pseudo_hdr = csum_partial(&ipv6h->saddr, sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 559d11a443bc..1cf722eba607 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -14,7 +14,6 @@ config MLX5_CORE_EN bool "Mellanox Technologies ConnectX-4 Ethernet support" depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE select PTP_1588_CLOCK - select VXLAN if MLX5_CORE=y default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h index 129f3527aa14..5def12c048e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h @@ -53,9 +53,10 @@ static inline bool mlx5e_vxlan_allowed(struct mlx5_core_dev *mdev) } void mlx5e_vxlan_init(struct mlx5e_priv *priv); +void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv); + void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, sa_family_t sa_family, u16 port, int add); struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port); -void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv); #endif /* __MLX5_VXLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 79cdd81d55ab..4a7273771028 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2843,11 +2843,11 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, lag->ref_count++; return 0; +err_col_port_enable: + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); err_col_port_add: if (!lag->ref_count) mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); -err_col_port_enable: - mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); return err; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index fb9efb84f13b..3710f19ed6bb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -214,7 +214,15 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, idx_begin, table_type, range, local_port, set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + if (err) + goto err_flood_bm_set; + else + goto buffer_out; +err_flood_bm_set: + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin, + table_type, range, local_port, !set); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); buffer_out: kfree(sftr_pl); return err; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c index db80eb1c6d4f..2b10f1bcd151 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c @@ -1015,20 +1015,24 @@ static int netxen_get_flash_block(struct netxen_adapter *adapter, int base, { int i, v, addr; __le32 *ptr32; + int ret; addr = base; ptr32 = buf; for (i = 0; i < size / sizeof(u32); i++) { - if (netxen_rom_fast_read(adapter, addr, &v) == -1) - return -1; + ret = netxen_rom_fast_read(adapter, addr, &v); + if (ret) + return ret; + *ptr32 = cpu_to_le32(v); ptr32++; addr += sizeof(u32); } if ((char *)buf + size > (char *)ptr32) { __le32 local; - if (netxen_rom_fast_read(adapter, addr, &v) == -1) - return -1; + ret = netxen_rom_fast_read(adapter, addr, &v); + if (ret) + return ret; local = cpu_to_le32(v); memcpy(ptr32, &local, (char *)buf + size - (char *)ptr32); } @@ -1940,7 +1944,7 @@ void netxen_nic_set_link_parameters(struct netxen_adapter *adapter) if (adapter->phy_read && adapter->phy_read(adapter, NETXEN_NIU_GB_MII_MGMT_ADDR_AUTONEG, - &autoneg) != 0) + &autoneg) == 0) adapter->link_autoneg = autoneg; } else goto link_down; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index cad37af1517d..7a0281a36c28 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -852,7 +852,8 @@ netxen_check_options(struct netxen_adapter *adapter) ptr32 = (__le32 *)&serial_num; offset = NX_FW_SERIAL_NUM_OFFSET; for (i = 0; i < 8; i++) { - if (netxen_rom_fast_read(adapter, offset, &val) == -1) { + err = netxen_rom_fast_read(adapter, offset, &val); + if (err) { dev_err(&pdev->dev, "error reading board info\n"); adapter->driver_mismatch = 1; return; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 82d85ccc9ed1..075faa52eb48 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -429,7 +429,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, u8 xmit_type; u16 idx; u16 hlen; - bool data_split; + bool data_split = false; /* Get tx-queue context and netdev index */ txq_index = skb_get_queue_mapping(skb); @@ -2094,8 +2094,6 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev, edev->q_num_rx_buffers = NUM_RX_BDS_DEF; edev->q_num_tx_buffers = NUM_TX_BDS_DEF; - DP_INFO(edev, "Allocated netdev with 64 tx queues and 64 rx queues\n"); - SET_NETDEV_DEV(ndev, &pdev->dev); memset(&edev->stats, 0, sizeof(edev->stats)); @@ -2274,9 +2272,9 @@ static void qede_update_pf_params(struct qed_dev *cdev) { struct qed_pf_params pf_params; - /* 16 rx + 16 tx */ + /* 64 rx + 64 tx */ memset(&pf_params, 0, sizeof(struct qed_pf_params)); - pf_params.eth_pf_params.num_cons = 32; + pf_params.eth_pf_params.num_cons = 128; qed_ops->common->update_pf_params(cdev, &pf_params); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index cd9764a6a36f..f13499fa1f58 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -271,7 +271,8 @@ static int socfpga_dwmac_resume(struct device *dev) } #endif /* CONFIG_PM_SLEEP */ -SIMPLE_DEV_PM_OPS(socfpga_dwmac_pm_ops, stmmac_suspend, socfpga_dwmac_resume); +static SIMPLE_DEV_PM_OPS(socfpga_dwmac_pm_ops, stmmac_suspend, + socfpga_dwmac_resume); static const struct of_device_id socfpga_dwmac_match[] = { { .compatible = "altr,socfpga-stmmac" }, diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index f4e69261a3ce..86c331bb5eb3 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -819,7 +819,6 @@ static int fjes_change_mtu(struct net_device *netdev, int new_mtu) netdev->mtu = new_mtu; if (running) { - spin_lock_irqsave(&hw->rx_status_lock, flags); for (epidx = 0; epidx < hw->max_epid; epidx++) { if (epidx == hw->my_epid) continue; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 9c40b88fabd5..a6dc11ce497f 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -495,8 +495,6 @@ static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, int gh_len; int err = -ENOSYS; - udp_tunnel_gro_complete(skb, nhoff); - gh = (struct genevehdr *)(skb->data + nhoff); gh_len = geneve_hlen(gh); type = gh->proto_type; @@ -507,6 +505,9 @@ static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); rcu_read_unlock(); + + skb_set_inner_mac_header(skb, nhoff + gh_len); + return err; } diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c new file mode 100644 index 000000000000..8ce1104e4fdb --- /dev/null +++ b/drivers/net/gtp.c @@ -0,0 +1,1364 @@ +/* GTP according to GSM TS 09.60 / 3GPP TS 29.060 + * + * (C) 2012-2014 by sysmocom - s.f.m.c. GmbH + * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org> + * + * Author: Harald Welte <hwelte@sysmocom.de> + * Pablo Neira Ayuso <pablo@netfilter.org> + * Andreas Schultz <aschultz@travelping.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/version.h> +#include <linux/skbuff.h> +#include <linux/udp.h> +#include <linux/rculist.h> +#include <linux/jhash.h> +#include <linux/if_tunnel.h> +#include <linux/net.h> +#include <linux/file.h> +#include <linux/gtp.h> + +#include <net/net_namespace.h> +#include <net/protocol.h> +#include <net/ip.h> +#include <net/udp.h> +#include <net/udp_tunnel.h> +#include <net/icmp.h> +#include <net/xfrm.h> +#include <net/genetlink.h> +#include <net/netns/generic.h> +#include <net/gtp.h> + +/* An active session for the subscriber. */ +struct pdp_ctx { + struct hlist_node hlist_tid; + struct hlist_node hlist_addr; + + union { + u64 tid; + struct { + u64 tid; + u16 flow; + } v0; + struct { + u32 i_tei; + u32 o_tei; + } v1; + } u; + u8 gtp_version; + u16 af; + + struct in_addr ms_addr_ip4; + struct in_addr sgsn_addr_ip4; + + atomic_t tx_seq; + struct rcu_head rcu_head; +}; + +/* One instance of the GTP device. */ +struct gtp_dev { + struct list_head list; + + struct socket *sock0; + struct socket *sock1u; + + struct net *net; + struct net_device *dev; + + unsigned int hash_size; + struct hlist_head *tid_hash; + struct hlist_head *addr_hash; +}; + +static int gtp_net_id __read_mostly; + +struct gtp_net { + struct list_head gtp_dev_list; +}; + +static u32 gtp_h_initval; + +static inline u32 gtp0_hashfn(u64 tid) +{ + u32 *tid32 = (u32 *) &tid; + return jhash_2words(tid32[0], tid32[1], gtp_h_initval); +} + +static inline u32 gtp1u_hashfn(u32 tid) +{ + return jhash_1word(tid, gtp_h_initval); +} + +static inline u32 ipv4_hashfn(__be32 ip) +{ + return jhash_1word((__force u32)ip, gtp_h_initval); +} + +/* Resolve a PDP context structure based on the 64bit TID. */ +static struct pdp_ctx *gtp0_pdp_find(struct gtp_dev *gtp, u64 tid) +{ + struct hlist_head *head; + struct pdp_ctx *pdp; + + head = >p->tid_hash[gtp0_hashfn(tid) % gtp->hash_size]; + + hlist_for_each_entry_rcu(pdp, head, hlist_tid) { + if (pdp->gtp_version == GTP_V0 && + pdp->u.v0.tid == tid) + return pdp; + } + return NULL; +} + +/* Resolve a PDP context structure based on the 32bit TEI. */ +static struct pdp_ctx *gtp1_pdp_find(struct gtp_dev *gtp, u32 tid) +{ + struct hlist_head *head; + struct pdp_ctx *pdp; + + head = >p->tid_hash[gtp1u_hashfn(tid) % gtp->hash_size]; + + hlist_for_each_entry_rcu(pdp, head, hlist_tid) { + if (pdp->gtp_version == GTP_V1 && + pdp->u.v1.i_tei == tid) + return pdp; + } + return NULL; +} + +/* Resolve a PDP context based on IPv4 address of MS. */ +static struct pdp_ctx *ipv4_pdp_find(struct gtp_dev *gtp, __be32 ms_addr) +{ + struct hlist_head *head; + struct pdp_ctx *pdp; + + head = >p->addr_hash[ipv4_hashfn(ms_addr) % gtp->hash_size]; + + hlist_for_each_entry_rcu(pdp, head, hlist_addr) { + if (pdp->af == AF_INET && + pdp->ms_addr_ip4.s_addr == ms_addr) + return pdp; + } + + return NULL; +} + +static bool gtp_check_src_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx, + unsigned int hdrlen) +{ + struct iphdr *iph; + + if (!pskb_may_pull(skb, hdrlen + sizeof(struct iphdr))) + return false; + + iph = (struct iphdr *)(skb->data + hdrlen + sizeof(struct iphdr)); + + return iph->saddr != pctx->ms_addr_ip4.s_addr; +} + +/* Check if the inner IP source address in this packet is assigned to any + * existing mobile subscriber. + */ +static bool gtp_check_src_ms(struct sk_buff *skb, struct pdp_ctx *pctx, + unsigned int hdrlen) +{ + switch (ntohs(skb->protocol)) { + case ETH_P_IP: + return gtp_check_src_ms_ipv4(skb, pctx, hdrlen); + } + return false; +} + +/* 1 means pass up to the stack, -1 means drop and 0 means decapsulated. */ +static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb, + bool xnet) +{ + unsigned int hdrlen = sizeof(struct udphdr) + + sizeof(struct gtp0_header); + struct gtp0_header *gtp0; + struct pdp_ctx *pctx; + int ret = 0; + + if (!pskb_may_pull(skb, hdrlen)) + return -1; + + gtp0 = (struct gtp0_header *)(skb->data + sizeof(struct udphdr)); + + if ((gtp0->flags >> 5) != GTP_V0) + return 1; + + if (gtp0->type != GTP_TPDU) + return 1; + + rcu_read_lock(); + pctx = gtp0_pdp_find(gtp, be64_to_cpu(gtp0->tid)); + if (!pctx) { + netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb); + ret = -1; + goto out_rcu; + } + + if (!gtp_check_src_ms(skb, pctx, hdrlen)) { + netdev_dbg(gtp->dev, "No PDP ctx for this MS\n"); + ret = -1; + goto out_rcu; + } + rcu_read_unlock(); + + /* Get rid of the GTP + UDP headers. */ + return iptunnel_pull_header(skb, hdrlen, skb->protocol, xnet); +out_rcu: + rcu_read_unlock(); + return ret; +} + +static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb, + bool xnet) +{ + unsigned int hdrlen = sizeof(struct udphdr) + + sizeof(struct gtp1_header); + struct gtp1_header *gtp1; + struct pdp_ctx *pctx; + int ret = 0; + + if (!pskb_may_pull(skb, hdrlen)) + return -1; + + gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr)); + + if ((gtp1->flags >> 5) != GTP_V1) + return 1; + + if (gtp1->type != GTP_TPDU) + return 1; + + /* From 29.060: "This field shall be present if and only if any one or + * more of the S, PN and E flags are set.". + * + * If any of the bit is set, then the remaining ones also have to be + * set. + */ + if (gtp1->flags & GTP1_F_MASK) + hdrlen += 4; + + /* Make sure the header is larger enough, including extensions. */ + if (!pskb_may_pull(skb, hdrlen)) + return -1; + + rcu_read_lock(); + pctx = gtp1_pdp_find(gtp, ntohl(gtp1->tid)); + if (!pctx) { + netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb); + ret = -1; + goto out_rcu; + } + + if (!gtp_check_src_ms(skb, pctx, hdrlen)) { + netdev_dbg(gtp->dev, "No PDP ctx for this MS\n"); + ret = -1; + goto out_rcu; + } + rcu_read_unlock(); + + /* Get rid of the GTP + UDP headers. */ + return iptunnel_pull_header(skb, hdrlen, skb->protocol, xnet); +out_rcu: + rcu_read_unlock(); + return ret; +} + +static void gtp_encap_disable(struct gtp_dev *gtp) +{ + if (gtp->sock0 && gtp->sock0->sk) { + udp_sk(gtp->sock0->sk)->encap_type = 0; + rcu_assign_sk_user_data(gtp->sock0->sk, NULL); + } + if (gtp->sock1u && gtp->sock1u->sk) { + udp_sk(gtp->sock1u->sk)->encap_type = 0; + rcu_assign_sk_user_data(gtp->sock1u->sk, NULL); + } + + gtp->sock0 = NULL; + gtp->sock1u = NULL; +} + +static void gtp_encap_destroy(struct sock *sk) +{ + struct gtp_dev *gtp; + + gtp = rcu_dereference_sk_user_data(sk); + if (gtp) + gtp_encap_disable(gtp); +} + +/* UDP encapsulation receive handler. See net/ipv4/udp.c. + * Return codes: 0: success, <0: error, >0: pass up to userspace UDP socket. + */ +static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb) +{ + struct pcpu_sw_netstats *stats; + struct gtp_dev *gtp; + bool xnet; + int ret; + + gtp = rcu_dereference_sk_user_data(sk); + if (!gtp) + return 1; + + netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk); + + xnet = !net_eq(gtp->net, dev_net(gtp->dev)); + + switch (udp_sk(sk)->encap_type) { + case UDP_ENCAP_GTP0: + netdev_dbg(gtp->dev, "received GTP0 packet\n"); + ret = gtp0_udp_encap_recv(gtp, skb, xnet); + break; + case UDP_ENCAP_GTP1U: + netdev_dbg(gtp->dev, "received GTP1U packet\n"); + ret = gtp1u_udp_encap_recv(gtp, skb, xnet); + break; + default: + ret = -1; /* Shouldn't happen. */ + } + + switch (ret) { + case 1: + netdev_dbg(gtp->dev, "pass up to the process\n"); + return 1; + case 0: + netdev_dbg(gtp->dev, "forwarding packet from GGSN to uplink\n"); + break; + case -1: + netdev_dbg(gtp->dev, "GTP packet has been dropped\n"); + kfree_skb(skb); + return 0; + } + + /* Now that the UDP and the GTP header have been removed, set up the + * new network header. This is required by the upper layer to + * calculate the transport header. + */ + skb_reset_network_header(skb); + + skb->dev = gtp->dev; + + stats = this_cpu_ptr(gtp->dev->tstats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += skb->len; + u64_stats_update_end(&stats->syncp); + + netif_rx(skb); + + return 0; +} + +static int gtp_dev_init(struct net_device *dev) +{ + struct gtp_dev *gtp = netdev_priv(dev); + + gtp->dev = dev; + + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static void gtp_dev_uninit(struct net_device *dev) +{ + struct gtp_dev *gtp = netdev_priv(dev); + + gtp_encap_disable(gtp); + free_percpu(dev->tstats); +} + +static struct rtable *ip4_route_output_gtp(struct net *net, struct flowi4 *fl4, + const struct sock *sk, __be32 daddr) +{ + memset(fl4, 0, sizeof(*fl4)); + fl4->flowi4_oif = sk->sk_bound_dev_if; + fl4->daddr = daddr; + fl4->saddr = inet_sk(sk)->inet_saddr; + fl4->flowi4_tos = RT_CONN_FLAGS(sk); + fl4->flowi4_proto = sk->sk_protocol; + + return ip_route_output_key(net, fl4); +} + +static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx) +{ + int payload_len = skb->len; + struct gtp0_header *gtp0; + + gtp0 = (struct gtp0_header *) skb_push(skb, sizeof(*gtp0)); + + gtp0->flags = 0x1e; /* v0, GTP-non-prime. */ + gtp0->type = GTP_TPDU; + gtp0->length = htons(payload_len); + gtp0->seq = htons((atomic_inc_return(&pctx->tx_seq) - 1) % 0xffff); + gtp0->flow = htons(pctx->u.v0.flow); + gtp0->number = 0xff; + gtp0->spare[0] = gtp0->spare[1] = gtp0->spare[2] = 0xff; + gtp0->tid = cpu_to_be64(pctx->u.v0.tid); +} + +static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx) +{ + int payload_len = skb->len; + struct gtp1_header *gtp1; + + gtp1 = (struct gtp1_header *) skb_push(skb, sizeof(*gtp1)); + + /* Bits 8 7 6 5 4 3 2 1 + * +--+--+--+--+--+--+--+--+ + * |version |PT| 1| E| S|PN| + * +--+--+--+--+--+--+--+--+ + * 0 0 1 1 1 0 0 0 + */ + gtp1->flags = 0x38; /* v1, GTP-non-prime. */ + gtp1->type = GTP_TPDU; + gtp1->length = htons(payload_len); + gtp1->tid = htonl(pctx->u.v1.o_tei); + + /* TODO: Suppport for extension header, sequence number and N-PDU. + * Update the length field if any of them is available. + */ +} + +struct gtp_pktinfo { + struct sock *sk; + struct iphdr *iph; + struct flowi4 fl4; + struct rtable *rt; + struct pdp_ctx *pctx; + struct net_device *dev; + __be16 gtph_port; +}; + +static void gtp_push_header(struct sk_buff *skb, struct gtp_pktinfo *pktinfo) +{ + switch (pktinfo->pctx->gtp_version) { + case GTP_V0: + pktinfo->gtph_port = htons(GTP0_PORT); + gtp0_push_header(skb, pktinfo->pctx); + break; + case GTP_V1: + pktinfo->gtph_port = htons(GTP1U_PORT); + gtp1_push_header(skb, pktinfo->pctx); + break; + } +} + +static inline void gtp_set_pktinfo_ipv4(struct gtp_pktinfo *pktinfo, + struct sock *sk, struct iphdr *iph, + struct pdp_ctx *pctx, struct rtable *rt, + struct flowi4 *fl4, + struct net_device *dev) +{ + pktinfo->sk = sk; + pktinfo->iph = iph; + pktinfo->pctx = pctx; + pktinfo->rt = rt; + pktinfo->fl4 = *fl4; + pktinfo->dev = dev; +} + +static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, + struct gtp_pktinfo *pktinfo) +{ + struct gtp_dev *gtp = netdev_priv(dev); + struct pdp_ctx *pctx; + struct rtable *rt; + struct flowi4 fl4; + struct iphdr *iph; + struct sock *sk; + __be16 df; + int mtu; + + /* Read the IP destination address and resolve the PDP context. + * Prepend PDP header with TEI/TID from PDP ctx. + */ + iph = ip_hdr(skb); + pctx = ipv4_pdp_find(gtp, iph->daddr); + if (!pctx) { + netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n", + &iph->daddr); + return -ENOENT; + } + netdev_dbg(dev, "found PDP context %p\n", pctx); + + switch (pctx->gtp_version) { + case GTP_V0: + if (gtp->sock0) + sk = gtp->sock0->sk; + else + sk = NULL; + break; + case GTP_V1: + if (gtp->sock1u) + sk = gtp->sock1u->sk; + else + sk = NULL; + break; + default: + return -ENOENT; + } + + if (!sk) { + netdev_dbg(dev, "no userspace socket is available, skip\n"); + return -ENOENT; + } + + rt = ip4_route_output_gtp(sock_net(sk), &fl4, gtp->sock0->sk, + pctx->sgsn_addr_ip4.s_addr); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to SSGN %pI4\n", + &pctx->sgsn_addr_ip4.s_addr); + dev->stats.tx_carrier_errors++; + goto err; + } + + if (rt->dst.dev == dev) { + netdev_dbg(dev, "circular route to SSGN %pI4\n", + &pctx->sgsn_addr_ip4.s_addr); + dev->stats.collisions++; + goto err_rt; + } + + skb_dst_drop(skb); + + /* This is similar to tnl_update_pmtu(). */ + df = iph->frag_off; + if (df) { + mtu = dst_mtu(&rt->dst) - dev->hard_header_len - + sizeof(struct iphdr) - sizeof(struct udphdr); + switch (pctx->gtp_version) { + case GTP_V0: + mtu -= sizeof(struct gtp0_header); + break; + case GTP_V1: + mtu -= sizeof(struct gtp1_header); + break; + } + } else { + mtu = dst_mtu(&rt->dst); + } + + rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu); + + if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && + mtu < ntohs(iph->tot_len)) { + netdev_dbg(dev, "packet too big, fragmentation needed\n"); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + goto err_rt; + } + + gtp_set_pktinfo_ipv4(pktinfo, sk, iph, pctx, rt, &fl4, dev); + gtp_push_header(skb, pktinfo); + + return 0; +err_rt: + ip_rt_put(rt); +err: + return -EBADMSG; +} + +static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned int proto = ntohs(skb->protocol); + struct gtp_pktinfo pktinfo; + int err; + + /* Ensure there is sufficient headroom. */ + if (skb_cow_head(skb, dev->needed_headroom)) + goto tx_err; + + skb_reset_inner_headers(skb); + + /* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */ + rcu_read_lock(); + switch (proto) { + case ETH_P_IP: + err = gtp_build_skb_ip4(skb, dev, &pktinfo); + break; + default: + err = -EOPNOTSUPP; + break; + } + rcu_read_unlock(); + + if (err < 0) + goto tx_err; + + switch (proto) { + case ETH_P_IP: + netdev_dbg(pktinfo.dev, "gtp -> IP src: %pI4 dst: %pI4\n", + &pktinfo.iph->saddr, &pktinfo.iph->daddr); + udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb, + pktinfo.fl4.saddr, pktinfo.fl4.daddr, + pktinfo.iph->tos, + ip4_dst_hoplimit(&pktinfo.rt->dst), + htons(IP_DF), + pktinfo.gtph_port, pktinfo.gtph_port, + true, false); + break; + } + + return NETDEV_TX_OK; +tx_err: + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +static const struct net_device_ops gtp_netdev_ops = { + .ndo_init = gtp_dev_init, + .ndo_uninit = gtp_dev_uninit, + .ndo_start_xmit = gtp_dev_xmit, + .ndo_get_stats64 = ip_tunnel_get_stats64, +}; + +static void gtp_link_setup(struct net_device *dev) +{ + dev->netdev_ops = >p_netdev_ops; + dev->destructor = free_netdev; + + dev->hard_header_len = 0; + dev->addr_len = 0; + + /* Zero header length. */ + dev->type = ARPHRD_NONE; + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + + dev->priv_flags |= IFF_NO_QUEUE; + dev->features |= NETIF_F_LLTX; + netif_keep_dst(dev); + + /* Assume largest header, ie. GTPv0. */ + dev->needed_headroom = LL_MAX_HEADER + + sizeof(struct iphdr) + + sizeof(struct udphdr) + + sizeof(struct gtp0_header); +} + +static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); +static void gtp_hashtable_free(struct gtp_dev *gtp); +static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp, + int fd_gtp0, int fd_gtp1, struct net *src_net); + +static int gtp_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + int hashsize, err, fd0, fd1; + struct gtp_dev *gtp; + struct gtp_net *gn; + + if (!data[IFLA_GTP_FD0] || !data[IFLA_GTP_FD1]) + return -EINVAL; + + gtp = netdev_priv(dev); + + fd0 = nla_get_u32(data[IFLA_GTP_FD0]); + fd1 = nla_get_u32(data[IFLA_GTP_FD1]); + + err = gtp_encap_enable(dev, gtp, fd0, fd1, src_net); + if (err < 0) + goto out_err; + + if (!data[IFLA_GTP_PDP_HASHSIZE]) + hashsize = 1024; + else + hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]); + + err = gtp_hashtable_new(gtp, hashsize); + if (err < 0) + goto out_encap; + + err = register_netdevice(dev); + if (err < 0) { + netdev_dbg(dev, "failed to register new netdev %d\n", err); + goto out_hashtable; + } + + gn = net_generic(dev_net(dev), gtp_net_id); + list_add_rcu(>p->list, &gn->gtp_dev_list); + + netdev_dbg(dev, "registered new GTP interface\n"); + + return 0; + +out_hashtable: + gtp_hashtable_free(gtp); +out_encap: + gtp_encap_disable(gtp); +out_err: + return err; +} + +static void gtp_dellink(struct net_device *dev, struct list_head *head) +{ + struct gtp_dev *gtp = netdev_priv(dev); + + gtp_encap_disable(gtp); + gtp_hashtable_free(gtp); + list_del_rcu(>p->list); + unregister_netdevice_queue(dev, head); +} + +static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = { + [IFLA_GTP_FD0] = { .type = NLA_U32 }, + [IFLA_GTP_FD1] = { .type = NLA_U32 }, + [IFLA_GTP_PDP_HASHSIZE] = { .type = NLA_U32 }, +}; + +static int gtp_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (!data) + return -EINVAL; + + return 0; +} + +static size_t gtp_get_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(__u32)); /* IFLA_GTP_PDP_HASHSIZE */ +} + +static int gtp_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct gtp_dev *gtp = netdev_priv(dev); + + if (nla_put_u32(skb, IFLA_GTP_PDP_HASHSIZE, gtp->hash_size)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops gtp_link_ops __read_mostly = { + .kind = "gtp", + .maxtype = IFLA_GTP_MAX, + .policy = gtp_policy, + .priv_size = sizeof(struct gtp_dev), + .setup = gtp_link_setup, + .validate = gtp_validate, + .newlink = gtp_newlink, + .dellink = gtp_dellink, + .get_size = gtp_get_size, + .fill_info = gtp_fill_info, +}; + +static struct net *gtp_genl_get_net(struct net *src_net, struct nlattr *tb[]) +{ + struct net *net; + + /* Examine the link attributes and figure out which network namespace + * we are talking about. + */ + if (tb[GTPA_NET_NS_FD]) + net = get_net_ns_by_fd(nla_get_u32(tb[GTPA_NET_NS_FD])); + else + net = get_net(src_net); + + return net; +} + +static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize) +{ + int i; + + gtp->addr_hash = kmalloc(sizeof(struct hlist_head) * hsize, GFP_KERNEL); + if (gtp->addr_hash == NULL) + return -ENOMEM; + + gtp->tid_hash = kmalloc(sizeof(struct hlist_head) * hsize, GFP_KERNEL); + if (gtp->tid_hash == NULL) + goto err1; + + gtp->hash_size = hsize; + + for (i = 0; i < hsize; i++) { + INIT_HLIST_HEAD(>p->addr_hash[i]); + INIT_HLIST_HEAD(>p->tid_hash[i]); + } + return 0; +err1: + kfree(gtp->addr_hash); + return -ENOMEM; +} + +static void gtp_hashtable_free(struct gtp_dev *gtp) +{ + struct pdp_ctx *pctx; + int i; + + for (i = 0; i < gtp->hash_size; i++) { + hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) { + hlist_del_rcu(&pctx->hlist_tid); + hlist_del_rcu(&pctx->hlist_addr); + kfree_rcu(pctx, rcu_head); + } + } + synchronize_rcu(); + kfree(gtp->addr_hash); + kfree(gtp->tid_hash); +} + +static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp, + int fd_gtp0, int fd_gtp1, struct net *src_net) +{ + struct udp_tunnel_sock_cfg tuncfg = {NULL}; + struct socket *sock0, *sock1u; + int err; + + netdev_dbg(dev, "enable gtp on %d, %d\n", fd_gtp0, fd_gtp1); + + sock0 = sockfd_lookup(fd_gtp0, &err); + if (sock0 == NULL) { + netdev_dbg(dev, "socket fd=%d not found (gtp0)\n", fd_gtp0); + return -ENOENT; + } + + if (sock0->sk->sk_protocol != IPPROTO_UDP) { + netdev_dbg(dev, "socket fd=%d not UDP\n", fd_gtp0); + err = -EINVAL; + goto err1; + } + + sock1u = sockfd_lookup(fd_gtp1, &err); + if (sock1u == NULL) { + netdev_dbg(dev, "socket fd=%d not found (gtp1u)\n", fd_gtp1); + err = -ENOENT; + goto err1; + } + + if (sock1u->sk->sk_protocol != IPPROTO_UDP) { + netdev_dbg(dev, "socket fd=%d not UDP\n", fd_gtp1); + err = -EINVAL; + goto err2; + } + + netdev_dbg(dev, "enable gtp on %p, %p\n", sock0, sock1u); + + gtp->sock0 = sock0; + gtp->sock1u = sock1u; + gtp->net = src_net; + + tuncfg.sk_user_data = gtp; + tuncfg.encap_rcv = gtp_encap_recv; + tuncfg.encap_destroy = gtp_encap_destroy; + + tuncfg.encap_type = UDP_ENCAP_GTP0; + setup_udp_tunnel_sock(sock_net(gtp->sock0->sk), gtp->sock0, &tuncfg); + + tuncfg.encap_type = UDP_ENCAP_GTP1U; + setup_udp_tunnel_sock(sock_net(gtp->sock1u->sk), gtp->sock1u, &tuncfg); + + err = 0; +err2: + sockfd_put(sock1u); +err1: + sockfd_put(sock0); + return err; +} + +static struct net_device *gtp_find_dev(struct net *net, int ifindex) +{ + struct gtp_net *gn = net_generic(net, gtp_net_id); + struct gtp_dev *gtp; + + list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) { + if (ifindex == gtp->dev->ifindex) + return gtp->dev; + } + return NULL; +} + +static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) +{ + pctx->gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]); + pctx->af = AF_INET; + pctx->sgsn_addr_ip4.s_addr = + nla_get_be32(info->attrs[GTPA_SGSN_ADDRESS]); + pctx->ms_addr_ip4.s_addr = + nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); + + switch (pctx->gtp_version) { + case GTP_V0: + /* According to TS 09.60, sections 7.5.1 and 7.5.2, the flow + * label needs to be the same for uplink and downlink packets, + * so let's annotate this. + */ + pctx->u.v0.tid = nla_get_u64(info->attrs[GTPA_TID]); + pctx->u.v0.flow = nla_get_u16(info->attrs[GTPA_FLOW]); + break; + case GTP_V1: + pctx->u.v1.i_tei = nla_get_u32(info->attrs[GTPA_I_TEI]); + pctx->u.v1.o_tei = nla_get_u32(info->attrs[GTPA_O_TEI]); + break; + default: + break; + } +} + +static int ipv4_pdp_add(struct net_device *dev, struct genl_info *info) +{ + struct gtp_dev *gtp = netdev_priv(dev); + u32 hash_ms, hash_tid = 0; + struct pdp_ctx *pctx; + bool found = false; + __be32 ms_addr; + + ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); + hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size; + + hlist_for_each_entry_rcu(pctx, >p->addr_hash[hash_ms], hlist_addr) { + if (pctx->ms_addr_ip4.s_addr == ms_addr) { + found = true; + break; + } + } + + if (found) { + if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + ipv4_pdp_fill(pctx, info); + + if (pctx->gtp_version == GTP_V0) + netdev_dbg(dev, "GTPv0-U: update tunnel id = %llx (pdp %p)\n", + pctx->u.v0.tid, pctx); + else if (pctx->gtp_version == GTP_V1) + netdev_dbg(dev, "GTPv1-U: update tunnel id = %x/%x (pdp %p)\n", + pctx->u.v1.i_tei, pctx->u.v1.o_tei, pctx); + + return 0; + + } + + pctx = kmalloc(sizeof(struct pdp_ctx), GFP_KERNEL); + if (pctx == NULL) + return -ENOMEM; + + ipv4_pdp_fill(pctx, info); + atomic_set(&pctx->tx_seq, 0); + + switch (pctx->gtp_version) { + case GTP_V0: + /* TS 09.60: "The flow label identifies unambiguously a GTP + * flow.". We use the tid for this instead, I cannot find a + * situation in which this doesn't unambiguosly identify the + * PDP context. + */ + hash_tid = gtp0_hashfn(pctx->u.v0.tid) % gtp->hash_size; + break; + case GTP_V1: + hash_tid = gtp1u_hashfn(pctx->u.v1.i_tei) % gtp->hash_size; + break; + } + + hlist_add_head_rcu(&pctx->hlist_addr, >p->addr_hash[hash_ms]); + hlist_add_head_rcu(&pctx->hlist_tid, >p->tid_hash[hash_tid]); + + switch (pctx->gtp_version) { + case GTP_V0: + netdev_dbg(dev, "GTPv0-U: new PDP ctx id=%llx ssgn=%pI4 ms=%pI4 (pdp=%p)\n", + pctx->u.v0.tid, &pctx->sgsn_addr_ip4, + &pctx->ms_addr_ip4, pctx); + break; + case GTP_V1: + netdev_dbg(dev, "GTPv1-U: new PDP ctx id=%x/%x ssgn=%pI4 ms=%pI4 (pdp=%p)\n", + pctx->u.v1.i_tei, pctx->u.v1.o_tei, + &pctx->sgsn_addr_ip4, &pctx->ms_addr_ip4, pctx); + break; + } + + return 0; +} + +static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev; + struct net *net; + + if (!info->attrs[GTPA_VERSION] || + !info->attrs[GTPA_LINK] || + !info->attrs[GTPA_SGSN_ADDRESS] || + !info->attrs[GTPA_MS_ADDRESS]) + return -EINVAL; + + switch (nla_get_u32(info->attrs[GTPA_VERSION])) { + case GTP_V0: + if (!info->attrs[GTPA_TID] || + !info->attrs[GTPA_FLOW]) + return -EINVAL; + break; + case GTP_V1: + if (!info->attrs[GTPA_I_TEI] || + !info->attrs[GTPA_O_TEI]) + return -EINVAL; + break; + + default: + return -EINVAL; + } + + net = gtp_genl_get_net(sock_net(skb->sk), info->attrs); + if (IS_ERR(net)) + return PTR_ERR(net); + + /* Check if there's an existing gtpX device to configure */ + dev = gtp_find_dev(net, nla_get_u32(info->attrs[GTPA_LINK])); + if (dev == NULL) + return -ENODEV; + + return ipv4_pdp_add(dev, info); +} + +static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev; + struct pdp_ctx *pctx; + struct gtp_dev *gtp; + struct net *net; + + if (!info->attrs[GTPA_VERSION] || + !info->attrs[GTPA_LINK]) + return -EINVAL; + + net = gtp_genl_get_net(sock_net(skb->sk), info->attrs); + if (IS_ERR(net)) + return PTR_ERR(net); + + /* Check if there's an existing gtpX device to configure */ + dev = gtp_find_dev(net, nla_get_u32(info->attrs[GTPA_LINK])); + if (dev == NULL) + return -ENODEV; + + gtp = netdev_priv(dev); + + switch (nla_get_u32(info->attrs[GTPA_VERSION])) { + case GTP_V0: + if (!info->attrs[GTPA_TID]) + return -EINVAL; + pctx = gtp0_pdp_find(gtp, nla_get_u64(info->attrs[GTPA_TID])); + break; + case GTP_V1: + if (!info->attrs[GTPA_I_TEI]) + return -EINVAL; + pctx = gtp1_pdp_find(gtp, nla_get_u64(info->attrs[GTPA_I_TEI])); + break; + + default: + return -EINVAL; + } + + if (pctx == NULL) + return -ENOENT; + + if (pctx->gtp_version == GTP_V0) + netdev_dbg(dev, "GTPv0-U: deleting tunnel id = %llx (pdp %p)\n", + pctx->u.v0.tid, pctx); + else if (pctx->gtp_version == GTP_V1) + netdev_dbg(dev, "GTPv1-U: deleting tunnel id = %x/%x (pdp %p)\n", + pctx->u.v1.i_tei, pctx->u.v1.o_tei, pctx); + + hlist_del_rcu(&pctx->hlist_tid); + hlist_del_rcu(&pctx->hlist_addr); + kfree_rcu(pctx, rcu_head); + + return 0; +} + +static struct genl_family gtp_genl_family = { + .id = GENL_ID_GENERATE, + .name = "gtp", + .version = 0, + .hdrsize = 0, + .maxattr = GTPA_MAX, + .netnsok = true, +}; + +static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq, + u32 type, struct pdp_ctx *pctx) +{ + void *genlh; + + genlh = genlmsg_put(skb, snd_portid, snd_seq, >p_genl_family, 0, + type); + if (genlh == NULL) + goto nlmsg_failure; + + if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version) || + nla_put_be32(skb, GTPA_SGSN_ADDRESS, pctx->sgsn_addr_ip4.s_addr) || + nla_put_be32(skb, GTPA_MS_ADDRESS, pctx->ms_addr_ip4.s_addr)) + goto nla_put_failure; + + switch (pctx->gtp_version) { + case GTP_V0: + if (nla_put_u64_64bit(skb, GTPA_TID, pctx->u.v0.tid, GTPA_PAD) || + nla_put_u16(skb, GTPA_FLOW, pctx->u.v0.flow)) + goto nla_put_failure; + break; + case GTP_V1: + if (nla_put_u32(skb, GTPA_I_TEI, pctx->u.v1.i_tei) || + nla_put_u32(skb, GTPA_O_TEI, pctx->u.v1.o_tei)) + goto nla_put_failure; + break; + } + genlmsg_end(skb, genlh); + return 0; + +nlmsg_failure: +nla_put_failure: + genlmsg_cancel(skb, genlh); + return -EMSGSIZE; +} + +static int gtp_genl_get_pdp(struct sk_buff *skb, struct genl_info *info) +{ + struct pdp_ctx *pctx = NULL; + struct net_device *dev; + struct sk_buff *skb2; + struct gtp_dev *gtp; + u32 gtp_version; + struct net *net; + int err; + + if (!info->attrs[GTPA_VERSION] || + !info->attrs[GTPA_LINK]) + return -EINVAL; + + gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]); + switch (gtp_version) { + case GTP_V0: + case GTP_V1: + break; + default: + return -EINVAL; + } + + net = gtp_genl_get_net(sock_net(skb->sk), info->attrs); + if (IS_ERR(net)) + return PTR_ERR(net); + + /* Check if there's an existing gtpX device to configure */ + dev = gtp_find_dev(net, nla_get_u32(info->attrs[GTPA_LINK])); + if (dev == NULL) + return -ENODEV; + + gtp = netdev_priv(dev); + + rcu_read_lock(); + if (gtp_version == GTP_V0 && + info->attrs[GTPA_TID]) { + u64 tid = nla_get_u64(info->attrs[GTPA_TID]); + + pctx = gtp0_pdp_find(gtp, tid); + } else if (gtp_version == GTP_V1 && + info->attrs[GTPA_I_TEI]) { + u32 tid = nla_get_u32(info->attrs[GTPA_I_TEI]); + + pctx = gtp1_pdp_find(gtp, tid); + } else if (info->attrs[GTPA_MS_ADDRESS]) { + __be32 ip = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); + + pctx = ipv4_pdp_find(gtp, ip); + } + + if (pctx == NULL) { + err = -ENOENT; + goto err_unlock; + } + + skb2 = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (skb2 == NULL) { + err = -ENOMEM; + goto err_unlock; + } + + err = gtp_genl_fill_info(skb2, NETLINK_CB(skb).portid, + info->snd_seq, info->nlhdr->nlmsg_type, pctx); + if (err < 0) + goto err_unlock_free; + + rcu_read_unlock(); + return genlmsg_unicast(genl_info_net(info), skb2, info->snd_portid); + +err_unlock_free: + kfree_skb(skb2); +err_unlock: + rcu_read_unlock(); + return err; +} + +static int gtp_genl_dump_pdp(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp; + struct net *net = sock_net(skb->sk); + struct gtp_net *gn = net_generic(net, gtp_net_id); + unsigned long tid = cb->args[1]; + int i, k = cb->args[0], ret; + struct pdp_ctx *pctx; + + if (cb->args[4]) + return 0; + + list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) { + if (last_gtp && last_gtp != gtp) + continue; + else + last_gtp = NULL; + + for (i = k; i < gtp->hash_size; i++) { + hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) { + if (tid && tid != pctx->u.tid) + continue; + else + tid = 0; + + ret = gtp_genl_fill_info(skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, pctx); + if (ret < 0) { + cb->args[0] = i; + cb->args[1] = pctx->u.tid; + cb->args[2] = (unsigned long)gtp; + goto out; + } + } + } + } + cb->args[4] = 1; +out: + return skb->len; +} + +static struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = { + [GTPA_LINK] = { .type = NLA_U32, }, + [GTPA_VERSION] = { .type = NLA_U32, }, + [GTPA_TID] = { .type = NLA_U64, }, + [GTPA_SGSN_ADDRESS] = { .type = NLA_U32, }, + [GTPA_MS_ADDRESS] = { .type = NLA_U32, }, + [GTPA_FLOW] = { .type = NLA_U16, }, + [GTPA_NET_NS_FD] = { .type = NLA_U32, }, + [GTPA_I_TEI] = { .type = NLA_U32, }, + [GTPA_O_TEI] = { .type = NLA_U32, }, +}; + +static const struct genl_ops gtp_genl_ops[] = { + { + .cmd = GTP_CMD_NEWPDP, + .doit = gtp_genl_new_pdp, + .policy = gtp_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = GTP_CMD_DELPDP, + .doit = gtp_genl_del_pdp, + .policy = gtp_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = GTP_CMD_GETPDP, + .doit = gtp_genl_get_pdp, + .dumpit = gtp_genl_dump_pdp, + .policy = gtp_genl_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + +static int __net_init gtp_net_init(struct net *net) +{ + struct gtp_net *gn = net_generic(net, gtp_net_id); + + INIT_LIST_HEAD(&gn->gtp_dev_list); + return 0; +} + +static void __net_exit gtp_net_exit(struct net *net) +{ + struct gtp_net *gn = net_generic(net, gtp_net_id); + struct gtp_dev *gtp; + LIST_HEAD(list); + + rtnl_lock(); + list_for_each_entry(gtp, &gn->gtp_dev_list, list) + gtp_dellink(gtp->dev, &list); + + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +static struct pernet_operations gtp_net_ops = { + .init = gtp_net_init, + .exit = gtp_net_exit, + .id = >p_net_id, + .size = sizeof(struct gtp_net), +}; + +static int __init gtp_init(void) +{ + int err; + + get_random_bytes(>p_h_initval, sizeof(gtp_h_initval)); + + err = rtnl_link_register(>p_link_ops); + if (err < 0) + goto error_out; + + err = genl_register_family_with_ops(>p_genl_family, gtp_genl_ops); + if (err < 0) + goto unreg_rtnl_link; + + err = register_pernet_subsys(>p_net_ops); + if (err < 0) + goto unreg_genl_family; + + pr_info("GTP module loaded (pdp ctx size %Zd bytes)\n", + sizeof(struct pdp_ctx)); + return 0; + +unreg_genl_family: + genl_unregister_family(>p_genl_family); +unreg_rtnl_link: + rtnl_link_unregister(>p_link_ops); +error_out: + pr_err("error loading GTP module loaded\n"); + return err; +} +late_initcall(gtp_init); + +static void __exit gtp_fini(void) +{ + unregister_pernet_subsys(>p_net_ops); + genl_unregister_family(>p_genl_family); + rtnl_link_unregister(>p_link_ops); + + pr_info("GTP module unloaded\n"); +} +module_exit(gtp_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <hwelte@sysmocom.de>"); +MODULE_DESCRIPTION("Interface driver for GTP encapsulated traffic"); +MODULE_ALIAS_RTNL_LINK("gtp"); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3add2c4aac21..460740ccc238 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -85,7 +85,7 @@ struct gcm_iv { * @tfm: crypto struct, key storage */ struct macsec_key { - u64 id; + u8 id[MACSEC_KEYID_LEN]; struct crypto_aead *tfm; }; @@ -1530,7 +1530,8 @@ static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = { [MACSEC_SA_ATTR_AN] = { .type = NLA_U8 }, [MACSEC_SA_ATTR_ACTIVE] = { .type = NLA_U8 }, [MACSEC_SA_ATTR_PN] = { .type = NLA_U32 }, - [MACSEC_SA_ATTR_KEYID] = { .type = NLA_U64 }, + [MACSEC_SA_ATTR_KEYID] = { .type = NLA_BINARY, + .len = MACSEC_KEYID_LEN, }, [MACSEC_SA_ATTR_KEY] = { .type = NLA_BINARY, .len = MACSEC_MAX_KEY_LEN, }, }; @@ -1577,6 +1578,9 @@ static bool validate_add_rxsa(struct nlattr **attrs) return false; } + if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN) + return false; + return true; } @@ -1642,7 +1646,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) rx_sa->active = !!nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); - rx_sa->key.id = nla_get_u64(tb_sa[MACSEC_SA_ATTR_KEYID]); + nla_memcpy(rx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEY], MACSEC_KEYID_LEN); rx_sa->sc = rx_sc; rcu_assign_pointer(rx_sc->sa[assoc_num], rx_sa); @@ -1723,6 +1727,9 @@ static bool validate_add_txsa(struct nlattr **attrs) return false; } + if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN) + return false; + return true; } @@ -1778,7 +1785,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; } - tx_sa->key.id = nla_get_u64(tb_sa[MACSEC_SA_ATTR_KEYID]); + nla_memcpy(tx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEY], MACSEC_KEYID_LEN); spin_lock_bh(&tx_sa->lock); tx_sa->next_pn = nla_get_u32(tb_sa[MACSEC_SA_ATTR_PN]); @@ -2365,9 +2372,7 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev, if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || nla_put_u32(skb, MACSEC_SA_ATTR_PN, tx_sa->next_pn) || - nla_put_u64_64bit(skb, MACSEC_SA_ATTR_KEYID, - tx_sa->key.id, - MACSEC_SA_ATTR_PAD) || + nla_put(skb, MACSEC_SA_ATTR_KEYID, MACSEC_KEYID_LEN, tx_sa->key.id) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, tx_sa->active)) { nla_nest_cancel(skb, txsa_nest); nla_nest_cancel(skb, txsa_list); @@ -2469,9 +2474,7 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev, if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || nla_put_u32(skb, MACSEC_SA_ATTR_PN, rx_sa->next_pn) || - nla_put_u64_64bit(skb, MACSEC_SA_ATTR_KEYID, - rx_sa->key.id, - MACSEC_SA_ATTR_PAD) || + nla_put(skb, MACSEC_SA_ATTR_KEYID, MACSEC_KEYID_LEN, rx_sa->key.id) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, rx_sa->active)) { nla_nest_cancel(skb, rxsa_nest); nla_nest_cancel(skb, rxsc_nest); diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 22b85b097cbc..bd6720962b1f 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -384,7 +384,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) goto wake_up; } - kfree_skb(skb); + consume_skb(skb); while (segs) { struct sk_buff *nskb = segs->next; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 4b2461ae5d3b..c8db55aa8280 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -648,6 +648,8 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF; fl4->flowi4_iif = LOOPBACK_IFINDEX; + /* make sure oif is set to VRF device for lookup */ + fl4->flowi4_oif = dev->ifindex; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2668e528dee4..2f29d20aa08f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -613,8 +613,9 @@ out: static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { - udp_tunnel_gro_complete(skb, nhoff); - + /* Sets 'skb->inner_mac_header' since we are always called with + * 'skb->encapsulation' set. + */ return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr)); } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index f798899338ed..5101f3ab4f29 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -397,10 +397,17 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) */ start += start_pad; npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K; - if (nd_pfn->mode == PFN_MODE_PMEM) - offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align) + if (nd_pfn->mode == PFN_MODE_PMEM) { + unsigned long memmap_size; + + /* + * vmemmap_populate_hugepages() allocates the memmap array in + * HPAGE_SIZE chunks. + */ + memmap_size = ALIGN(64 * npfns, HPAGE_SIZE); + offset = ALIGN(start + SZ_8K + memmap_size, nd_pfn->align) - start; - else if (nd_pfn->mode == PFN_MODE_RAM) + } else if (nd_pfn->mode == PFN_MODE_RAM) offset = ALIGN(start + SZ_8K, nd_pfn->align) - start; else goto err; diff --git a/drivers/nvmem/mxs-ocotp.c b/drivers/nvmem/mxs-ocotp.c index 8ba19bba3156..2bb3c5799ac4 100644 --- a/drivers/nvmem/mxs-ocotp.c +++ b/drivers/nvmem/mxs-ocotp.c @@ -94,7 +94,7 @@ static int mxs_ocotp_read(void *context, const void *reg, size_t reg_size, if (ret) goto close_banks; - while (val_size) { + while (val_size >= reg_size) { if ((offset < OCOTP_DATA_OFFSET) || (offset % 16)) { /* fill up non-data register */ *buf = 0; @@ -103,7 +103,7 @@ static int mxs_ocotp_read(void *context, const void *reg, size_t reg_size, } buf++; - val_size--; + val_size -= reg_size; offset += reg_size; } diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 96168b819044..e165b7ce29d7 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -126,7 +126,7 @@ struct rio_mport_mapping { struct list_head node; struct mport_dev *md; enum rio_mport_map_dir dir; - u32 rioid; + u16 rioid; u64 rio_addr; dma_addr_t phys_addr; /* for mmap */ void *virt_addr; /* kernel address, for dma_free_coherent */ @@ -137,7 +137,7 @@ struct rio_mport_mapping { struct rio_mport_dma_map { int valid; - uint64_t length; + u64 length; void *vaddr; dma_addr_t paddr; }; @@ -208,7 +208,7 @@ struct mport_cdev_priv { struct kfifo event_fifo; wait_queue_head_t event_rx_wait; spinlock_t fifo_lock; - unsigned int event_mask; /* RIO_DOORBELL, RIO_PORTWRITE */ + u32 event_mask; /* RIO_DOORBELL, RIO_PORTWRITE */ #ifdef CONFIG_RAPIDIO_DMA_ENGINE struct dma_chan *dmach; struct list_head async_list; @@ -276,7 +276,8 @@ static int rio_mport_maint_rd(struct mport_cdev_priv *priv, void __user *arg, return -EFAULT; if ((maint_io.offset % 4) || - (maint_io.length == 0) || (maint_io.length % 4)) + (maint_io.length == 0) || (maint_io.length % 4) || + (maint_io.length + maint_io.offset) > RIO_MAINT_SPACE_SZ) return -EINVAL; buffer = vmalloc(maint_io.length); @@ -298,7 +299,8 @@ static int rio_mport_maint_rd(struct mport_cdev_priv *priv, void __user *arg, offset += 4; } - if (unlikely(copy_to_user(maint_io.buffer, buffer, maint_io.length))) + if (unlikely(copy_to_user((void __user *)(uintptr_t)maint_io.buffer, + buffer, maint_io.length))) ret = -EFAULT; out: vfree(buffer); @@ -319,7 +321,8 @@ static int rio_mport_maint_wr(struct mport_cdev_priv *priv, void __user *arg, return -EFAULT; if ((maint_io.offset % 4) || - (maint_io.length == 0) || (maint_io.length % 4)) + (maint_io.length == 0) || (maint_io.length % 4) || + (maint_io.length + maint_io.offset) > RIO_MAINT_SPACE_SZ) return -EINVAL; buffer = vmalloc(maint_io.length); @@ -327,7 +330,8 @@ static int rio_mport_maint_wr(struct mport_cdev_priv *priv, void __user *arg, return -ENOMEM; length = maint_io.length; - if (unlikely(copy_from_user(buffer, maint_io.buffer, length))) { + if (unlikely(copy_from_user(buffer, + (void __user *)(uintptr_t)maint_io.buffer, length))) { ret = -EFAULT; goto out; } @@ -360,7 +364,7 @@ out: */ static int rio_mport_create_outbound_mapping(struct mport_dev *md, struct file *filp, - u32 rioid, u64 raddr, u32 size, + u16 rioid, u64 raddr, u32 size, dma_addr_t *paddr) { struct rio_mport *mport = md->mport; @@ -369,7 +373,7 @@ rio_mport_create_outbound_mapping(struct mport_dev *md, struct file *filp, rmcd_debug(OBW, "did=%d ra=0x%llx sz=0x%x", rioid, raddr, size); - map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL); + map = kzalloc(sizeof(*map), GFP_KERNEL); if (map == NULL) return -ENOMEM; @@ -394,7 +398,7 @@ err_map_outb: static int rio_mport_get_outbound_mapping(struct mport_dev *md, struct file *filp, - u32 rioid, u64 raddr, u32 size, + u16 rioid, u64 raddr, u32 size, dma_addr_t *paddr) { struct rio_mport_mapping *map; @@ -433,7 +437,7 @@ static int rio_mport_obw_map(struct file *filp, void __user *arg) dma_addr_t paddr; int ret; - if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_mmap)))) + if (unlikely(copy_from_user(&map, arg, sizeof(map)))) return -EFAULT; rmcd_debug(OBW, "did=%d ra=0x%llx sz=0x%llx", @@ -448,7 +452,7 @@ static int rio_mport_obw_map(struct file *filp, void __user *arg) map.handle = paddr; - if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_mmap)))) + if (unlikely(copy_to_user(arg, &map, sizeof(map)))) return -EFAULT; return 0; } @@ -469,7 +473,7 @@ static int rio_mport_obw_free(struct file *filp, void __user *arg) if (!md->mport->ops->unmap_outb) return -EPROTONOSUPPORT; - if (copy_from_user(&handle, arg, sizeof(u64))) + if (copy_from_user(&handle, arg, sizeof(handle))) return -EFAULT; rmcd_debug(OBW, "h=0x%llx", handle); @@ -498,9 +502,9 @@ static int rio_mport_obw_free(struct file *filp, void __user *arg) static int maint_hdid_set(struct mport_cdev_priv *priv, void __user *arg) { struct mport_dev *md = priv->md; - uint16_t hdid; + u16 hdid; - if (copy_from_user(&hdid, arg, sizeof(uint16_t))) + if (copy_from_user(&hdid, arg, sizeof(hdid))) return -EFAULT; md->mport->host_deviceid = hdid; @@ -520,9 +524,9 @@ static int maint_hdid_set(struct mport_cdev_priv *priv, void __user *arg) static int maint_comptag_set(struct mport_cdev_priv *priv, void __user *arg) { struct mport_dev *md = priv->md; - uint32_t comptag; + u32 comptag; - if (copy_from_user(&comptag, arg, sizeof(uint32_t))) + if (copy_from_user(&comptag, arg, sizeof(comptag))) return -EFAULT; rio_local_write_config_32(md->mport, RIO_COMPONENT_TAG_CSR, comptag); @@ -837,7 +841,7 @@ err_out: * @xfer: data transfer descriptor structure */ static int -rio_dma_transfer(struct file *filp, uint32_t transfer_mode, +rio_dma_transfer(struct file *filp, u32 transfer_mode, enum rio_transfer_sync sync, enum dma_data_direction dir, struct rio_transfer_io *xfer) { @@ -875,7 +879,7 @@ rio_dma_transfer(struct file *filp, uint32_t transfer_mode, unsigned long offset; long pinned; - offset = (unsigned long)xfer->loc_addr & ~PAGE_MASK; + offset = (unsigned long)(uintptr_t)xfer->loc_addr & ~PAGE_MASK; nr_pages = PAGE_ALIGN(xfer->length + offset) >> PAGE_SHIFT; page_list = kmalloc_array(nr_pages, @@ -1015,19 +1019,20 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg) if (unlikely(copy_from_user(&transaction, arg, sizeof(transaction)))) return -EFAULT; - if (transaction.count != 1) + if (transaction.count != 1) /* only single transfer for now */ return -EINVAL; if ((transaction.transfer_mode & priv->md->properties.transfer_mode) == 0) return -ENODEV; - transfer = vmalloc(transaction.count * sizeof(struct rio_transfer_io)); + transfer = vmalloc(transaction.count * sizeof(*transfer)); if (!transfer) return -ENOMEM; - if (unlikely(copy_from_user(transfer, transaction.block, - transaction.count * sizeof(struct rio_transfer_io)))) { + if (unlikely(copy_from_user(transfer, + (void __user *)(uintptr_t)transaction.block, + transaction.count * sizeof(*transfer)))) { ret = -EFAULT; goto out_free; } @@ -1038,8 +1043,9 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg) ret = rio_dma_transfer(filp, transaction.transfer_mode, transaction.sync, dir, &transfer[i]); - if (unlikely(copy_to_user(transaction.block, transfer, - transaction.count * sizeof(struct rio_transfer_io)))) + if (unlikely(copy_to_user((void __user *)(uintptr_t)transaction.block, + transfer, + transaction.count * sizeof(*transfer)))) ret = -EFAULT; out_free: @@ -1129,11 +1135,11 @@ err_tmo: } static int rio_mport_create_dma_mapping(struct mport_dev *md, struct file *filp, - uint64_t size, struct rio_mport_mapping **mapping) + u64 size, struct rio_mport_mapping **mapping) { struct rio_mport_mapping *map; - map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL); + map = kzalloc(sizeof(*map), GFP_KERNEL); if (map == NULL) return -ENOMEM; @@ -1165,7 +1171,7 @@ static int rio_mport_alloc_dma(struct file *filp, void __user *arg) struct rio_mport_mapping *mapping = NULL; int ret; - if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_dma_mem)))) + if (unlikely(copy_from_user(&map, arg, sizeof(map)))) return -EFAULT; ret = rio_mport_create_dma_mapping(md, filp, map.length, &mapping); @@ -1174,7 +1180,7 @@ static int rio_mport_alloc_dma(struct file *filp, void __user *arg) map.dma_handle = mapping->phys_addr; - if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_dma_mem)))) { + if (unlikely(copy_to_user(arg, &map, sizeof(map)))) { mutex_lock(&md->buf_mutex); kref_put(&mapping->ref, mport_release_mapping); mutex_unlock(&md->buf_mutex); @@ -1192,7 +1198,7 @@ static int rio_mport_free_dma(struct file *filp, void __user *arg) int ret = -EFAULT; struct rio_mport_mapping *map, *_map; - if (copy_from_user(&handle, arg, sizeof(u64))) + if (copy_from_user(&handle, arg, sizeof(handle))) return -EFAULT; rmcd_debug(EXIT, "filp=%p", filp); @@ -1242,14 +1248,18 @@ static int rio_mport_free_dma(struct file *filp, void __user *arg) static int rio_mport_create_inbound_mapping(struct mport_dev *md, struct file *filp, - u64 raddr, u32 size, + u64 raddr, u64 size, struct rio_mport_mapping **mapping) { struct rio_mport *mport = md->mport; struct rio_mport_mapping *map; int ret; - map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL); + /* rio_map_inb_region() accepts u32 size */ + if (size > 0xffffffff) + return -EINVAL; + + map = kzalloc(sizeof(*map), GFP_KERNEL); if (map == NULL) return -ENOMEM; @@ -1262,7 +1272,7 @@ rio_mport_create_inbound_mapping(struct mport_dev *md, struct file *filp, if (raddr == RIO_MAP_ANY_ADDR) raddr = map->phys_addr; - ret = rio_map_inb_region(mport, map->phys_addr, raddr, size, 0); + ret = rio_map_inb_region(mport, map->phys_addr, raddr, (u32)size, 0); if (ret < 0) goto err_map_inb; @@ -1288,7 +1298,7 @@ err_dma_alloc: static int rio_mport_get_inbound_mapping(struct mport_dev *md, struct file *filp, - u64 raddr, u32 size, + u64 raddr, u64 size, struct rio_mport_mapping **mapping) { struct rio_mport_mapping *map; @@ -1331,7 +1341,7 @@ static int rio_mport_map_inbound(struct file *filp, void __user *arg) if (!md->mport->ops->map_inb) return -EPROTONOSUPPORT; - if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_mmap)))) + if (unlikely(copy_from_user(&map, arg, sizeof(map)))) return -EFAULT; rmcd_debug(IBW, "%s filp=%p", dev_name(&priv->md->dev), filp); @@ -1344,7 +1354,7 @@ static int rio_mport_map_inbound(struct file *filp, void __user *arg) map.handle = mapping->phys_addr; map.rio_addr = mapping->rio_addr; - if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_mmap)))) { + if (unlikely(copy_to_user(arg, &map, sizeof(map)))) { /* Delete mapping if it was created by this request */ if (ret == 0 && mapping->filp == filp) { mutex_lock(&md->buf_mutex); @@ -1375,7 +1385,7 @@ static int rio_mport_inbound_free(struct file *filp, void __user *arg) if (!md->mport->ops->unmap_inb) return -EPROTONOSUPPORT; - if (copy_from_user(&handle, arg, sizeof(u64))) + if (copy_from_user(&handle, arg, sizeof(handle))) return -EFAULT; mutex_lock(&md->buf_mutex); @@ -1401,7 +1411,7 @@ static int rio_mport_inbound_free(struct file *filp, void __user *arg) static int maint_port_idx_get(struct mport_cdev_priv *priv, void __user *arg) { struct mport_dev *md = priv->md; - uint32_t port_idx = md->mport->index; + u32 port_idx = md->mport->index; rmcd_debug(MPORT, "port_index=%d", port_idx); @@ -1451,7 +1461,7 @@ static void rio_mport_doorbell_handler(struct rio_mport *mport, void *dev_id, handled = 0; spin_lock(&data->db_lock); list_for_each_entry(db_filter, &data->doorbells, data_node) { - if (((db_filter->filter.rioid == 0xffffffff || + if (((db_filter->filter.rioid == RIO_INVALID_DESTID || db_filter->filter.rioid == src)) && info >= db_filter->filter.low && info <= db_filter->filter.high) { @@ -1525,6 +1535,9 @@ static int rio_mport_remove_db_filter(struct mport_cdev_priv *priv, if (copy_from_user(&filter, arg, sizeof(filter))) return -EFAULT; + if (filter.low > filter.high) + return -EINVAL; + spin_lock_irqsave(&priv->md->db_lock, flags); list_for_each_entry(db_filter, &priv->db_filters, priv_node) { if (db_filter->filter.rioid == filter.rioid && @@ -1737,10 +1750,10 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, return -EEXIST; } - size = sizeof(struct rio_dev); + size = sizeof(*rdev); mport = md->mport; - destid = (u16)dev_info.destid; - hopcount = (u8)dev_info.hopcount; + destid = dev_info.destid; + hopcount = dev_info.hopcount; if (rio_mport_read_config_32(mport, destid, hopcount, RIO_PEF_CAR, &rval)) @@ -1872,8 +1885,8 @@ static int rio_mport_del_riodev(struct mport_cdev_priv *priv, void __user *arg) do { rdev = rio_get_comptag(dev_info.comptag, rdev); if (rdev && rdev->dev.parent == &mport->net->dev && - rdev->destid == (u16)dev_info.destid && - rdev->hopcount == (u8)dev_info.hopcount) + rdev->destid == dev_info.destid && + rdev->hopcount == dev_info.hopcount) break; } while (rdev); } @@ -2146,8 +2159,8 @@ static long mport_cdev_ioctl(struct file *filp, return maint_port_idx_get(data, (void __user *)arg); case RIO_MPORT_GET_PROPERTIES: md->properties.hdid = md->mport->host_deviceid; - if (copy_to_user((void __user *)arg, &(data->md->properties), - sizeof(data->md->properties))) + if (copy_to_user((void __user *)arg, &(md->properties), + sizeof(md->properties))) return -EFAULT; return 0; case RIO_ENABLE_DOORBELL_RANGE: @@ -2159,11 +2172,11 @@ static long mport_cdev_ioctl(struct file *filp, case RIO_DISABLE_PORTWRITE_RANGE: return rio_mport_remove_pw_filter(data, (void __user *)arg); case RIO_SET_EVENT_MASK: - data->event_mask = arg; + data->event_mask = (u32)arg; return 0; case RIO_GET_EVENT_MASK: if (copy_to_user((void __user *)arg, &data->event_mask, - sizeof(data->event_mask))) + sizeof(u32))) return -EFAULT; return 0; case RIO_MAP_OUTBOUND: @@ -2374,7 +2387,7 @@ static ssize_t mport_write(struct file *filp, const char __user *buf, return -EINVAL; ret = rio_mport_send_doorbell(mport, - (u16)event.u.doorbell.rioid, + event.u.doorbell.rioid, event.u.doorbell.payload); if (ret < 0) return ret; @@ -2421,7 +2434,7 @@ static struct mport_dev *mport_cdev_add(struct rio_mport *mport) struct mport_dev *md; struct rio_mport_attr attr; - md = kzalloc(sizeof(struct mport_dev), GFP_KERNEL); + md = kzalloc(sizeof(*md), GFP_KERNEL); if (!md) { rmcd_error("Unable allocate a device object"); return NULL; @@ -2470,7 +2483,7 @@ static struct mport_dev *mport_cdev_add(struct rio_mport *mport) /* The transfer_mode property will be returned through mport query * interface */ -#ifdef CONFIG_PPC /* for now: only on Freescale's SoCs */ +#ifdef CONFIG_FSL_RIO /* for now: only on Freescale's SoCs */ md->properties.transfer_mode |= RIO_TRANSFER_MODE_MAPPED; #else md->properties.transfer_mode |= RIO_TRANSFER_MODE_TRANSFER; diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 14718a9ffcfb..460c855be0d0 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -249,18 +249,12 @@ static int usb_port_runtime_suspend(struct device *dev) return retval; } - -static int usb_port_prepare(struct device *dev) -{ - return 1; -} #endif static const struct dev_pm_ops usb_port_pm_ops = { #ifdef CONFIG_PM .runtime_suspend = usb_port_runtime_suspend, .runtime_resume = usb_port_runtime_resume, - .prepare = usb_port_prepare, #endif }; diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index dcb85e3cd5a7..479187c32571 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -312,13 +312,7 @@ static int usb_dev_uevent(struct device *dev, struct kobj_uevent_env *env) static int usb_dev_prepare(struct device *dev) { - struct usb_device *udev = to_usb_device(dev); - - /* Return 0 if the current wakeup setting is wrong, otherwise 1 */ - if (udev->do_remote_wakeup != device_may_wakeup(dev)) - return 0; - - return 1; + return 0; /* Implement eventually? */ } static void usb_dev_complete(struct device *dev) diff --git a/drivers/usb/musb/jz4740.c b/drivers/usb/musb/jz4740.c index 5e5a8fa005f8..bc8889956d17 100644 --- a/drivers/usb/musb/jz4740.c +++ b/drivers/usb/musb/jz4740.c @@ -83,9 +83,9 @@ static int jz4740_musb_init(struct musb *musb) { usb_phy_generic_register(); musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2); - if (!musb->xceiv) { + if (IS_ERR(musb->xceiv)) { pr_err("HS UDC: no transceiver configured\n"); - return -ENODEV; + return PTR_ERR(musb->xceiv); } /* Silicon does not implement ConfigData register. diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 87bd578799a8..152865b36522 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1164,12 +1164,12 @@ static int musb_gadget_disable(struct usb_ep *ep) musb_writew(epio, MUSB_RXMAXP, 0); } - musb_ep->desc = NULL; - musb_ep->end_point.desc = NULL; - /* abort all pending DMA and requests */ nuke(musb_ep, -ESHUTDOWN); + musb_ep->desc = NULL; + musb_ep->end_point.desc = NULL; + schedule_work(&musb->irq_work); spin_unlock_irqrestore(&(musb->lock), flags); diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 58487a473521..2f8ad7f1f482 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2735,7 +2735,7 @@ static const struct hc_driver musb_hc_driver = { .description = "musb-hcd", .product_desc = "MUSB HDRC host driver", .hcd_priv_size = sizeof(struct musb *), - .flags = HCD_USB2 | HCD_MEMORY | HCD_BH, + .flags = HCD_USB2 | HCD_MEMORY, /* not using irq handler or reset hooks from usbcore, since * those must be shared with peripheral code for OTG configs diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index dd47823bb014..7c9f25e9c422 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -109,6 +109,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */ { USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */ { USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */ + { USB_DEVICE(0x10C4, 0x82F4) }, /* Starizona MicroTouch */ { USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */ { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */ { USB_DEVICE(0x10C4, 0x8382) }, /* Cygnal Integrated Products, Inc. */ @@ -118,6 +119,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */ { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */ { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */ + { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */ { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ @@ -141,6 +143,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */ { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */ { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */ + { USB_DEVICE(0x12B8, 0xEC60) }, /* Link G4 ECU */ + { USB_DEVICE(0x12B8, 0xEC62) }, /* Link G4+ ECU */ { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */ { USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */ { USB_DEVICE(0x166A, 0x0201) }, /* Clipsal 5500PACA C-Bus Pascal Automation Controller */ diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5c802d47892c..ca6bfddaacad 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1006,7 +1006,7 @@ struct virtqueue *vring_create_virtqueue( const char *name) { struct virtqueue *vq; - void *queue; + void *queue = NULL; dma_addr_t dma_addr; size_t queue_size_in_bytes; struct vring vring; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 9781e0dd59d6..d46839f51e73 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -151,6 +151,8 @@ static DECLARE_WAIT_QUEUE_HEAD(balloon_wq); static void balloon_process(struct work_struct *work); static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); +static void release_memory_resource(struct resource *resource); + /* When ballooning out (allocating memory to return to Xen) we don't really want the kernel to try too hard since that can trigger the oom killer. */ #define GFP_BALLOON \ @@ -267,6 +269,20 @@ static struct resource *additional_memory_resource(phys_addr_t size) return NULL; } +#ifdef CONFIG_SPARSEMEM + { + unsigned long limit = 1UL << (MAX_PHYSMEM_BITS - PAGE_SHIFT); + unsigned long pfn = res->start >> PAGE_SHIFT; + + if (pfn > limit) { + pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", + pfn, limit); + release_memory_resource(res); + return NULL; + } + } +#endif + return res; } diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 38272ad24551..f4edd6df3df2 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -316,7 +316,6 @@ static int evtchn_resize_ring(struct per_user_data *u) { unsigned int new_size; evtchn_port_t *new_ring, *old_ring; - unsigned int p, c; /* * Ensure the ring is large enough to capture all possible @@ -346,20 +345,17 @@ static int evtchn_resize_ring(struct per_user_data *u) /* * Copy the old ring contents to the new ring. * - * If the ring contents crosses the end of the current ring, - * it needs to be copied in two chunks. + * To take care of wrapping, a full ring, and the new index + * pointing into the second half, simply copy the old contents + * twice. * * +---------+ +------------------+ - * |34567 12| -> | 1234567 | - * +-----p-c-+ +------------------+ + * |34567 12| -> |34567 1234567 12| + * +-----p-c-+ +-------c------p---+ */ - p = evtchn_ring_offset(u, u->ring_prod); - c = evtchn_ring_offset(u, u->ring_cons); - if (p < c) { - memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring)); - memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring)); - } else - memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring)); + memcpy(new_ring, old_ring, u->ring_size * sizeof(*u->ring)); + memcpy(new_ring + u->ring_size, old_ring, + u->ring_size * sizeof(*u->ring)); u->ring = new_ring; u->ring_size = new_size; diff --git a/fs/pnode.c b/fs/pnode.c index c524fdddc7fb..99899705b105 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -198,7 +198,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin) /* all accesses are serialized by namespace_sem */ static struct user_namespace *user_ns; -static struct mount *last_dest, *last_source, *dest_master; +static struct mount *last_dest, *first_source, *last_source, *dest_master; static struct mountpoint *mp; static struct hlist_head *list; @@ -221,20 +221,22 @@ static int propagate_one(struct mount *m) type = CL_MAKE_SHARED; } else { struct mount *n, *p; + bool done; for (n = m; ; n = p) { p = n->mnt_master; - if (p == dest_master || IS_MNT_MARKED(p)) { - while (last_dest->mnt_master != p) { - last_source = last_source->mnt_master; - last_dest = last_source->mnt_parent; - } - if (!peers(n, last_dest)) { - last_source = last_source->mnt_master; - last_dest = last_source->mnt_parent; - } + if (p == dest_master || IS_MNT_MARKED(p)) break; - } } + do { + struct mount *parent = last_source->mnt_parent; + if (last_source == first_source) + break; + done = parent->mnt_master == p; + if (done && peers(n, parent)) + break; + last_source = last_source->mnt_master; + } while (!done); + type = CL_SLAVE; /* beginning of peer group among the slaves? */ if (IS_MNT_SHARED(m)) @@ -286,6 +288,7 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, */ user_ns = current->nsproxy->mnt_ns->user_ns; last_dest = dest_mnt; + first_source = source_mnt; last_source = source_mnt; mp = dest_mp; list = tree_list; diff --git a/fs/proc/base.c b/fs/proc/base.c index b1755b23893e..92e37e224cd2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -955,7 +955,8 @@ static ssize_t environ_read(struct file *file, char __user *buf, struct mm_struct *mm = file->private_data; unsigned long env_start, env_end; - if (!mm) + /* Ensure the process spawned far enough to have an environment. */ + if (!mm || !mm->env_end) return 0; page = (char *)__get_free_page(GFP_TEMPORARY); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 14362a84c78e..3a932501d690 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -394,13 +394,13 @@ struct acpi_data_node { static inline bool is_acpi_node(struct fwnode_handle *fwnode) { - return fwnode && (fwnode->type == FWNODE_ACPI + return !IS_ERR_OR_NULL(fwnode) && (fwnode->type == FWNODE_ACPI || fwnode->type == FWNODE_ACPI_DATA); } static inline bool is_acpi_device_node(struct fwnode_handle *fwnode) { - return fwnode && fwnode->type == FWNODE_ACPI; + return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_ACPI; } static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index eeae401a2412..3d5202eda22f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -246,7 +246,7 @@ #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ #endif -#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600) +#if GCC_VERSION >= 40800 #define __HAVE_BUILTIN_BSWAP16__ #endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 4dd9306c9d56..dc4f58a3cdcc 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -380,16 +380,16 @@ static inline unsigned long ifname_compare_aligned(const char *_a, * allows us to return 0 for single core systems without forcing * callers to deal with SMP vs. NONSMP issues. */ -static inline u64 xt_percpu_counter_alloc(void) +static inline unsigned long xt_percpu_counter_alloc(void) { if (nr_cpu_ids > 1) { void __percpu *res = __alloc_percpu(sizeof(struct xt_counters), sizeof(struct xt_counters)); if (res == NULL) - return (u64) -ENOMEM; + return -ENOMEM; - return (u64) (__force unsigned long) res; + return (__force unsigned long) res; } return 0; diff --git a/include/linux/of.h b/include/linux/of.h index 7fcb681baadf..31758036787c 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -133,7 +133,7 @@ void of_core_init(void); static inline bool is_of_node(struct fwnode_handle *fwnode) { - return fwnode && fwnode->type == FWNODE_OF; + return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_OF; } static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f4ed4f1b0c77..6b052aa7b5b7 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -517,6 +517,27 @@ static inline int PageTransCompound(struct page *page) } /* + * PageTransCompoundMap is the same as PageTransCompound, but it also + * guarantees the primary MMU has the entire compound page mapped + * through pmd_trans_huge, which in turn guarantees the secondary MMUs + * can also map the entire compound page. This allows the secondary + * MMUs to call get_user_pages() only once for each compound page and + * to immediately map the entire compound page with a single secondary + * MMU fault. If there will be a pmd split later, the secondary MMUs + * will get an update through the MMU notifier invalidation through + * split_huge_pmd(). + * + * Unlike PageTransCompound, this is safe to be called only while + * split_huge_pmd() cannot run from under us, like if protected by the + * MMU notifier, otherwise it may result in page->_mapcount < 0 false + * positives. + */ +static inline int PageTransCompoundMap(struct page *page) +{ + return PageTransCompound(page) && atomic_read(&page->_mapcount) < 0; +} + +/* * PageTransTail returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. @@ -559,6 +580,7 @@ static inline int TestClearPageDoubleMap(struct page *page) #else TESTPAGEFLAG_FALSE(TransHuge) TESTPAGEFLAG_FALSE(TransCompound) +TESTPAGEFLAG_FALSE(TransCompoundMap) TESTPAGEFLAG_FALSE(TransTail) TESTPAGEFLAG_FALSE(DoubleMap) TESTSETFLAG_FALSE(DoubleMap) diff --git a/include/linux/swap.h b/include/linux/swap.h index 2b83359c19ca..0a4cd4703f40 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -533,6 +533,10 @@ static inline swp_entry_t get_swap_page(void) #ifdef CONFIG_MEMCG static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) { + /* Cgroup2 doesn't have per-cgroup swappiness */ + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return vm_swappiness; + /* root ? */ if (mem_cgroup_disabled() || !memcg->css.parent) return vm_swappiness; diff --git a/include/net/gtp.h b/include/net/gtp.h new file mode 100644 index 000000000000..894a37b87d63 --- /dev/null +++ b/include/net/gtp.h @@ -0,0 +1,34 @@ +#ifndef _GTP_H_ +#define _GTP_H + +/* General GTP protocol related definitions. */ + +#define GTP0_PORT 3386 +#define GTP1U_PORT 2152 + +#define GTP_TPDU 255 + +struct gtp0_header { /* According to GSM TS 09.60. */ + __u8 flags; + __u8 type; + __be16 length; + __be16 seq; + __be16 flow; + __u8 number; + __u8 spare[3]; + __be64 tid; +} __attribute__ ((packed)); + +struct gtp1_header { /* According to 3GPP TS 29.060. */ + __u8 flags; + __u8 type; + __be16 length; + __be32 tid; +} __attribute__ ((packed)); + +#define GTP1_F_NPDU 0x01 +#define GTP1_F_SEQ 0x02 +#define GTP1_F_EXTHDR 0x04 +#define GTP1_F_MASK 0x07 + +#endif diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a6cc576fd467..af4c10ebb241 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -731,6 +731,12 @@ struct ip_vs_pe { u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, bool inverse); int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); + /* create connections for real-server outgoing packets */ + struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, __be16 cport); }; /* The application module object (a.k.a. app incarnation) */ @@ -874,6 +880,7 @@ struct netns_ipvs { /* Service counters */ atomic_t ftpsvc_counter; atomic_t nullsvc_counter; + atomic_t conn_out_counter; #ifdef CONFIG_SYSCTL /* 1/rate drop and drop-entry variables */ @@ -1147,6 +1154,12 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) */ const char *ip_vs_proto_name(unsigned int proto); void ip_vs_init_hash_table(struct list_head *table, int rows); +struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, + __be16 cport); #define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t))) #define IP_VS_APP_TYPE_FTP 1 @@ -1378,6 +1391,10 @@ ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport); +struct ip_vs_dest * +ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport); + int ip_vs_use_count_inc(void); void ip_vs_use_count_dec(void); int ip_vs_register_nl_ioctl(void); diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index c43a9c73de5e..78872bd1dc2c 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -130,52 +130,9 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return rc; } -static inline int l3mdev_get_saddr(struct net *net, int ifindex, - struct flowi4 *fl4) -{ - struct net_device *dev; - int rc = 0; - - if (ifindex) { - - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr) { - rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4); - } - - rcu_read_unlock(); - } - - return rc; -} +int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); -static inline struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev, - const struct flowi6 *fl6) -{ - if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rt6_dst) - return dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6); - - return NULL; -} - -static inline -struct dst_entry *l3mdev_rt6_dst_by_oif(struct net *net, - const struct flowi6 *fl6) -{ - struct dst_entry *dst = NULL; - struct net_device *dev; - - dev = dev_get_by_index(net, fl6->flowi6_oif); - if (dev) { - dst = l3mdev_get_rt6_dst(dev, fl6); - dev_put(dev); - } - - return dst; -} +struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6); #else @@ -233,14 +190,7 @@ static inline int l3mdev_get_saddr(struct net *net, int ifindex, } static inline -struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev, - const struct flowi6 *fl6) -{ - return NULL; -} -static inline -struct dst_entry *l3mdev_rt6_dst_by_oif(struct net *net, - const struct flowi6 *fl6) +struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6) { return NULL; } diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index fde4068eec0b..dd78bea227c8 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -289,8 +289,6 @@ struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; extern unsigned int nf_conntrack_max; -extern unsigned int nf_conntrack_hash_rnd; -void init_nf_conntrack_hash_rnd(void); struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 62e17d1319ff..3e2f3328945c 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -81,6 +81,7 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, #define CONNTRACK_LOCKS 1024 +extern struct hlist_nulls_head *nf_conntrack_hash; extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; void nf_conntrack_lock(spinlock_t *lock); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index dce56f09ac9a..5ed33ea4718e 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -10,6 +10,7 @@ extern unsigned int nf_ct_expect_hsize; extern unsigned int nf_ct_expect_max; +extern struct hlist_head *nf_ct_expect_hash; struct nf_conntrack_expect { /* Conntrack expectation list member */ diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 956d8a6ac069..1a5fb36f165f 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -23,6 +23,9 @@ struct nf_conntrack_l4proto { /* L4 Protocol number. */ u_int8_t l4proto; + /* Resolve clashes on insertion races. */ + bool allow_clash; + /* Try to fill in the third arg: dataoff is offset past network protocol hdr. Return true if possible. */ bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f6b1daf2e698..092235458691 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -303,7 +303,7 @@ void nft_unregister_set(struct nft_set_ops *ops); struct nft_set { struct list_head list; struct list_head bindings; - char name[IFNAMSIZ]; + char name[NFT_SET_MAXNAMELEN]; u32 ktype; u32 dtype; u32 size; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 723b61c82b3f..38b1a80517f0 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -84,7 +84,6 @@ struct netns_ct { struct ctl_table_header *event_sysctl_header; struct ctl_table_header *helper_sysctl_header; #endif - char *slabname; unsigned int sysctl_log_invalid; /* Log invalid packets */ int sysctl_events; int sysctl_acct; @@ -93,11 +92,6 @@ struct netns_ct { int sysctl_tstamp; int sysctl_checksum; - unsigned int htable_size; - seqcount_t generation; - struct kmem_cache *nf_conntrack_cachep; - struct hlist_nulls_head *hash; - struct hlist_head *expect_hash; struct ct_pcpu __percpu *pcpu_lists; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; @@ -107,9 +101,5 @@ struct netns_ct { unsigned int labels_used; u8 label_words; #endif -#ifdef CONFIG_NF_NAT_NEEDED - struct hlist_head *nat_bysource; - unsigned int nat_htable_size; -#endif }; #endif diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 730d82ad6ee5..24cd3949a9a4 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -80,6 +80,7 @@ struct netns_xfrm { struct flow_cache flow_cache_global; atomic_t flow_cache_genid; struct list_head flow_cache_gc_list; + atomic_t flow_cache_gc_count; spinlock_t flow_cache_gc_lock; struct work_struct flow_cache_gc_work; struct work_struct flow_cache_flush_work; diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 4f543262dd81..9d14f707e534 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -112,15 +112,6 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) return iptunnel_handle_offloads(skb, type); } -static inline void udp_tunnel_gro_complete(struct sk_buff *skb, int nhoff) -{ - struct udphdr *uh; - - uh = (struct udphdr *)(skb->data + nhoff - sizeof(struct udphdr)); - skb_shinfo(skb)->gso_type |= uh->check ? - SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; -} - static inline void udp_tunnel_encap_enable(struct socket *sock) { #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 6e0f5f01734c..c51afb71bfab 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -718,9 +718,9 @@ __SYSCALL(__NR_mlock2, sys_mlock2) #define __NR_copy_file_range 285 __SYSCALL(__NR_copy_file_range, sys_copy_file_range) #define __NR_preadv2 286 -__SYSCALL(__NR_preadv2, sys_preadv2) +__SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2) #define __NR_pwritev2 287 -__SYSCALL(__NR_pwritev2, sys_pwritev2) +__SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2) #undef __NR_syscalls #define __NR_syscalls 288 diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 813ffb2e22c9..8bdae34d1f9a 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -141,6 +141,7 @@ header-y += gfs2_ondisk.h header-y += gigaset_dev.h header-y += gpio.h header-y += gsmmux.h +header-y += gtp.h header-y += hdlcdrv.h header-y += hdlc.h header-y += hdreg.h diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h new file mode 100644 index 000000000000..ca1054dd8249 --- /dev/null +++ b/include/uapi/linux/gtp.h @@ -0,0 +1,33 @@ +#ifndef _UAPI_LINUX_GTP_H_ +#define _UAPI_LINUX_GTP_H__ + +enum gtp_genl_cmds { + GTP_CMD_NEWPDP, + GTP_CMD_DELPDP, + GTP_CMD_GETPDP, + + GTP_CMD_MAX, +}; + +enum gtp_version { + GTP_V0 = 0, + GTP_V1, +}; + +enum gtp_attrs { + GTPA_UNSPEC = 0, + GTPA_LINK, + GTPA_VERSION, + GTPA_TID, /* for GTPv0 only */ + GTPA_SGSN_ADDRESS, + GTPA_MS_ADDRESS, + GTPA_FLOW, + GTPA_NET_NS_FD, + GTPA_I_TEI, /* for GTPv1 only */ + GTPA_O_TEI, /* for GTPv1 only */ + GTPA_PAD, + __GTPA_MAX, +}; +#define GTPA_MAX (__GTPA_MAX + 1) + +#endif /* _UAPI_LINUX_GTP_H_ */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d2d7fd4ba5f5..bb36bd5675a7 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -529,6 +529,16 @@ enum { }; #define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1) +/* GTP section */ +enum { + IFLA_GTP_UNSPEC, + IFLA_GTP_FD0, + IFLA_GTP_FD1, + IFLA_GTP_PDP_HASHSIZE, + __IFLA_GTP_MAX, +}; +#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) + /* Bonding section */ enum { diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 897a94942245..f7d4831a2cc7 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -19,6 +19,8 @@ #define MACSEC_MAX_KEY_LEN 128 +#define MACSEC_KEYID_LEN 16 + #define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL #define MACSEC_DEFAULT_CIPHER_ALT 0x0080C20001000001ULL @@ -79,7 +81,7 @@ enum macsec_sa_attrs { MACSEC_SA_ATTR_ACTIVE, /* config/dump, u8 0..1 */ MACSEC_SA_ATTR_PN, /* config/dump, u32 */ MACSEC_SA_ATTR_KEY, /* config, data */ - MACSEC_SA_ATTR_KEYID, /* config/dump, u64 */ + MACSEC_SA_ATTR_KEYID, /* config/dump, 128-bit */ MACSEC_SA_ATTR_STATS, /* dump, nested, macsec_sa_stats_attr */ MACSEC_SA_ATTR_PAD, __MACSEC_SA_ATTR_END, diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 660231363bb5..6a4dbe04f09e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -3,6 +3,7 @@ #define NFT_TABLE_MAXNAMELEN 32 #define NFT_CHAIN_MAXNAMELEN 32 +#define NFT_SET_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 /** diff --git a/include/linux/rio_mport_cdev.h b/include/uapi/linux/rio_mport_cdev.h index b65d19df76d2..5796bf1d06ad 100644 --- a/include/linux/rio_mport_cdev.h +++ b/include/uapi/linux/rio_mport_cdev.h @@ -39,16 +39,16 @@ #ifndef _RIO_MPORT_CDEV_H_ #define _RIO_MPORT_CDEV_H_ -#ifndef __user -#define __user -#endif +#include <linux/ioctl.h> +#include <linux/types.h> struct rio_mport_maint_io { - uint32_t rioid; /* destID of remote device */ - uint32_t hopcount; /* hopcount to remote device */ - uint32_t offset; /* offset in register space */ - size_t length; /* length in bytes */ - void __user *buffer; /* data buffer */ + __u16 rioid; /* destID of remote device */ + __u8 hopcount; /* hopcount to remote device */ + __u8 pad0[5]; + __u32 offset; /* offset in register space */ + __u32 length; /* length in bytes */ + __u64 buffer; /* pointer to data buffer */ }; /* @@ -66,22 +66,23 @@ struct rio_mport_maint_io { #define RIO_CAP_MAP_INB (1 << 7) struct rio_mport_properties { - uint16_t hdid; - uint8_t id; /* Physical port ID */ - uint8_t index; - uint32_t flags; - uint32_t sys_size; /* Default addressing size */ - uint8_t port_ok; - uint8_t link_speed; - uint8_t link_width; - uint32_t dma_max_sge; - uint32_t dma_max_size; - uint32_t dma_align; - uint32_t transfer_mode; /* Default transfer mode */ - uint32_t cap_sys_size; /* Capable system sizes */ - uint32_t cap_addr_size; /* Capable addressing sizes */ - uint32_t cap_transfer_mode; /* Capable transfer modes */ - uint32_t cap_mport; /* Mport capabilities */ + __u16 hdid; + __u8 id; /* Physical port ID */ + __u8 index; + __u32 flags; + __u32 sys_size; /* Default addressing size */ + __u8 port_ok; + __u8 link_speed; + __u8 link_width; + __u8 pad0; + __u32 dma_max_sge; + __u32 dma_max_size; + __u32 dma_align; + __u32 transfer_mode; /* Default transfer mode */ + __u32 cap_sys_size; /* Capable system sizes */ + __u32 cap_addr_size; /* Capable addressing sizes */ + __u32 cap_transfer_mode; /* Capable transfer modes */ + __u32 cap_mport; /* Mport capabilities */ }; /* @@ -93,54 +94,57 @@ struct rio_mport_properties { #define RIO_PORTWRITE (1 << 1) struct rio_doorbell { - uint32_t rioid; - uint16_t payload; + __u16 rioid; + __u16 payload; }; struct rio_doorbell_filter { - uint32_t rioid; /* 0xffffffff to match all ids */ - uint16_t low; - uint16_t high; + __u16 rioid; /* Use RIO_INVALID_DESTID to match all ids */ + __u16 low; + __u16 high; + __u16 pad0; }; struct rio_portwrite { - uint32_t payload[16]; + __u32 payload[16]; }; struct rio_pw_filter { - uint32_t mask; - uint32_t low; - uint32_t high; + __u32 mask; + __u32 low; + __u32 high; + __u32 pad0; }; /* RapidIO base address for inbound requests set to value defined below * indicates that no specific RIO-to-local address translation is requested * and driver should use direct (one-to-one) address mapping. */ -#define RIO_MAP_ANY_ADDR (uint64_t)(~((uint64_t) 0)) +#define RIO_MAP_ANY_ADDR (__u64)(~((__u64) 0)) struct rio_mmap { - uint32_t rioid; - uint64_t rio_addr; - uint64_t length; - uint64_t handle; - void *address; + __u16 rioid; + __u16 pad0[3]; + __u64 rio_addr; + __u64 length; + __u64 handle; + __u64 address; }; struct rio_dma_mem { - uint64_t length; /* length of DMA memory */ - uint64_t dma_handle; /* handle associated with this memory */ - void *buffer; /* pointer to this memory */ + __u64 length; /* length of DMA memory */ + __u64 dma_handle; /* handle associated with this memory */ + __u64 address; }; - struct rio_event { - unsigned int header; /* event type RIO_DOORBELL or RIO_PORTWRITE */ + __u32 header; /* event type RIO_DOORBELL or RIO_PORTWRITE */ union { struct rio_doorbell doorbell; /* header for RIO_DOORBELL */ struct rio_portwrite portwrite; /* header for RIO_PORTWRITE */ } u; + __u32 pad0; }; enum rio_transfer_sync { @@ -184,35 +188,37 @@ enum rio_exchange { }; struct rio_transfer_io { - uint32_t rioid; /* Target destID */ - uint64_t rio_addr; /* Address in target's RIO mem space */ - enum rio_exchange method; /* Data exchange method */ - void __user *loc_addr; - uint64_t handle; - uint64_t offset; /* Offset in buffer */ - uint64_t length; /* Length in bytes */ - uint32_t completion_code; /* Completion code for this transfer */ + __u64 rio_addr; /* Address in target's RIO mem space */ + __u64 loc_addr; + __u64 handle; + __u64 offset; /* Offset in buffer */ + __u64 length; /* Length in bytes */ + __u16 rioid; /* Target destID */ + __u16 method; /* Data exchange method, one of rio_exchange enum */ + __u32 completion_code; /* Completion code for this transfer */ }; struct rio_transaction { - uint32_t transfer_mode; /* Data transfer mode */ - enum rio_transfer_sync sync; /* Synchronization method */ - enum rio_transfer_dir dir; /* Transfer direction */ - size_t count; /* Number of transfers */ - struct rio_transfer_io __user *block; /* Array of <count> transfers */ + __u64 block; /* Pointer to array of <count> transfers */ + __u32 count; /* Number of transfers */ + __u32 transfer_mode; /* Data transfer mode */ + __u16 sync; /* Synch method, one of rio_transfer_sync enum */ + __u16 dir; /* Transfer direction, one of rio_transfer_dir enum */ + __u32 pad0; }; struct rio_async_tx_wait { - uint32_t token; /* DMA transaction ID token */ - uint32_t timeout; /* Wait timeout in msec, if 0 use default TO */ + __u32 token; /* DMA transaction ID token */ + __u32 timeout; /* Wait timeout in msec, if 0 use default TO */ }; #define RIO_MAX_DEVNAME_SZ 20 struct rio_rdev_info { - uint32_t destid; - uint8_t hopcount; - uint32_t comptag; + __u16 destid; + __u8 hopcount; + __u8 pad0; + __u32 comptag; char name[RIO_MAX_DEVNAME_SZ + 1]; }; @@ -220,11 +226,11 @@ struct rio_rdev_info { #define RIO_MPORT_DRV_MAGIC 'm' #define RIO_MPORT_MAINT_HDID_SET \ - _IOW(RIO_MPORT_DRV_MAGIC, 1, uint16_t) + _IOW(RIO_MPORT_DRV_MAGIC, 1, __u16) #define RIO_MPORT_MAINT_COMPTAG_SET \ - _IOW(RIO_MPORT_DRV_MAGIC, 2, uint32_t) + _IOW(RIO_MPORT_DRV_MAGIC, 2, __u32) #define RIO_MPORT_MAINT_PORT_IDX_GET \ - _IOR(RIO_MPORT_DRV_MAGIC, 3, uint32_t) + _IOR(RIO_MPORT_DRV_MAGIC, 3, __u32) #define RIO_MPORT_GET_PROPERTIES \ _IOR(RIO_MPORT_DRV_MAGIC, 4, struct rio_mport_properties) #define RIO_MPORT_MAINT_READ_LOCAL \ @@ -244,9 +250,9 @@ struct rio_rdev_info { #define RIO_DISABLE_PORTWRITE_RANGE \ _IOW(RIO_MPORT_DRV_MAGIC, 12, struct rio_pw_filter) #define RIO_SET_EVENT_MASK \ - _IOW(RIO_MPORT_DRV_MAGIC, 13, unsigned int) + _IOW(RIO_MPORT_DRV_MAGIC, 13, __u32) #define RIO_GET_EVENT_MASK \ - _IOR(RIO_MPORT_DRV_MAGIC, 14, unsigned int) + _IOR(RIO_MPORT_DRV_MAGIC, 14, __u32) #define RIO_MAP_OUTBOUND \ _IOWR(RIO_MPORT_DRV_MAGIC, 15, struct rio_mmap) #define RIO_UNMAP_OUTBOUND \ @@ -254,11 +260,11 @@ struct rio_rdev_info { #define RIO_MAP_INBOUND \ _IOWR(RIO_MPORT_DRV_MAGIC, 17, struct rio_mmap) #define RIO_UNMAP_INBOUND \ - _IOW(RIO_MPORT_DRV_MAGIC, 18, uint64_t) + _IOW(RIO_MPORT_DRV_MAGIC, 18, __u64) #define RIO_ALLOC_DMA \ _IOWR(RIO_MPORT_DRV_MAGIC, 19, struct rio_dma_mem) #define RIO_FREE_DMA \ - _IOW(RIO_MPORT_DRV_MAGIC, 20, uint64_t) + _IOW(RIO_MPORT_DRV_MAGIC, 20, __u64) #define RIO_TRANSFER \ _IOWR(RIO_MPORT_DRV_MAGIC, 21, struct rio_transaction) #define RIO_WAIT_FOR_ASYNC \ diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index 3f10e5317b46..8f3a8f606fd9 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -45,9 +45,7 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) { -#ifdef __HAVE_BUILTIN_BSWAP16__ - return __builtin_bswap16(val); -#elif defined (__arch_swab16) +#if defined (__arch_swab16) return __arch_swab16(val); #else return ___constant_swab16(val); @@ -56,9 +54,7 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) static inline __attribute_const__ __u32 __fswab32(__u32 val) { -#ifdef __HAVE_BUILTIN_BSWAP32__ - return __builtin_bswap32(val); -#elif defined(__arch_swab32) +#if defined(__arch_swab32) return __arch_swab32(val); #else return ___constant_swab32(val); @@ -67,9 +63,7 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val) static inline __attribute_const__ __u64 __fswab64(__u64 val) { -#ifdef __HAVE_BUILTIN_BSWAP64__ - return __builtin_bswap64(val); -#elif defined (__arch_swab64) +#if defined (__arch_swab64) return __arch_swab64(val); #elif defined(__SWAB_64_THRU_32__) __u32 h = val >> 32; @@ -102,28 +96,40 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) * __swab16 - return a byteswapped 16-bit value * @x: value to byteswap */ +#ifdef __HAVE_BUILTIN_BSWAP16__ +#define __swab16(x) (__u16)__builtin_bswap16((__u16)(x)) +#else #define __swab16(x) \ (__builtin_constant_p((__u16)(x)) ? \ ___constant_swab16(x) : \ __fswab16(x)) +#endif /** * __swab32 - return a byteswapped 32-bit value * @x: value to byteswap */ +#ifdef __HAVE_BUILTIN_BSWAP32__ +#define __swab32(x) (__u32)__builtin_bswap32((__u32)(x)) +#else #define __swab32(x) \ (__builtin_constant_p((__u32)(x)) ? \ ___constant_swab32(x) : \ __fswab32(x)) +#endif /** * __swab64 - return a byteswapped 64-bit value * @x: value to byteswap */ +#ifdef __HAVE_BUILTIN_BSWAP64__ +#define __swab64(x) (__u64)__builtin_bswap64((__u64)(x)) +#else #define __swab64(x) \ (__builtin_constant_p((__u64)(x)) ? \ ___constant_swab64(x) : \ __fswab64(x)) +#endif /** * __swahw32 - return a word-swapped 32-bit value diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 16574ea18f0c..2c8180f9156f 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -36,6 +36,7 @@ struct udphdr { #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ #define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-06 */ #define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */ - +#define UDP_ENCAP_GTP0 4 /* GSM TS 09.60 */ +#define UDP_ENCAP_GTP1U 5 /* 3GPP TS 29.060 */ #endif /* _UAPI_LINUX_UDP_H */ diff --git a/include/xen/page.h b/include/xen/page.h index 96294ac93755..9dc46cb8a0fd 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -15,9 +15,9 @@ */ #define xen_pfn_to_page(xen_pfn) \ - ((pfn_to_page(((unsigned long)(xen_pfn) << XEN_PAGE_SHIFT) >> PAGE_SHIFT))) + (pfn_to_page((unsigned long)(xen_pfn) >> (PAGE_SHIFT - XEN_PAGE_SHIFT))) #define page_to_xen_pfn(page) \ - (((page_to_pfn(page)) << PAGE_SHIFT) >> XEN_PAGE_SHIFT) + ((page_to_pfn(page)) << (PAGE_SHIFT - XEN_PAGE_SHIFT)) #define XEN_PFN_PER_PAGE (PAGE_SIZE / XEN_PAGE_SIZE) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8b489fcac37b..d1f7149f8704 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -596,17 +596,8 @@ bool sched_can_stop_tick(struct rq *rq) return false; /* - * FIFO realtime policy runs the highest priority task (after DEADLINE). - * Other runnable tasks are of a lower priority. The scheduler tick - * isn't needed. - */ - fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running; - if (fifo_nr_running) - return true; - - /* - * Round-robin realtime tasks time slice with other tasks at the same - * realtime priority. + * If there are more than one RR tasks, we need the tick to effect the + * actual RR behaviour. */ if (rq->rt.rr_nr_running) { if (rq->rt.rr_nr_running == 1) @@ -615,8 +606,20 @@ bool sched_can_stop_tick(struct rq *rq) return false; } - /* Normal multitasking need periodic preemption checks */ - if (rq->cfs.nr_running > 1) + /* + * If there's no RR tasks, but FIFO tasks, we can skip the tick, no + * forced preemption between FIFO tasks. + */ + fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running; + if (fifo_nr_running) + return true; + + /* + * If there are no DL,RR/FIFO tasks, there must only be CFS tasks left; + * if there's more than one we need the tick for involuntary + * preemption. + */ + if (rq->nr_running > 1) return false; return true; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ced963049e0a..b7b0760ba6ee 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2113,8 +2113,13 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) trace_create_file("filter", 0644, file->dir, file, &ftrace_event_filter_fops); - trace_create_file("trigger", 0644, file->dir, file, - &event_trigger_fops); + /* + * Only event directories that can be enabled should have + * triggers. + */ + if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) + trace_create_file("trigger", 0644, file->dir, file, + &event_trigger_fops); trace_create_file("format", 0444, file->dir, call, &ftrace_event_format_fops); diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 9e0b0315a724..53ad6c0831ae 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -42,12 +42,14 @@ #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) +#define STACK_ALLOC_NULL_PROTECTION_BITS 1 #define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */ #define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER)) #define STACK_ALLOC_ALIGN 4 #define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \ STACK_ALLOC_ALIGN) -#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - STACK_ALLOC_OFFSET_BITS) +#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \ + STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS) #define STACK_ALLOC_SLABS_CAP 1024 #define STACK_ALLOC_MAX_SLABS \ (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \ @@ -59,6 +61,7 @@ union handle_parts { struct { u32 slabindex : STACK_ALLOC_INDEX_BITS; u32 offset : STACK_ALLOC_OFFSET_BITS; + u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS; }; }; @@ -136,6 +139,7 @@ static struct stack_record *depot_alloc_stack(unsigned long *entries, int size, stack->size = size; stack->handle.slabindex = depot_index; stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; + stack->handle.valid = 1; memcpy(stack->entries, entries, size * sizeof(unsigned long)); depot_offset += required_size; diff --git a/mm/compaction.c b/mm/compaction.c index ccf97b02b85f..8fa254043801 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -852,16 +852,8 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, pfn = isolate_migratepages_block(cc, pfn, block_end_pfn, ISOLATE_UNEVICTABLE); - /* - * In case of fatal failure, release everything that might - * have been isolated in the previous iteration, and signal - * the failure back to caller. - */ - if (!pfn) { - putback_movable_pages(&cc->migratepages); - cc->nr_migratepages = 0; + if (!pfn) break; - } if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) break; @@ -1741,7 +1733,7 @@ void compaction_unregister_node(struct node *node) static inline bool kcompactd_work_requested(pg_data_t *pgdat) { - return pgdat->kcompactd_max_order > 0; + return pgdat->kcompactd_max_order > 0 || kthread_should_stop(); } static bool kcompactd_node_suitable(pg_data_t *pgdat) @@ -1805,6 +1797,8 @@ static void kcompactd_do_work(pg_data_t *pgdat) INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); + if (kthread_should_stop()) + return; status = compact_zone(zone, &cc); if (zone_watermark_ok(zone, cc.order, low_wmark_pages(zone), diff --git a/mm/huge_memory.c b/mm/huge_memory.c index df67b53ae3c5..f7daa7de8f48 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3452,7 +3452,7 @@ next: } } - pr_info("%lu of %lu THP split", split, total); + pr_info("%lu of %lu THP split\n", split, total); return 0; } @@ -3463,7 +3463,7 @@ static int __init split_huge_pages_debugfs(void) { void *ret; - ret = debugfs_create_file("split_huge_pages", 0644, NULL, NULL, + ret = debugfs_create_file("split_huge_pages", 0200, NULL, NULL, &split_huge_pages_fops); if (!ret) pr_warn("Failed to create split_huge_pages in debugfs"); diff --git a/mm/memory.c b/mm/memory.c index 305537fc8640..52c218e2b724 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1222,15 +1222,8 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, next = pmd_addr_end(addr, end); if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) { -#ifdef CONFIG_DEBUG_VM - if (!rwsem_is_locked(&tlb->mm->mmap_sem)) { - pr_err("%s: mmap_sem is unlocked! addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n", - __func__, addr, end, - vma->vm_start, - vma->vm_end); - BUG(); - } -#endif + VM_BUG_ON_VMA(vma_is_anonymous(vma) && + !rwsem_is_locked(&tlb->mm->mmap_sem), vma); split_huge_pmd(vma, pmd, addr); } else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 999792d35ccc..bc5149d5ec38 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1910,7 +1910,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb) if (gdtc->dirty > gdtc->bg_thresh) return true; - if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(gdtc)) + if (wb_stat(wb, WB_RECLAIMABLE) > + wb_calc_thresh(gdtc->wb, gdtc->bg_thresh)) return true; if (mdtc) { @@ -1924,7 +1925,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb) if (mdtc->dirty > mdtc->bg_thresh) return true; - if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(mdtc)) + if (wb_stat(wb, WB_RECLAIMABLE) > + wb_calc_thresh(mdtc->wb, mdtc->bg_thresh)) return true; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 59de90d5d3a3..c1069efcc4d7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6485,7 +6485,7 @@ int __meminit init_per_zone_wmark_min(void) setup_per_zone_inactive_ratio(); return 0; } -module_init(init_per_zone_wmark_min) +core_initcall(init_per_zone_wmark_min) /* * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so diff --git a/mm/zswap.c b/mm/zswap.c index 91dad80d068b..de0f119b1780 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -170,6 +170,8 @@ static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; static LIST_HEAD(zswap_pools); /* protects zswap_pools list modification */ static DEFINE_SPINLOCK(zswap_pools_lock); +/* pool counter to provide unique names to zpool */ +static atomic_t zswap_pools_count = ATOMIC_INIT(0); /* used by param callback function */ static bool zswap_init_started; @@ -565,6 +567,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) static struct zswap_pool *zswap_pool_create(char *type, char *compressor) { struct zswap_pool *pool; + char name[38]; /* 'zswap' + 32 char (max) num + \0 */ gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; pool = kzalloc(sizeof(*pool), GFP_KERNEL); @@ -573,7 +576,10 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) return NULL; } - pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops); + /* unique name for each pool specifically required by zsmalloc */ + snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); + + pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops); if (!pool->zpool) { pr_err("%s zpool not available\n", type); goto error; diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index f8fc6241469a..d99b2009771a 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -21,18 +21,19 @@ #include <asm/uaccess.h> #include "br_private.h" -/* called with RTNL */ static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } + rcu_read_unlock(); return i; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 191ea66e4d92..6852f3c7009c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1279,6 +1279,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); __be32 group; int err = 0; @@ -1289,14 +1290,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, group = ih->group; - if (skb->len == sizeof(*ih)) { + if (skb->len == offset + sizeof(*ih)) { max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); if (!max_delay) { max_delay = 10 * HZ; group = 0; } - } else if (skb->len >= sizeof(*ih3)) { + } else if (skb->len >= offset + sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; @@ -1357,6 +1358,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); const struct in6_addr *group = NULL; bool is_general_query; int err = 0; @@ -1366,8 +1368,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - if (skb->len == sizeof(*mld)) { - if (!pskb_may_pull(skb, sizeof(*mld))) { + if (skb->len == offset + sizeof(*mld)) { + if (!pskb_may_pull(skb, offset + sizeof(*mld))) { err = -EINVAL; goto out; } @@ -1376,7 +1378,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (max_delay) group = &mld->mld_mca; } else { - if (!pskb_may_pull(skb, sizeof(*mld2q))) { + if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) { err = -EINVAL; goto out; } diff --git a/net/core/flow.c b/net/core/flow.c index 1033725be40b..3937b1b68d5b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -92,8 +92,11 @@ static void flow_cache_gc_task(struct work_struct *work) list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); spin_unlock_bh(&xfrm->flow_cache_gc_lock); - list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) + list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) { flow_entry_kill(fce, xfrm); + atomic_dec(&xfrm->flow_cache_gc_count); + WARN_ON(atomic_read(&xfrm->flow_cache_gc_count) < 0); + } } static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, @@ -101,6 +104,7 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, struct netns_xfrm *xfrm) { if (deleted) { + atomic_add(deleted, &xfrm->flow_cache_gc_count); fcp->hash_count -= deleted; spin_lock_bh(&xfrm->flow_cache_gc_lock); list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); @@ -232,6 +236,13 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_count > fc->high_watermark) flow_cache_shrink(fc, fcp); + if (fcp->hash_count > 2 * fc->high_watermark || + atomic_read(&net->xfrm.flow_cache_gc_count) > fc->high_watermark) { + atomic_inc(&net->xfrm.flow_cache_genid); + flo = ERR_PTR(-ENOBUFS); + goto ret_object; + } + fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { fle->net = net; @@ -446,6 +457,7 @@ int flow_cache_init(struct net *net) INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); mutex_init(&net->xfrm.flow_flush_sem); + atomic_set(&net->xfrm.flow_cache_gc_count, 0); fc->hash_shift = 10; fc->low_watermark = 2 * flow_cache_hash_size(fc); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d471f097c739..d69c4644f8f2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1173,14 +1173,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) { - struct rtnl_link_ifmap map = { - .mem_start = dev->mem_start, - .mem_end = dev->mem_end, - .base_addr = dev->base_addr, - .irq = dev->irq, - .dma = dev->dma, - .port = dev->if_port, - }; + struct rtnl_link_ifmap map; + + memset(&map, 0, sizeof(map)); + map.mem_start = dev->mem_start; + map.mem_end = dev->mem_end; + map.base_addr = dev->base_addr; + map.irq = dev->irq; + map.dma = dev->dma; + map.port = dev->if_port; + if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD)) return -EMSGSIZE; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 7ac5ec87b004..eeec7d60e5fd 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -227,8 +227,6 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, int err = -ENOSYS; const struct net_offload **offloads; - udp_tunnel_gro_complete(skb, nhoff); - rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); @@ -237,6 +235,8 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, err = ops->callbacks.gro_complete(skb, nhoff); + skb_set_inner_mac_header(skb, nhoff); + out_unlock: rcu_read_unlock(); @@ -412,6 +412,8 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) err = ops->callbacks.gro_complete(skb, nhoff + guehlen); + skb_set_inner_mac_header(skb, nhoff + guehlen); + out_unlock: rcu_read_unlock(); return err; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 5cf10b777b7e..a917903d5e97 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -156,6 +156,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; /* Device to other host */ int err; + int mtu; if (!dst) { dev->stats.tx_carrier_errors++; @@ -192,6 +193,23 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->err_count = 0; } + mtu = dst_mtu(dst); + if (skb->len > mtu) { + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + if (skb->protocol == htons(ETH_P_IP)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + } else { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + } + + dst_release(dst); + goto tx_error; + } + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 60f5161abcb4..2033f929aa66 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -34,27 +34,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); MODULE_DESCRIPTION("arptables core"); -/*#define DEBUG_ARP_TABLES*/ -/*#define DEBUG_ARP_TABLES_USER*/ - -#ifdef DEBUG_ARP_TABLES -#define dprintf(format, args...) pr_debug(format, ## args) -#else -#define dprintf(format, args...) -#endif - -#ifdef DEBUG_ARP_TABLES_USER -#define duprintf(format, args...) pr_debug(format, ## args) -#else -#define duprintf(format, args...) -#endif - -#ifdef CONFIG_NETFILTER_DEBUG -#define ARP_NF_ASSERT(x) WARN_ON(!(x)) -#else -#define ARP_NF_ASSERT(x) -#endif - void *arpt_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(arpt, ARPT); @@ -113,36 +92,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr, #define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop, - ARPT_INV_ARPOP)) { - dprintf("ARP operation field mismatch.\n"); - dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n", - arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask); + ARPT_INV_ARPOP)) return 0; - } if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd, - ARPT_INV_ARPHRD)) { - dprintf("ARP hardware address format mismatch.\n"); - dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n", - arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask); + ARPT_INV_ARPHRD)) return 0; - } if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro, - ARPT_INV_ARPPRO)) { - dprintf("ARP protocol address format mismatch.\n"); - dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n", - arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask); + ARPT_INV_ARPPRO)) return 0; - } if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln, - ARPT_INV_ARPHLN)) { - dprintf("ARP hardware address length mismatch.\n"); - dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n", - arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask); + ARPT_INV_ARPHLN)) return 0; - } src_devaddr = arpptr; arpptr += dev->addr_len; @@ -155,49 +118,25 @@ static inline int arp_packet_match(const struct arphdr *arphdr, if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len), ARPT_INV_SRCDEVADDR) || FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len), - ARPT_INV_TGTDEVADDR)) { - dprintf("Source or target device address mismatch.\n"); - + ARPT_INV_TGTDEVADDR)) return 0; - } if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr, ARPT_INV_SRCIP) || FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr), - ARPT_INV_TGTIP)) { - dprintf("Source or target IP address mismatch.\n"); - - dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", - &src_ipaddr, - &arpinfo->smsk.s_addr, - &arpinfo->src.s_addr, - arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : ""); - dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n", - &tgt_ipaddr, - &arpinfo->tmsk.s_addr, - &arpinfo->tgt.s_addr, - arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : ""); + ARPT_INV_TGTIP)) return 0; - } /* Look for ifname matches. */ ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); - if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { - dprintf("VIA in mismatch (%s vs %s).%s\n", - indev, arpinfo->iniface, - arpinfo->invflags & ARPT_INV_VIA_IN ? " (INV)" : ""); + if (FWINV(ret != 0, ARPT_INV_VIA_IN)) return 0; - } ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); - if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { - dprintf("VIA out mismatch (%s vs %s).%s\n", - outdev, arpinfo->outiface, - arpinfo->invflags & ARPT_INV_VIA_OUT ? " (INV)" : ""); + if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) return 0; - } return 1; #undef FWINV @@ -205,16 +144,10 @@ static inline int arp_packet_match(const struct arphdr *arphdr, static inline int arp_checkentry(const struct arpt_arp *arp) { - if (arp->flags & ~ARPT_F_MASK) { - duprintf("Unknown flag bits set: %08X\n", - arp->flags & ~ARPT_F_MASK); + if (arp->flags & ~ARPT_F_MASK) return 0; - } - if (arp->invflags & ~ARPT_INV_MASK) { - duprintf("Unknown invflag bits set: %08X\n", - arp->invflags & ~ARPT_INV_MASK); + if (arp->invflags & ~ARPT_INV_MASK) return 0; - } return 1; } @@ -406,11 +339,9 @@ static int mark_source_chains(const struct xt_table_info *newinfo, = (void *)arpt_get_target_c(e); int visited = e->comefrom & (1 << hook); - if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { - pr_notice("arptables: loop hook %u pos %u %08X.\n", - hook, pos, e->comefrom); + if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) return 0; - } + e->comefrom |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); @@ -423,12 +354,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, if ((strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < -NF_MAX_VERDICT - 1) { - duprintf("mark_source_chains: bad " - "negative verdict (%i)\n", - t->verdict); + t->verdict < -NF_MAX_VERDICT - 1) return 0; - } /* Return: backtrack through the last * big jump. @@ -462,8 +389,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ - duprintf("Jump rule %u -> %u\n", - pos, newpos); e = (struct arpt_entry *) (entry0 + newpos); if (!find_jump_target(newinfo, e)) @@ -480,8 +405,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo, pos = newpos; } } -next: - duprintf("Finished chain %u\n", hook); +next: ; } return 1; } @@ -489,7 +413,6 @@ next: static inline int check_target(struct arpt_entry *e, const char *name) { struct xt_entry_target *t = arpt_get_target(e); - int ret; struct xt_tgchk_param par = { .table = name, .entryinfo = e, @@ -499,13 +422,7 @@ static inline int check_target(struct arpt_entry *e, const char *name) .family = NFPROTO_ARP, }; - ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); - if (ret < 0) { - duprintf("arp_tables: check failed for `%s'.\n", - t->u.kernel.target->name); - return ret; - } - return 0; + return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); } static inline int @@ -513,17 +430,18 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) { struct xt_entry_target *t; struct xt_target *target; + unsigned long pcnt; int ret; - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) + pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(pcnt)) return -ENOMEM; + e->counters.pcnt = pcnt; t = arpt_get_target(e); target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("find_check_entry: `%s' not found\n", t->u.user.name); ret = PTR_ERR(target); goto out; } @@ -569,17 +487,12 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || (unsigned char *)e + sizeof(struct arpt_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p\n", e); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset - < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) return -EINVAL; - } if (!arp_checkentry(&e->arp)) return -EINVAL; @@ -596,12 +509,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { - if (!check_underflow(e)) { - pr_debug("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + if (!check_underflow(e)) return -EINVAL; - } + newinfo->underflow[h] = underflows[h]; } } @@ -646,7 +556,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } - duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ @@ -663,31 +572,21 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } - duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); if (ret != 0) return ret; - if (i != repl->num_entries) { - duprintf("translate_table: %u not %u entries\n", - i, repl->num_entries); + if (i != repl->num_entries) return -EINVAL; - } /* Check hooks all assigned */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { /* Only hooks which are valid */ if (!(repl->valid_hooks & (1 << i))) continue; - if (newinfo->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, repl->hook_entry[i]); + if (newinfo->hook_entry[i] == 0xFFFFFFFF) return -EINVAL; - } - if (newinfo->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, repl->underflow[i]); + if (newinfo->underflow[i] == 0xFFFFFFFF) return -EINVAL; - } } if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) @@ -895,11 +794,8 @@ static int get_info(struct net *net, void __user *user, struct xt_table *t; int ret; - if (*len != sizeof(struct arpt_getinfo)) { - duprintf("length %u != %Zu\n", *len, - sizeof(struct arpt_getinfo)); + if (*len != sizeof(struct arpt_getinfo)) return -EINVAL; - } if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; @@ -955,33 +851,25 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, struct arpt_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("get_entries: %u < %Zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct arpt_get_entries) + get.size) { - duprintf("get_entries: %u != %Zu\n", *len, - sizeof(struct arpt_get_entries) + get.size); + if (*len != sizeof(struct arpt_get_entries) + get.size) return -EINVAL; - } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; - duprintf("t->private->number = %u\n", - private->number); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); - else { - duprintf("get_entries: I've got %u not %u!\n", - private->size, get.size); + else ret = -EAGAIN; - } + module_put(t->me); xt_table_unlock(t); } else @@ -1019,8 +907,6 @@ static int __do_replace(struct net *net, const char *name, /* You lied! */ if (valid_hooks != t->valid_hooks) { - duprintf("Valid hook crap: %08X vs %08X\n", - valid_hooks, t->valid_hooks); ret = -EINVAL; goto put_module; } @@ -1030,8 +916,6 @@ static int __do_replace(struct net *net, const char *name, goto put_module; /* Update module usage count based on number of rules */ - duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n", - oldinfo->number, oldinfo->initial_entries, newinfo->number); if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); @@ -1101,8 +985,6 @@ static int do_replace(struct net *net, const void __user *user, if (ret != 0) goto free_newinfo; - duprintf("arp_tables: Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) @@ -1200,20 +1082,14 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, unsigned int entry_offset; int ret, off; - duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p, limit = %p\n", e, limit); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset < sizeof(struct compat_arpt_entry) + - sizeof(struct compat_xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + sizeof(struct compat_xt_entry_target)) return -EINVAL; - } if (!arp_checkentry(&e->arp)) return -EINVAL; @@ -1230,8 +1106,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", - t->u.user.name); ret = PTR_ERR(target); goto out; } @@ -1301,7 +1175,6 @@ static int translate_compat_table(struct xt_table_info **pinfo, size = compatr->size; info->number = compatr->num_entries; - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(NFPROTO_ARP); xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); @@ -1316,11 +1189,8 @@ static int translate_compat_table(struct xt_table_info **pinfo, } ret = -EINVAL; - if (j != compatr->num_entries) { - duprintf("translate_compat_table: %u not %u entries\n", - j, compatr->num_entries); + if (j != compatr->num_entries) goto out_unlock; - } ret = -ENOMEM; newinfo = xt_alloc_table_info(size); @@ -1411,8 +1281,6 @@ static int compat_do_replace(struct net *net, void __user *user, if (ret != 0) goto free_newinfo; - duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) @@ -1445,7 +1313,6 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, break; default: - duprintf("do_arpt_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1528,17 +1395,13 @@ static int compat_get_entries(struct net *net, struct compat_arpt_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct compat_arpt_get_entries) + get.size) { - duprintf("compat_get_entries: %u != %zu\n", - *len, sizeof(get) + get.size); + if (*len != sizeof(struct compat_arpt_get_entries) + get.size) return -EINVAL; - } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(NFPROTO_ARP); @@ -1547,16 +1410,13 @@ static int compat_get_entries(struct net *net, const struct xt_table_info *private = t->private; struct xt_table_info info; - duprintf("t->private->number = %u\n", private->number); ret = compat_table_info(private, &info); if (!ret && get.size == info.size) { ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); - } else if (!ret) { - duprintf("compat_get_entries: I've got %u not %u!\n", - private->size, get.size); + } else if (!ret) ret = -EAGAIN; - } + xt_compat_flush_offsets(NFPROTO_ARP); module_put(t->me); xt_table_unlock(t); @@ -1608,7 +1468,6 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned break; default: - duprintf("do_arpt_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1651,7 +1510,6 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len } default: - duprintf("do_arpt_get_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1696,7 +1554,6 @@ int arpt_register_table(struct net *net, memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(newinfo, loc_cpu_entry, repl); - duprintf("arpt_register_table: translate table gives %d\n", ret); if (ret != 0) goto out_free; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 735d1ee8c1ab..54906e0e8e0c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -35,34 +35,12 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv4 packet filter"); -/*#define DEBUG_IP_FIREWALL*/ -/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ -/*#define DEBUG_IP_FIREWALL_USER*/ - -#ifdef DEBUG_IP_FIREWALL -#define dprintf(format, args...) pr_info(format , ## args) -#else -#define dprintf(format, args...) -#endif - -#ifdef DEBUG_IP_FIREWALL_USER -#define duprintf(format, args...) pr_info(format , ## args) -#else -#define duprintf(format, args...) -#endif - #ifdef CONFIG_NETFILTER_DEBUG #define IP_NF_ASSERT(x) WARN_ON(!(x)) #else #define IP_NF_ASSERT(x) #endif -#if 0 -/* All the better to debug you with... */ -#define static -#define inline -#endif - void *ipt_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ipt, IPT); @@ -85,52 +63,28 @@ ip_packet_match(const struct iphdr *ip, if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, IPT_INV_SRCIP) || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, - IPT_INV_DSTIP)) { - dprintf("Source or dest mismatch.\n"); - - dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", - &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr, - ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); - dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n", - &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr, - ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); + IPT_INV_DSTIP)) return false; - } ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); - if (FWINV(ret != 0, IPT_INV_VIA_IN)) { - dprintf("VIA in mismatch (%s vs %s).%s\n", - indev, ipinfo->iniface, - ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : ""); + if (FWINV(ret != 0, IPT_INV_VIA_IN)) return false; - } ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); - if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { - dprintf("VIA out mismatch (%s vs %s).%s\n", - outdev, ipinfo->outiface, - ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : ""); + if (FWINV(ret != 0, IPT_INV_VIA_OUT)) return false; - } /* Check specific protocol */ if (ipinfo->proto && - FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { - dprintf("Packet protocol %hi does not match %hi.%s\n", - ip->protocol, ipinfo->proto, - ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : ""); + FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) return false; - } /* If we have a fragment rule but the packet is not a fragment * then we return zero */ - if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) { - dprintf("Fragment rule but not fragment.%s\n", - ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : ""); + if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) return false; - } return true; } @@ -138,16 +92,10 @@ ip_packet_match(const struct iphdr *ip, static bool ip_checkentry(const struct ipt_ip *ip) { - if (ip->flags & ~IPT_F_MASK) { - duprintf("Unknown flag bits set: %08X\n", - ip->flags & ~IPT_F_MASK); + if (ip->flags & ~IPT_F_MASK) return false; - } - if (ip->invflags & ~IPT_INV_MASK) { - duprintf("Unknown invflag bits set: %08X\n", - ip->invflags & ~IPT_INV_MASK); + if (ip->invflags & ~IPT_INV_MASK) return false; - } return true; } @@ -346,10 +294,6 @@ ipt_do_table(struct sk_buff *skb, e = get_entry(table_base, private->hook_entry[hook]); - pr_debug("Entering %s(hook %u), UF %p\n", - table->name, hook, - get_entry(table_base, private->underflow[hook])); - do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; @@ -396,22 +340,15 @@ ipt_do_table(struct sk_buff *skb, if (stackidx == 0) { e = get_entry(table_base, private->underflow[hook]); - pr_debug("Underflow (this is normal) " - "to %p\n", e); } else { e = jumpstack[--stackidx]; - pr_debug("Pulled %p out from pos %u\n", - e, stackidx); e = ipt_next_entry(e); } continue; } if (table_base + v != ipt_next_entry(e) && - !(e->ip.flags & IPT_F_GOTO)) { + !(e->ip.flags & IPT_F_GOTO)) jumpstack[stackidx++] = e; - pr_debug("Pushed %p into pos %u\n", - e, stackidx - 1); - } e = get_entry(table_base, v); continue; @@ -429,18 +366,13 @@ ipt_do_table(struct sk_buff *skb, /* Verdict */ break; } while (!acpar.hotdrop); - pr_debug("Exiting %s; sp at %u\n", __func__, stackidx); xt_write_recseq_end(addend); local_bh_enable(); -#ifdef DEBUG_ALLOW_ALL - return NF_ACCEPT; -#else if (acpar.hotdrop) return NF_DROP; else return verdict; -#endif } static bool find_jump_target(const struct xt_table_info *t, @@ -480,11 +412,9 @@ mark_source_chains(const struct xt_table_info *newinfo, = (void *)ipt_get_target_c(e); int visited = e->comefrom & (1 << hook); - if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { - pr_err("iptables: loop hook %u pos %u %08X.\n", - hook, pos, e->comefrom); + if (e->comefrom & (1 << NF_INET_NUMHOOKS)) return 0; - } + e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ @@ -496,26 +426,13 @@ mark_source_chains(const struct xt_table_info *newinfo, if ((strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < -NF_MAX_VERDICT - 1) { - duprintf("mark_source_chains: bad " - "negative verdict (%i)\n", - t->verdict); + t->verdict < -NF_MAX_VERDICT - 1) return 0; - } /* Return: backtrack through the last big jump. */ do { e->comefrom ^= (1<<NF_INET_NUMHOOKS); -#ifdef DEBUG_IP_FIREWALL_USER - if (e->comefrom - & (1 << NF_INET_NUMHOOKS)) { - duprintf("Back unset " - "on hook %u " - "rule %u\n", - hook, pos); - } -#endif oldpos = pos; pos = e->counters.pcnt; e->counters.pcnt = 0; @@ -543,8 +460,6 @@ mark_source_chains(const struct xt_table_info *newinfo, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ - duprintf("Jump rule %u -> %u\n", - pos, newpos); e = (struct ipt_entry *) (entry0 + newpos); if (!find_jump_target(newinfo, e)) @@ -561,8 +476,7 @@ mark_source_chains(const struct xt_table_info *newinfo, pos = newpos; } } -next: - duprintf("Finished chain %u\n", hook); +next: ; } return 1; } @@ -584,18 +498,12 @@ static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ipt_ip *ip = par->entryinfo; - int ret; par->match = m->u.kernel.match; par->matchinfo = m->data; - ret = xt_check_match(par, m->u.match_size - sizeof(*m), - ip->proto, ip->invflags & IPT_INV_PROTO); - if (ret < 0) { - duprintf("check failed for `%s'.\n", par->match->name); - return ret; - } - return 0; + return xt_check_match(par, m->u.match_size - sizeof(*m), + ip->proto, ip->invflags & IPT_INV_PROTO); } static int @@ -606,10 +514,8 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, m->u.user.revision); - if (IS_ERR(match)) { - duprintf("find_check_match: `%s' not found\n", m->u.user.name); + if (IS_ERR(match)) return PTR_ERR(match); - } m->u.kernel.match = match; ret = check_match(m, par); @@ -634,16 +540,9 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name) .hook_mask = e->comefrom, .family = NFPROTO_IPV4, }; - int ret; - ret = xt_check_target(&par, t->u.target_size - sizeof(*t), - e->ip.proto, e->ip.invflags & IPT_INV_PROTO); - if (ret < 0) { - duprintf("check failed for `%s'.\n", - t->u.kernel.target->name); - return ret; - } - return 0; + return xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ip.proto, e->ip.invflags & IPT_INV_PROTO); } static int @@ -656,10 +555,12 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; + unsigned long pcnt; - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) + pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(pcnt)) return -ENOMEM; + e->counters.pcnt = pcnt; j = 0; mtpar.net = net; @@ -678,7 +579,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("find_check_entry: `%s' not found\n", t->u.user.name); ret = PTR_ERR(target); goto cleanup_matches; } @@ -732,17 +632,12 @@ check_entry_size_and_hooks(struct ipt_entry *e, if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p\n", e); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset - < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) return -EINVAL; - } if (!ip_checkentry(&e->ip)) return -EINVAL; @@ -759,12 +654,9 @@ check_entry_size_and_hooks(struct ipt_entry *e, if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { - if (!check_underflow(e)) { - pr_debug("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + if (!check_underflow(e)) return -EINVAL; - } + newinfo->underflow[h] = underflows[h]; } } @@ -816,7 +708,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } - duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { @@ -833,27 +724,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, ++newinfo->stacksize; } - if (i != repl->num_entries) { - duprintf("translate_table: %u not %u entries\n", - i, repl->num_entries); + if (i != repl->num_entries) return -EINVAL; - } /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ if (!(repl->valid_hooks & (1 << i))) continue; - if (newinfo->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, repl->hook_entry[i]); + if (newinfo->hook_entry[i] == 0xFFFFFFFF) return -EINVAL; - } - if (newinfo->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, repl->underflow[i]); + if (newinfo->underflow[i] == 0xFFFFFFFF) return -EINVAL; - } } if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) @@ -1081,11 +963,8 @@ static int get_info(struct net *net, void __user *user, struct xt_table *t; int ret; - if (*len != sizeof(struct ipt_getinfo)) { - duprintf("length %u != %zu\n", *len, - sizeof(struct ipt_getinfo)); + if (*len != sizeof(struct ipt_getinfo)) return -EINVAL; - } if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; @@ -1143,31 +1022,23 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, struct ipt_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("get_entries: %u < %zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct ipt_get_entries) + get.size) { - duprintf("get_entries: %u != %zu\n", - *len, sizeof(get) + get.size); + if (*len != sizeof(struct ipt_get_entries) + get.size) return -EINVAL; - } get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; - duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); - else { - duprintf("get_entries: I've got %u not %u!\n", - private->size, get.size); + else ret = -EAGAIN; - } + module_put(t->me); xt_table_unlock(t); } else @@ -1203,8 +1074,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, /* You lied! */ if (valid_hooks != t->valid_hooks) { - duprintf("Valid hook crap: %08X vs %08X\n", - valid_hooks, t->valid_hooks); ret = -EINVAL; goto put_module; } @@ -1214,8 +1083,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, goto put_module; /* Update module usage count based on number of rules */ - duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n", - oldinfo->number, oldinfo->initial_entries, newinfo->number); if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); @@ -1284,8 +1151,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len) if (ret != 0) goto free_newinfo; - duprintf("Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) @@ -1411,11 +1276,9 @@ compat_find_calc_match(struct xt_entry_match *m, match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, m->u.user.revision); - if (IS_ERR(match)) { - duprintf("compat_check_calc_match: `%s' not found\n", - m->u.user.name); + if (IS_ERR(match)) return PTR_ERR(match); - } + m->u.kernel.match = match; *size += xt_compat_match_offset(match); return 0; @@ -1447,20 +1310,14 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, unsigned int j; int ret, off; - duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p, limit = %p\n", e, limit); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset < sizeof(struct compat_ipt_entry) + - sizeof(struct compat_xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + sizeof(struct compat_xt_entry_target)) return -EINVAL; - } if (!ip_checkentry(&e->ip)) return -EINVAL; @@ -1484,8 +1341,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", - t->u.user.name); ret = PTR_ERR(target); goto release_matches; } @@ -1567,7 +1422,6 @@ translate_compat_table(struct net *net, size = compatr->size; info->number = compatr->num_entries; - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET); xt_compat_init_offsets(AF_INET, compatr->num_entries); @@ -1582,11 +1436,8 @@ translate_compat_table(struct net *net, } ret = -EINVAL; - if (j != compatr->num_entries) { - duprintf("translate_compat_table: %u not %u entries\n", - j, compatr->num_entries); + if (j != compatr->num_entries) goto out_unlock; - } ret = -ENOMEM; newinfo = xt_alloc_table_info(size); @@ -1683,8 +1534,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) if (ret != 0) goto free_newinfo; - duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) @@ -1718,7 +1567,6 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, break; default: - duprintf("do_ipt_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1768,19 +1616,15 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, struct compat_ipt_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct compat_ipt_get_entries) + get.size) { - duprintf("compat_get_entries: %u != %zu\n", - *len, sizeof(get) + get.size); + if (*len != sizeof(struct compat_ipt_get_entries) + get.size) return -EINVAL; - } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET); @@ -1788,16 +1632,13 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; - duprintf("t->private->number = %u\n", private->number); ret = compat_table_info(private, &info); - if (!ret && get.size == info.size) { + if (!ret && get.size == info.size) ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); - } else if (!ret) { - duprintf("compat_get_entries: I've got %u not %u!\n", - private->size, get.size); + else if (!ret) ret = -EAGAIN; - } + xt_compat_flush_offsets(AF_INET); module_put(t->me); xt_table_unlock(t); @@ -1850,7 +1691,6 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) break; default: - duprintf("do_ipt_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1902,7 +1742,6 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } default: - duprintf("do_ipt_get_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -2004,7 +1843,6 @@ icmp_match(const struct sk_buff *skb, struct xt_action_param *par) /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ - duprintf("Dropping evil ICMP tinygram.\n"); par->hotdrop = true; return false; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index e3c46e8e2762..ae1a71a97132 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -360,7 +360,7 @@ static int ipv4_init_net(struct net *net) in->ctl_table[0].data = &nf_conntrack_max; in->ctl_table[1].data = &net->ct.count; - in->ctl_table[2].data = &net->ct.htable_size; + in->ctl_table[2].data = &nf_conntrack_htable_size; in->ctl_table[3].data = &net->ct.sysctl_checksum; in->ctl_table[4].data = &net->ct.sysctl_log_invalid; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index f0dfe92a00d6..c6f3c406f707 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -31,15 +31,14 @@ struct ct_iter_state { static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) { - struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < net->ct.htable_size; + st->bucket < nf_conntrack_htable_size; st->bucket++) { n = rcu_dereference( - hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); + hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); if (!is_a_nulls(n)) return n; } @@ -49,17 +48,16 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, struct hlist_nulls_node *head) { - struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(hlist_nulls_next_rcu(head)); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= net->ct.htable_size) + if (++st->bucket >= nf_conntrack_htable_size) return NULL; } head = rcu_dereference( - hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); + hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); } return head; } @@ -114,6 +112,23 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) } #endif +static bool ct_seq_should_skip(const struct nf_conn *ct, + const struct net *net, + const struct nf_conntrack_tuple_hash *hash) +{ + /* we only want to print DIR_ORIGINAL */ + if (NF_CT_DIRECTION(hash)) + return true; + + if (nf_ct_l3num(ct) != AF_INET) + return true; + + if (!net_eq(nf_ct_net(ct), net)) + return true; + + return false; +} + static int ct_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_tuple_hash *hash = v; @@ -123,14 +138,15 @@ static int ct_seq_show(struct seq_file *s, void *v) int ret = 0; NF_CT_ASSERT(ct); - if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + if (ct_seq_should_skip(ct, seq_file_net(s), hash)) return 0; + if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + return 0; - /* we only want to print DIR_ORIGINAL */ - if (NF_CT_DIRECTION(hash)) - goto release; - if (nf_ct_l3num(ct) != AF_INET) + /* check if we raced w. object reuse */ + if (!nf_ct_is_confirmed(ct) || + ct_seq_should_skip(ct, seq_file_net(s), hash)) goto release; l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); @@ -220,13 +236,12 @@ struct ct_expect_iter_state { static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { - struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { n = rcu_dereference( - hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); if (n) return n; } @@ -236,7 +251,6 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { - struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(hlist_next_rcu(head)); @@ -244,7 +258,7 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, if (++st->bucket >= nf_ct_expect_hsize) return NULL; head = rcu_dereference( - hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); } return head; } @@ -285,6 +299,9 @@ static int exp_seq_show(struct seq_file *s, void *v) exp = hlist_entry(n, struct nf_conntrack_expect, hnode); + if (!net_eq(nf_ct_net(exp->master), seq_file_net(s))) + return 0; + if (exp->tuple.src.l3num != AF_INET) return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8c8c655bb2c4..a1f2830d8110 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2146,6 +2146,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, unsigned int flags = 0; struct fib_result res; struct rtable *rth; + int master_idx; int orig_oif; int err = -ENETUNREACH; @@ -2155,6 +2156,9 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, orig_oif = fl4->flowi4_oif; + master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif); + if (master_idx) + fl4->flowi4_oif = master_idx; fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 097060def7f0..6b7459c92bb2 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -350,6 +350,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, uh->len = newlen; + /* Set encapsulation before calling into inner gro_complete() functions + * to make them set up the inner offsets. + */ + skb->encapsulation = 1; + rcu_read_lock(); sk = (*lookup)(skb, uh->source, uh->dest); if (sk && udp_sk(sk)->gro_complete) @@ -360,9 +365,6 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, if (skb->remcsum_offload) skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; - skb->encapsulation = 1; - skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr)); - return err; } EXPORT_SYMBOL(udp_gro_complete); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 9554b99a8508..4527285fcaa2 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -446,6 +446,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; + else + iif = l3mdev_master_ifindex(skb->dev); /* * Must not send error if the source does not uniquely @@ -500,9 +502,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - if (!fl6.flowi6_oif) - fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev); - dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 47b671a46dc4..ee62ec469ab3 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -343,7 +343,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, goto failed_free; /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) + if (!(nt->parms.o_flags & TUNNEL_SEQ)) dev->features |= NETIF_F_LLTX; dev_hold(dev); @@ -519,7 +519,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); - skb_set_inner_protocol(skb, proto); + skb_set_inner_protocol(skb, protocol); return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); @@ -700,7 +700,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; - int addend = sizeof(struct ipv6hdr) + 4; + int t_hlen; if (dev->type != ARPHRD_ETHER) { memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); @@ -727,16 +727,11 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) else dev->flags &= ~IFF_POINTOPOINT; - /* Precalculate GRE options length */ - if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { - if (t->parms.o_flags&GRE_CSUM) - addend += 4; - if (t->parms.o_flags&GRE_KEY) - addend += 4; - if (t->parms.o_flags&GRE_SEQ) - addend += 4; - } - t->hlen = addend; + t->tun_hlen = gre_calc_hlen(t->parms.o_flags); + + t->hlen = t->tun_hlen; + + t_hlen = t->hlen + sizeof(struct ipv6hdr); if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & @@ -750,10 +745,11 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) return; if (rt->dst.dev) { - dev->hard_header_len = rt->dst.dev->hard_header_len + addend; + dev->hard_header_len = rt->dst.dev->hard_header_len + + t_hlen; if (set_mtu) { - dev->mtu = rt->dst.dev->mtu - addend; + dev->mtu = rt->dst.dev->mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; if (dev->type == ARPHRD_ETHER) @@ -799,8 +795,8 @@ static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p, p->link = u->link; p->i_key = u->i_key; p->o_key = u->o_key; - p->i_flags = u->i_flags; - p->o_flags = u->o_flags; + p->i_flags = gre_flags_to_tnl_flags(u->i_flags); + p->o_flags = gre_flags_to_tnl_flags(u->o_flags); memcpy(p->name, u->name, sizeof(u->name)); } @@ -817,8 +813,8 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u, u->link = p->link; u->i_key = p->i_key; u->o_key = p->o_key; - u->i_flags = p->i_flags; - u->o_flags = p->o_flags; + u->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); + u->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); memcpy(u->name, p->name, sizeof(u->name)); } @@ -1027,11 +1023,12 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); - t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + tunnel->hlen = tunnel->tun_hlen; - dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; - dev->mtu = ETH_DATA_LEN - t_hlen - 4; + t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; @@ -1217,10 +1214,12 @@ static void ip6gre_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) - parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + parms->i_flags = gre_flags_to_tnl_flags( + nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) - parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + parms->o_flags = gre_flags_to_tnl_flags( + nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); @@ -1315,7 +1314,7 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, dev->features |= GRE6_FEATURES; dev->hw_features |= GRE6_FEATURES; - if (!(nt->parms.o_flags & GRE_SEQ)) { + if (!(nt->parms.o_flags & TUNNEL_SEQ)) { /* TCP segmentation offload is not supported when we * generate output sequences. */ @@ -1412,8 +1411,10 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *p = &t->parms; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || - nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || - nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, + gre_tnl_flags_to_gre_flags(p->i_flags)) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, + gre_tnl_flags_to_gre_flags(p->o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ade55af6ace6..50af7061ecdb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1114,8 +1114,6 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); skb_dst_set(skb, dst); - skb->transport_header = skb->network_header; - if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 73e606c719ef..63e06c3dd319 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -39,34 +39,12 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv6 packet filter"); -/*#define DEBUG_IP_FIREWALL*/ -/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ -/*#define DEBUG_IP_FIREWALL_USER*/ - -#ifdef DEBUG_IP_FIREWALL -#define dprintf(format, args...) pr_info(format , ## args) -#else -#define dprintf(format, args...) -#endif - -#ifdef DEBUG_IP_FIREWALL_USER -#define duprintf(format, args...) pr_info(format , ## args) -#else -#define duprintf(format, args...) -#endif - #ifdef CONFIG_NETFILTER_DEBUG #define IP_NF_ASSERT(x) WARN_ON(!(x)) #else #define IP_NF_ASSERT(x) #endif -#if 0 -/* All the better to debug you with... */ -#define static -#define inline -#endif - void *ip6t_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ip6t, IP6T); @@ -100,35 +78,18 @@ ip6_packet_match(const struct sk_buff *skb, if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, &ip6info->src), IP6T_INV_SRCIP) || FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, - &ip6info->dst), IP6T_INV_DSTIP)) { - dprintf("Source or dest mismatch.\n"); -/* - dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, - ipinfo->smsk.s_addr, ipinfo->src.s_addr, - ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : ""); - dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr, - ipinfo->dmsk.s_addr, ipinfo->dst.s_addr, - ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/ + &ip6info->dst), IP6T_INV_DSTIP)) return false; - } ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_IN)) { - dprintf("VIA in mismatch (%s vs %s).%s\n", - indev, ip6info->iniface, - ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : ""); + if (FWINV(ret != 0, IP6T_INV_VIA_IN)) return false; - } ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) { - dprintf("VIA out mismatch (%s vs %s).%s\n", - outdev, ip6info->outiface, - ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : ""); + if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) return false; - } /* ... might want to do something with class and flowlabel here ... */ @@ -145,11 +106,6 @@ ip6_packet_match(const struct sk_buff *skb, } *fragoff = _frag_off; - dprintf("Packet protocol %hi ?= %s%hi.\n", - protohdr, - ip6info->invflags & IP6T_INV_PROTO ? "!":"", - ip6info->proto); - if (ip6info->proto == protohdr) { if (ip6info->invflags & IP6T_INV_PROTO) return false; @@ -169,16 +125,11 @@ ip6_packet_match(const struct sk_buff *skb, static bool ip6_checkentry(const struct ip6t_ip6 *ipv6) { - if (ipv6->flags & ~IP6T_F_MASK) { - duprintf("Unknown flag bits set: %08X\n", - ipv6->flags & ~IP6T_F_MASK); + if (ipv6->flags & ~IP6T_F_MASK) return false; - } - if (ipv6->invflags & ~IP6T_INV_MASK) { - duprintf("Unknown invflag bits set: %08X\n", - ipv6->invflags & ~IP6T_INV_MASK); + if (ipv6->invflags & ~IP6T_INV_MASK) return false; - } + return true; } @@ -446,13 +397,9 @@ ip6t_do_table(struct sk_buff *skb, xt_write_recseq_end(addend); local_bh_enable(); -#ifdef DEBUG_ALLOW_ALL - return NF_ACCEPT; -#else if (acpar.hotdrop) return NF_DROP; else return verdict; -#endif } static bool find_jump_target(const struct xt_table_info *t, @@ -492,11 +439,9 @@ mark_source_chains(const struct xt_table_info *newinfo, = (void *)ip6t_get_target_c(e); int visited = e->comefrom & (1 << hook); - if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { - pr_err("iptables: loop hook %u pos %u %08X.\n", - hook, pos, e->comefrom); + if (e->comefrom & (1 << NF_INET_NUMHOOKS)) return 0; - } + e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ @@ -508,26 +453,13 @@ mark_source_chains(const struct xt_table_info *newinfo, if ((strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < -NF_MAX_VERDICT - 1) { - duprintf("mark_source_chains: bad " - "negative verdict (%i)\n", - t->verdict); + t->verdict < -NF_MAX_VERDICT - 1) return 0; - } /* Return: backtrack through the last big jump. */ do { e->comefrom ^= (1<<NF_INET_NUMHOOKS); -#ifdef DEBUG_IP_FIREWALL_USER - if (e->comefrom - & (1 << NF_INET_NUMHOOKS)) { - duprintf("Back unset " - "on hook %u " - "rule %u\n", - hook, pos); - } -#endif oldpos = pos; pos = e->counters.pcnt; e->counters.pcnt = 0; @@ -555,8 +487,6 @@ mark_source_chains(const struct xt_table_info *newinfo, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ - duprintf("Jump rule %u -> %u\n", - pos, newpos); e = (struct ip6t_entry *) (entry0 + newpos); if (!find_jump_target(newinfo, e)) @@ -573,8 +503,7 @@ mark_source_chains(const struct xt_table_info *newinfo, pos = newpos; } } -next: - duprintf("Finished chain %u\n", hook); +next: ; } return 1; } @@ -595,19 +524,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ip6t_ip6 *ipv6 = par->entryinfo; - int ret; par->match = m->u.kernel.match; par->matchinfo = m->data; - ret = xt_check_match(par, m->u.match_size - sizeof(*m), - ipv6->proto, ipv6->invflags & IP6T_INV_PROTO); - if (ret < 0) { - duprintf("ip_tables: check failed for `%s'.\n", - par.match->name); - return ret; - } - return 0; + return xt_check_match(par, m->u.match_size - sizeof(*m), + ipv6->proto, ipv6->invflags & IP6T_INV_PROTO); } static int @@ -618,10 +540,9 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, m->u.user.revision); - if (IS_ERR(match)) { - duprintf("find_check_match: `%s' not found\n", m->u.user.name); + if (IS_ERR(match)) return PTR_ERR(match); - } + m->u.kernel.match = match; ret = check_match(m, par); @@ -646,17 +567,11 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name) .hook_mask = e->comefrom, .family = NFPROTO_IPV6, }; - int ret; t = ip6t_get_target(e); - ret = xt_check_target(&par, t->u.target_size - sizeof(*t), - e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO); - if (ret < 0) { - duprintf("ip_tables: check failed for `%s'.\n", - t->u.kernel.target->name); - return ret; - } - return 0; + return xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ipv6.proto, + e->ipv6.invflags & IP6T_INV_PROTO); } static int @@ -669,10 +584,12 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; + unsigned long pcnt; - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) + pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(pcnt)) return -ENOMEM; + e->counters.pcnt = pcnt; j = 0; mtpar.net = net; @@ -691,7 +608,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("find_check_entry: `%s' not found\n", t->u.user.name); ret = PTR_ERR(target); goto cleanup_matches; } @@ -744,17 +660,12 @@ check_entry_size_and_hooks(struct ip6t_entry *e, if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p\n", e); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset - < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) return -EINVAL; - } if (!ip6_checkentry(&e->ipv6)) return -EINVAL; @@ -771,12 +682,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e, if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { - if (!check_underflow(e)) { - pr_debug("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + if (!check_underflow(e)) return -EINVAL; - } + newinfo->underflow[h] = underflows[h]; } } @@ -828,7 +736,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } - duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { @@ -845,27 +752,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, ++newinfo->stacksize; } - if (i != repl->num_entries) { - duprintf("translate_table: %u not %u entries\n", - i, repl->num_entries); + if (i != repl->num_entries) return -EINVAL; - } /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ if (!(repl->valid_hooks & (1 << i))) continue; - if (newinfo->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, repl->hook_entry[i]); + if (newinfo->hook_entry[i] == 0xFFFFFFFF) return -EINVAL; - } - if (newinfo->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, repl->underflow[i]); + if (newinfo->underflow[i] == 0xFFFFFFFF) return -EINVAL; - } } if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) @@ -1093,11 +991,8 @@ static int get_info(struct net *net, void __user *user, struct xt_table *t; int ret; - if (*len != sizeof(struct ip6t_getinfo)) { - duprintf("length %u != %zu\n", *len, - sizeof(struct ip6t_getinfo)); + if (*len != sizeof(struct ip6t_getinfo)) return -EINVAL; - } if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; @@ -1155,31 +1050,24 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, struct ip6t_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("get_entries: %u < %zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct ip6t_get_entries) + get.size) { - duprintf("get_entries: %u != %zu\n", - *len, sizeof(get) + get.size); + if (*len != sizeof(struct ip6t_get_entries) + get.size) return -EINVAL; - } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR_OR_NULL(t)) { struct xt_table_info *private = t->private; - duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); - else { - duprintf("get_entries: I've got %u not %u!\n", - private->size, get.size); + else ret = -EAGAIN; - } + module_put(t->me); xt_table_unlock(t); } else @@ -1215,8 +1103,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, /* You lied! */ if (valid_hooks != t->valid_hooks) { - duprintf("Valid hook crap: %08X vs %08X\n", - valid_hooks, t->valid_hooks); ret = -EINVAL; goto put_module; } @@ -1226,8 +1112,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, goto put_module; /* Update module usage count based on number of rules */ - duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n", - oldinfo->number, oldinfo->initial_entries, newinfo->number); if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); @@ -1296,8 +1180,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len) if (ret != 0) goto free_newinfo; - duprintf("ip_tables: Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) @@ -1422,11 +1304,9 @@ compat_find_calc_match(struct xt_entry_match *m, match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, m->u.user.revision); - if (IS_ERR(match)) { - duprintf("compat_check_calc_match: `%s' not found\n", - m->u.user.name); + if (IS_ERR(match)) return PTR_ERR(match); - } + m->u.kernel.match = match; *size += xt_compat_match_offset(match); return 0; @@ -1458,20 +1338,14 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, unsigned int j; int ret, off; - duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || - (unsigned char *)e + e->next_offset > limit) { - duprintf("Bad offset %p, limit = %p\n", e, limit); + (unsigned char *)e + e->next_offset > limit) return -EINVAL; - } if (e->next_offset < sizeof(struct compat_ip6t_entry) + - sizeof(struct compat_xt_entry_target)) { - duprintf("checking: element %p size %u\n", - e, e->next_offset); + sizeof(struct compat_xt_entry_target)) return -EINVAL; - } if (!ip6_checkentry(&e->ipv6)) return -EINVAL; @@ -1495,8 +1369,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { - duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", - t->u.user.name); ret = PTR_ERR(target); goto release_matches; } @@ -1575,7 +1447,6 @@ translate_compat_table(struct net *net, size = compatr->size; info->number = compatr->num_entries; - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET6); xt_compat_init_offsets(AF_INET6, compatr->num_entries); @@ -1590,11 +1461,8 @@ translate_compat_table(struct net *net, } ret = -EINVAL; - if (j != compatr->num_entries) { - duprintf("translate_compat_table: %u not %u entries\n", - j, compatr->num_entries); + if (j != compatr->num_entries) goto out_unlock; - } ret = -ENOMEM; newinfo = xt_alloc_table_info(size); @@ -1685,8 +1553,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) if (ret != 0) goto free_newinfo; - duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) @@ -1720,7 +1586,6 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, break; default: - duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1770,19 +1635,15 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, struct compat_ip6t_get_entries get; struct xt_table *t; - if (*len < sizeof(get)) { - duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); + if (*len < sizeof(get)) return -EINVAL; - } if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; - if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) { - duprintf("compat_get_entries: %u != %zu\n", - *len, sizeof(get) + get.size); + if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) return -EINVAL; - } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET6); @@ -1790,16 +1651,13 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; - duprintf("t->private->number = %u\n", private->number); ret = compat_table_info(private, &info); - if (!ret && get.size == info.size) { + if (!ret && get.size == info.size) ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); - } else if (!ret) { - duprintf("compat_get_entries: I've got %u not %u!\n", - private->size, get.size); + else if (!ret) ret = -EAGAIN; - } + xt_compat_flush_offsets(AF_INET6); module_put(t->me); xt_table_unlock(t); @@ -1852,7 +1710,6 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) break; default: - duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -1904,7 +1761,6 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } default: - duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd); ret = -EINVAL; } @@ -2006,7 +1862,6 @@ icmp6_match(const struct sk_buff *skb, struct xt_action_param *par) /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ - duprintf("Dropping evil ICMP tinygram.\n"); par->hotdrop = true; return false; } diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 5d778dd11f66..06bed74cf5ee 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -60,7 +60,7 @@ synproxy_send_tcp(struct net *net, fl6.fl6_dport = nth->dest; security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6)); dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { + if (dst->error) { dst_release(dst); goto free_nskb; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index af46e19205f5..c42fa1deb152 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1190,7 +1190,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct dst_entry *dst; bool any_src; - dst = l3mdev_rt6_dst_by_oif(net, fl6); + dst = l3mdev_get_rt6_dst(net, fl6); if (dst) return dst; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7bdc9c9c231b..c4efaa97280c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -810,8 +810,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else + else { + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; + fl6.flowi6_oif = oif; + } + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index e925037fa0df..6651a78e100c 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -97,3 +97,66 @@ u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return tb_id; } EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); + +/** + * l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns + * cached route for L3 master device if relevant + * to flow + * @net: network namespace for device index lookup + * @fl6: IPv6 flow struct for lookup + */ + +struct dst_entry *l3mdev_get_rt6_dst(struct net *net, + const struct flowi6 *fl6) +{ + struct dst_entry *dst = NULL; + struct net_device *dev; + + if (fl6->flowi6_oif) { + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); + if (dev && netif_is_l3_slave(dev)) + dev = netdev_master_upper_dev_get_rcu(dev); + + if (dev && netif_is_l3_master(dev) && + dev->l3mdev_ops->l3mdev_get_rt6_dst) + dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6); + + rcu_read_unlock(); + } + + return dst; +} +EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst); + +/** + * l3mdev_get_saddr - get source address for a flow based on an interface + * enslaved to an L3 master device + * @net: network namespace for device index lookup + * @ifindex: Interface index + * @fl4: IPv4 flow struct + */ + +int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) +{ + struct net_device *dev; + int rc = 0; + + if (ifindex) { + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev && netif_is_l3_slave(dev)) + dev = netdev_master_upper_dev_get_rcu(dev); + + if (dev && netif_is_l3_master(dev) && + dev->l3mdev_ops->l3mdev_get_saddr) + rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4); + + rcu_read_unlock(); + } + + return rc; +} +EXPORT_SYMBOL_GPL(l3mdev_get_saddr); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b3c52e3f689a..8ae3ed97d95c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { struct llc_pktinfo info; + memset(&info, 0, sizeof(info)); info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; llc_pdu_decode_dsap(skb, &info.lpi_sap); llc_pdu_decode_da(skb, info.lpi_mac); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 85ca189bdc3d..2cb3c626cd43 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -104,6 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key) spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } +static void ip_vs_conn_expire(unsigned long data); /* * Returns hash value for IPVS connection entry @@ -453,10 +454,16 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af, } EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); +static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp) +{ + __ip_vs_conn_put(cp); + ip_vs_conn_expire((unsigned long)cp); +} + /* * Put back the conn and restart its timer with its timeout */ -void ip_vs_conn_put(struct ip_vs_conn *cp) +static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp) { unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ? 0 : cp->timeout; @@ -465,6 +472,16 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) __ip_vs_conn_put(cp); } +void ip_vs_conn_put(struct ip_vs_conn *cp) +{ + if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && + (atomic_read(&cp->refcnt) == 1) && + !timer_pending(&cp->timer)) + /* expire connection immediately */ + __ip_vs_conn_put_notimer(cp); + else + __ip_vs_conn_put_timer(cp); +} /* * Fill a no_client_port connection with a client port number @@ -819,7 +836,8 @@ static void ip_vs_conn_expire(unsigned long data) if (cp->control) ip_vs_control_del(cp); - if (cp->flags & IP_VS_CONN_F_NFCT) { + if ((cp->flags & IP_VS_CONN_F_NFCT) && + !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) { /* Do not access conntracks during subsys cleanup * because nf_conntrack_find_get can not be used after * conntrack cleanup for the net. @@ -834,7 +852,10 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_unbind_dest(cp); if (cp->flags & IP_VS_CONN_F_NO_CPORT) atomic_dec(&ip_vs_conn_no_cport_cnt); - call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free); + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + ip_vs_conn_rcu_free(&cp->rcu_head); + else + call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free); atomic_dec(&ipvs->conn_count); return; } @@ -850,7 +871,7 @@ static void ip_vs_conn_expire(unsigned long data) if (ipvs->sync_state & IP_VS_STATE_MASTER) ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs)); - ip_vs_conn_put(cp); + __ip_vs_conn_put_timer(cp); } /* Modify timer, so that it expires as soon as possible. @@ -1240,6 +1261,16 @@ static inline int todrop_entry(struct ip_vs_conn *cp) return 1; } +static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp) +{ + struct ip_vs_service *svc; + + if (!cp->dest) + return false; + svc = rcu_dereference(cp->dest->svc); + return svc && (svc->flags & IP_VS_SVC_F_ONEPACKET); +} + /* Called from keventd and must protect itself from softirqs */ void ip_vs_random_dropentry(struct netns_ipvs *ipvs) { @@ -1254,11 +1285,16 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs) unsigned int hash = prandom_u32() & ip_vs_conn_tab_mask; hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->flags & IP_VS_CONN_F_TEMPLATE) - /* connection template */ - continue; if (cp->ipvs != ipvs) continue; + if (cp->flags & IP_VS_CONN_F_TEMPLATE) { + if (atomic_read(&cp->n_control) || + !ip_vs_conn_ops_mode(cp)) + continue; + else + /* connection template of OPS */ + goto try_drop; + } if (cp->protocol == IPPROTO_TCP) { switch(cp->state) { case IP_VS_TCP_S_SYN_RECV: @@ -1286,6 +1322,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs) continue; } } else { +try_drop: if (!todrop_entry(cp)) continue; } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b9a4082afa3a..1207f20d24e4 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -68,6 +68,7 @@ EXPORT_SYMBOL(ip_vs_conn_put); #ifdef CONFIG_IP_VS_DEBUG EXPORT_SYMBOL(ip_vs_get_debug_level); #endif +EXPORT_SYMBOL(ip_vs_new_conn_out); static int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ @@ -611,7 +612,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ret = cp->packet_xmit(skb, cp, pd->pp, iph); /* do not touch skb anymore */ - atomic_inc(&cp->in_pkts); + if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control) + atomic_inc(&cp->control->in_pkts); + else + atomic_inc(&cp->in_pkts); ip_vs_conn_put(cp); return ret; } @@ -1100,6 +1104,143 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp, } } +/* Generic function to create new connections for outgoing RS packets + * + * Pre-requisites for successful connection creation: + * 1) Virtual Service is NOT fwmark based: + * In fwmark-VS actual vaddr and vport are unknown to IPVS + * 2) Real Server and Virtual Service were NOT configured without port: + * This is to allow match of different VS to the same RS ip-addr + */ +struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, + __be16 cport) +{ + struct ip_vs_conn_param param; + struct ip_vs_conn *ct = NULL, *cp = NULL; + const union nf_inet_addr *vaddr, *daddr, *caddr; + union nf_inet_addr snet; + __be16 vport; + unsigned int flags; + + EnterFunction(12); + vaddr = &svc->addr; + vport = svc->port; + daddr = &iph->saddr; + caddr = &iph->daddr; + + /* check pre-requisites are satisfied */ + if (svc->fwmark) + return NULL; + if (!vport || !dport) + return NULL; + + /* for persistent service first create connection template */ + if (svc->flags & IP_VS_SVC_F_PERSISTENT) { + /* apply netmask the same way ingress-side does */ +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + ipv6_addr_prefix(&snet.in6, &caddr->in6, + (__force __u32)svc->netmask); + else +#endif + snet.ip = caddr->ip & svc->netmask; + /* fill params and create template if not existent */ + if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol, + &snet, 0, vaddr, + vport, ¶m) < 0) + return NULL; + ct = ip_vs_ct_in_get(¶m); + if (!ct) { + ct = ip_vs_conn_new(¶m, dest->af, daddr, dport, + IP_VS_CONN_F_TEMPLATE, dest, 0); + if (!ct) { + kfree(param.pe_data); + return NULL; + } + ct->timeout = svc->timeout; + } else { + kfree(param.pe_data); + } + } + + /* connection flags */ + flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) && + iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; + /* create connection */ + ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, + caddr, cport, vaddr, vport, ¶m); + cp = ip_vs_conn_new(¶m, dest->af, daddr, dport, flags, dest, 0); + if (!cp) { + if (ct) + ip_vs_conn_put(ct); + return NULL; + } + if (ct) { + ip_vs_control_add(cp, ct); + ip_vs_conn_put(ct); + } + ip_vs_conn_stats(cp, svc); + + /* return connection (will be used to handle outgoing packet) */ + IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u " + "d:%s:%u conn->flags:%X conn->refcnt:%d\n", + ip_vs_fwd_tag(cp), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + cp->flags, atomic_read(&cp->refcnt)); + LeaveFunction(12); + return cp; +} + +/* Handle outgoing packets which are considered requests initiated by + * real servers, so that subsequent responses from external client can be + * routed to the right real server. + * Used also for outgoing responses in OPS mode. + * + * Connection management is handled by persistent-engine specific callback. + */ +static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, + struct netns_ipvs *ipvs, + int af, struct sk_buff *skb, + const struct ip_vs_iphdr *iph) +{ + struct ip_vs_dest *dest; + struct ip_vs_conn *cp = NULL; + __be16 _ports[2], *pptr; + + if (hooknum == NF_INET_LOCAL_IN) + return NULL; + + pptr = frag_safe_skb_hp(skb, iph->len, + sizeof(_ports), _ports, iph); + if (!pptr) + return NULL; + + rcu_read_lock(); + dest = ip_vs_find_real_service(ipvs, af, iph->protocol, + &iph->saddr, pptr[0]); + if (dest) { + struct ip_vs_service *svc; + struct ip_vs_pe *pe; + + svc = rcu_dereference(dest->svc); + if (svc) { + pe = rcu_dereference(svc->pe); + if (pe && pe->conn_out) + cp = pe->conn_out(svc, dest, skb, iph, + pptr[0], pptr[1]); + } + } + rcu_read_unlock(); + + return cp; +} + /* Handle response packets: rewrite addresses and send away... */ static unsigned int @@ -1245,6 +1386,22 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in if (likely(cp)) return handle_response(af, skb, pd, cp, &iph, hooknum); + + /* Check for real-server-started requests */ + if (atomic_read(&ipvs->conn_out_counter)) { + /* Currently only for UDP: + * connection oriented protocols typically use + * ephemeral ports for outgoing connections, so + * related incoming responses would not match any VS + */ + if (pp->protocol == IPPROTO_UDP) { + cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph); + if (likely(cp)) + return handle_response(af, skb, pd, cp, &iph, + hooknum); + } + } + if (sysctl_nat_icmp_send(ipvs) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || @@ -1837,6 +1994,9 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int if (ipvs->sync_state & IP_VS_STATE_MASTER) ip_vs_sync_conn(ipvs, cp, pkts); + else if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control) + /* increment is done inside ip_vs_sync_conn too */ + atomic_inc(&cp->control->in_pkts); ip_vs_conn_put(cp); return ret; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index f35ebc02fa5c..c3c809b2e712 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -567,6 +567,36 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, return false; } +/* Find real service record by <proto,addr,port>. + * In case of multiple records with the same <proto,addr,port>, only + * the first found record is returned. + * + * To be called under RCU lock. + */ +struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, + __u16 protocol, + const union nf_inet_addr *daddr, + __be16 dport) +{ + unsigned int hash; + struct ip_vs_dest *dest; + + /* Check for "full" addressed entries */ + hash = ip_vs_rs_hashkey(af, daddr, dport); + + hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { + if (dest->port == dport && + dest->af == af && + ip_vs_addr_equal(af, &dest->addr, daddr) && + (dest->protocol == protocol || dest->vfwmark)) { + /* HIT */ + return dest; + } + } + + return NULL; +} + /* Lookup destination by {addr,port} in the given service * Called under RCU lock. */ @@ -1253,6 +1283,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, atomic_inc(&ipvs->ftpsvc_counter); else if (svc->port == 0) atomic_inc(&ipvs->nullsvc_counter); + if (svc->pe && svc->pe->conn_out) + atomic_inc(&ipvs->conn_out_counter); ip_vs_start_estimator(ipvs, &svc->stats); @@ -1293,6 +1325,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) struct ip_vs_scheduler *sched = NULL, *old_sched; struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; + bool new_pe_conn_out, old_pe_conn_out; /* * Lookup the scheduler, by 'u->sched_name' @@ -1355,8 +1388,16 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) svc->netmask = u->netmask; old_pe = rcu_dereference_protected(svc->pe, 1); - if (pe != old_pe) + if (pe != old_pe) { rcu_assign_pointer(svc->pe, pe); + /* check for optional methods in new pe */ + new_pe_conn_out = (pe && pe->conn_out) ? true : false; + old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; + if (new_pe_conn_out && !old_pe_conn_out) + atomic_inc(&svc->ipvs->conn_out_counter); + if (old_pe_conn_out && !new_pe_conn_out) + atomic_dec(&svc->ipvs->conn_out_counter); + } out: ip_vs_scheduler_put(old_sched); @@ -1389,6 +1430,8 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* Unbind persistence engine, keep svc->pe */ old_pe = rcu_dereference_protected(svc->pe, 1); + if (old_pe && old_pe->conn_out) + atomic_dec(&ipvs->conn_out_counter); ip_vs_pe_put(old_pe); /* @@ -3969,6 +4012,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) (unsigned long) ipvs); atomic_set(&ipvs->ftpsvc_counter, 0); atomic_set(&ipvs->nullsvc_counter, 0); + atomic_set(&ipvs->conn_out_counter, 0); /* procfs stats */ ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 30434fb133df..f04fd8df210b 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -93,6 +93,10 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) return; + /* Never alter conntrack for OPS conns (no reply is expected) */ + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + return; + /* Alter reply only in original direction */ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return; diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 0a6eb5c0d9e9..d07ef9e31c12 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -143,6 +143,20 @@ static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf) return cp->pe_data_len; } +static struct ip_vs_conn * +ip_vs_sip_conn_out(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, + __be16 cport) +{ + if (likely(iph->protocol == IPPROTO_UDP)) + return ip_vs_new_conn_out(svc, dest, skb, iph, dport, cport); + /* currently no need to handle other than UDP */ + return NULL; +} + static struct ip_vs_pe ip_vs_sip_pe = { .name = "sip", @@ -153,6 +167,7 @@ static struct ip_vs_pe ip_vs_sip_pe = .ct_match = ip_vs_sip_ct_match, .hashkey_raw = ip_vs_sip_hashkey_raw, .show_pe_data = ip_vs_sip_show_pe_data, + .conn_out = ip_vs_sip_conn_out, }; static int __init ip_vs_sip_init(void) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2fd607408998..566c64e3ec50 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -54,6 +54,7 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> +#include <net/netns/hash.h> #define NF_CONNTRACK_VERSION "0.5.0" @@ -68,7 +69,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks); __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); +struct hlist_nulls_head *nf_conntrack_hash __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_hash); + +static __read_mostly struct kmem_cache *nf_conntrack_cachep; static __read_mostly spinlock_t nf_conntrack_locks_all_lock; +static __read_mostly seqcount_t nf_conntrack_generation; static __read_mostly bool nf_conntrack_locks_all; void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) @@ -107,7 +113,7 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1, spin_lock_nested(&nf_conntrack_locks[h1], SINGLE_DEPTH_NESTING); } - if (read_seqcount_retry(&net->ct.generation, sequence)) { + if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { nf_conntrack_double_unlock(h1, h2); return true; } @@ -141,43 +147,43 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max); DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); -unsigned int nf_conntrack_hash_rnd __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd); +static unsigned int nf_conntrack_hash_rnd __read_mostly; -static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple) +static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, + const struct net *net) { unsigned int n; + u32 seed; + + get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); /* The direction must be ignored, so we hash everything up to the * destination ports (which is a multiple of 4) and treat the last * three bytes manually. */ + seed = nf_conntrack_hash_rnd ^ net_hash_mix(net); n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); - return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^ + return jhash2((u32 *)tuple, n, seed ^ (((__force __u16)tuple->dst.u.all << 16) | tuple->dst.protonum)); } -static u32 __hash_bucket(u32 hash, unsigned int size) -{ - return reciprocal_scale(hash, size); -} - -static u32 hash_bucket(u32 hash, const struct net *net) +static u32 scale_hash(u32 hash) { - return __hash_bucket(hash, net->ct.htable_size); + return reciprocal_scale(hash, nf_conntrack_htable_size); } -static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, - unsigned int size) +static u32 __hash_conntrack(const struct net *net, + const struct nf_conntrack_tuple *tuple, + unsigned int size) { - return __hash_bucket(hash_conntrack_raw(tuple), size); + return reciprocal_scale(hash_conntrack_raw(tuple, net), size); } -static inline u_int32_t hash_conntrack(const struct net *net, - const struct nf_conntrack_tuple *tuple) +static u32 hash_conntrack(const struct net *net, + const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, net->ct.htable_size); + return scale_hash(hash_conntrack_raw(tuple, net)); } bool @@ -358,7 +364,7 @@ destroy_conntrack(struct nf_conntrack *nfct) } rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); - if (l4proto && l4proto->destroy) + if (l4proto->destroy) l4proto->destroy(ct); rcu_read_unlock(); @@ -393,7 +399,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) local_bh_disable(); do { - sequence = read_seqcount_begin(&net->ct.generation); + sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); reply_hash = hash_conntrack(net, @@ -445,7 +451,8 @@ static void death_by_timeout(unsigned long ul_conntrack) static inline bool nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone) + const struct nf_conntrack_zone *zone, + const struct net *net) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -454,7 +461,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, */ return nf_ct_tuple_equal(tuple, &h->tuple) && nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && - nf_ct_is_confirmed(ct); + nf_ct_is_confirmed(ct) && + net_eq(net, nf_ct_net(ct)); } /* @@ -467,21 +475,23 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; - unsigned int bucket = hash_bucket(hash, net); + unsigned int bucket, sequence; - /* Disable BHs the entire time since we normally need to disable them - * at least once for the stats anyway. - */ - local_bh_disable(); begin: - hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) { - if (nf_ct_key_equal(h, tuple, zone)) { - NF_CT_STAT_INC(net, found); - local_bh_enable(); + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + bucket = scale_hash(hash); + ct_hash = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { + if (nf_ct_key_equal(h, tuple, zone, net)) { + NF_CT_STAT_INC_ATOMIC(net, found); return h; } - NF_CT_STAT_INC(net, searched); + NF_CT_STAT_INC_ATOMIC(net, searched); } /* * if the nulls value we got at the end of this lookup is @@ -489,10 +499,9 @@ begin: * We probably met an item that was moved to another chain. */ if (get_nulls_value(n) != bucket) { - NF_CT_STAT_INC(net, search_restart); + NF_CT_STAT_INC_ATOMIC(net, search_restart); goto begin; } - local_bh_enable(); return NULL; } @@ -514,7 +523,7 @@ begin: !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { - if (unlikely(!nf_ct_key_equal(h, tuple, zone))) { + if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) { nf_ct_put(ct); goto begin; } @@ -530,7 +539,7 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { return __nf_conntrack_find_get(net, zone, tuple, - hash_conntrack_raw(tuple)); + hash_conntrack_raw(tuple, net)); } EXPORT_SYMBOL_GPL(nf_conntrack_find_get); @@ -538,12 +547,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int reply_hash) { - struct net *net = nf_ct_net(ct); - hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &net->ct.hash[hash]); + &nf_conntrack_hash[hash]); hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, - &net->ct.hash[reply_hash]); + &nf_conntrack_hash[reply_hash]); } int @@ -560,7 +567,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) local_bh_disable(); do { - sequence = read_seqcount_begin(&net->ct.generation); + sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); reply_hash = hash_conntrack(net, @@ -568,17 +575,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); /* See if there's one in the list already, including reverse */ - hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) - if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &h->tuple) && - nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, - NF_CT_DIRECTION(h))) + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) + if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + zone, net)) goto out; - hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) - if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &h->tuple) && - nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, - NF_CT_DIRECTION(h))) + + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) + if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + zone, net)) goto out; add_timer(&ct->timeout); @@ -599,6 +603,62 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); +static inline void nf_ct_acct_update(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int len) +{ + struct nf_conn_acct *acct; + + acct = nf_conn_acct_find(ct); + if (acct) { + struct nf_conn_counter *counter = acct->counter; + + atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); + atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes); + } +} + +static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + const struct nf_conn *loser_ct) +{ + struct nf_conn_acct *acct; + + acct = nf_conn_acct_find(loser_ct); + if (acct) { + struct nf_conn_counter *counter = acct->counter; + unsigned int bytes; + + /* u32 should be fine since we must have seen one packet. */ + bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes); + nf_ct_acct_update(ct, ctinfo, bytes); + } +} + +/* Resolve race on insertion if this protocol allows this. */ +static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + struct nf_conntrack_tuple_hash *h) +{ + /* This is the conntrack entry already in hashes that won race. */ + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + struct nf_conntrack_l4proto *l4proto; + + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + if (l4proto->allow_clash && + !nf_ct_is_dying(ct) && + atomic_inc_not_zero(&ct->ct_general.use)) { + nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct); + nf_conntrack_put(skb->nfct); + /* Assign conntrack already in hashes to this skbuff. Don't + * modify skb->nfctinfo to ensure consistent stateful filtering. + */ + skb->nfct = &ct->ct_general; + return NF_ACCEPT; + } + NF_CT_STAT_INC(net, drop); + return NF_DROP; +} + /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) @@ -613,6 +673,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) enum ip_conntrack_info ctinfo; struct net *net; unsigned int sequence; + int ret = NF_DROP; ct = nf_ct_get(skb, &ctinfo); net = nf_ct_net(ct); @@ -628,10 +689,10 @@ __nf_conntrack_confirm(struct sk_buff *skb) local_bh_disable(); do { - sequence = read_seqcount_begin(&net->ct.generation); + sequence = read_seqcount_begin(&nf_conntrack_generation); /* reuse the hash saved before */ hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; - hash = hash_bucket(hash, net); + hash = scale_hash(hash); reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); @@ -655,23 +716,22 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ nf_ct_del_from_dying_or_unconfirmed_list(ct); - if (unlikely(nf_ct_is_dying(ct))) - goto out; + if (unlikely(nf_ct_is_dying(ct))) { + nf_ct_add_to_dying_list(ct); + goto dying; + } /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) - if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &h->tuple) && - nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, - NF_CT_DIRECTION(h))) + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) + if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + zone, net)) goto out; - hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) - if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &h->tuple) && - nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, - NF_CT_DIRECTION(h))) + + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) + if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + zone, net)) goto out; /* Timer relative to confirmation time, not original @@ -710,10 +770,12 @@ __nf_conntrack_confirm(struct sk_buff *skb) out: nf_ct_add_to_dying_list(ct); + ret = nf_ct_resolve_clash(net, skb, ctinfo, h); +dying: nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert_failed); local_bh_enable(); - return NF_DROP; + return ret; } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); @@ -726,29 +788,31 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, struct net *net = nf_ct_net(ignored_conntrack); const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_head *ct_hash; + unsigned int hash, sequence; struct hlist_nulls_node *n; struct nf_conn *ct; - unsigned int hash; zone = nf_ct_zone(ignored_conntrack); - hash = hash_conntrack(net, tuple); - /* Disable BHs the entire time since we need to disable them at - * least once for the stats anyway. - */ - rcu_read_lock_bh(); - hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { + rcu_read_lock(); + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + hash = hash_conntrack(net, tuple); + ct_hash = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (ct != ignored_conntrack && - nf_ct_tuple_equal(tuple, &h->tuple) && - nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) { - NF_CT_STAT_INC(net, found); - rcu_read_unlock_bh(); + nf_ct_key_equal(h, tuple, zone, net)) { + NF_CT_STAT_INC_ATOMIC(net, found); + rcu_read_unlock(); return 1; } - NF_CT_STAT_INC(net, searched); + NF_CT_STAT_INC_ATOMIC(net, searched); } - rcu_read_unlock_bh(); + rcu_read_unlock(); return 0; } @@ -762,71 +826,63 @@ static noinline int early_drop(struct net *net, unsigned int _hash) { /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; - struct nf_conn *ct = NULL, *tmp; + struct nf_conn *tmp; struct hlist_nulls_node *n; - unsigned int i = 0, cnt = 0; - int dropped = 0; - unsigned int hash, sequence; + unsigned int i, hash, sequence; + struct nf_conn *ct = NULL; spinlock_t *lockp; + bool ret = false; + + i = 0; local_bh_disable(); restart: - sequence = read_seqcount_begin(&net->ct.generation); - hash = hash_bucket(_hash, net); - for (; i < net->ct.htable_size; i++) { + sequence = read_seqcount_begin(&nf_conntrack_generation); + for (; i < NF_CT_EVICTION_RANGE; i++) { + hash = scale_hash(_hash++); lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; nf_conntrack_lock(lockp); - if (read_seqcount_retry(&net->ct.generation, sequence)) { + if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { spin_unlock(lockp); goto restart; } - hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], - hnnode) { + hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], + hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); - if (!test_bit(IPS_ASSURED_BIT, &tmp->status) && - !nf_ct_is_dying(tmp) && - atomic_inc_not_zero(&tmp->ct_general.use)) { + + if (test_bit(IPS_ASSURED_BIT, &tmp->status) || + !net_eq(nf_ct_net(tmp), net) || + nf_ct_is_dying(tmp)) + continue; + + if (atomic_inc_not_zero(&tmp->ct_general.use)) { ct = tmp; break; } - cnt++; } - hash = (hash + 1) % net->ct.htable_size; spin_unlock(lockp); - - if (ct || cnt >= NF_CT_EVICTION_RANGE) + if (ct) break; - } + local_bh_enable(); if (!ct) - return dropped; + return false; - if (del_timer(&ct->timeout)) { + /* kill only if in same netns -- might have moved due to + * SLAB_DESTROY_BY_RCU rules + */ + if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) { if (nf_ct_delete(ct, 0, 0)) { - dropped = 1; NF_CT_STAT_INC_ATOMIC(net, early_drop); + ret = true; } } - nf_ct_put(ct); - return dropped; -} - -void init_nf_conntrack_hash_rnd(void) -{ - unsigned int rand; - /* - * Why not initialize nf_conntrack_rnd in a "init()" function ? - * Because there isn't enough entropy when system initializing, - * and we initialize it as late as possible. - */ - do { - get_random_bytes(&rand, sizeof(rand)); - } while (!rand); - cmpxchg(&nf_conntrack_hash_rnd, 0, rand); + nf_ct_put(ct); + return ret; } static struct nf_conn * @@ -838,12 +894,6 @@ __nf_conntrack_alloc(struct net *net, { struct nf_conn *ct; - if (unlikely(!nf_conntrack_hash_rnd)) { - init_nf_conntrack_hash_rnd(); - /* recompute the hash as nf_conntrack_hash_rnd is initialized */ - hash = hash_conntrack_raw(orig); - } - /* We don't want any race condition at early drop stage */ atomic_inc(&net->ct.count); @@ -860,7 +910,7 @@ __nf_conntrack_alloc(struct net *net, * Do not use kmem_cache_zalloc(), as this cache uses * SLAB_DESTROY_BY_RCU. */ - ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); + ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); if (ct == NULL) goto out; @@ -887,7 +937,7 @@ __nf_conntrack_alloc(struct net *net, atomic_set(&ct->ct_general.use, 0); return ct; out_free: - kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + kmem_cache_free(nf_conntrack_cachep, ct); out: atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); @@ -914,7 +964,7 @@ void nf_conntrack_free(struct nf_conn *ct) nf_ct_ext_destroy(ct); nf_ct_ext_free(ct); - kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + kmem_cache_free(nf_conntrack_cachep, ct); smp_mb__before_atomic(); atomic_dec(&net->ct.count); } @@ -1061,7 +1111,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, /* look for tuple match */ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); - hash = hash_conntrack_raw(&tuple); + hash = hash_conntrack_raw(&tuple, net); h = __nf_conntrack_find_get(net, zone, &tuple, hash); if (!h) { h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, @@ -1270,17 +1320,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, } acct: - if (do_acct) { - struct nf_conn_acct *acct; - - acct = nf_conn_acct_find(ct); - if (acct) { - struct nf_conn_counter *counter = acct->counter; - - atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); - atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes); - } - } + if (do_acct) + nf_ct_acct_update(ct, ctinfo, skb->len); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); @@ -1289,18 +1330,8 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, const struct sk_buff *skb, int do_acct) { - if (do_acct) { - struct nf_conn_acct *acct; - - acct = nf_conn_acct_find(ct); - if (acct) { - struct nf_conn_counter *counter = acct->counter; - - atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); - atomic64_add(skb->len - skb_network_offset(skb), - &counter[CTINFO2DIR(ctinfo)].bytes); - } - } + if (do_acct) + nf_ct_acct_update(ct, ctinfo, skb->len); if (del_timer(&ct->timeout)) { ct->timeout.function((unsigned long)ct); @@ -1396,16 +1427,17 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), int cpu; spinlock_t *lockp; - for (; *bucket < net->ct.htable_size; (*bucket)++) { + for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; local_bh_disable(); nf_conntrack_lock(lockp); - if (*bucket < net->ct.htable_size) { - hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { + if (*bucket < nf_conntrack_htable_size) { + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); - if (iter(ct, data)) + if (net_eq(nf_ct_net(ct), net) && + iter(ct, data)) goto found; } } @@ -1443,6 +1475,9 @@ void nf_ct_iterate_cleanup(struct net *net, might_sleep(); + if (atomic_read(&net->ct.count) == 0) + return; + while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) @@ -1494,6 +1529,8 @@ void nf_conntrack_cleanup_end(void) while (untrack_refs() > 0) schedule(); + nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); + #ifdef CONFIG_NF_CONNTRACK_ZONES nf_ct_extend_unregister(&nf_ct_zone_extend); #endif @@ -1544,15 +1581,12 @@ i_see_dead_people: } list_for_each_entry(net, net_exit_list, exit_list) { - nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); nf_conntrack_proto_pernet_fini(net); nf_conntrack_helper_pernet_fini(net); nf_conntrack_ecache_pernet_fini(net); nf_conntrack_tstamp_pernet_fini(net); nf_conntrack_acct_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); - kmem_cache_destroy(net->ct.nf_conntrack_cachep); - kfree(net->ct.slabname); free_percpu(net->ct.stat); free_percpu(net->ct.pcpu_lists); } @@ -1607,7 +1641,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) local_bh_disable(); nf_conntrack_all_lock(); - write_seqcount_begin(&init_net.ct.generation); + write_seqcount_begin(&nf_conntrack_generation); /* Lookups in the old hash might happen in parallel, which means we * might get false negatives during connection lookup. New connections @@ -1615,26 +1649,28 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) * though since that required taking the locks. */ - for (i = 0; i < init_net.ct.htable_size; i++) { - while (!hlist_nulls_empty(&init_net.ct.hash[i])) { - h = hlist_nulls_entry(init_net.ct.hash[i].first, - struct nf_conntrack_tuple_hash, hnnode); + for (i = 0; i < nf_conntrack_htable_size; i++) { + while (!hlist_nulls_empty(&nf_conntrack_hash[i])) { + h = hlist_nulls_entry(nf_conntrack_hash[i].first, + struct nf_conntrack_tuple_hash, hnnode); ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, hashsize); + bucket = __hash_conntrack(nf_ct_net(ct), + &h->tuple, hashsize); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } - old_size = init_net.ct.htable_size; - old_hash = init_net.ct.hash; + old_size = nf_conntrack_htable_size; + old_hash = nf_conntrack_hash; - init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; - init_net.ct.hash = hash; + nf_conntrack_hash = hash; + nf_conntrack_htable_size = hashsize; - write_seqcount_end(&init_net.ct.generation); + write_seqcount_end(&nf_conntrack_generation); nf_conntrack_all_unlock(); local_bh_enable(); + synchronize_net(); nf_ct_free_hashtable(old_hash, old_size); return 0; } @@ -1655,7 +1691,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); int nf_conntrack_init_start(void) { int max_factor = 8; - int i, ret, cpu; + int ret = -ENOMEM; + int i, cpu; + + seqcount_init(&nf_conntrack_generation); for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock_init(&nf_conntrack_locks[i]); @@ -1682,8 +1721,19 @@ int nf_conntrack_init_start(void) * entries. */ max_factor = 4; } + + nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1); + if (!nf_conntrack_hash) + return -ENOMEM; + nf_conntrack_max = max_factor * nf_conntrack_htable_size; + nf_conntrack_cachep = kmem_cache_create("nf_conntrack", + sizeof(struct nf_conn), 0, + SLAB_DESTROY_BY_RCU, NULL); + if (!nf_conntrack_cachep) + goto err_cachep; + printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", NF_CONNTRACK_VERSION, nf_conntrack_htable_size, nf_conntrack_max); @@ -1760,6 +1810,9 @@ err_tstamp: err_acct: nf_conntrack_expect_fini(); err_expect: + kmem_cache_destroy(nf_conntrack_cachep); +err_cachep: + nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); return ret; } @@ -1783,7 +1836,6 @@ int nf_conntrack_init_net(struct net *net) int cpu; atomic_set(&net->ct.count, 0); - seqcount_init(&net->ct.generation); net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); if (!net->ct.pcpu_lists) @@ -1801,24 +1853,6 @@ int nf_conntrack_init_net(struct net *net) if (!net->ct.stat) goto err_pcpu_lists; - net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); - if (!net->ct.slabname) - goto err_slabname; - - net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname, - sizeof(struct nf_conn), 0, - SLAB_DESTROY_BY_RCU, NULL); - if (!net->ct.nf_conntrack_cachep) { - printk(KERN_ERR "Unable to create nf_conn slab cache\n"); - goto err_cache; - } - - net->ct.htable_size = nf_conntrack_htable_size; - net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1); - if (!net->ct.hash) { - printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); - goto err_hash; - } ret = nf_conntrack_expect_pernet_init(net); if (ret < 0) goto err_expect; @@ -1850,12 +1884,6 @@ err_tstamp: err_acct: nf_conntrack_expect_pernet_fini(net); err_expect: - nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); -err_hash: - kmem_cache_destroy(net->ct.nf_conntrack_cachep); -err_cache: - kfree(net->ct.slabname); -err_slabname: free_percpu(net->ct.stat); err_pcpu_lists: free_percpu(net->ct.pcpu_lists); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 278927ab0948..9e3693128313 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -24,6 +24,7 @@ #include <linux/moduleparam.h> #include <linux/export.h> #include <net/net_namespace.h> +#include <net/netns/hash.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> @@ -35,9 +36,13 @@ unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); +struct hlist_head *nf_ct_expect_hash __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_expect_hash); + unsigned int nf_ct_expect_max __read_mostly; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; +static unsigned int nf_ct_expect_hashrnd __read_mostly; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, @@ -72,21 +77,32 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect) nf_ct_expect_put(exp); } -static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple) +static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple) { - unsigned int hash; + unsigned int hash, seed; - if (unlikely(!nf_conntrack_hash_rnd)) { - init_nf_conntrack_hash_rnd(); - } + get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd)); + + seed = nf_ct_expect_hashrnd ^ net_hash_mix(n); hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | - (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd); + (__force __u16)tuple->dst.u.all) ^ seed); return reciprocal_scale(hash, nf_ct_expect_hsize); } +static bool +nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_expect *i, + const struct nf_conntrack_zone *zone, + const struct net *net) +{ + return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && + net_eq(net, nf_ct_net(i->master)) && + nf_ct_zone_equal_any(i->master, zone); +} + struct nf_conntrack_expect * __nf_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, @@ -98,10 +114,9 @@ __nf_ct_expect_find(struct net *net, if (!net->ct.expect_count) return NULL; - h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) { - if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - nf_ct_zone_equal_any(i->master, zone)) + h = nf_ct_expect_dst_hash(net, tuple); + hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) { + if (nf_ct_exp_equal(tuple, i, zone, net)) return i; } return NULL; @@ -139,11 +154,10 @@ nf_ct_find_expectation(struct net *net, if (!net->ct.expect_count) return NULL; - h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) { + h = nf_ct_expect_dst_hash(net, tuple); + hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && - nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - nf_ct_zone_equal_any(i->master, zone)) { + nf_ct_exp_equal(tuple, i, zone, net)) { exp = i; break; } @@ -223,6 +237,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, } return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && + net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) && nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); } @@ -232,6 +247,7 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, return a->master == b->master && a->class == b->class && nf_ct_tuple_equal(&a->tuple, &b->tuple) && nf_ct_tuple_mask_equal(&a->mask, &b->mask) && + net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) && nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); } @@ -342,7 +358,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) struct nf_conn_help *master_help = nfct_help(exp->master); struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(exp); - unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); + unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple); /* two references : one for hash insert, one for the timer */ atomic_add(2, &exp->use); @@ -350,7 +366,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) hlist_add_head(&exp->lnode, &master_help->expectations); master_help->expecting[exp->class]++; - hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); + hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); net->ct.expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, @@ -401,8 +417,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) ret = -ESHUTDOWN; goto out; } - h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) { + h = nf_ct_expect_dst_hash(net, &expect->tuple); + hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) { if (expect_matches(i, expect)) { if (del_timer(&i->timeout)) { nf_ct_unlink_expect(i); @@ -468,12 +484,11 @@ struct ct_expect_iter_state { static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { - struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); if (n) return n; } @@ -483,14 +498,13 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { - struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(hlist_next_rcu(head)); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); } return head; } @@ -623,28 +637,13 @@ module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); int nf_conntrack_expect_pernet_init(struct net *net) { - int err = -ENOMEM; - net->ct.expect_count = 0; - net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); - if (net->ct.expect_hash == NULL) - goto err1; - - err = exp_proc_init(net); - if (err < 0) - goto err2; - - return 0; -err2: - nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); -err1: - return err; + return exp_proc_init(net); } void nf_conntrack_expect_pernet_fini(struct net *net) { exp_proc_remove(net); - nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); } int nf_conntrack_expect_init(void) @@ -660,6 +659,13 @@ int nf_conntrack_expect_init(void) 0, 0, NULL); if (!nf_ct_expect_cachep) return -ENOMEM; + + nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); + if (!nf_ct_expect_hash) { + kmem_cache_destroy(nf_ct_expect_cachep); + return -ENOMEM; + } + return 0; } @@ -667,4 +673,5 @@ void nf_conntrack_expect_fini(void) { rcu_barrier(); /* Wait for call_rcu() before destroy */ kmem_cache_destroy(nf_ct_expect_cachep); + nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3b40ec575cd5..f703adb7e5f7 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -38,10 +38,10 @@ unsigned int nf_ct_helper_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); static unsigned int nf_ct_helper_count __read_mostly; -static bool nf_ct_auto_assign_helper __read_mostly = true; +static bool nf_ct_auto_assign_helper __read_mostly = false; module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644); MODULE_PARM_DESC(nf_conntrack_helper, - "Enable automatic conntrack helper assignment (default 1)"); + "Enable automatic conntrack helper assignment (default 0)"); #ifdef CONFIG_SYSCTL static struct ctl_table helper_sysctl_table[] = { @@ -400,7 +400,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, spin_lock_bh(&nf_conntrack_expect_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, next, - &net->ct.expect_hash[i], hnode) { + &nf_ct_expect_hash[i], hnode) { struct nf_conn_help *help = nfct_help(exp->master); if ((rcu_dereference_protected( help->helper, @@ -424,10 +424,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, spin_unlock_bh(&pcpu->lock); } local_bh_disable(); - for (i = 0; i < net->ct.htable_size; i++) { + for (i = 0; i < nf_conntrack_htable_size; i++) { nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < net->ct.htable_size) { - hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) + if (i < nf_conntrack_htable_size) { + hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) unhelp(h, me); } spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 294a8e28cec4..a18d1ceabad5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -824,19 +824,22 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conn *)cb->args[1]; local_bh_disable(); - for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { + for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; nf_conntrack_lock(lockp); - if (cb->args[0] >= net->ct.htable_size) { + if (cb->args[0] >= nf_conntrack_htable_size) { spin_unlock(lockp); goto out; } - hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]], - hnnode) { + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], + hnnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); + if (!net_eq(net, nf_ct_net(ct))) + continue; + /* Dump entries of a given L3 protocol number. * If it is not specified, ie. l3proto == 0, * then dump everything. */ @@ -2629,10 +2632,14 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - hlist_for_each_entry(exp, &net->ct.expect_hash[cb->args[0]], + hlist_for_each_entry(exp, &nf_ct_expect_hash[cb->args[0]], hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; + + if (!net_eq(nf_ct_net(exp->master), net)) + continue; + if (cb->args[1]) { if (exp != last) continue; @@ -2883,8 +2890,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, spin_lock_bh(&nf_conntrack_expect_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, next, - &net->ct.expect_hash[i], + &nf_ct_expect_hash[i], hnode) { + + if (!net_eq(nf_ct_exp_net(exp), net)) + continue; + m_help = nfct_help(exp->master); if (!strcmp(m_help->helper->name, name) && del_timer(&exp->timeout)) { @@ -2901,8 +2912,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, spin_lock_bh(&nf_conntrack_expect_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, next, - &net->ct.expect_hash[i], + &nf_ct_expect_hash[i], hnode) { + + if (!net_eq(nf_ct_exp_net(exp), net)) + continue; + if (del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 478f92f834b6..4fd040575ffe 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -309,6 +309,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .l3proto = PF_INET, .l4proto = IPPROTO_UDP, .name = "udp", + .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, .print_tuple = udp_print_tuple, @@ -341,6 +342,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .l3proto = PF_INET6, .l4proto = IPPROTO_UDP, .name = "udp", + .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, .print_tuple = udp_print_tuple, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 1ac8ee13a873..9d692f5adb94 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -274,6 +274,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .l3proto = PF_INET, .l4proto = IPPROTO_UDPLITE, .name = "udplite", + .allow_clash = true, .pkt_to_tuple = udplite_pkt_to_tuple, .invert_tuple = udplite_invert_tuple, .print_tuple = udplite_print_tuple, @@ -306,6 +307,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .l3proto = PF_INET6, .l4proto = IPPROTO_UDPLITE, .name = "udplite", + .allow_clash = true, .pkt_to_tuple = udplite_pkt_to_tuple, .invert_tuple = udplite_invert_tuple, .print_tuple = udplite_print_tuple, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0f1a45bcacb2..f87e84ebcec3 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -54,14 +54,13 @@ struct ct_iter_state { static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) { - struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < net->ct.htable_size; + st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); + n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); if (!is_a_nulls(n)) return n; } @@ -71,18 +70,17 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, struct hlist_nulls_node *head) { - struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(hlist_nulls_next_rcu(head)); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= net->ct.htable_size) + if (++st->bucket >= nf_conntrack_htable_size) return NULL; } head = rcu_dereference( hlist_nulls_first_rcu( - &net->ct.hash[st->bucket])); + &nf_conntrack_hash[st->bucket])); } return head; } @@ -458,7 +456,7 @@ static struct ctl_table nf_ct_sysctl_table[] = { }, { .procname = "nf_conntrack_buckets", - .data = &init_net.ct.htable_size, + .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, @@ -512,7 +510,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) goto out_kmemdup; table[1].data = &net->ct.count; - table[2].data = &net->ct.htable_size; table[3].data = &net->ct.sysctl_checksum; table[4].data = &net->ct.sysctl_log_invalid; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 06a9f45771ab..6877a396f8fc 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -38,6 +38,9 @@ static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] __read_mostly; +static struct hlist_head *nf_nat_bysource __read_mostly; +static unsigned int nf_nat_htable_size __read_mostly; +static unsigned int nf_nat_hash_rnd __read_mostly; inline const struct nf_nat_l3proto * __nf_nat_l3proto_find(u8 family) @@ -118,15 +121,17 @@ EXPORT_SYMBOL(nf_xfrm_me_harder); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) { unsigned int hash; + get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd)); + /* Original src, to ensure we map it consistently if poss. */ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), - tuple->dst.protonum ^ nf_conntrack_hash_rnd); + tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n)); - return reciprocal_scale(hash, net->ct.nat_htable_size); + return reciprocal_scale(hash, nf_nat_htable_size); } /* Is this tuple already taken? (not by us) */ @@ -196,9 +201,10 @@ find_appropriate_src(struct net *net, const struct nf_conn_nat *nat; const struct nf_conn *ct; - hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple) && + net_eq(net, nf_ct_net(ct)) && nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, @@ -431,7 +437,7 @@ nf_nat_setup_info(struct nf_conn *ct, nat = nfct_nat(ct); nat->ct = ct; hlist_add_head_rcu(&nat->bysource, - &net->ct.nat_bysource[srchash]); + &nf_nat_bysource[srchash]); spin_unlock_bh(&nf_nat_lock); } @@ -819,27 +825,14 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, } #endif -static int __net_init nf_nat_net_init(struct net *net) -{ - /* Leave them the same for the moment. */ - net->ct.nat_htable_size = net->ct.htable_size; - net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0); - if (!net->ct.nat_bysource) - return -ENOMEM; - return 0; -} - static void __net_exit nf_nat_net_exit(struct net *net) { struct nf_nat_proto_clean clean = {}; nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0); - synchronize_rcu(); - nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } static struct pernet_operations nf_nat_net_ops = { - .init = nf_nat_net_init, .exit = nf_nat_net_exit, }; @@ -852,8 +845,16 @@ static int __init nf_nat_init(void) { int ret; + /* Leave them the same for the moment. */ + nf_nat_htable_size = nf_conntrack_htable_size; + + nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0); + if (!nf_nat_bysource) + return -ENOMEM; + ret = nf_ct_extend_register(&nat_extend); if (ret < 0) { + nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); return ret; } @@ -877,6 +878,7 @@ static int __init nf_nat_init(void) return 0; cleanup_extend: + nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); nf_ct_extend_unregister(&nat_extend); return ret; } @@ -895,6 +897,7 @@ static void __exit nf_nat_cleanup(void) for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); synchronize_net(); + nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); } MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7a85a9dd37ad..4d292b933b5c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2317,7 +2317,7 @@ nft_select_set_ops(const struct nlattr * const nla[], static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_TABLE] = { .type = NLA_STRING }, [NFTA_SET_NAME] = { .type = NLA_STRING, - .len = IFNAMSIZ - 1 }, + .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_SET_FLAGS] = { .type = NLA_U32 }, [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, @@ -2401,7 +2401,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, unsigned long *inuse; unsigned int n = 0, min = 0; - p = strnchr(name, IFNAMSIZ, '%'); + p = strnchr(name, NFT_SET_MAXNAMELEN, '%'); if (p != NULL) { if (p[1] != 'd' || strchr(p + 2, '%')) return -EINVAL; @@ -2696,7 +2696,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; - char name[IFNAMSIZ]; + char name[NFT_SET_MAXNAMELEN]; unsigned int size; bool create; u64 timeout; @@ -3375,6 +3375,22 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem) } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); +static int nft_setelem_parse_flags(const struct nft_set *set, + const struct nlattr *attr, u32 *flags) +{ + if (attr == NULL) + return 0; + + *flags = ntohl(nla_get_be32(attr)); + if (*flags & ~NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + if (!(set->flags & NFT_SET_INTERVAL) && + *flags & NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + + return 0; +} + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { @@ -3388,8 +3404,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_data data; enum nft_registers dreg; struct nft_trans *trans; + u32 flags = 0; u64 timeout; - u32 flags; u8 ulen; int err; @@ -3403,17 +3419,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_prepare(&tmpl); - flags = 0; - if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { - flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); - if (flags & ~NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; - if (!(set->flags & NFT_SET_INTERVAL) && - flags & NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); - } + err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags); + if (err < 0) + return err; + if (flags != 0) + nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && @@ -3582,9 +3592,13 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + struct nft_set_ext_tmpl tmpl; struct nft_data_desc desc; struct nft_set_elem elem; + struct nft_set_ext *ext; struct nft_trans *trans; + u32 flags = 0; + void *priv; int err; err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, @@ -3596,6 +3610,14 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_KEY] == NULL) goto err1; + nft_set_ext_prepare(&tmpl); + + err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags); + if (err < 0) + return err; + if (flags != 0) + nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, nla[NFTA_SET_ELEM_KEY]); if (err < 0) @@ -3605,24 +3627,40 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) goto err2; + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, desc.len); + + err = -ENOMEM; + elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0, + GFP_KERNEL); + if (elem.priv == NULL) + goto err2; + + ext = nft_set_elem_ext(set, elem.priv); + if (flags) + *nft_set_ext_flags(ext) = flags; + trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (trans == NULL) { err = -ENOMEM; - goto err2; + goto err3; } - elem.priv = set->ops->deactivate(set, &elem); - if (elem.priv == NULL) { + priv = set->ops->deactivate(set, &elem); + if (priv == NULL) { err = -ENOENT; - goto err3; + goto err4; } + kfree(elem.priv); + elem.priv = priv; nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; -err3: +err4: kfree(trans); +err3: + kfree(elem.priv); err2: nft_data_uninit(&elem.key.val, desc.type); err1: diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 2671b9deb103..3c84f14326f5 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -306,10 +306,10 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) int i; local_bh_disable(); - for (i = 0; i < net->ct.htable_size; i++) { + for (i = 0; i < nf_conntrack_htable_size; i++) { nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < net->ct.htable_size) { - hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) + if (i < nf_conntrack_htable_size) { + hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) untimeout(h, timeout); } spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 25998facefd0..137e308d5b24 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -198,6 +198,14 @@ static void nft_ct_set_eval(const struct nft_expr *expr, } break; #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + case NFT_CT_LABELS: + nf_connlabels_replace(ct, + ®s->data[priv->sreg], + ®s->data[priv->sreg], + NF_CT_LABELS_MAX_SIZE / sizeof(u32)); + break; +#endif default: break; } @@ -365,6 +373,16 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, len = FIELD_SIZEOF(struct nf_conn, mark); break; #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + case NFT_CT_LABELS: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; + len = NF_CT_LABELS_MAX_SIZE; + err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1); + if (err) + return err; + break; +#endif default: return -EOPNOTSUPP; } @@ -384,6 +402,18 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, static void nft_ct_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { + struct nft_ct *priv = nft_expr_priv(expr); + + switch (priv->key) { +#ifdef CONFIG_NF_CONNTRACK_LABELS + case NFT_CT_LABELS: + nf_connlabels_put(ctx->net); + break; +#endif + default: + break; + } + nft_ct_l3proto_module_put(ctx->afi->family); } diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 1c30f41cff5b..f762094af7c1 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -29,6 +29,17 @@ struct nft_rbtree_elem { struct nft_set_ext ext; }; +static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe) +{ + return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && + (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END); +} + +static bool nft_rbtree_equal(const struct nft_set *set, const void *this, + const struct nft_rbtree_elem *interval) +{ + return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; +} static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) @@ -37,6 +48,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, const struct nft_rbtree_elem *rbe, *interval = NULL; const struct rb_node *parent; u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); + const void *this; int d; spin_lock_bh(&nft_rbtree_lock); @@ -44,9 +56,16 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); + this = nft_set_ext_key(&rbe->ext); + d = memcmp(this, key, set->klen); if (d < 0) { parent = parent->rb_left; + /* In case of adjacent ranges, we always see the high + * part of the range in first place, before the low one. + * So don't update interval if the keys are equal. + */ + if (interval && nft_rbtree_equal(set, this, interval)) + continue; interval = rbe; } else if (d > 0) parent = parent->rb_right; @@ -56,9 +75,7 @@ found: parent = parent->rb_left; continue; } - if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && - *nft_set_ext_flags(&rbe->ext) & - NFT_SET_ELEM_INTERVAL_END) + if (nft_rbtree_interval_end(rbe)) goto out; spin_unlock_bh(&nft_rbtree_lock); @@ -98,9 +115,16 @@ static int __nft_rbtree_insert(const struct nft_set *set, else if (d > 0) p = &parent->rb_right; else { - if (nft_set_elem_active(&rbe->ext, genmask)) - return -EEXIST; - p = &parent->rb_left; + if (nft_set_elem_active(&rbe->ext, genmask)) { + if (nft_rbtree_interval_end(rbe) && + !nft_rbtree_interval_end(new)) + p = &parent->rb_left; + else if (!nft_rbtree_interval_end(rbe) && + nft_rbtree_interval_end(new)) + p = &parent->rb_right; + else + return -EEXIST; + } } } rb_link_node(&new->node, parent, p); @@ -145,7 +169,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set, { const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; - struct nft_rbtree_elem *rbe; + struct nft_rbtree_elem *rbe, *this = elem->priv; u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; @@ -163,6 +187,15 @@ static void *nft_rbtree_deactivate(const struct nft_set *set, parent = parent->rb_left; continue; } + if (nft_rbtree_interval_end(rbe) && + !nft_rbtree_interval_end(this)) { + parent = parent->rb_left; + continue; + } else if (!nft_rbtree_interval_end(rbe) && + nft_rbtree_interval_end(this)) { + parent = parent->rb_right; + continue; + } nft_set_elem_change_active(set, &rbe->ext); return rbe; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 9741a76c7405..9f0bc49fa969 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -439,20 +439,12 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, u8 protonum; l3proto = __nf_ct_l3proto_find(l3num); - if (!l3proto) { - pr_debug("ovs_ct_find_existing: Can't get l3proto\n"); - return NULL; - } if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum) <= 0) { pr_debug("ovs_ct_find_existing: Can't get protonum\n"); return NULL; } l4proto = __nf_ct_l4proto_find(l3num, protonum); - if (!l4proto) { - pr_debug("ovs_ct_find_existing: Can't get l4proto\n"); - return NULL; - } if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, protonum, net, &tuple, l3proto, l4proto)) { pr_debug("ovs_ct_find_existing: Can't get tuple\n"); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3dce53ebea92..b5f1221f48d4 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1808,27 +1808,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, else if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - if (copied > 0) { - /* We only do these additional bookkeeping/notification steps - * if we actually copied something out of the queue pair - * instead of just peeking ahead. - */ - - if (!(flags & MSG_PEEK)) { - /* If the other side has shutdown for sending and there - * is nothing more to read, then modify the socket - * state. - */ - if (vsk->peer_shutdown & SEND_SHUTDOWN) { - if (vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = SS_UNCONNECTED; - sock_set_flag(sk, SOCK_DONE); - sk->sk_state_change(sk); - } - } - } + if (copied > 0) err = copied; - } out: release_sock(sk); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ff4a91fcab9f..637387bbaaea 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -99,6 +99,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err) skb_dst_force(skb); + /* Inner headers are invalid now. */ + skb->encapsulation = 0; + err = x->type->output(x, skb); if (err == -EINPROGRESS) goto out; diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 161dd0d67da8..a9155077feef 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -371,6 +371,49 @@ static void do_usb_table(void *symval, unsigned long size, do_usb_entry_multi(symval + i, mod); } +static void do_of_entry_multi(void *symval, struct module *mod) +{ + char alias[500]; + int len; + char *tmp; + + DEF_FIELD_ADDR(symval, of_device_id, name); + DEF_FIELD_ADDR(symval, of_device_id, type); + DEF_FIELD_ADDR(symval, of_device_id, compatible); + + len = sprintf(alias, "of:N%sT%s", (*name)[0] ? *name : "*", + (*type)[0] ? *type : "*"); + + if (compatible[0]) + sprintf(&alias[len], "%sC%s", (*type)[0] ? "*" : "", + *compatible); + + /* Replace all whitespace with underscores */ + for (tmp = alias; tmp && *tmp; tmp++) + if (isspace(*tmp)) + *tmp = '_'; + + buf_printf(&mod->dev_table_buf, "MODULE_ALIAS(\"%s\");\n", alias); + strcat(alias, "C"); + add_wildcard(alias); + buf_printf(&mod->dev_table_buf, "MODULE_ALIAS(\"%s\");\n", alias); +} + +static void do_of_table(void *symval, unsigned long size, + struct module *mod) +{ + unsigned int i; + const unsigned long id_size = SIZE_of_device_id; + + device_id_check(mod->name, "of", size, id_size, symval); + + /* Leave last one: it's the terminator. */ + size -= id_size; + + for (i = 0; i < size; i += id_size) + do_of_entry_multi(symval + i, mod); +} + /* Looks like: hid:bNvNpN */ static int do_hid_entry(const char *filename, void *symval, char *alias) @@ -684,30 +727,6 @@ static int do_pcmcia_entry(const char *filename, } ADD_TO_DEVTABLE("pcmcia", pcmcia_device_id, do_pcmcia_entry); -static int do_of_entry (const char *filename, void *symval, char *alias) -{ - int len; - char *tmp; - DEF_FIELD_ADDR(symval, of_device_id, name); - DEF_FIELD_ADDR(symval, of_device_id, type); - DEF_FIELD_ADDR(symval, of_device_id, compatible); - - len = sprintf(alias, "of:N%sT%s", (*name)[0] ? *name : "*", - (*type)[0] ? *type : "*"); - - if (compatible[0]) - sprintf(&alias[len], "%sC%s", (*type)[0] ? "*" : "", - *compatible); - - /* Replace all whitespace with underscores */ - for (tmp = alias; tmp && *tmp; tmp++) - if (isspace (*tmp)) - *tmp = '_'; - - return 1; -} -ADD_TO_DEVTABLE("of", of_device_id, do_of_entry); - static int do_vio_entry(const char *filename, void *symval, char *alias) { @@ -1348,6 +1367,8 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, /* First handle the "special" cases */ if (sym_is(name, namelen, "usb")) do_usb_table(symval, sym->st_size, mod); + if (sym_is(name, namelen, "of")) + do_of_table(symval, sym->st_size, mod); else if (sym_is(name, namelen, "pnp")) do_pnp_device_entry(symval, sym->st_size, mod); else if (sym_is(name, namelen, "pnp_card")) diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index be09e2cacf82..3cd0a58672dd 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -884,10 +884,10 @@ static char *func_tokens[] = { "BPRM_CHECK", "MODULE_CHECK", "FIRMWARE_CHECK", + "POST_SETATTR", "KEXEC_KERNEL_CHECK", "KEXEC_INITRAMFS_CHECK", - "POLICY_CHECK", - "POST_SETATTR" + "POLICY_CHECK" }; void *ima_policy_start(struct seq_file *m, loff_t *pos) diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c index 5b3241340945..544b05a53b70 100644 --- a/tools/net/bpf_jit_disasm.c +++ b/tools/net/bpf_jit_disasm.c @@ -98,6 +98,9 @@ static char *get_klog_buff(unsigned int *klen) char *buff; len = klogctl(CMD_ACTION_SIZE_BUFFER, NULL, 0); + if (len < 0) + return NULL; + buff = malloc(len); if (!buff) return NULL; |