diff options
813 files changed, 16416 insertions, 9229 deletions
@@ -88,7 +88,6 @@ Antonio Quartulli <antonio@mandelbit.com> <antonio@open-mesh.com> Antonio Quartulli <antonio@mandelbit.com> <antonio.quartulli@open-mesh.com> Antonio Quartulli <antonio@mandelbit.com> <ordex@autistici.org> Antonio Quartulli <antonio@mandelbit.com> <ordex@ritirata.org> -Antonio Quartulli <antonio@mandelbit.com> <antonio@openvpn.net> Antonio Quartulli <antonio@mandelbit.com> <a@unstable.cc> Anup Patel <anup@brainfault.org> <anup.patel@wdc.com> Archit Taneja <archit@ti.com> @@ -522,6 +521,7 @@ Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il> Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com> Naoya Horiguchi <nao.horiguchi@gmail.com> <n-horiguchi@ah.jp.nec.com> Naoya Horiguchi <nao.horiguchi@gmail.com> <naoya.horiguchi@nec.com> +Natalie Vock <natalie.vock@gmx.de> <friedrich.vock@gmx.de> Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com> Naveen N Rao <naveen@kernel.org> <naveen.n.rao@linux.ibm.com> Naveen N Rao <naveen@kernel.org> <naveen.n.rao@linux.vnet.ibm.com> @@ -613,6 +613,8 @@ Richard Leitner <richard.leitner@linux.dev> <me@g0hl1n.net> Richard Leitner <richard.leitner@linux.dev> <richard.leitner@skidata.com> Robert Foss <rfoss@kernel.org> <robert.foss@linaro.org> Rocky Liao <quic_rjliao@quicinc.com> <rjliao@codeaurora.org> +Rodrigo Siqueira <siqueira@igalia.com> <rodrigosiqueiramelo@gmail.com> +Rodrigo Siqueira <siqueira@igalia.com> <Rodrigo.Siqueira@amd.com> Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com> Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com> Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru> @@ -689,6 +691,7 @@ Subbaraman Narayanamurthy <quic_subbaram@quicinc.com> <subbaram@codeaurora.org> Subhash Jadavani <subhashj@codeaurora.org> Sudarshan Rajagopalan <quic_sudaraja@quicinc.com> <sudaraja@codeaurora.org> Sudeep Holla <sudeep.holla@arm.com> Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com> +Sumit Garg <sumit.garg@kernel.org> <sumit.garg@linaro.org> Sumit Semwal <sumit.semwal@ti.com> Surabhi Vishnoi <quic_svishnoi@quicinc.com> <svishnoi@codeaurora.org> Sven Eckelmann <sven@narfation.org> <seckelmann@datto.com> diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index eb9452668909..b557cf1c820d 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -176,7 +176,7 @@ Configuring the kernel values without prompting. "make defconfig" Create a ./.config file by using the default - symbol values from either arch/$ARCH/defconfig + symbol values from either arch/$ARCH/configs/defconfig or arch/$ARCH/configs/${PLATFORM}_defconfig, depending on the architecture. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index a43b78b4b646..dd49a89a62d3 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -212,6 +212,17 @@ pid>/``). This value defaults to 0. +core_sort_vma +============= + +The default coredump writes VMAs in address order. By setting +``core_sort_vma`` to 1, VMAs will be written from smallest size +to largest size. This is known to break at least elfutils, but +can be handy when dealing with very large (and truncated) +coredumps where the more useful debugging details are included +in the smaller VMAs. + + core_uses_pid ============= diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml index ab5881d0d017..52d3f1ce3367 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml @@ -146,6 +146,7 @@ properties: maxItems: 2 pwm-names: + minItems: 1 items: - const: convst1 - const: convst2 diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml index 798a4c19f18c..0cd78d71768c 100644 --- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml @@ -152,6 +152,12 @@ properties: The second range is is for the Amlogic specific configuration (for example the PRG_ETHERNET register range on Meson8b and newer) + interrupts: + maxItems: 1 + + interrupt-names: + const: macirq + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml index 73252fe56fe6..f81d56f7c12a 100644 --- a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml +++ b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml @@ -45,6 +45,10 @@ properties: - enum: - nxp,s32g3-flexcan - const: nxp,s32g2-flexcan + - items: + - enum: + - fsl,imx94-flexcan + - const: fsl,imx95-flexcan reg: maxItems: 1 @@ -77,6 +81,9 @@ properties: xceiver-supply: description: Regulator that powers the CAN transceiver. + phys: + maxItems: 1 + big-endian: $ref: /schemas/types.yaml#/definitions/flag description: | @@ -171,6 +178,12 @@ allOf: interrupts: maxItems: 1 interrupt-names: false + - if: + required: + - xceiver-supply + then: + properties: + phys: false additionalProperties: false diff --git a/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml b/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml index 42a0bc94312c..62c1da36a2b5 100644 --- a/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml +++ b/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml @@ -41,6 +41,12 @@ properties: - const: ptp_ref - const: tx_clk + interrupts: + maxItems: 1 + + interrupt-names: + const: macirq + required: - compatible - clocks diff --git a/Documentation/devicetree/bindings/net/mediatek-dwmac.yaml b/Documentation/devicetree/bindings/net/mediatek-dwmac.yaml index ed9d845f6008..3aab21b8e8de 100644 --- a/Documentation/devicetree/bindings/net/mediatek-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/mediatek-dwmac.yaml @@ -64,6 +64,12 @@ properties: - const: rmii_internal - const: mac_cg + interrupts: + maxItems: 1 + + interrupt-names: + const: macirq + power-domains: maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml index 87bc4416eadf..e5db346beca9 100644 --- a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml +++ b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml @@ -56,6 +56,14 @@ properties: - tx - mem + interrupts: + maxItems: 2 + + interrupt-names: + items: + - const: macirq + - const: eth_wake_irq + intf_mode: $ref: /schemas/types.yaml#/definitions/phandle-array items: diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml index f8a576611d6c..63ee0cc7a8f8 100644 --- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml @@ -32,9 +32,6 @@ select: required: - compatible -allOf: - - $ref: snps,dwmac.yaml# - properties: compatible: oneOf: @@ -58,6 +55,16 @@ properties: - rockchip,rv1126-gmac - const: snps,dwmac-4.20a + interrupts: + minItems: 1 + maxItems: 2 + + interrupt-names: + minItems: 1 + items: + - const: macirq + - const: eth_wake_irq + clocks: minItems: 5 maxItems: 8 @@ -114,6 +121,24 @@ required: - compatible - clocks - clock-names + - rockchip,grf + +allOf: + - $ref: snps,dwmac.yaml# + + - if: + properties: + compatible: + contains: + enum: + - rockchip,rk3576-gmac + - rockchip,rk3588-gmac + then: + required: + - rockchip,php-grf + else: + properties: + rockchip,php-grf: false unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 91e75eb3f329..3f0aa46d798e 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -32,6 +32,7 @@ select: - snps,dwmac-4.20a - snps,dwmac-5.10a - snps,dwmac-5.20 + - snps,dwmac-5.30a - snps,dwxgmac - snps,dwxgmac-2.10 @@ -98,10 +99,13 @@ properties: - snps,dwmac-4.20a - snps,dwmac-5.10a - snps,dwmac-5.20 + - snps,dwmac-5.30a - snps,dwxgmac - snps,dwxgmac-2.10 + - sophgo,sg2044-dwmac - starfive,jh7100-dwmac - starfive,jh7110-dwmac + - tesla,fsd-ethqos - thead,th1520-gmac reg: @@ -126,7 +130,7 @@ properties: clocks: minItems: 1 - maxItems: 8 + maxItems: 10 additionalItems: true items: - description: GMAC main clock @@ -138,7 +142,7 @@ properties: clock-names: minItems: 1 - maxItems: 8 + maxItems: 10 additionalItems: true contains: enum: @@ -631,6 +635,7 @@ allOf: - snps,dwmac-4.20a - snps,dwmac-5.10a - snps,dwmac-5.20 + - snps,dwmac-5.30a - snps,dwxgmac - snps,dwxgmac-2.10 - st,spear600-gmac diff --git a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml new file mode 100644 index 000000000000..4dd2dc9c678b --- /dev/null +++ b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml @@ -0,0 +1,126 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/sophgo,sg2044-dwmac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sophgo SG2044 DWMAC glue layer + +maintainers: + - Inochi Amaoto <inochiama@gmail.com> + +select: + properties: + compatible: + contains: + enum: + - sophgo,sg2044-dwmac + required: + - compatible + +properties: + compatible: + items: + - const: sophgo,sg2044-dwmac + - const: snps,dwmac-5.30a + + reg: + maxItems: 1 + + clocks: + items: + - description: GMAC main clock + - description: PTP clock + - description: TX clock + + clock-names: + items: + - const: stmmaceth + - const: ptp_ref + - const: tx + + dma-noncoherent: true + + interrupts: + maxItems: 1 + + interrupt-names: + maxItems: 1 + + resets: + maxItems: 1 + + reset-names: + const: stmmaceth + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + - interrupt-names + - resets + - reset-names + +allOf: + - $ref: snps,dwmac.yaml# + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/irq.h> + + ethernet@30006000 { + compatible = "sophgo,sg2044-dwmac", "snps,dwmac-5.30a"; + reg = <0x30006000 0x4000>; + clocks = <&clk 151>, <&clk 152>, <&clk 154>; + clock-names = "stmmaceth", "ptp_ref", "tx"; + interrupt-parent = <&intc>; + interrupts = <296 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + resets = <&rst 30>; + reset-names = "stmmaceth"; + snps,multicast-filter-bins = <0>; + snps,perfect-filter-entries = <1>; + snps,aal; + snps,tso; + snps,txpbl = <32>; + snps,rxpbl = <32>; + snps,mtl-rx-config = <&gmac0_mtl_rx_setup>; + snps,mtl-tx-config = <&gmac0_mtl_tx_setup>; + snps,axi-config = <&gmac0_stmmac_axi_setup>; + status = "disabled"; + + gmac0_mtl_rx_setup: rx-queues-config { + snps,rx-queues-to-use = <8>; + snps,rx-sched-wsp; + queue0 {}; + queue1 {}; + queue2 {}; + queue3 {}; + queue4 {}; + queue5 {}; + queue6 {}; + queue7 {}; + }; + + gmac0_mtl_tx_setup: tx-queues-config { + snps,tx-queues-to-use = <8>; + queue0 {}; + queue1 {}; + queue2 {}; + queue3 {}; + queue4 {}; + queue5 {}; + queue6 {}; + queue7 {}; + }; + + gmac0_stmmac_axi_setup: stmmac-axi-config { + snps,blen = <16 8 4 0 0 0 0>; + snps,wr_osr_lmt = <1>; + snps,rd_osr_lmt = <2>; + }; + }; diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml index 85cea9966a27..987254900d0d 100644 --- a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml @@ -54,6 +54,16 @@ properties: items: - const: stmmaceth + interrupts: + minItems: 1 + maxItems: 2 + + interrupt-names: + minItems: 1 + items: + - const: macirq + - const: eth_wake_irq + clocks: minItems: 3 items: diff --git a/Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml b/Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml new file mode 100644 index 000000000000..dd7481bb16e5 --- /dev/null +++ b/Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml @@ -0,0 +1,118 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/tesla,fsd-ethqos.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: FSD Ethernet Quality of Service + +maintainers: + - Swathi K S <swathi.ks@samsung.com> + +description: + Tesla ethernet devices based on dwmmac support Gigabit ethernet. + +allOf: + - $ref: snps,dwmac.yaml# + +properties: + compatible: + const: tesla,fsd-ethqos + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + interrupt-names: + items: + - const: macirq + + clocks: + minItems: 5 + items: + - description: PTP clock + - description: Master bus clock + - description: Slave bus clock + - description: MAC TX clock + - description: MAC RX clock + - description: Master2 bus clock + - description: Slave2 bus clock + - description: RX MUX clock + - description: PHY RX clock + - description: PERIC RGMII clock + + clock-names: + minItems: 5 + items: + - const: ptp_ref + - const: master_bus + - const: slave_bus + - const: tx + - const: rx + - const: master2_bus + - const: slave2_bus + - const: eqos_rxclk_mux + - const: eqos_phyrxclk + - const: dout_peric_rgmii_clk + + iommus: + maxItems: 1 + + phy-mode: + enum: + - rgmii + - rgmii-id + - rgmii-rxid + - rgmii-txid + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - iommus + - phy-mode + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/fsd-clk.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + soc { + #address-cells = <2>; + #size-cells = <2>; + ethernet1: ethernet@14300000 { + compatible = "tesla,fsd-ethqos"; + reg = <0x0 0x14300000 0x0 0x10000>; + interrupts = <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + clocks = <&clock_peric PERIC_EQOS_TOP_IPCLKPORT_CLK_PTP_REF_I>, + <&clock_peric PERIC_EQOS_TOP_IPCLKPORT_ACLK_I>, + <&clock_peric PERIC_EQOS_TOP_IPCLKPORT_HCLK_I>, + <&clock_peric PERIC_EQOS_TOP_IPCLKPORT_RGMII_CLK_I>, + <&clock_peric PERIC_EQOS_TOP_IPCLKPORT_CLK_RX_I>, + <&clock_peric PERIC_BUS_D_PERIC_IPCLKPORT_EQOSCLK>, + <&clock_peric PERIC_BUS_P_PERIC_IPCLKPORT_EQOSCLK>, + <&clock_peric PERIC_EQOS_PHYRXCLK_MUX>, + <&clock_peric PERIC_EQOS_PHYRXCLK>, + <&clock_peric PERIC_DOUT_RGMII_CLK>; + clock-names = "ptp_ref", "master_bus", "slave_bus","tx", + "rx", "master2_bus", "slave2_bus", "eqos_rxclk_mux", + "eqos_phyrxclk","dout_peric_rgmii_clk"; + assigned-clocks = <&clock_peric PERIC_EQOS_PHYRXCLK_MUX>, + <&clock_peric PERIC_EQOS_PHYRXCLK>; + assigned-clock-parents = <&clock_peric PERIC_EQOS_PHYRXCLK>; + pinctrl-names = "default"; + pinctrl-0 = <ð1_tx_clk>, <ð1_tx_data>, <ð1_tx_ctrl>, + <ð1_phy_intr>, <ð1_rx_clk>, <ð1_rx_data>, + <ð1_rx_ctrl>, <ð1_mdio>; + iommus = <&smmu_peric 0x0 0x1>; + phy-mode = "rgmii-id"; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml b/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml index 052f636158b3..f0f32e18fc85 100644 --- a/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml @@ -42,6 +42,12 @@ properties: - const: stmmaceth - const: phy_ref_clk + interrupts: + maxItems: 1 + + interrupt-names: + const: macirq + required: - compatible - reg diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst index 77930c77fcfe..2a206129f828 100644 --- a/Documentation/filesystems/idmappings.rst +++ b/Documentation/filesystems/idmappings.rst @@ -63,8 +63,8 @@ what id ``k11000`` corresponds to in the second or third idmapping. The straightforward algorithm to use is to apply the inverse of the first idmapping, mapping ``k11000`` up to ``u1000``. Afterwards, we can map ``u1000`` down using either the second idmapping mapping or third idmapping mapping. The second -idmapping would map ``u1000`` down to ``21000``. The third idmapping would map -``u1000`` down to ``u31000``. +idmapping would map ``u1000`` down to ``k21000``. The third idmapping would map +``u1000`` down to ``k31000``. If we were given the same task for the following three idmappings:: diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 36f1152bfac3..f5e0750ab71d 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -745,8 +745,8 @@ operations: - irq-suspend-timeout kernel-family: - headers: [ "linux/list.h"] - sock-priv: struct list_head + headers: [ "net/netdev_netlink.h"] + sock-priv: struct netdev_nl_sock mcast-groups: list: diff --git a/Documentation/networking/batman-adv.rst b/Documentation/networking/batman-adv.rst index 44b9b5cc0e24..ec53a42499c1 100644 --- a/Documentation/networking/batman-adv.rst +++ b/Documentation/networking/batman-adv.rst @@ -27,7 +27,7 @@ Load the batman-adv module into your kernel:: $ insmod batman-adv.ko The module is now waiting for activation. You must add some interfaces on which -batman-adv can operate. The batman-adv soft-interface can be created using the +batman-adv can operate. The batman-adv mesh-interface can be created using the iproute2 tool ``ip``:: $ ip link add name bat0 type batadv diff --git a/Documentation/networking/devlink/bnxt.rst b/Documentation/networking/devlink/bnxt.rst index a4fb27663cd6..9a8b3d76d11f 100644 --- a/Documentation/networking/devlink/bnxt.rst +++ b/Documentation/networking/devlink/bnxt.rst @@ -24,6 +24,8 @@ Parameters - Permanent * - ``enable_remote_dev_reset`` - Runtime + * - ``enable_roce`` + - Permanent The ``bnxt`` driver also implements the following driver-specific parameters. diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 1d37038e9fbe..ebb868f50ac2 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -210,49 +210,55 @@ packets is preferred. struct net_device synchronization rules ======================================= ndo_open: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process ndo_stop: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process Note: netif_running() is guaranteed false ndo_do_ioctl: Synchronization: rtnl_lock() semaphore. - Context: process - This is only called by network subsystems internally, - not by user space calling ioctl as it was in before - linux-5.14. + This is only called by network subsystems internally, + not by user space calling ioctl as it was in before + linux-5.14. ndo_siocbond: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process - Used by the bonding driver for the SIOCBOND family of - ioctl commands. + Used by the bonding driver for the SIOCBOND family of + ioctl commands. ndo_siocwandev: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process Used by the drivers/net/wan framework to handle the SIOCWANDEV ioctl with the if_settings structure. ndo_siocdevprivate: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process This is used to implement SIOCDEVPRIVATE ioctl helpers. These should not be added to new drivers, so don't use. ndo_eth_ioctl: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process ndo_get_stats: - Synchronization: rtnl_lock() semaphore, or RCU. + Synchronization: RCU (can be called concurrently with the stats + update path). Context: atomic (can't sleep under RCU) ndo_start_xmit: @@ -284,6 +290,16 @@ ndo_set_rx_mode: Synchronization: netif_addr_lock spinlock. Context: BHs disabled +ndo_setup_tc: + ``TC_SETUP_BLOCK`` and ``TC_SETUP_FT`` are running under NFT locks + (i.e. no ``rtnl_lock`` and no device instance lock). The rest of + ``tc_setup_type`` types run under netdev instance lock if the driver + implements queue management or shaper API. + +Most ndo callbacks not specified in the list above are running +under ``rtnl_lock``. In addition, netdev instance lock is taken as well if +the driver implements queue management or shaper API. + struct napi_struct synchronization rules ======================================== napi->poll: @@ -298,6 +314,35 @@ napi->poll: softirq will be called with interrupts disabled by netconsole. +struct netdev_queue_mgmt_ops synchronization rules +================================================== + +All queue management ndo callbacks are holding netdev instance lock. + +RTNL and netdev instance lock +============================= + +Historically, all networking control operations were protected by a single +global lock known as ``rtnl_lock``. There is an ongoing effort to replace this +global lock with separate locks for each network namespace. Additionally, +properties of individual netdev are increasingly protected by per-netdev locks. + +For device drivers that implement shaping or queue management APIs, all control +operations will be performed under the netdev instance lock. Currently, this +instance lock is acquired within the context of ``rtnl_lock``. The drivers +can also explicitly request instance lock to be acquired via +``request_ops_lock``. In the future, there will be an option for individual +drivers to opt out of using ``rtnl_lock`` and instead perform their control +operations directly under the netdev instance lock. + +Devices drivers are encouraged to rely on the instance lock where possible. + +For the (mostly software) drivers that need to interact with the core stack, +there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g., +``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx`` functions handle +acquiring the instance lock themselves, while the ``netif_xxx`` functions +assume that the driver has already acquired the instance lock. + NETDEV_INTERNAL symbol namespace ================================ diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index e497729525d5..1ac62dc3a66f 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -311,6 +311,14 @@ to the mailing list, e.g.:: Posting as one thread is discouraged because it confuses patchwork (as of patchwork 2.2.2). +Co-posting selftests +-------------------- + +Selftests should be part of the same series as the code changes. +Specifically for fixes both code change and related test should go into +the same tree (the tests may lack a Fixes tag, which is expected). +Mixing code changes and test changes in a single commit is discouraged. + Preparing changes ----------------- diff --git a/Documentation/scheduler/sched-rt-group.rst b/Documentation/scheduler/sched-rt-group.rst index 80b05a3009ea..ab464335d320 100644 --- a/Documentation/scheduler/sched-rt-group.rst +++ b/Documentation/scheduler/sched-rt-group.rst @@ -102,6 +102,9 @@ The system wide settings are configured under the /proc virtual file system: * sched_rt_period_us takes values from 1 to INT_MAX. * sched_rt_runtime_us takes values from -1 to sched_rt_period_us. * A run time of -1 specifies runtime == period, ie. no limit. + * sched_rt_runtime_us/sched_rt_period_us > 0.05 inorder to preserve + bandwidth for fair dl_server. For accurate value check average of + runtime/period in /sys/kernel/debug/sched/fair_server/cpuX/ 2.2 Default behaviour diff --git a/MAINTAINERS b/MAINTAINERS index 2eccaff95ca2..241ca9e260a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -124,6 +124,7 @@ F: include/net/ieee80211_radiotap.h F: include/net/iw_handler.h F: include/net/wext.h F: include/uapi/linux/nl80211.h +N: include/uapi/linux/nl80211-.* F: include/uapi/linux/wireless.h F: net/wireless/ @@ -514,7 +515,7 @@ F: drivers/hwmon/adm1029.c ADM8211 WIRELESS DRIVER L: linux-wireless@vger.kernel.org S: Orphan -F: drivers/net/wireless/admtek/adm8211.* +F: drivers/net/wireless/admtek/ ADP1050 HARDWARE MONITOR DRIVER M: Radu Sabau <radu.sabau@analog.com> @@ -726,7 +727,7 @@ L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/airoha,en7581-eth.yaml -F: drivers/net/ethernet/mediatek/airoha_eth.c +F: drivers/net/ethernet/airoha/ AIROHA PCIE PHY DRIVER M: Lorenzo Bianconi <lorenzo@kernel.org> @@ -1046,14 +1047,14 @@ F: drivers/crypto/ccp/hsti.* AMD DISPLAY CORE M: Harry Wentland <harry.wentland@amd.com> M: Leo Li <sunpeng.li@amd.com> -M: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com> +R: Rodrigo Siqueira <siqueira@igalia.com> L: amd-gfx@lists.freedesktop.org S: Supported T: git https://gitlab.freedesktop.org/agd5f/linux.git F: drivers/gpu/drm/amd/display/ AMD DISPLAY CORE - DML -M: Chaitanya Dhere <chaitanya.dhere@amd.com> +M: Austin Zheng <austin.zheng@amd.com> M: Jun Lei <jun.lei@amd.com> S: Supported F: drivers/gpu/drm/amd/display/dc/dml/ @@ -5775,6 +5776,7 @@ X: drivers/clk/clkdev.c COMMON INTERNET FILE SYSTEM CLIENT (CIFS and SMB3) M: Steve French <sfrench@samba.org> +M: Steve French <smfrench@gmail.com> R: Paulo Alcantara <pc@manguebit.com> (DFS, global name space) R: Ronnie Sahlberg <ronniesahlberg@gmail.com> (directory leases, sparse files) R: Shyam Prasad N <sprasad@microsoft.com> (multichannel) @@ -5926,6 +5928,17 @@ F: tools/testing/selftests/cgroup/test_cpuset.c F: tools/testing/selftests/cgroup/test_cpuset_prs.sh F: tools/testing/selftests/cgroup/test_cpuset_v1_base.sh +CONTROL GROUP - DEVICE MEMORY CONTROLLER (DMEM) +M: Maarten Lankhorst <dev@lankhorst.se> +M: Maxime Ripard <mripard@kernel.org> +M: Natalie Vock <natalie.vock@gmx.de> +L: cgroups@vger.kernel.org +L: dri-devel@lists.freedesktop.org +S: Maintained +T: git https://gitlab.freedesktop.org/drm/misc/kernel.git +F: include/linux/cgroup_dmem.h +F: kernel/cgroup/dmem.c + CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG) M: Johannes Weiner <hannes@cmpxchg.org> M: Michal Hocko <mhocko@kernel.org> @@ -6195,7 +6208,7 @@ F: Documentation/process/cve.rst CW1200 WLAN driver S: Orphan -F: drivers/net/wireless/st/cw1200/ +F: drivers/net/wireless/st/ F: include/linux/platform_data/net-cw1200.h CX18 VIDEO4LINUX DRIVER @@ -9431,14 +9444,11 @@ F: include/linux/fscrypt.h F: include/uapi/linux/fscrypt.h FSI SUBSYSTEM -M: Jeremy Kerr <jk@ozlabs.org> -M: Joel Stanley <joel@jms.id.au> -R: Alistar Popple <alistair@popple.id.au> -R: Eddie James <eajames@linux.ibm.com> +M: Eddie James <eajames@linux.ibm.com> +R: Ninad Palsule <ninad@linux.ibm.com> L: linux-fsi@lists.ozlabs.org S: Supported Q: http://patchwork.ozlabs.org/project/linux-fsi/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joel/fsi.git F: drivers/fsi/ F: include/linux/fsi*.h F: include/trace/events/fsi*.h @@ -12645,7 +12655,9 @@ F: tools/testing/selftests/ KERNEL SMB3 SERVER (KSMBD) M: Namjae Jeon <linkinjeon@kernel.org> +M: Namjae Jeon <linkinjeon@samba.org> M: Steve French <sfrench@samba.org> +M: Steve French <smfrench@gmail.com> R: Sergey Senozhatsky <senozhatsky@chromium.org> R: Tom Talpey <tom@talpey.com> L: linux-cifs@vger.kernel.org @@ -12862,7 +12874,7 @@ F: include/keys/trusted_dcp.h F: security/keys/trusted-keys/trusted_dcp.c KEYS-TRUSTED-TEE -M: Sumit Garg <sumit.garg@linaro.org> +M: Sumit Garg <sumit.garg@kernel.org> L: linux-integrity@vger.kernel.org L: keyrings@vger.kernel.org S: Supported @@ -13985,6 +13997,7 @@ MARVELL LIBERTAS WIRELESS DRIVER L: libertas-dev@lists.infradead.org S: Orphan F: drivers/net/wireless/marvell/libertas/ +F: drivers/net/wireless/marvell/libertas_tf/ MARVELL MACCHIATOBIN SUPPORT M: Russell King <linux@armlinux.org.uk> @@ -15654,7 +15667,7 @@ M: Ajay Singh <ajay.kathat@microchip.com> M: Claudiu Beznea <claudiu.beznea@tuxon.dev> L: linux-wireless@vger.kernel.org S: Supported -F: drivers/net/wireless/microchip/wilc1000/ +F: drivers/net/wireless/microchip/ MICROSEMI MIPS SOCS M: Alexandre Belloni <alexandre.belloni@bootlin.com> @@ -16440,6 +16453,23 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/devicetree/bindings/net/wireless/ F: drivers/net/wireless/ +X: drivers/net/wireless/ath/ +X: drivers/net/wireless/broadcom/ +X: drivers/net/wireless/intel/ +X: drivers/net/wireless/intersil/ +X: drivers/net/wireless/marvell/ +X: drivers/net/wireless/mediatek/mt76/ +X: drivers/net/wireless/mediatek/mt7601u/ +X: drivers/net/wireless/microchip/ +X: drivers/net/wireless/purelifi/ +X: drivers/net/wireless/quantenna/ +X: drivers/net/wireless/ralink/ +X: drivers/net/wireless/realtek/ +X: drivers/net/wireless/rsi/ +X: drivers/net/wireless/silabs/ +X: drivers/net/wireless/st/ +X: drivers/net/wireless/ti/ +X: drivers/net/wireless/zydas/ NETWORKING [DSA] M: Andrew Lunn <andrew@lunn.ch> @@ -17663,7 +17693,7 @@ F: Documentation/ABI/testing/sysfs-bus-optee-devices F: drivers/tee/optee/ OP-TEE RANDOM NUMBER GENERATOR (RNG) DRIVER -M: Sumit Garg <sumit.garg@linaro.org> +M: Sumit Garg <sumit.garg@kernel.org> L: op-tee@lists.trustedfirmware.org S: Maintained F: drivers/char/hw_random/optee-rng.c @@ -17824,7 +17854,7 @@ M: Christian Lamparter <chunkeey@googlemail.com> L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/en/users/Drivers/p54 -F: drivers/net/wireless/intersil/p54/ +F: drivers/net/wireless/intersil/ PACKET SOCKETS M: Willem de Bruijn <willemdebruijn.kernel@gmail.com> @@ -19101,7 +19131,7 @@ PURELIFI PLFXLC DRIVER M: Srinivasan Raju <srini.raju@purelifi.com> L: linux-wireless@vger.kernel.org S: Supported -F: drivers/net/wireless/purelifi/plfxlc/ +F: drivers/net/wireless/purelifi/ PVRUSB2 VIDEO4LINUX DRIVER M: Mike Isely <isely@pobox.com> @@ -19652,12 +19682,11 @@ M: Igor Mitsyanko <imitsyanko@quantenna.com> R: Sergey Matyukevich <geomatsi@gmail.com> L: linux-wireless@vger.kernel.org S: Maintained -F: drivers/net/wireless/quantenna +F: drivers/net/wireless/quantenna/ RADEON and AMDGPU DRM DRIVERS M: Alex Deucher <alexander.deucher@amd.com> M: Christian König <christian.koenig@amd.com> -M: Xinhui Pan <Xinhui.Pan@amd.com> L: amd-gfx@lists.freedesktop.org S: Supported B: https://gitlab.freedesktop.org/drm/amd/-/issues @@ -19733,7 +19762,7 @@ RALINK RT2X00 WIRELESS LAN DRIVER M: Stanislaw Gruszka <stf_xl@wp.pl> L: linux-wireless@vger.kernel.org S: Maintained -F: drivers/net/wireless/ralink/rt2x00/ +F: drivers/net/wireless/ralink/ RAMDISK RAM BLOCK DEVICE DRIVER M: Jens Axboe <axboe@kernel.dk> @@ -21081,6 +21110,7 @@ F: include/linux/clk/samsung.h SAMSUNG SPI DRIVERS M: Andi Shyti <andi.shyti@kernel.org> +R: Tudor Ambarus <tudor.ambarus@linaro.org> L: linux-spi@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained @@ -21491,7 +21521,6 @@ F: include/linux/slimbus.h SFC NETWORK DRIVER M: Edward Cree <ecree.xilinx@gmail.com> -M: Martin Habets <habetsm.xilinx@gmail.com> L: netdev@vger.kernel.org L: linux-net-drivers@amd.com S: Maintained @@ -21700,7 +21729,7 @@ SILICON LABS WIRELESS DRIVERS (for WFxxx series) M: Jérôme Pouiller <jerome.pouiller@silabs.com> S: Supported F: Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml -F: drivers/net/wireless/silabs/wfx/ +F: drivers/net/wireless/silabs/ SILICON MOTION SM712 FRAME BUFFER DRIVER M: Sudip Mukherjee <sudipm.mukherjee@gmail.com> @@ -23277,7 +23306,7 @@ F: include/media/i2c/tw9910.h TEE SUBSYSTEM M: Jens Wiklander <jens.wiklander@linaro.org> -R: Sumit Garg <sumit.garg@linaro.org> +R: Sumit Garg <sumit.garg@kernel.org> L: op-tee@lists.trustedfirmware.org S: Maintained F: Documentation/ABI/testing/sysfs-class-tee @@ -26200,7 +26229,7 @@ F: mm/zbud.c ZD1211RW WIRELESS DRIVER L: linux-wireless@vger.kernel.org S: Orphan -F: drivers/net/wireless/zydas/zd1211rw/ +F: drivers/net/wireless/zydas/ ZD1301 MEDIA DRIVER L: linux-media@vger.kernel.org @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -1123,6 +1123,11 @@ endif KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) +# userspace programs are linked via the compiler, use the correct linker +ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_LD_IS_LLD),yy) +KBUILD_USERLDFLAGS += --ld-path=$(LD) +endif + # make the checker run with the right architecture CHECKFLAGS += --arch=$(ARCH) diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 2bec87c3327d..39fd5df73317 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -62,7 +62,7 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address, } static int adjust_pte(struct vm_area_struct *vma, unsigned long address, - unsigned long pfn, struct vm_fault *vmf) + unsigned long pfn, bool need_lock) { spinlock_t *ptl; pgd_t *pgd; @@ -99,12 +99,11 @@ again: if (!pte) return 0; - /* - * If we are using split PTE locks, then we need to take the page - * lock here. Otherwise we are using shared mm->page_table_lock - * which is already locked, thus cannot take it. - */ - if (ptl != vmf->ptl) { + if (need_lock) { + /* + * Use nested version here to indicate that we are already + * holding one similar spinlock. + */ spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) { pte_unmap_unlock(pte, ptl); @@ -114,7 +113,7 @@ again: ret = do_adjust_pte(vma, address, pfn, pte); - if (ptl != vmf->ptl) + if (need_lock) spin_unlock(ptl); pte_unmap(pte); @@ -123,9 +122,10 @@ again: static void make_coherent(struct address_space *mapping, struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, unsigned long pfn, - struct vm_fault *vmf) + unsigned long addr, pte_t *ptep, unsigned long pfn) { + const unsigned long pmd_start_addr = ALIGN_DOWN(addr, PMD_SIZE); + const unsigned long pmd_end_addr = pmd_start_addr + PMD_SIZE; struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *mpnt; unsigned long offset; @@ -142,6 +142,14 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, flush_dcache_mmap_lock(mapping); vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { /* + * If we are using split PTE locks, then we need to take the pte + * lock. Otherwise we are using shared mm->page_table_lock which + * is already locked, thus cannot take it. + */ + bool need_lock = IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS); + unsigned long mpnt_addr; + + /* * If this VMA is not in our MM, we can ignore it. * Note that we intentionally mask out the VMA * that we are fixing up. @@ -151,7 +159,12 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, if (!(mpnt->vm_flags & VM_MAYSHARE)) continue; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; - aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf); + mpnt_addr = mpnt->vm_start + offset; + + /* Avoid deadlocks by not grabbing the same PTE lock again. */ + if (mpnt_addr >= pmd_start_addr && mpnt_addr < pmd_end_addr) + need_lock = false; + aliases += adjust_pte(mpnt, mpnt_addr, pfn, need_lock); } flush_dcache_mmap_unlock(mapping); if (aliases) @@ -194,7 +207,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, __flush_dcache_folio(mapping, folio); if (mapping) { if (cache_is_vivt()) - make_coherent(mapping, vma, addr, ptep, pfn, vmf); + make_coherent(mapping, vma, addr, ptep, pfn); else if (vma->vm_flags & VM_EXEC) __flush_icache_all(); } diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 25e162651750..555c613fd232 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -16,6 +16,32 @@ #include <asm/sysreg.h> #include <linux/irqchip/arm-gic-v3.h> +.macro init_el2_hcr val + mov_q x0, \val + + /* + * Compliant CPUs advertise their VHE-onlyness with + * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it + * can reset into an UNKNOWN state and might not read as 1 until it has + * been initialized explicitly. + * + * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but + * don't advertise it (they predate this relaxation). + * + * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H + * indicating whether the CPU is running in E2H mode. + */ + mrs_s x1, SYS_ID_AA64MMFR4_EL1 + sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH + cmp x1, #0 + b.ge .LnVHE_\@ + + orr x0, x0, #HCR_E2H +.LnVHE_\@: + msr hcr_el2, x0 + isb +.endm + .macro __init_el2_sctlr mov_q x0, INIT_SCTLR_EL2_MMU_OFF msr sctlr_el2, x0 @@ -244,11 +270,6 @@ .Lskip_gcs_\@: .endm -.macro __init_el2_nvhe_prepare_eret - mov x0, #INIT_PSTATE_EL1 - msr spsr_el2, x0 -.endm - .macro __init_el2_mpam /* Memory Partitioning And Monitoring: disable EL2 traps */ mrs x1, id_aa64pfr0_el1 diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index c6dff3e69539..07fbf5bf85a7 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -42,8 +42,8 @@ extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); +extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT extern void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep); @@ -76,12 +76,22 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, { unsigned long stride = huge_page_size(hstate_vma(vma)); - if (stride == PMD_SIZE) - __flush_tlb_range(vma, start, end, stride, false, 2); - else if (stride == PUD_SIZE) - __flush_tlb_range(vma, start, end, stride, false, 1); - else - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + switch (stride) { +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + __flush_tlb_range(vma, start, end, PUD_SIZE, false, 1); + break; +#endif + case CONT_PMD_SIZE: + case PMD_SIZE: + __flush_tlb_range(vma, start, end, PMD_SIZE, false, 2); + break; + case CONT_PTE_SIZE: + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 3); + break; + default: + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); + } } #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8d94a6c0ed5c..c2417a424b98 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -119,7 +119,7 @@ #define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK #define TCR_EL2_T0SZ_MASK 0x3f #define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \ - TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) + TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK) /* VTCR_EL2 Registers bits */ #define VTCR_EL2_DS TCR_EL2_DS diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3a7ec98ef123..d919557af5e5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1259,7 +1259,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, extern unsigned int __ro_after_init kvm_arm_vmid_bits; int __init kvm_arm_vmid_alloc_init(void); void __init kvm_arm_vmid_alloc_free(void); -bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); +void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); void kvm_arm_vmid_clear_active(void); static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 5ab1970ee543..2ce73525de2c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -298,25 +298,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr sctlr_el2, x0 isb 0: - mov_q x0, HCR_HOST_NVHE_FLAGS - - /* - * Compliant CPUs advertise their VHE-onlyness with - * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be - * RES1 in that case. Publish the E2H bit early so that - * it can be picked up by the init_el2_state macro. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but - * don't advertise it (they predate this relaxation). - */ - mrs_s x1, SYS_ID_AA64MMFR4_EL1 - tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f - - orr x0, x0, #HCR_E2H -1: - msr hcr_el2, x0 - isb + init_el2_hcr HCR_HOST_NVHE_FLAGS init_el2_state /* Hypervisor stub */ @@ -339,7 +322,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr sctlr_el1, x1 mov x2, xzr 3: - __init_el2_nvhe_prepare_eret + mov x0, #INIT_PSTATE_EL1 + msr spsr_el2, x0 mov w0, #BOOT_CPU_MODE_EL2 orr x0, x0, x2 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index b8e55a441282..0160b4924351 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -560,6 +560,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) last_ran = this_cpu_ptr(mmu->last_vcpu_ran); /* + * Ensure a VMID is allocated for the MMU before programming VTTBR_EL2, + * which happens eagerly in VHE. + * + * Also, the VMID allocator only preserves VMIDs that are active at the + * time of rollover, so KVM might need to grab a new VMID for the MMU if + * this is called from kvm_sched_in(). + */ + kvm_arm_vmid_update(&mmu->vmid); + + /* * We guarantee that both TLBs and I-cache are private to each * vcpu. If detecting that a vcpu from the same VM has * previously run on the same physical CPU, call into the @@ -1138,18 +1148,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ preempt_disable(); - /* - * The VMID allocator only tracks active VMIDs per - * physical CPU, and therefore the VMID allocated may not be - * preserved on VMID roll-over if the task was preempted, - * making a thread's VMID inactive. So we need to call - * kvm_arm_vmid_update() in non-premptible context. - */ - if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) && - has_vhe()) - __load_stage2(vcpu->arch.hw_mmu, - vcpu->arch.hw_mmu->arch); - kvm_pmu_flush_hwstate(vcpu); local_irq_disable(); @@ -1980,7 +1978,7 @@ static int kvm_init_vector_slots(void) static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) { struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); - unsigned long tcr, ips; + unsigned long tcr; /* * Calculate the raw per-cpu offset without a translation from the @@ -1994,19 +1992,18 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) params->mair_el2 = read_sysreg(mair_el1); tcr = read_sysreg(tcr_el1); - ips = FIELD_GET(TCR_IPS_MASK, tcr); if (cpus_have_final_cap(ARM64_KVM_HVHE)) { + tcr &= ~(TCR_HD | TCR_HA | TCR_A1 | TCR_T0SZ_MASK); tcr |= TCR_EPD1_MASK; } else { + unsigned long ips = FIELD_GET(TCR_IPS_MASK, tcr); + tcr &= TCR_EL2_MASK; - tcr |= TCR_EL2_RES1; + tcr |= TCR_EL2_RES1 | FIELD_PREP(TCR_EL2_PS_MASK, ips); + if (lpa2_is_enabled()) + tcr |= TCR_EL2_DS; } - tcr &= ~TCR_T0SZ_MASK; tcr |= TCR_T0SZ(hyp_va_bits); - tcr &= ~TCR_EL2_PS_MASK; - tcr |= FIELD_PREP(TCR_EL2_PS_MASK, ips); - if (lpa2_is_enabled()) - tcr |= TCR_EL2_DS; params->tcr_el2 = tcr; params->pgd_pa = kvm_mmu_get_httbr(); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index fc1866226067..f8af11189572 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -73,8 +73,12 @@ __do_hyp_init: eret SYM_CODE_END(__kvm_hyp_init) +/* + * Initialize EL2 CPU state to sane values. + * + * HCR_EL2.E2H must have been initialized already. + */ SYM_CODE_START_LOCAL(__kvm_init_el2_state) - /* Initialize EL2 CPU state to sane values. */ init_el2_state // Clobbers x0..x2 finalise_el2_state ret @@ -206,9 +210,9 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) 2: msr SPsel, #1 // We want to use SP_EL{1,2} - bl __kvm_init_el2_state + init_el2_hcr 0 - __init_el2_nvhe_prepare_eret + bl __kvm_init_el2_state /* Enable MMU, set vectors and stack. */ mov x0, x28 diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 9c2ce1e0e99a..c3e196fb8b18 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -218,6 +218,9 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) if (is_cpu_on) release_boot_args(boot_args); + write_sysreg_el1(INIT_SCTLR_EL1_MMU_OFF, SYS_SCTLR); + write_sysreg(INIT_PSTATE_EL1, SPSR_EL2); + __host_enter(host_ctxt); } diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index 806223b7022a..7fe8ba1a2851 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -135,11 +135,10 @@ void kvm_arm_vmid_clear_active(void) atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID); } -bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) +void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) { unsigned long flags; u64 vmid, old_active_vmid; - bool updated = false; vmid = atomic64_read(&kvm_vmid->id); @@ -157,21 +156,17 @@ bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) if (old_active_vmid != 0 && vmid_gen_match(vmid) && 0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids), old_active_vmid, vmid)) - return false; + return; raw_spin_lock_irqsave(&cpu_vmid_lock, flags); /* Check that our VMID belongs to the current generation. */ vmid = atomic64_read(&kvm_vmid->id); - if (!vmid_gen_match(vmid)) { + if (!vmid_gen_match(vmid)) vmid = new_vmid(kvm_vmid); - updated = true; - } atomic64_set(this_cpu_ptr(&active_vmids), vmid); raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags); - - return updated; } /* diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 98a2a0e64e25..b3a7fafe8892 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -100,20 +100,11 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr, static inline int num_contig_ptes(unsigned long size, size_t *pgsize) { - int contig_ptes = 0; + int contig_ptes = 1; *pgsize = size; switch (size) { -#ifndef __PAGETABLE_PMD_FOLDED - case PUD_SIZE: - if (pud_sect_supported()) - contig_ptes = 1; - break; -#endif - case PMD_SIZE: - contig_ptes = 1; - break; case CONT_PMD_SIZE: *pgsize = PMD_SIZE; contig_ptes = CONT_PMDS; @@ -122,6 +113,8 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) *pgsize = PAGE_SIZE; contig_ptes = CONT_PTES; break; + default: + WARN_ON(!__hugetlb_valid_size(size)); } return contig_ptes; @@ -163,24 +156,23 @@ static pte_t get_clear_contig(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - pte_t orig_pte = __ptep_get(ptep); - unsigned long i; - - for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { - pte_t pte = __ptep_get_and_clear(mm, addr, ptep); - - /* - * If HW_AFDBM is enabled, then the HW could turn on - * the dirty or accessed bit for any page in the set, - * so check them all. - */ - if (pte_dirty(pte)) - orig_pte = pte_mkdirty(orig_pte); - - if (pte_young(pte)) - orig_pte = pte_mkyoung(orig_pte); + pte_t pte, tmp_pte; + bool present; + + pte = __ptep_get_and_clear(mm, addr, ptep); + present = pte_present(pte); + while (--ncontig) { + ptep++; + addr += pgsize; + tmp_pte = __ptep_get_and_clear(mm, addr, ptep); + if (present) { + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } } - return orig_pte; + return pte; } static pte_t get_clear_contig_flush(struct mm_struct *mm, @@ -396,18 +388,13 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr, __pte_clear(mm, addr, ptep); } -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz) { int ncontig; size_t pgsize; - pte_t orig_pte = __ptep_get(ptep); - - if (!pte_cont(orig_pte)) - return __ptep_get_and_clear(mm, addr, ptep); - - ncontig = find_num_contig(mm, addr, ptep, &pgsize); + ncontig = num_contig_ptes(sz, &pgsize); return get_clear_contig(mm, addr, ptep, pgsize, ncontig); } @@ -549,6 +536,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size) pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { + unsigned long psize = huge_page_size(hstate_vma(vma)); + if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) { /* * Break-before-make (BBM) is required for all user space mappings @@ -558,7 +547,7 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr if (pte_user_exec(__ptep_get(ptep))) return huge_ptep_clear_flush(vma, addr, ptep); } - return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); } void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9c0b8d9558fc..ccdef53872a0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -279,12 +279,7 @@ void __init arm64_memblock_init(void) if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern u16 memstart_offset_seed; - - /* - * Use the sanitised version of id_aa64mmfr0_el1 so that linear - * map randomization can be enabled by shrinking the IPA space. - */ - u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); int parange = cpuid_feature_extract_unsigned_field( mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); s64 range = linear_region_size - diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h index c8e4057734d0..4dc4b3e04225 100644 --- a/arch/loongarch/include/asm/hugetlb.h +++ b/arch/loongarch/include/asm/hugetlb.h @@ -36,7 +36,8 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + unsigned long sz) { pte_t clear; pte_t pte = ptep_get(ptep); @@ -51,8 +52,9 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; + unsigned long sz = huge_page_size(hstate_vma(vma)); - pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz); flush_tlb_page(vma, addr); return pte; } diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 382a09a7152c..1120ac2824f6 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -249,18 +249,6 @@ static __init int setup_node(int pxm) return acpi_map_pxm_to_node(pxm); } -/* - * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for - * I/O localities since SRAT does not list them. I/O localities are - * not supported at this point. - */ -unsigned int numa_distance_cnt; - -static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit) -{ - return slit->locality_count; -} - void __init numa_set_distance(int from, int to, int distance) { if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index 8ae641dc53bb..f9381800e291 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -126,14 +126,14 @@ void kexec_reboot(void) /* All secondary cpus go to kexec_smp_wait */ if (smp_processor_id() > 0) { relocated_kexec_smp_wait(NULL); - unreachable(); + BUG(); } #endif do_kexec = (void *)reboot_code_buffer; do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry); - unreachable(); + BUG(); } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index edcfdfcad7d2..90cb3ca96f08 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -387,6 +387,9 @@ static void __init check_kernel_sections_mem(void) */ static void __init arch_mem_init(char **cmdline_p) { + /* Recalculate max_low_pfn for "mem=xxx" */ + max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + if (usermem) pr_info("User-defined physical RAM map overwrite\n"); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index fbf747447f13..4b24589c0b56 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -19,6 +19,7 @@ #include <linux/smp.h> #include <linux/threads.h> #include <linux/export.h> +#include <linux/suspend.h> #include <linux/syscore_ops.h> #include <linux/time.h> #include <linux/tracepoint.h> @@ -423,7 +424,7 @@ void loongson_cpu_die(unsigned int cpu) mb(); } -void __noreturn arch_cpu_idle_dead(void) +static void __noreturn idle_play_dead(void) { register uint64_t addr; register void (*init_fn)(void); @@ -447,6 +448,50 @@ void __noreturn arch_cpu_idle_dead(void) BUG(); } +#ifdef CONFIG_HIBERNATION +static void __noreturn poll_play_dead(void) +{ + register uint64_t addr; + register void (*init_fn)(void); + + idle_task_exit(); + __this_cpu_write(cpu_state, CPU_DEAD); + + __smp_mb(); + do { + __asm__ __volatile__("nop\n\t"); + addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); + } while (addr == 0); + + init_fn = (void *)TO_CACHE(addr); + iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); + + init_fn(); + BUG(); +} +#endif + +static void (*play_dead)(void) = idle_play_dead; + +void __noreturn arch_cpu_idle_dead(void) +{ + play_dead(); + BUG(); /* play_dead() doesn't return */ +} + +#ifdef CONFIG_HIBERNATION +int hibernate_resume_nonboot_cpu_disable(void) +{ + int ret; + + play_dead = poll_play_dead; + ret = suspend_disable_secondary_cpus(); + play_dead = idle_play_dead; + + return ret; +} +#endif + #endif /* diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index c1e8ec5b941b..ea321403644a 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -669,6 +669,12 @@ static int kvm_handle_rdwr_fault(struct kvm_vcpu *vcpu, bool write) struct kvm_run *run = vcpu->run; unsigned long badv = vcpu->arch.badv; + /* Inject ADE exception if exceed max GPA size */ + if (unlikely(badv >= vcpu->kvm->arch.gpa_size)) { + kvm_queue_exception(vcpu, EXCCODE_ADE, EXSUBCODE_ADEM); + return RESUME_GUEST; + } + ret = kvm_handle_mm_fault(vcpu, badv, write); if (ret) { /* Treat as MMIO */ diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index f6d3242b9234..b6864d6e5ec8 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -317,6 +317,13 @@ int kvm_arch_enable_virtualization_cpu(void) kvm_debug("GCFG:%lx GSTAT:%lx GINTC:%lx GTLBC:%lx", read_csr_gcfg(), read_csr_gstat(), read_csr_gintc(), read_csr_gtlbc()); + /* + * HW Guest CSR registers are lost after CPU suspend and resume. + * Clear last_vcpu so that Guest CSR registers forced to reload + * from vCPU SW state. + */ + this_cpu_ptr(vmcs)->last_vcpu = NULL; + return 0; } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 20f941af3e9e..9e1a9b4aa4c6 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -311,7 +311,7 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) { int ret = RESUME_GUEST; unsigned long estat = vcpu->arch.host_estat; - u32 intr = estat & 0x1fff; /* Ignore NMI */ + u32 intr = estat & CSR_ESTAT_IS; u32 ecode = (estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; vcpu->mode = OUTSIDE_GUEST_MODE; diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index b8b3e1972d6e..edccfc8c9cd8 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -48,7 +48,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (kvm_pvtime_supported()) kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME); - kvm->arch.gpa_size = BIT(cpu_vabits - 1); + /* + * cpu_vabits means user address space only (a half of total). + * GPA size of VM is the same with the size of user address space. + */ + kvm->arch.gpa_size = BIT(cpu_vabits); kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1; kvm->arch.invalid_ptes[0] = 0; kvm->arch.invalid_ptes[1] = (unsigned long)invalid_pte_table; diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index 914e82ff3f65..1df9e99582cc 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -3,6 +3,7 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include <linux/export.h> +#include <linux/hugetlb.h> #include <linux/io.h> #include <linux/kfence.h> #include <linux/memblock.h> @@ -63,8 +64,11 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, } info.length = len; - info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; info.align_offset = pgoff << PAGE_SHIFT; + if (filp && is_file_hugepages(filp)) + info.align_mask = huge_page_mask_align(filp); + else + info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; if (dir == DOWN) { info.flags = VM_UNMAPPED_AREA_TOPDOWN; diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index f1ae4ed890db..80afc3a18724 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -44,8 +44,10 @@ static inline pgd_t * pgd_alloc(struct mm_struct *mm) pgd_t *new_pgd; new_pgd = __pgd_alloc(mm, 0); - memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE); - memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT)); + if (likely(new_pgd != NULL)) { + memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE); + memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT)); + } return new_pgd; } diff --git a/arch/mips/boot/tools/relocs.c b/arch/mips/boot/tools/relocs.c index a88d66c46d7f..9863e1d5c62e 100644 --- a/arch/mips/boot/tools/relocs.c +++ b/arch/mips/boot/tools/relocs.c @@ -468,6 +468,8 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, const char *symname)) { int i; + struct section *extab_sec = sec_lookup("__ex_table"); + int extab_index = extab_sec ? extab_sec - secs : -1; /* Walk through the relocations */ for (i = 0; i < ehdr.e_shnum; i++) { @@ -480,6 +482,9 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, if (sec->shdr.sh_type != SHT_REL_TYPE) continue; + if (sec->shdr.sh_info == extab_index) + continue; + sec_symtab = sec->link; sec_applies = &secs[sec->shdr.sh_info]; if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index d0a86ce83de9..fbc71ddcf0f6 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -27,7 +27,8 @@ static inline int prepare_hugepage_range(struct file *file, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + unsigned long sz) { pte_t clear; pte_t pte = *ptep; @@ -42,13 +43,14 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; + unsigned long sz = huge_page_size(hstate_vma(vma)); /* * clear the huge pte entry firstly, so that the other smp threads will * not get old pte entry after finishing flush_tlb_page and before * setting new huge pte entry */ - pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz); flush_tlb_page(vma, addr); return pte; } diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h index 5b3a5429f71b..21e9ace17739 100644 --- a/arch/parisc/include/asm/hugetlb.h +++ b/arch/parisc/include/asm/hugetlb.h @@ -10,7 +10,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); + pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index e9d18cf25b79..a94fe546d434 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -126,7 +126,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) + pte_t *ptep, unsigned long sz) { pte_t entry; diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index dad2e7980f24..86326587e58d 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -45,7 +45,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + unsigned long sz) { return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1)); } @@ -55,8 +56,9 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; + unsigned long sz = huge_page_size(hstate_vma(vma)); - pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz); flush_hugetlb_page(vma, addr); return pte; } diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 1916cf7ba450..17606940bb52 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -26,6 +26,7 @@ config ARCH_SOPHGO config ARCH_SPACEMIT bool "SpacemiT SoCs" + select PINCTRL help This enables support for SpacemiT SoC platform hardware. diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index faf3624d8057..446126497768 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -28,7 +28,8 @@ void set_huge_pte_at(struct mm_struct *mm, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); + unsigned long addr, pte_t *ptep, + unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index a8085cd8215e..29ef9c2133a9 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -974,7 +974,6 @@ int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu, if (imsic->vsfile_cpu >= 0) { writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE); - kvm_vcpu_kick(vcpu); } else { eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)]; set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip); diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c index dce667f4b6ab..3070bb31745d 100644 --- a/arch/riscv/kvm/vcpu_sbi_hsm.c +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -9,6 +9,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> +#include <linux/wordpart.h> #include <asm/sbi.h> #include <asm/kvm_vcpu_sbi.h> @@ -79,12 +80,12 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!kvm_riscv_vcpu_stopped(target_vcpu)) - return SBI_HSM_STATE_STARTED; - else if (vcpu->stat.generic.blocking) + if (kvm_riscv_vcpu_stopped(target_vcpu)) + return SBI_HSM_STATE_STOPPED; + else if (target_vcpu->stat.generic.blocking) return SBI_HSM_STATE_SUSPENDED; else - return SBI_HSM_STATE_STOPPED; + return SBI_HSM_STATE_STARTED; } static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, @@ -109,7 +110,7 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, } return 0; case SBI_EXT_HSM_HART_SUSPEND: - switch (cp->a0) { + switch (lower_32_bits(cp->a0)) { case SBI_HSM_SUSPEND_RET_DEFAULT: kvm_riscv_vcpu_wfi(vcpu); break; diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 9c2ab3dfa93a..5fbf3f94f1e8 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -21,7 +21,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, u64 next_cycle; if (cp->a6 != SBI_EXT_TIME_SET_TIMER) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } @@ -51,9 +51,10 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_cpu_context *cp = &vcpu->arch.guest_context; unsigned long hmask = cp->a0; unsigned long hbase = cp->a1; + unsigned long hart_bit = 0, sentmask = 0; if (cp->a6 != SBI_EXT_IPI_SEND_IPI) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } @@ -62,15 +63,23 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, if (hbase != -1UL) { if (tmp->vcpu_id < hbase) continue; - if (!(hmask & (1UL << (tmp->vcpu_id - hbase)))) + hart_bit = tmp->vcpu_id - hbase; + if (hart_bit >= __riscv_xlen) + goto done; + if (!(hmask & (1UL << hart_bit))) continue; } ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT); if (ret < 0) break; + sentmask |= 1UL << hart_bit; kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RCVD); } +done: + if (hbase != -1UL && (hmask ^ sentmask)) + retdata->err_val = SBI_ERR_INVALID_PARAM; + return ret; } diff --git a/arch/riscv/kvm/vcpu_sbi_system.c b/arch/riscv/kvm/vcpu_sbi_system.c index 5d55e08791fa..bc0ebba89003 100644 --- a/arch/riscv/kvm/vcpu_sbi_system.c +++ b/arch/riscv/kvm/vcpu_sbi_system.c @@ -4,6 +4,7 @@ */ #include <linux/kvm_host.h> +#include <linux/wordpart.h> #include <asm/kvm_vcpu_sbi.h> #include <asm/sbi.h> @@ -19,7 +20,7 @@ static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, switch (funcid) { case SBI_EXT_SUSP_SYSTEM_SUSPEND: - if (cp->a0 != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) { + if (lower_32_bits(cp->a0) != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) { retdata->err_val = SBI_ERR_INVALID_PARAM; return 0; } diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 42314f093922..b4a78a4b35cf 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -293,7 +293,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) + pte_t *ptep, unsigned long sz) { pte_t orig_pte = ptep_get(ptep); int pte_num; diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 7c52acaf9f82..663e87220e89 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -25,8 +25,16 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned long sz) +{ + return __huge_ptep_get_and_clear(mm, addr, ptep); +} static inline void arch_clear_hugetlb_flags(struct folio *folio) { @@ -48,7 +56,7 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - return huge_ptep_get_and_clear(vma->vm_mm, address, ptep); + return __huge_ptep_get_and_clear(vma->vm_mm, address, ptep); } #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS @@ -59,7 +67,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte); if (changed) { - huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + __huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } return changed; @@ -69,7 +77,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); + pte_t pte = __huge_ptep_get_and_clear(mm, addr, ptep); __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 63ba6306632e..e540b022ceb2 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -266,12 +266,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15]; if (unlikely(ftrace_graph_is_dead())) return; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; - if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs)) + if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs)) *parent = (unsigned long)&return_to_handler; } diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 24fee11b030d..b746213d3110 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -285,10 +285,10 @@ static void __init test_monitor_call(void) return; asm volatile( " mc 0,0\n" - "0: xgr %0,%0\n" + "0: lhi %[val],0\n" "1:\n" - EX_TABLE(0b,1b) - : "+d" (val)); + EX_TABLE(0b, 1b) + : [val] "+d" (val)); if (!val) panic("Monitor call doesn't work!\n"); } diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index d9ce199953de..2e568f175cd4 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -188,8 +188,8 @@ pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) return __rste_to_pte(pte_val(*ptep)); } -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) +pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { pte_t pte = huge_ptep_get(mm, addr, ptep); pmd_t *pmdp = (pmd_t *) ptep; diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index c714ca6a05aa..e7a9cdd498dc 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -20,7 +20,7 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); + pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index eee601a0d2cf..80504148d8a5 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -260,7 +260,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) + pte_t *ptep, unsigned long sz) { unsigned int i, nptes, orig_shift, shift; unsigned long size; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be2c311f5118..0e27ebd7e36a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1341,6 +1341,7 @@ config X86_REBOOTFIXUPS config MICROCODE def_bool y depends on CPU_SUP_AMD || CPU_SUP_INTEL + select CRYPTO_LIB_SHA256 if CPU_SUP_AMD config MICROCODE_INITRD32 def_bool y diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index c882e1f67af0..d8c5de40669d 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "misc.h" #include <asm/bootparam.h> +#include <asm/bootparam_utils.h> #include <asm/e820/types.h> #include <asm/processor.h> #include "pgtable.h" @@ -107,6 +108,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ + sanitize_boot_params(bp); boot_params_ptr = bp; /* diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 82492efc5d94..96c7bc698e6b 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -2853,19 +2853,8 @@ struct snp_msg_desc *snp_msg_alloc(void) if (!mdesc->response) goto e_free_request; - mdesc->certs_data = alloc_shared_pages(SEV_FW_BLOB_MAX_SIZE); - if (!mdesc->certs_data) - goto e_free_response; - - /* initial the input address for guest request */ - mdesc->input.req_gpa = __pa(mdesc->request); - mdesc->input.resp_gpa = __pa(mdesc->response); - mdesc->input.data_gpa = __pa(mdesc->certs_data); - return mdesc; -e_free_response: - free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); e_free_request: free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); e_unmap: @@ -2885,7 +2874,6 @@ void snp_msg_free(struct snp_msg_desc *mdesc) kfree(mdesc->ctx); free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); - free_shared_pages(mdesc->certs_data, SEV_FW_BLOB_MAX_SIZE); iounmap((__force void __iomem *)mdesc->secrets); memset(mdesc, 0, sizeof(*mdesc)); @@ -3054,7 +3042,7 @@ retry_request: * sequence number must be incremented or the VMPCK must be deleted to * prevent reuse of the IV. */ - rc = snp_issue_guest_request(req, &mdesc->input, rio); + rc = snp_issue_guest_request(req, &req->input, rio); switch (rc) { case -ENOSPC: /* @@ -3064,7 +3052,7 @@ retry_request: * order to increment the sequence number and thus avoid * IV reuse. */ - override_npages = mdesc->input.data_npages; + override_npages = req->input.data_npages; req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; /* @@ -3120,7 +3108,7 @@ retry_request: } if (override_npages) - mdesc->input.data_npages = override_npages; + req->input.data_npages = override_npages; return rc; } @@ -3158,6 +3146,11 @@ int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req */ memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); + /* Initialize the input address for guest request */ + req->input.req_gpa = __pa(mdesc->request); + req->input.resp_gpa = __pa(mdesc->response); + req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0; + rc = __handle_guest_request(mdesc, req, rio); if (rc) { if (rc == -EIO && diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 94941c5a10ac..14db5b85114c 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -190,6 +190,7 @@ static __always_inline bool int80_is_external(void) /** * do_int80_emulation - 32-bit legacy syscall C entry from asm + * @regs: syscall arguments in struct pt_args on the stack. * * This entry point can be used by 32-bit and 64-bit programs to perform * 32-bit system calls. Instances of INT $0x80 can be found inline in diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8f218ac0d445..2092d615333d 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -628,7 +628,7 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == event->pmu->type) event->hw.config |= x86_pmu_get_event_config(event); - if (event->attr.sample_period && x86_pmu.limit_period) { + if (!event->attr.freq && x86_pmu.limit_period) { s64 left = event->attr.sample_period; x86_pmu.limit_period(event, &left); if (left > event->attr.sample_period) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index cdcebf30468a..cdb19e3ba3aa 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3952,6 +3952,85 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx) return test_bit(idx, (unsigned long *)&intel_cap->capabilities); } +static u64 intel_pmu_freq_start_period(struct perf_event *event) +{ + int type = event->attr.type; + u64 config, factor; + s64 start; + + /* + * The 127 is the lowest possible recommended SAV (sample after value) + * for a 4000 freq (default freq), according to the event list JSON file. + * Also, assume the workload is idle 50% time. + */ + factor = 64 * 4000; + if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE) + goto end; + + /* + * The estimation of the start period in the freq mode is + * based on the below assumption. + * + * For a cycles or an instructions event, 1GHZ of the + * underlying platform, 1 IPC. The workload is idle 50% time. + * The start period = 1,000,000,000 * 1 / freq / 2. + * = 500,000,000 / freq + * + * Usually, the branch-related events occur less than the + * instructions event. According to the Intel event list JSON + * file, the SAV (sample after value) of a branch-related event + * is usually 1/4 of an instruction event. + * The start period of branch-related events = 125,000,000 / freq. + * + * The cache-related events occurs even less. The SAV is usually + * 1/20 of an instruction event. + * The start period of cache-related events = 25,000,000 / freq. + */ + config = event->attr.config & PERF_HW_EVENT_MASK; + if (type == PERF_TYPE_HARDWARE) { + switch (config) { + case PERF_COUNT_HW_CPU_CYCLES: + case PERF_COUNT_HW_INSTRUCTIONS: + case PERF_COUNT_HW_BUS_CYCLES: + case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: + case PERF_COUNT_HW_STALLED_CYCLES_BACKEND: + case PERF_COUNT_HW_REF_CPU_CYCLES: + factor = 500000000; + break; + case PERF_COUNT_HW_BRANCH_INSTRUCTIONS: + case PERF_COUNT_HW_BRANCH_MISSES: + factor = 125000000; + break; + case PERF_COUNT_HW_CACHE_REFERENCES: + case PERF_COUNT_HW_CACHE_MISSES: + factor = 25000000; + break; + default: + goto end; + } + } + + if (type == PERF_TYPE_HW_CACHE) + factor = 25000000; +end: + /* + * Usually, a prime or a number with less factors (close to prime) + * is chosen as an SAV, which makes it less likely that the sampling + * period synchronizes with some periodic event in the workload. + * Minus 1 to make it at least avoiding values near power of twos + * for the default freq. + */ + start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1; + + if (start > x86_pmu.max_period) + start = x86_pmu.max_period; + + if (x86_pmu.limit_period) + x86_pmu.limit_period(event, &start); + + return start; +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -3963,6 +4042,12 @@ static int intel_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (event->attr.freq && event->attr.sample_freq) { + event->hw.sample_period = intel_pmu_freq_start_period(event); + event->hw.last_period = event->hw.sample_period; + local64_set(&event->hw.period_left, event->hw.sample_period); + } + if (event->attr.precise_ip) { if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) return -EINVAL; diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 4952faf03e82..6941f4811bec 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -879,6 +879,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl), X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl), X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, &model_skl), X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl), {}, }; diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 4e1b1e3b5658..3f4e20d7b724 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -30,6 +30,7 @@ void __init hv_vtl_init_platform(void) x86_platform.realmode_init = x86_init_noop; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; + x86_init.resources.probe_roms = x86_init_noop; /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_mptable = x86_init_noop; diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index dd68d9ad9b22..ec7880271cf9 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -464,7 +464,6 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], enum hv_mem_host_visibility visibility) { struct hv_gpa_range_for_visibility *input; - u16 pages_processed; u64 hv_status; unsigned long flags; @@ -493,7 +492,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn)); hv_status = hv_do_rep_hypercall( HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count, - 0, input, &pages_processed); + 0, input, NULL); local_irq_restore(flags); if (hv_result_success(hv_status)) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0b7af5902ff7..32ae3aa50c7e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -780,6 +780,7 @@ struct kvm_vcpu_arch { u32 pkru; u32 hflags; u64 efer; + u64 host_debugctl; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ bool load_eoi_exitmap_pending; diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7e8bf78c03d5..aee26bb8230f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -198,9 +198,8 @@ .endm /* - * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call - * to the retpoline thunk with a CS prefix when the register requires - * a RAX prefix byte to encode. Also see apply_retpolines(). + * Emits a conditional CS prefix that is compatible with + * -mindirect-branch-cs-prefix. */ .macro __CS_PREFIX reg:req .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 @@ -421,19 +420,26 @@ static inline void call_depth_return_thunk(void) {} #ifdef CONFIG_X86_64 /* + * Emits a conditional CS prefix that is compatible with + * -mindirect-branch-cs-prefix. + */ +#define __CS_PREFIX(reg) \ + ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \ + ".ifc \\rs," reg "\n" \ + ".byte 0x2e\n" \ + ".endif\n" \ + ".endr\n" + +/* * Inline asm uses the %V modifier which is only in newer GCC * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. */ -# define CALL_NOSPEC \ - ALTERNATIVE_2( \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%[thunk_target]\n", \ - "call __x86_indirect_thunk_%V[thunk_target]\n", \ - X86_FEATURE_RETPOLINE, \ - "lfence;\n" \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_LFENCE) +#ifdef CONFIG_MITIGATION_RETPOLINE +#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \ + "call __x86_indirect_thunk_%V[thunk_target]\n" +#else +#define CALL_NOSPEC "call *%[thunk_target]\n" +#endif # define THUNK_TARGET(addr) [thunk_target] "r" (addr) diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 7f6ccff0ba72..4a12c276b181 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -23,17 +23,17 @@ typedef union { #define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED /* - * traditional i386 two-level paging structure: + * Traditional i386 two-level paging structure: */ #define PGDIR_SHIFT 22 #define PTRS_PER_PGD 1024 - /* - * the i386 is two-level, so we don't really have any - * PMD directory physically. + * The i386 is two-level, so we don't really have any + * PMD directory physically: */ +#define PTRS_PER_PMD 1 #define PTRS_PER_PTE 1024 diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 1581246491b5..ba7999f66abe 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -203,6 +203,9 @@ struct snp_guest_req { unsigned int vmpck_id; u8 msg_version; u8 msg_type; + + struct snp_req_data input; + void *certs_data; }; /* @@ -263,9 +266,6 @@ struct snp_msg_desc { struct snp_guest_msg secret_request, secret_response; struct snp_secrets_page *secrets; - struct snp_req_data input; - - void *certs_data; struct aesgcm_ctx *ctx; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 11fac09e3a8c..67e773744edb 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -143,7 +143,6 @@ bool __init early_is_amd_nb(u32 device) struct resource *amd_get_mmconfig_range(struct resource *res) { - u32 address; u64 base, msr; unsigned int segn_busn_bits; @@ -151,13 +150,11 @@ struct resource *amd_get_mmconfig_range(struct resource *res) boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) return NULL; - /* assume all cpus from fam10h have mmconfig */ - if (boot_cpu_data.x86 < 0x10) + /* Assume CPUs from Fam10h have mmconfig, although not all VMs do */ + if (boot_cpu_data.x86 < 0x10 || + rdmsrl_safe(MSR_FAM10H_MMIO_CONF_BASE, &msr)) return NULL; - address = MSR_FAM10H_MMIO_CONF_BASE; - rdmsrl(address, msr); - /* mmconfig is not enabled */ if (!(msr & FAM10H_MMIO_CONF_ENABLE)) return NULL; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index e6fa03ed9172..a6c6bccfa8b8 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -808,7 +808,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 3 ; j++) + for (j = 0 ; j < 4 ; j++) if (regs[j] & (1 << 31)) regs[j] = 0; diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 9651275aecd1..dfec2c61e354 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -153,8 +153,8 @@ static void geode_configure(void) u8 ccr3; local_irq_save(flags); - /* Suspend on halt power saving and enable #SUSP pin */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + /* Suspend on halt power saving */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x08); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3dce22f00dc3..134368a3f4b1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -635,26 +635,37 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) } #endif -#define TLB_INST_4K 0x01 -#define TLB_INST_4M 0x02 -#define TLB_INST_2M_4M 0x03 +#define TLB_INST_4K 0x01 +#define TLB_INST_4M 0x02 +#define TLB_INST_2M_4M 0x03 -#define TLB_INST_ALL 0x05 -#define TLB_INST_1G 0x06 +#define TLB_INST_ALL 0x05 +#define TLB_INST_1G 0x06 -#define TLB_DATA_4K 0x11 -#define TLB_DATA_4M 0x12 -#define TLB_DATA_2M_4M 0x13 -#define TLB_DATA_4K_4M 0x14 +#define TLB_DATA_4K 0x11 +#define TLB_DATA_4M 0x12 +#define TLB_DATA_2M_4M 0x13 +#define TLB_DATA_4K_4M 0x14 -#define TLB_DATA_1G 0x16 +#define TLB_DATA_1G 0x16 +#define TLB_DATA_1G_2M_4M 0x17 -#define TLB_DATA0_4K 0x21 -#define TLB_DATA0_4M 0x22 -#define TLB_DATA0_2M_4M 0x23 +#define TLB_DATA0_4K 0x21 +#define TLB_DATA0_4M 0x22 +#define TLB_DATA0_2M_4M 0x23 -#define STLB_4K 0x41 -#define STLB_4K_2M 0x42 +#define STLB_4K 0x41 +#define STLB_4K_2M 0x42 + +/* + * All of leaf 0x2's one-byte TLB descriptors implies the same number of + * entries for their respective TLB types. The 0x63 descriptor is an + * exception: it implies 4 dTLB entries for 1GB pages 32 dTLB entries + * for 2MB or 4MB pages. Encode descriptor 0x63 dTLB entry count for + * 2MB/4MB pages here, as its count for dTLB 1GB pages is already at the + * intel_tlb_table[] mapping. + */ +#define TLB_0x63_2M_4M_ENTRIES 32 static const struct _tlb_table intel_tlb_table[] = { { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, @@ -676,7 +687,8 @@ static const struct _tlb_table intel_tlb_table[] = { { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, - { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, + { 0x63, TLB_DATA_1G_2M_4M, 4, " TLB_DATA 1 GByte pages, 4-way set associative" + " (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here)" }, { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" }, { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" }, { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" }, @@ -776,6 +788,12 @@ static void intel_tlb_lookup(const unsigned char desc) if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; break; + case TLB_DATA_1G_2M_4M: + if (tlb_lld_2m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) + tlb_lld_2m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; + if (tlb_lld_4m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) + tlb_lld_4m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; + fallthrough; case TLB_DATA_1G: if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; @@ -799,7 +817,7 @@ static void intel_detect_tlb(struct cpuinfo_x86 *c) cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 3 ; j++) + for (j = 0 ; j < 4 ; j++) if (regs[j] & (1 << 31)) regs[j] = 0; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index a5dac7f3c0a0..138689b8e1d8 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -23,14 +23,18 @@ #include <linux/earlycpio.h> #include <linux/firmware.h> +#include <linux/bsearch.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/initrd.h> #include <linux/kernel.h> #include <linux/pci.h> +#include <crypto/sha2.h> + #include <asm/microcode.h> #include <asm/processor.h> +#include <asm/cmdline.h> #include <asm/setup.h> #include <asm/cpu.h> #include <asm/msr.h> @@ -145,6 +149,113 @@ ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; */ static u32 bsp_cpuid_1_eax __ro_after_init; +static bool sha_check = true; + +struct patch_digest { + u32 patch_id; + u8 sha256[SHA256_DIGEST_SIZE]; +}; + +#include "amd_shas.c" + +static int cmp_id(const void *key, const void *elem) +{ + struct patch_digest *pd = (struct patch_digest *)elem; + u32 patch_id = *(u32 *)key; + + if (patch_id == pd->patch_id) + return 0; + else if (patch_id < pd->patch_id) + return -1; + else + return 1; +} + +static bool need_sha_check(u32 cur_rev) +{ + switch (cur_rev >> 8) { + case 0x80012: return cur_rev <= 0x800126f; break; + case 0x80082: return cur_rev <= 0x800820f; break; + case 0x83010: return cur_rev <= 0x830107c; break; + case 0x86001: return cur_rev <= 0x860010e; break; + case 0x86081: return cur_rev <= 0x8608108; break; + case 0x87010: return cur_rev <= 0x8701034; break; + case 0x8a000: return cur_rev <= 0x8a0000a; break; + case 0xa0010: return cur_rev <= 0xa00107a; break; + case 0xa0011: return cur_rev <= 0xa0011da; break; + case 0xa0012: return cur_rev <= 0xa001243; break; + case 0xa0082: return cur_rev <= 0xa00820e; break; + case 0xa1011: return cur_rev <= 0xa101153; break; + case 0xa1012: return cur_rev <= 0xa10124e; break; + case 0xa1081: return cur_rev <= 0xa108109; break; + case 0xa2010: return cur_rev <= 0xa20102f; break; + case 0xa2012: return cur_rev <= 0xa201212; break; + case 0xa4041: return cur_rev <= 0xa404109; break; + case 0xa5000: return cur_rev <= 0xa500013; break; + case 0xa6012: return cur_rev <= 0xa60120a; break; + case 0xa7041: return cur_rev <= 0xa704109; break; + case 0xa7052: return cur_rev <= 0xa705208; break; + case 0xa7080: return cur_rev <= 0xa708009; break; + case 0xa70c0: return cur_rev <= 0xa70C009; break; + case 0xaa001: return cur_rev <= 0xaa00116; break; + case 0xaa002: return cur_rev <= 0xaa00218; break; + default: break; + } + + pr_info("You should not be seeing this. Please send the following couple of lines to x86-<at>-kernel.org\n"); + pr_info("CPUID(1).EAX: 0x%x, current revision: 0x%x\n", bsp_cpuid_1_eax, cur_rev); + return true; +} + +static bool verify_sha256_digest(u32 patch_id, u32 cur_rev, const u8 *data, unsigned int len) +{ + struct patch_digest *pd = NULL; + u8 digest[SHA256_DIGEST_SIZE]; + struct sha256_state s; + int i; + + if (x86_family(bsp_cpuid_1_eax) < 0x17 || + x86_family(bsp_cpuid_1_eax) > 0x19) + return true; + + if (!need_sha_check(cur_rev)) + return true; + + if (!sha_check) + return true; + + pd = bsearch(&patch_id, phashes, ARRAY_SIZE(phashes), sizeof(struct patch_digest), cmp_id); + if (!pd) { + pr_err("No sha256 digest for patch ID: 0x%x found\n", patch_id); + return false; + } + + sha256_init(&s); + sha256_update(&s, data, len); + sha256_final(&s, digest); + + if (memcmp(digest, pd->sha256, sizeof(digest))) { + pr_err("Patch 0x%x SHA256 digest mismatch!\n", patch_id); + + for (i = 0; i < SHA256_DIGEST_SIZE; i++) + pr_cont("0x%x ", digest[i]); + pr_info("\n"); + + return false; + } + + return true; +} + +static u32 get_patch_level(void) +{ + u32 rev, dummy __always_unused; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + + return rev; +} + static union cpuid_1_eax ucode_rev_to_cpuid(unsigned int val) { union zen_patch_rev p; @@ -246,8 +357,7 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size) * On success, @sh_psize returns the patch size according to the section header, * to the caller. */ -static bool -__verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize) +static bool __verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize) { u32 p_type, p_size; const u32 *hdr; @@ -484,10 +594,13 @@ static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc) } } -static bool __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) +static bool __apply_microcode_amd(struct microcode_amd *mc, u32 *cur_rev, + unsigned int psize) { unsigned long p_addr = (unsigned long)&mc->hdr.data_code; - u32 rev, dummy; + + if (!verify_sha256_digest(mc->hdr.patch_id, *cur_rev, (const u8 *)p_addr, psize)) + return -1; native_wrmsrl(MSR_AMD64_PATCH_LOADER, p_addr); @@ -505,47 +618,13 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) } /* verify patch application was successful */ - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - - if (rev != mc->hdr.patch_id) + *cur_rev = get_patch_level(); + if (*cur_rev != mc->hdr.patch_id) return false; return true; } -/* - * Early load occurs before we can vmalloc(). So we look for the microcode - * patch container file in initrd, traverse equivalent cpu table, look for a - * matching microcode patch, and update, all in initrd memory in place. - * When vmalloc() is available for use later -- on 64-bit during first AP load, - * and on 32-bit during save_microcode_in_initrd_amd() -- we can call - * load_microcode_amd() to save equivalent cpu table and microcode patches in - * kernel heap memory. - * - * Returns true if container found (sets @desc), false otherwise. - */ -static bool early_apply_microcode(u32 old_rev, void *ucode, size_t size) -{ - struct cont_desc desc = { 0 }; - struct microcode_amd *mc; - - scan_containers(ucode, size, &desc); - - mc = desc.mc; - if (!mc) - return false; - - /* - * Allow application of the same revision to pick up SMT-specific - * changes even if the revision of the other SMT thread is already - * up-to-date. - */ - if (old_rev > mc->hdr.patch_id) - return false; - - return __apply_microcode_amd(mc, desc.psize); -} - static bool get_builtin_microcode(struct cpio_data *cp) { char fw_name[36] = "amd-ucode/microcode_amd.bin"; @@ -583,14 +662,35 @@ static bool __init find_blobs_in_containers(struct cpio_data *ret) return found; } +/* + * Early load occurs before we can vmalloc(). So we look for the microcode + * patch container file in initrd, traverse equivalent cpu table, look for a + * matching microcode patch, and update, all in initrd memory in place. + * When vmalloc() is available for use later -- on 64-bit during first AP load, + * and on 32-bit during save_microcode_in_initrd() -- we can call + * load_microcode_amd() to save equivalent cpu table and microcode patches in + * kernel heap memory. + */ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_eax) { + struct cont_desc desc = { }; + struct microcode_amd *mc; struct cpio_data cp = { }; - u32 dummy; + char buf[4]; + u32 rev; + + if (cmdline_find_option(boot_command_line, "microcode.amd_sha_check", buf, 4)) { + if (!strncmp(buf, "off", 3)) { + sha_check = false; + pr_warn_once("It is a very very bad idea to disable the blobs SHA check!\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + } + } bsp_cpuid_1_eax = cpuid_1_eax; - native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->old_rev, dummy); + rev = get_patch_level(); + ed->old_rev = rev; /* Needed in load_microcode_amd() */ ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax; @@ -598,37 +698,23 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ if (!find_blobs_in_containers(&cp)) return; - if (early_apply_microcode(ed->old_rev, cp.data, cp.size)) - native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); -} - -static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size); - -static int __init save_microcode_in_initrd(void) -{ - unsigned int cpuid_1_eax = native_cpuid_eax(1); - struct cpuinfo_x86 *c = &boot_cpu_data; - struct cont_desc desc = { 0 }; - enum ucode_state ret; - struct cpio_data cp; - - if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) - return 0; - - if (!find_blobs_in_containers(&cp)) - return -EINVAL; - scan_containers(cp.data, cp.size, &desc); - if (!desc.mc) - return -EINVAL; - ret = _load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size); - if (ret > UCODE_UPDATED) - return -EINVAL; + mc = desc.mc; + if (!mc) + return; - return 0; + /* + * Allow application of the same revision to pick up SMT-specific + * changes even if the revision of the other SMT thread is already + * up-to-date. + */ + if (ed->old_rev > mc->hdr.patch_id) + return; + + if (__apply_microcode_amd(mc, &rev, desc.psize)) + ed->new_rev = rev; } -early_initcall(save_microcode_in_initrd); static inline bool patch_cpus_equivalent(struct ucode_patch *p, struct ucode_patch *n, @@ -729,14 +815,9 @@ static void free_cache(void) static struct ucode_patch *find_patch(unsigned int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - u32 rev, dummy __always_unused; u16 equiv_id = 0; - /* fetch rev if not populated yet: */ - if (!uci->cpu_sig.rev) { - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - uci->cpu_sig.rev = rev; - } + uci->cpu_sig.rev = get_patch_level(); if (x86_family(bsp_cpuid_1_eax) < 0x17) { equiv_id = find_equiv_id(&equiv_table, uci->cpu_sig.sig); @@ -759,22 +840,20 @@ void reload_ucode_amd(unsigned int cpu) mc = p->data; - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - + rev = get_patch_level(); if (rev < mc->hdr.patch_id) { - if (__apply_microcode_amd(mc, p->size)) - pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id); + if (__apply_microcode_amd(mc, &rev, p->size)) + pr_info_once("reload revision: 0x%08x\n", rev); } } static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { - struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct ucode_patch *p; csig->sig = cpuid_eax(0x00000001); - csig->rev = c->microcode; + csig->rev = get_patch_level(); /* * a patch could have been loaded early, set uci->mc so that @@ -815,7 +894,7 @@ static enum ucode_state apply_microcode_amd(int cpu) goto out; } - if (!__apply_microcode_amd(mc_amd, p->size)) { + if (!__apply_microcode_amd(mc_amd, &rev, p->size)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); return UCODE_ERROR; @@ -937,8 +1016,7 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover, } /* Scan the blob in @data and add microcode patches to the cache. */ -static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, - size_t size) +static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, size_t size) { u8 *fw = (u8 *)data; size_t offset; @@ -996,7 +1074,7 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz if (ret != UCODE_OK) return ret; - for_each_node(nid) { + for_each_node_with_cpus(nid) { cpu = cpumask_first(cpumask_of_node(nid)); c = &cpu_data(cpu); @@ -1013,6 +1091,32 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz return ret; } +static int __init save_microcode_in_initrd(void) +{ + unsigned int cpuid_1_eax = native_cpuid_eax(1); + struct cpuinfo_x86 *c = &boot_cpu_data; + struct cont_desc desc = { 0 }; + enum ucode_state ret; + struct cpio_data cp; + + if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) + return 0; + + if (!find_blobs_in_containers(&cp)) + return -EINVAL; + + scan_containers(cp.data, cp.size, &desc); + if (!desc.mc) + return -EINVAL; + + ret = _load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size); + if (ret > UCODE_UPDATED) + return -EINVAL; + + return 0; +} +early_initcall(save_microcode_in_initrd); + /* * AMD microcode firmware naming convention, up to family 15h they are in * the legacy file: diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c new file mode 100644 index 000000000000..2a1655b1fdd8 --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/amd_shas.c @@ -0,0 +1,444 @@ +/* Keep 'em sorted. */ +static const struct patch_digest phashes[] = { + { 0x8001227, { + 0x99,0xc0,0x9b,0x2b,0xcc,0x9f,0x52,0x1b, + 0x1a,0x5f,0x1d,0x83,0xa1,0x6c,0xc4,0x46, + 0xe2,0x6c,0xda,0x73,0xfb,0x2d,0x23,0xa8, + 0x77,0xdc,0x15,0x31,0x33,0x4a,0x46,0x18, + } + }, + { 0x8001250, { + 0xc0,0x0b,0x6b,0x19,0xfd,0x5c,0x39,0x60, + 0xd5,0xc3,0x57,0x46,0x54,0xe4,0xd1,0xaa, + 0xa8,0xf7,0x1f,0xa8,0x6a,0x60,0x3e,0xe3, + 0x27,0x39,0x8e,0x53,0x30,0xf8,0x49,0x19, + } + }, + { 0x800126e, { + 0xf3,0x8b,0x2b,0xb6,0x34,0xe3,0xc8,0x2c, + 0xef,0xec,0x63,0x6d,0xc8,0x76,0x77,0xb3, + 0x25,0x5a,0xb7,0x52,0x8c,0x83,0x26,0xe6, + 0x4c,0xbe,0xbf,0xe9,0x7d,0x22,0x6a,0x43, + } + }, + { 0x800126f, { + 0x2b,0x5a,0xf2,0x9c,0xdd,0xd2,0x7f,0xec, + 0xec,0x96,0x09,0x57,0xb0,0x96,0x29,0x8b, + 0x2e,0x26,0x91,0xf0,0x49,0x33,0x42,0x18, + 0xdd,0x4b,0x65,0x5a,0xd4,0x15,0x3d,0x33, + } + }, + { 0x800820d, { + 0x68,0x98,0x83,0xcd,0x22,0x0d,0xdd,0x59, + 0x73,0x2c,0x5b,0x37,0x1f,0x84,0x0e,0x67, + 0x96,0x43,0x83,0x0c,0x46,0x44,0xab,0x7c, + 0x7b,0x65,0x9e,0x57,0xb5,0x90,0x4b,0x0e, + } + }, + { 0x8301025, { + 0xe4,0x7d,0xdb,0x1e,0x14,0xb4,0x5e,0x36, + 0x8f,0x3e,0x48,0x88,0x3c,0x6d,0x76,0xa1, + 0x59,0xc6,0xc0,0x72,0x42,0xdf,0x6c,0x30, + 0x6f,0x0b,0x28,0x16,0x61,0xfc,0x79,0x77, + } + }, + { 0x8301055, { + 0x81,0x7b,0x99,0x1b,0xae,0x2d,0x4f,0x9a, + 0xef,0x13,0xce,0xb5,0x10,0xaf,0x6a,0xea, + 0xe5,0xb0,0x64,0x98,0x10,0x68,0x34,0x3b, + 0x9d,0x7a,0xd6,0x22,0x77,0x5f,0xb3,0x5b, + } + }, + { 0x8301072, { + 0xcf,0x76,0xa7,0x1a,0x49,0xdf,0x2a,0x5e, + 0x9e,0x40,0x70,0xe5,0xdd,0x8a,0xa8,0x28, + 0x20,0xdc,0x91,0xd8,0x2c,0xa6,0xa0,0xb1, + 0x2d,0x22,0x26,0x94,0x4b,0x40,0x85,0x30, + } + }, + { 0x830107a, { + 0x2a,0x65,0x8c,0x1a,0x5e,0x07,0x21,0x72, + 0xdf,0x90,0xa6,0x51,0x37,0xd3,0x4b,0x34, + 0xc4,0xda,0x03,0xe1,0x8a,0x6c,0xfb,0x20, + 0x04,0xb2,0x81,0x05,0xd4,0x87,0xf4,0x0a, + } + }, + { 0x830107b, { + 0xb3,0x43,0x13,0x63,0x56,0xc1,0x39,0xad, + 0x10,0xa6,0x2b,0xcc,0x02,0xe6,0x76,0x2a, + 0x1e,0x39,0x58,0x3e,0x23,0x6e,0xa4,0x04, + 0x95,0xea,0xf9,0x6d,0xc2,0x8a,0x13,0x19, + } + }, + { 0x830107c, { + 0x21,0x64,0xde,0xfb,0x9f,0x68,0x96,0x47, + 0x70,0x5c,0xe2,0x8f,0x18,0x52,0x6a,0xac, + 0xa4,0xd2,0x2e,0xe0,0xde,0x68,0x66,0xc3, + 0xeb,0x1e,0xd3,0x3f,0xbc,0x51,0x1d,0x38, + } + }, + { 0x860010d, { + 0x86,0xb6,0x15,0x83,0xbc,0x3b,0x9c,0xe0, + 0xb3,0xef,0x1d,0x99,0x84,0x35,0x15,0xf7, + 0x7c,0x2a,0xc6,0x42,0xdb,0x73,0x07,0x5c, + 0x7d,0xc3,0x02,0xb5,0x43,0x06,0x5e,0xf8, + } + }, + { 0x8608108, { + 0x14,0xfe,0x57,0x86,0x49,0xc8,0x68,0xe2, + 0x11,0xa3,0xcb,0x6e,0xff,0x6e,0xd5,0x38, + 0xfe,0x89,0x1a,0xe0,0x67,0xbf,0xc4,0xcc, + 0x1b,0x9f,0x84,0x77,0x2b,0x9f,0xaa,0xbd, + } + }, + { 0x8701034, { + 0xc3,0x14,0x09,0xa8,0x9c,0x3f,0x8d,0x83, + 0x9b,0x4c,0xa5,0xb7,0x64,0x8b,0x91,0x5d, + 0x85,0x6a,0x39,0x26,0x1e,0x14,0x41,0xa8, + 0x75,0xea,0xa6,0xf9,0xc9,0xd1,0xea,0x2b, + } + }, + { 0x8a00008, { + 0xd7,0x2a,0x93,0xdc,0x05,0x2f,0xa5,0x6e, + 0x0c,0x61,0x2c,0x07,0x9f,0x38,0xe9,0x8e, + 0xef,0x7d,0x2a,0x05,0x4d,0x56,0xaf,0x72, + 0xe7,0x56,0x47,0x6e,0x60,0x27,0xd5,0x8c, + } + }, + { 0x8a0000a, { + 0x73,0x31,0x26,0x22,0xd4,0xf9,0xee,0x3c, + 0x07,0x06,0xe7,0xb9,0xad,0xd8,0x72,0x44, + 0x33,0x31,0xaa,0x7d,0xc3,0x67,0x0e,0xdb, + 0x47,0xb5,0xaa,0xbc,0xf5,0xbb,0xd9,0x20, + } + }, + { 0xa00104c, { + 0x3c,0x8a,0xfe,0x04,0x62,0xd8,0x6d,0xbe, + 0xa7,0x14,0x28,0x64,0x75,0xc0,0xa3,0x76, + 0xb7,0x92,0x0b,0x97,0x0a,0x8e,0x9c,0x5b, + 0x1b,0xc8,0x9d,0x3a,0x1e,0x81,0x3d,0x3b, + } + }, + { 0xa00104e, { + 0xc4,0x35,0x82,0x67,0xd2,0x86,0xe5,0xb2, + 0xfd,0x69,0x12,0x38,0xc8,0x77,0xba,0xe0, + 0x70,0xf9,0x77,0x89,0x10,0xa6,0x74,0x4e, + 0x56,0x58,0x13,0xf5,0x84,0x70,0x28,0x0b, + } + }, + { 0xa001053, { + 0x92,0x0e,0xf4,0x69,0x10,0x3b,0xf9,0x9d, + 0x31,0x1b,0xa6,0x99,0x08,0x7d,0xd7,0x25, + 0x7e,0x1e,0x89,0xba,0x35,0x8d,0xac,0xcb, + 0x3a,0xb4,0xdf,0x58,0x12,0xcf,0xc0,0xc3, + } + }, + { 0xa001058, { + 0x33,0x7d,0xa9,0xb5,0x4e,0x62,0x13,0x36, + 0xef,0x66,0xc9,0xbd,0x0a,0xa6,0x3b,0x19, + 0xcb,0xf5,0xc2,0xc3,0x55,0x47,0x20,0xec, + 0x1f,0x7b,0xa1,0x44,0x0e,0x8e,0xa4,0xb2, + } + }, + { 0xa001075, { + 0x39,0x02,0x82,0xd0,0x7c,0x26,0x43,0xe9, + 0x26,0xa3,0xd9,0x96,0xf7,0x30,0x13,0x0a, + 0x8a,0x0e,0xac,0xe7,0x1d,0xdc,0xe2,0x0f, + 0xcb,0x9e,0x8d,0xbc,0xd2,0xa2,0x44,0xe0, + } + }, + { 0xa001078, { + 0x2d,0x67,0xc7,0x35,0xca,0xef,0x2f,0x25, + 0x4c,0x45,0x93,0x3f,0x36,0x01,0x8c,0xce, + 0xa8,0x5b,0x07,0xd3,0xc1,0x35,0x3c,0x04, + 0x20,0xa2,0xfc,0xdc,0xe6,0xce,0x26,0x3e, + } + }, + { 0xa001079, { + 0x43,0xe2,0x05,0x9c,0xfd,0xb7,0x5b,0xeb, + 0x5b,0xe9,0xeb,0x3b,0x96,0xf4,0xe4,0x93, + 0x73,0x45,0x3e,0xac,0x8d,0x3b,0xe4,0xdb, + 0x10,0x31,0xc1,0xe4,0xa2,0xd0,0x5a,0x8a, + } + }, + { 0xa00107a, { + 0x5f,0x92,0xca,0xff,0xc3,0x59,0x22,0x5f, + 0x02,0xa0,0x91,0x3b,0x4a,0x45,0x10,0xfd, + 0x19,0xe1,0x8a,0x6d,0x9a,0x92,0xc1,0x3f, + 0x75,0x78,0xac,0x78,0x03,0x1d,0xdb,0x18, + } + }, + { 0xa001143, { + 0x56,0xca,0xf7,0x43,0x8a,0x4c,0x46,0x80, + 0xec,0xde,0xe5,0x9c,0x50,0x84,0x9a,0x42, + 0x27,0xe5,0x51,0x84,0x8f,0x19,0xc0,0x8d, + 0x0c,0x25,0xb4,0xb0,0x8f,0x10,0xf3,0xf8, + } + }, + { 0xa001144, { + 0x42,0xd5,0x9b,0xa7,0xd6,0x15,0x29,0x41, + 0x61,0xc4,0x72,0x3f,0xf3,0x06,0x78,0x4b, + 0x65,0xf3,0x0e,0xfa,0x9c,0x87,0xde,0x25, + 0xbd,0xb3,0x9a,0xf4,0x75,0x13,0x53,0xdc, + } + }, + { 0xa00115d, { + 0xd4,0xc4,0x49,0x36,0x89,0x0b,0x47,0xdd, + 0xfb,0x2f,0x88,0x3b,0x5f,0xf2,0x8e,0x75, + 0xc6,0x6c,0x37,0x5a,0x90,0x25,0x94,0x3e, + 0x36,0x9c,0xae,0x02,0x38,0x6c,0xf5,0x05, + } + }, + { 0xa001173, { + 0x28,0xbb,0x9b,0xd1,0xa0,0xa0,0x7e,0x3a, + 0x59,0x20,0xc0,0xa9,0xb2,0x5c,0xc3,0x35, + 0x53,0x89,0xe1,0x4c,0x93,0x2f,0x1d,0xc3, + 0xe5,0xf7,0xf3,0xc8,0x9b,0x61,0xaa,0x9e, + } + }, + { 0xa0011a8, { + 0x97,0xc6,0x16,0x65,0x99,0xa4,0x85,0x3b, + 0xf6,0xce,0xaa,0x49,0x4a,0x3a,0xc5,0xb6, + 0x78,0x25,0xbc,0x53,0xaf,0x5d,0xcf,0xf4, + 0x23,0x12,0xbb,0xb1,0xbc,0x8a,0x02,0x2e, + } + }, + { 0xa0011ce, { + 0xcf,0x1c,0x90,0xa3,0x85,0x0a,0xbf,0x71, + 0x94,0x0e,0x80,0x86,0x85,0x4f,0xd7,0x86, + 0xae,0x38,0x23,0x28,0x2b,0x35,0x9b,0x4e, + 0xfe,0xb8,0xcd,0x3d,0x3d,0x39,0xc9,0x6a, + } + }, + { 0xa0011d1, { + 0xdf,0x0e,0xca,0xde,0xf6,0xce,0x5c,0x1e, + 0x4c,0xec,0xd7,0x71,0x83,0xcc,0xa8,0x09, + 0xc7,0xc5,0xfe,0xb2,0xf7,0x05,0xd2,0xc5, + 0x12,0xdd,0xe4,0xf3,0x92,0x1c,0x3d,0xb8, + } + }, + { 0xa0011d3, { + 0x91,0xe6,0x10,0xd7,0x57,0xb0,0x95,0x0b, + 0x9a,0x24,0xee,0xf7,0xcf,0x56,0xc1,0xa6, + 0x4a,0x52,0x7d,0x5f,0x9f,0xdf,0xf6,0x00, + 0x65,0xf7,0xea,0xe8,0x2a,0x88,0xe2,0x26, + } + }, + { 0xa0011d5, { + 0xed,0x69,0x89,0xf4,0xeb,0x64,0xc2,0x13, + 0xe0,0x51,0x1f,0x03,0x26,0x52,0x7d,0xb7, + 0x93,0x5d,0x65,0xca,0xb8,0x12,0x1d,0x62, + 0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21, + } + }, + { 0xa001223, { + 0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8, + 0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4, + 0x83,0x75,0x94,0xdd,0xeb,0x7e,0xb7,0x15, + 0x8e,0x3b,0x50,0x29,0x8a,0x9c,0xcc,0x45, + } + }, + { 0xa001224, { + 0x0e,0x0c,0xdf,0xb4,0x89,0xee,0x35,0x25, + 0xdd,0x9e,0xdb,0xc0,0x69,0x83,0x0a,0xad, + 0x26,0xa9,0xaa,0x9d,0xfc,0x3c,0xea,0xf9, + 0x6c,0xdc,0xd5,0x6d,0x8b,0x6e,0x85,0x4a, + } + }, + { 0xa001227, { + 0xab,0xc6,0x00,0x69,0x4b,0x50,0x87,0xad, + 0x5f,0x0e,0x8b,0xea,0x57,0x38,0xce,0x1d, + 0x0f,0x75,0x26,0x02,0xf6,0xd6,0x96,0xe9, + 0x87,0xb9,0xd6,0x20,0x27,0x7c,0xd2,0xe0, + } + }, + { 0xa001229, { + 0x7f,0x49,0x49,0x48,0x46,0xa5,0x50,0xa6, + 0x28,0x89,0x98,0xe2,0x9e,0xb4,0x7f,0x75, + 0x33,0xa7,0x04,0x02,0xe4,0x82,0xbf,0xb4, + 0xa5,0x3a,0xba,0x24,0x8d,0x31,0x10,0x1d, + } + }, + { 0xa00122e, { + 0x56,0x94,0xa9,0x5d,0x06,0x68,0xfe,0xaf, + 0xdf,0x7a,0xff,0x2d,0xdf,0x74,0x0f,0x15, + 0x66,0xfb,0x00,0xb5,0x51,0x97,0x9b,0xfa, + 0xcb,0x79,0x85,0x46,0x25,0xb4,0xd2,0x10, + } + }, + { 0xa001231, { + 0x0b,0x46,0xa5,0xfc,0x18,0x15,0xa0,0x9e, + 0xa6,0xdc,0xb7,0xff,0x17,0xf7,0x30,0x64, + 0xd4,0xda,0x9e,0x1b,0xc3,0xfc,0x02,0x3b, + 0xe2,0xc6,0x0e,0x41,0x54,0xb5,0x18,0xdd, + } + }, + { 0xa001234, { + 0x88,0x8d,0xed,0xab,0xb5,0xbd,0x4e,0xf7, + 0x7f,0xd4,0x0e,0x95,0x34,0x91,0xff,0xcc, + 0xfb,0x2a,0xcd,0xf7,0xd5,0xdb,0x4c,0x9b, + 0xd6,0x2e,0x73,0x50,0x8f,0x83,0x79,0x1a, + } + }, + { 0xa001236, { + 0x3d,0x30,0x00,0xb9,0x71,0xba,0x87,0x78, + 0xa8,0x43,0x55,0xc4,0x26,0x59,0xcf,0x9d, + 0x93,0xce,0x64,0x0e,0x8b,0x72,0x11,0x8b, + 0xa3,0x8f,0x51,0xe9,0xca,0x98,0xaa,0x25, + } + }, + { 0xa001238, { + 0x72,0xf7,0x4b,0x0c,0x7d,0x58,0x65,0xcc, + 0x00,0xcc,0x57,0x16,0x68,0x16,0xf8,0x2a, + 0x1b,0xb3,0x8b,0xe1,0xb6,0x83,0x8c,0x7e, + 0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59, + } + }, + { 0xa00820c, { + 0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3, + 0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63, + 0xf1,0x8c,0x88,0x45,0xd7,0x82,0x80,0xd1, + 0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2, + } + }, + { 0xa10113e, { + 0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10, + 0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0, + 0x15,0xe3,0x3f,0x4b,0x1d,0x0d,0x0a,0xd5, + 0xfa,0x90,0xc4,0xed,0x9d,0x90,0xaf,0x53, + } + }, + { 0xa101144, { + 0xb3,0x0b,0x26,0x9a,0xf8,0x7c,0x02,0x26, + 0x35,0x84,0x53,0xa4,0xd3,0x2c,0x7c,0x09, + 0x68,0x7b,0x96,0xb6,0x93,0xef,0xde,0xbc, + 0xfd,0x4b,0x15,0xd2,0x81,0xd3,0x51,0x47, + } + }, + { 0xa101148, { + 0x20,0xd5,0x6f,0x40,0x4a,0xf6,0x48,0x90, + 0xc2,0x93,0x9a,0xc2,0xfd,0xac,0xef,0x4f, + 0xfa,0xc0,0x3d,0x92,0x3c,0x6d,0x01,0x08, + 0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4, + } + }, + { 0xa10123e, { + 0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18, + 0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d, + 0x1d,0x13,0x53,0x63,0xfe,0x42,0x6f,0xfc, + 0x19,0x0f,0xf1,0xfc,0xa7,0xdd,0x89,0x1b, + } + }, + { 0xa101244, { + 0x71,0x56,0xb5,0x9f,0x21,0xbf,0xb3,0x3c, + 0x8c,0xd7,0x36,0xd0,0x34,0x52,0x1b,0xb1, + 0x46,0x2f,0x04,0xf0,0x37,0xd8,0x1e,0x72, + 0x24,0xa2,0x80,0x84,0x83,0x65,0x84,0xc0, + } + }, + { 0xa101248, { + 0xed,0x3b,0x95,0xa6,0x68,0xa7,0x77,0x3e, + 0xfc,0x17,0x26,0xe2,0x7b,0xd5,0x56,0x22, + 0x2c,0x1d,0xef,0xeb,0x56,0xdd,0xba,0x6e, + 0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75, + } + }, + { 0xa108108, { + 0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9, + 0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6, + 0xf5,0xd4,0x3f,0x7b,0x14,0xd5,0x60,0x2c, + 0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16, + } + }, + { 0xa20102d, { + 0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11, + 0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89, + 0x8b,0x89,0x2f,0xb5,0xbb,0x82,0xef,0x23, + 0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4, + } + }, + { 0xa201210, { + 0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe, + 0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9, + 0x6d,0x3d,0x0e,0x6b,0xa7,0xac,0xe3,0x68, + 0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41, + } + }, + { 0xa404107, { + 0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45, + 0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0, + 0x8b,0x0d,0x9f,0xf9,0x3a,0xdf,0xc6,0x81, + 0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99, + } + }, + { 0xa500011, { + 0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4, + 0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1, + 0xd7,0x5b,0x65,0x3a,0x7d,0xab,0xdf,0xa2, + 0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74, + } + }, + { 0xa601209, { + 0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32, + 0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30, + 0x15,0x86,0xcc,0x5d,0x97,0x0f,0xc0,0x46, + 0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d, + } + }, + { 0xa704107, { + 0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6, + 0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93, + 0x2a,0xad,0x8e,0x6b,0xea,0x9b,0xb7,0xc2, + 0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39, + } + }, + { 0xa705206, { + 0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4, + 0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7, + 0x67,0x6f,0x04,0x18,0xae,0x20,0x87,0x4b, + 0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc, + } + }, + { 0xa708007, { + 0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3, + 0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2, + 0x07,0xaa,0x3a,0xe0,0x57,0x13,0x72,0x80, + 0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93, + } + }, + { 0xa70c005, { + 0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b, + 0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f, + 0x1f,0x1f,0xf1,0x97,0xeb,0xfe,0x56,0x55, + 0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13, + } + }, + { 0xaa00116, { + 0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63, + 0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5, + 0xfe,0x1d,0x5e,0x65,0xc7,0xaa,0x92,0x4d, + 0x91,0xee,0x76,0xbb,0x4c,0x66,0x78,0xc9, + } + }, + { 0xaa00212, { + 0xbd,0x57,0x5d,0x0a,0x0a,0x30,0xc1,0x75, + 0x95,0x58,0x5e,0x93,0x02,0x28,0x43,0x71, + 0xed,0x42,0x29,0xc8,0xec,0x34,0x2b,0xb2, + 0x1a,0x65,0x4b,0xfe,0x07,0x0f,0x34,0xa1, + } + }, + { 0xaa00213, { + 0xed,0x58,0xb7,0x76,0x81,0x7f,0xd9,0x3a, + 0x1a,0xff,0x8b,0x34,0xb8,0x4a,0x99,0x0f, + 0x28,0x49,0x6c,0x56,0x2b,0xdc,0xb7,0xed, + 0x96,0xd5,0x9d,0xc1,0x7a,0xd4,0x51,0x9b, + } + }, + { 0xaa00215, { + 0x55,0xd3,0x28,0xcb,0x87,0xa9,0x32,0xe9, + 0x4e,0x85,0x4b,0x7c,0x6b,0xd5,0x7c,0xd4, + 0x1b,0x51,0x71,0x3a,0x0e,0x0b,0xdc,0x9b, + 0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef, + } + }, +}; diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index 21776c529fa9..5df621752fef 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -100,14 +100,12 @@ extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family); void load_ucode_amd_ap(unsigned int family); -int save_microcode_in_initrd_amd(unsigned int family); void reload_ucode_amd(unsigned int cpu); struct microcode_ops *init_amd_microcode(void); void exit_amd_microcode(void); #else /* CONFIG_CPU_SUP_AMD */ static inline void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family) { } static inline void load_ucode_amd_ap(unsigned int family) { } -static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } static inline void reload_ucode_amd(unsigned int cpu) { } static inline struct microcode_ops *init_amd_microcode(void) { return NULL; } static inline void exit_amd_microcode(void) { } diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c index 22b65a5f5ec6..7f8d1e11dbee 100644 --- a/arch/x86/kernel/cpu/sgx/driver.c +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -150,13 +150,15 @@ int __init sgx_drv_init(void) u64 xfrm_mask; int ret; - if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) + if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) { + pr_info("SGX disabled: SGX launch control CPU feature is not available, /dev/sgx_enclave disabled.\n"); return -ENODEV; + } cpuid_count(SGX_CPUID, 0, &eax, &ebx, &ecx, &edx); if (!(eax & 1)) { - pr_err("SGX disabled: SGX1 instruction support not available.\n"); + pr_info("SGX disabled: SGX1 instruction support not available, /dev/sgx_enclave disabled.\n"); return -ENODEV; } @@ -173,8 +175,10 @@ int __init sgx_drv_init(void) } ret = misc_register(&sgx_dev_enclave); - if (ret) + if (ret) { + pr_info("SGX disabled: Unable to register the /dev/sgx_enclave driver (%d).\n", ret); return ret; + } return 0; } diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index b65ab214bdf5..776a20172867 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -64,6 +64,13 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) struct file *backing; long ret; + /* + * ECREATE would detect this too, but checking here also ensures + * that the 'encl_size' calculations below can never overflow. + */ + if (!is_power_of_2(secs->size)) + return -EINVAL; + va_page = sgx_encl_grow(encl, true); if (IS_ERR(va_page)) return PTR_ERR(va_page); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 59d23cdf4ed0..dd8748c45529 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -2,6 +2,7 @@ /* * Architecture specific OF callbacks. */ +#include <linux/acpi.h> #include <linux/export.h> #include <linux/io.h> #include <linux/interrupt.h> @@ -313,6 +314,6 @@ void __init x86_flattree_get_config(void) if (initial_dtb) early_memunmap(dt, map_len); #endif - if (of_have_populated_dt()) + if (acpi_disabled && of_have_populated_dt()) x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 385e3a5fc304..feca4f20b06a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -25,8 +25,10 @@ #include <asm/posted_intr.h> #include <asm/irq_remapping.h> +#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) #define CREATE_TRACE_POINTS #include <asm/trace/irq_vectors.h> +#endif DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8eb3a88707f2..121edf1f2a79 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1763,7 +1763,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ecx = entry->edx = 0; if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) { - entry->eax = entry->ebx; + entry->eax = entry->ebx = 0; break; } diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d4ac4a1f8b81..8160870398b9 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7460,7 +7460,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data) return true; } -static void kvm_mmu_start_lpage_recovery(struct once *once) +static int kvm_mmu_start_lpage_recovery(struct once *once) { struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once); struct kvm *kvm = container_of(ka, struct kvm, arch); @@ -7471,13 +7471,14 @@ static void kvm_mmu_start_lpage_recovery(struct once *once) kvm_nx_huge_page_recovery_worker_kill, kvm, "kvm-nx-lpage-recovery"); - if (!nx_thread) - return; + if (IS_ERR(nx_thread)) + return PTR_ERR(nx_thread); vhost_task_start(nx_thread); /* Make the task visible only once it is fully started. */ WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread); + return 0; } int kvm_mmu_post_init_vm(struct kvm *kvm) @@ -7485,10 +7486,7 @@ int kvm_mmu_post_init_vm(struct kvm *kvm) if (nx_hugepage_mitigation_hard_disabled) return 0; - call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); - if (!kvm->arch.nx_huge_page_recovery_thread) - return -ENOMEM; - return 0; + return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); } void kvm_mmu_pre_destroy_vm(struct kvm *kvm) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0dbb25442ec1..661108d65ee7 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -4590,6 +4590,8 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm) void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa) { + struct kvm *kvm = svm->vcpu.kvm; + /* * All host state for SEV-ES guests is categorized into three swap types * based on how it is handled by hardware during a world switch: @@ -4613,14 +4615,22 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are /* * If DebugSwap is enabled, debug registers are loaded but NOT saved by - * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both - * saves and loads debug registers (Type-A). + * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU does + * not save or load debug registers. Sadly, KVM can't prevent SNP + * guests from lying about DebugSwap on secondary vCPUs, i.e. the + * SEV_FEATURES provided at "AP Create" isn't guaranteed to match what + * the guest has actually enabled (or not!) in the VMSA. + * + * If DebugSwap is *possible*, save the masks so that they're restored + * if the guest enables DebugSwap. But for the DRs themselves, do NOT + * rely on the CPU to restore the host values; KVM will restore them as + * needed in common code, via hw_breakpoint_restore(). Note, KVM does + * NOT support virtualizing Breakpoint Extensions, i.e. the mask MSRs + * don't need to be restored per se, KVM just needs to ensure they are + * loaded with the correct values *if* the CPU writes the MSRs. */ - if (sev_vcpu_has_debug_swap(svm)) { - hostsa->dr0 = native_get_debugreg(0); - hostsa->dr1 = native_get_debugreg(1); - hostsa->dr2 = native_get_debugreg(2); - hostsa->dr3 = native_get_debugreg(3); + if (sev_vcpu_has_debug_swap(svm) || + (sev_snp_guest(kvm) && cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP))) { hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0); hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1); hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a713c803a3a3..e67de787fc71 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3165,6 +3165,27 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm_pr_unimpl_wrmsr(vcpu, ecx, data); break; } + + /* + * AMD changed the architectural behavior of bits 5:2. On CPUs + * without BusLockTrap, bits 5:2 control "external pins", but + * on CPUs that support BusLockDetect, bit 2 enables BusLockTrap + * and bits 5:3 are reserved-to-zero. Sadly, old KVM allowed + * the guest to set bits 5:2 despite not actually virtualizing + * Performance-Monitoring/Breakpoint external pins. Drop bits + * 5:2 for backwards compatibility. + */ + data &= ~GENMASK(5, 2); + + /* + * Suppress BTF as KVM doesn't virtualize BTF, but there's no + * way to communicate lack of support to the guest. + */ + if (data & DEBUGCTLMSR_BTF) { + kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data); + data &= ~DEBUGCTLMSR_BTF; + } + if (data & DEBUGCTL_RESERVED_BITS) return 1; @@ -4189,6 +4210,18 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in guest_state_enter_irqoff(); + /* + * Set RFLAGS.IF prior to VMRUN, as the host's RFLAGS.IF at the time of + * VMRUN controls whether or not physical IRQs are masked (KVM always + * runs with V_INTR_MASKING_MASK). Toggle RFLAGS.IF here to avoid the + * temptation to do STI+VMRUN+CLI, as AMD CPUs bleed the STI shadow + * into guest state if delivery of an event during VMRUN triggers a + * #VMEXIT, and the guest_state transitions already tell lockdep that + * IRQs are being enabled/disabled. Note! GIF=0 for the entirety of + * this path, so IRQs aren't actually unmasked while running host code. + */ + raw_local_irq_enable(); + amd_clear_divider(); if (sev_es_guest(vcpu->kvm)) @@ -4197,6 +4230,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in else __svm_vcpu_run(svm, spec_ctrl_intercepted); + raw_local_irq_disable(); + guest_state_exit_irqoff(); } @@ -4253,6 +4288,16 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, clgi(); kvm_load_guest_xsave_state(vcpu); + /* + * Hardware only context switches DEBUGCTL if LBR virtualization is + * enabled. Manually load DEBUGCTL if necessary (and restore it after + * VM-Exit), as running with the host's DEBUGCTL can negatively affect + * guest state and can even be fatal, e.g. due to Bus Lock Detect. + */ + if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) && + vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl) + update_debugctlmsr(svm->vmcb->save.dbgctl); + kvm_wait_lapic_expire(vcpu); /* @@ -4280,6 +4325,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(vcpu, KVM_HANDLING_NMI); + if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) && + vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl) + update_debugctlmsr(vcpu->arch.host_debugctl); + kvm_load_host_xsave_state(vcpu); stgi(); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 9d7cdb8fbf87..ea44c1da5a7c 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -584,7 +584,7 @@ static inline bool is_vnmi_enabled(struct vcpu_svm *svm) /* svm.c */ #define MSR_INVALID 0xffffffffU -#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +#define DEBUGCTL_RESERVED_BITS (~DEBUGCTLMSR_LBR) extern bool dump_invalid_vmcb; diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 2ed80aea3bb1..0c61153b275f 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -170,12 +170,8 @@ SYM_FUNC_START(__svm_vcpu_run) mov VCPU_RDI(%_ASM_DI), %_ASM_DI /* Enter guest mode */ - sti - 3: vmrun %_ASM_AX 4: - cli - /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX @@ -340,12 +336,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov KVM_VMCB_pa(%rax), %rax /* Enter guest mode */ - sti - 1: vmrun %rax - -2: cli - +2: /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 8a7af02d466e..ed8a3cb53961 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5084,6 +5084,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, load_vmcs12_host_state(vcpu, vmcs12); + /* + * Process events if an injectable IRQ or NMI is pending, even + * if the event is blocked (RFLAGS.IF is cleared on VM-Exit). + * If an event became pending while L2 was active, KVM needs to + * either inject the event or request an IRQ/NMI window. SMIs + * don't need to be processed as SMM is mutually exclusive with + * non-root mode. INIT/SIPI don't need to be checked as INIT + * is blocked post-VMXON, and SIPIs are ignored. + */ + if (kvm_cpu_has_injectable_intr(vcpu) || vcpu->arch.nmi_pending) + kvm_make_request(KVM_REQ_EVENT, vcpu); return; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6c56d5235f0f..3b92f893b239 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1514,16 +1514,12 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, */ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm)) shrink_ple_window(vcpu); vmx_vcpu_load_vmcs(vcpu, cpu, NULL); vmx_vcpu_pi_load(vcpu, cpu); - - vmx->host_debugctlmsr = get_debugctlmsr(); } void vmx_vcpu_put(struct kvm_vcpu *vcpu) @@ -7458,8 +7454,8 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) } /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ - if (vmx->host_debugctlmsr) - update_debugctlmsr(vmx->host_debugctlmsr); + if (vcpu->arch.host_debugctl) + update_debugctlmsr(vcpu->arch.host_debugctl); #ifndef CONFIG_X86_64 /* diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 8b111ce1087c..951e44dc9d0e 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -340,8 +340,6 @@ struct vcpu_vmx { /* apic deadline value in host tsc */ u64 hv_deadline_tsc; - unsigned long host_debugctlmsr; - /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02159c967d29..4b64ab350bcd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10968,6 +10968,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(0, 7); } + vcpu->arch.host_debugctl = get_debugctlmsr(); + guest_timing_enter_irqoff(); for (;;) { @@ -12877,11 +12879,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm) mutex_unlock(&kvm->slots_lock); } kvm_unload_vcpu_mmus(kvm); + kvm_destroy_vcpus(kvm); kvm_x86_call(vm_destroy)(kvm); kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); - kvm_destroy_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); kvm_mmu_uninit_vm(kvm); diff --git a/block/bio.c b/block/bio.c index f0c416e5931d..6ac5983ba51e 100644 --- a/block/bio.c +++ b/block/bio.c @@ -77,7 +77,7 @@ struct bio_slab { struct kmem_cache *slab; unsigned int slab_ref; unsigned int slab_size; - char name[8]; + char name[12]; }; static DEFINE_MUTEX(bio_slab_lock); static DEFINE_XARRAY(bio_slabs); diff --git a/block/blk-merge.c b/block/blk-merge.c index c7c85e10cf9c..1d1589c35297 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -329,7 +329,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, if (nsegs < lim->max_segments && bytes + bv.bv_len <= max_bytes && - bv.bv_offset + bv.bv_len <= PAGE_SIZE) { + bv.bv_offset + bv.bv_len <= lim->min_segment_size) { nsegs++; bytes += bv.bv_len; } else { diff --git a/block/blk-settings.c b/block/blk-settings.c index c44dadc35e1e..b9c6f0ec1c49 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -246,6 +246,7 @@ int blk_validate_limits(struct queue_limits *lim) { unsigned int max_hw_sectors; unsigned int logical_block_sectors; + unsigned long seg_size; int err; /* @@ -303,7 +304,7 @@ int blk_validate_limits(struct queue_limits *lim) max_hw_sectors = min_not_zero(lim->max_hw_sectors, lim->max_dev_sectors); if (lim->max_user_sectors) { - if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE) + if (lim->max_user_sectors < BLK_MIN_SEGMENT_SIZE / SECTOR_SIZE) return -EINVAL; lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors); } else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) { @@ -341,7 +342,7 @@ int blk_validate_limits(struct queue_limits *lim) */ if (!lim->seg_boundary_mask) lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; - if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1)) + if (WARN_ON_ONCE(lim->seg_boundary_mask < BLK_MIN_SEGMENT_SIZE - 1)) return -EINVAL; /* @@ -362,10 +363,17 @@ int blk_validate_limits(struct queue_limits *lim) */ if (!lim->max_segment_size) lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; - if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) + if (WARN_ON_ONCE(lim->max_segment_size < BLK_MIN_SEGMENT_SIZE)) return -EINVAL; } + /* setup min segment size for building new segment in fast path */ + if (lim->seg_boundary_mask > lim->max_segment_size - 1) + seg_size = lim->max_segment_size; + else + seg_size = lim->seg_boundary_mask + 1; + lim->min_segment_size = min_t(unsigned int, seg_size, PAGE_SIZE); + /* * We require drivers to at least do logical block aligned I/O, but * historically could not check for that due to the separate calls diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 761ea662ddc3..0c77244a35c9 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -410,13 +410,14 @@ static bool disk_insert_zone_wplug(struct gendisk *disk, } } hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]); + atomic_inc(&disk->nr_zone_wplugs); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); return true; } -static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, - sector_t sector) +static struct blk_zone_wplug *disk_get_hashed_zone_wplug(struct gendisk *disk, + sector_t sector) { unsigned int zno = disk_zone_no(disk, sector); unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits); @@ -437,6 +438,15 @@ static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, return NULL; } +static inline struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, + sector_t sector) +{ + if (!atomic_read(&disk->nr_zone_wplugs)) + return NULL; + + return disk_get_hashed_zone_wplug(disk, sector); +} + static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head) { struct blk_zone_wplug *zwplug = @@ -503,6 +513,7 @@ static void disk_remove_zone_wplug(struct gendisk *disk, zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED; spin_lock_irqsave(&disk->zone_wplugs_lock, flags); hlist_del_init_rcu(&zwplug->node); + atomic_dec(&disk->nr_zone_wplugs); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); disk_put_zone_wplug(zwplug); } @@ -593,6 +604,11 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) { struct bio *bio; + if (bio_list_empty(&zwplug->bio_list)) + return; + + pr_warn_ratelimited("%s: zone %u: Aborting plugged BIOs\n", + zwplug->disk->disk_name, zwplug->zone_no); while ((bio = bio_list_pop(&zwplug->bio_list))) blk_zone_wplug_bio_io_error(zwplug, bio); } @@ -1040,6 +1056,47 @@ plug: return true; } +static void blk_zone_wplug_handle_native_zone_append(struct bio *bio) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + struct blk_zone_wplug *zwplug; + unsigned long flags; + + /* + * We have native support for zone append operations, so we are not + * going to handle @bio through plugging. However, we may already have a + * zone write plug for the target zone if that zone was previously + * partially written using regular writes. In such case, we risk leaving + * the plug in the disk hash table if the zone is fully written using + * zone append operations. Avoid this by removing the zone write plug. + */ + zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); + if (likely(!zwplug)) + return; + + spin_lock_irqsave(&zwplug->lock, flags); + + /* + * We are about to remove the zone write plug. But if the user + * (mistakenly) has issued regular writes together with native zone + * append, we must aborts the writes as otherwise the plugged BIOs would + * not be executed by the plug BIO work as disk_get_zone_wplug() will + * return NULL after the plug is removed. Aborting the plugged write + * BIOs is consistent with the fact that these writes will most likely + * fail anyway as there is no ordering guarantees between zone append + * operations and regular write operations. + */ + if (!bio_list_empty(&zwplug->bio_list)) { + pr_warn_ratelimited("%s: zone %u: Invalid mix of zone append and regular writes\n", + disk->disk_name, zwplug->zone_no); + disk_zone_wplug_abort(zwplug); + } + disk_remove_zone_wplug(disk, zwplug); + spin_unlock_irqrestore(&zwplug->lock, flags); + + disk_put_zone_wplug(zwplug); +} + /** * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging * @bio: The BIO being submitted @@ -1096,8 +1153,10 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) */ switch (bio_op(bio)) { case REQ_OP_ZONE_APPEND: - if (!bdev_emulates_zone_append(bdev)) + if (!bdev_emulates_zone_append(bdev)) { + blk_zone_wplug_handle_native_zone_append(bio); return false; + } fallthrough; case REQ_OP_WRITE: case REQ_OP_WRITE_ZEROES: @@ -1284,6 +1343,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk, { unsigned int i; + atomic_set(&disk->nr_zone_wplugs, 0); disk->zone_wplugs_hash_bits = min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS); @@ -1338,6 +1398,7 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk) } } + WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs)); kfree(disk->zone_wplugs_hash); disk->zone_wplugs_hash = NULL; disk->zone_wplugs_hash_bits = 0; @@ -1550,11 +1611,12 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx, } /* - * We need to track the write pointer of all zones that are not - * empty nor full. So make sure we have a zone write plug for - * such zone if the device has a zone write plug hash table. + * If the device needs zone append emulation, we need to track the + * write pointer of all zones that are not empty nor full. So make sure + * we have a zone write plug for such zone if the device has a zone + * write plug hash table. */ - if (!disk->zone_wplugs_hash) + if (!queue_emulates_zone_append(disk->queue) || !disk->zone_wplugs_hash) return 0; disk_zone_wplug_sync_wp_offset(disk, zone); diff --git a/block/blk.h b/block/blk.h index 90fa5f28ccab..9cf9a0099416 100644 --- a/block/blk.h +++ b/block/blk.h @@ -14,6 +14,7 @@ struct elevator_type; #define BLK_DEV_MAX_SECTORS (LLONG_MAX >> 9) +#define BLK_MIN_SEGMENT_SIZE 4096 /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) @@ -358,8 +359,12 @@ struct bio *bio_split_zone_append(struct bio *bio, static inline bool bio_may_need_split(struct bio *bio, const struct queue_limits *lim) { - return lim->chunk_sectors || bio->bi_vcnt != 1 || - bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE; + if (lim->chunk_sectors) + return true; + if (bio->bi_vcnt != 1) + return true; + return bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > + lim->min_segment_size; } /** diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 5e9be13a56a8..7acba66eed48 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -682,7 +682,7 @@ static void utf16_le_to_7bit(const __le16 *in, unsigned int size, u8 *out) out[size] = 0; while (i < size) { - u8 c = le16_to_cpu(in[i]) & 0xff; + u8 c = le16_to_cpu(in[i]) & 0x7f; if (c && !isprint(c)) c = '!'; diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index 2ad53cc6aae5..ef9444482db1 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -21,9 +21,15 @@ struct platform_profile_handler { struct device dev; int minor; unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + unsigned long hidden_choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; const struct platform_profile_ops *ops; }; +struct aggregate_choices_data { + unsigned long aggregate[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + int count; +}; + static const char * const profile_names[] = { [PLATFORM_PROFILE_LOW_POWER] = "low-power", [PLATFORM_PROFILE_COOL] = "cool", @@ -73,7 +79,7 @@ static int _store_class_profile(struct device *dev, void *data) lockdep_assert_held(&profile_lock); handler = to_pprof_handler(dev); - if (!test_bit(*bit, handler->choices)) + if (!test_bit(*bit, handler->choices) && !test_bit(*bit, handler->hidden_choices)) return -EOPNOTSUPP; return handler->ops->profile_set(dev, *bit); @@ -239,21 +245,44 @@ static const struct class platform_profile_class = { /** * _aggregate_choices - Aggregate the available profile choices * @dev: The device - * @data: The available profile choices + * @arg: struct aggregate_choices_data * * Return: 0 on success, -errno on failure */ -static int _aggregate_choices(struct device *dev, void *data) +static int _aggregate_choices(struct device *dev, void *arg) { + unsigned long tmp[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + struct aggregate_choices_data *data = arg; struct platform_profile_handler *handler; - unsigned long *aggregate = data; lockdep_assert_held(&profile_lock); handler = to_pprof_handler(dev); - if (test_bit(PLATFORM_PROFILE_LAST, aggregate)) - bitmap_copy(aggregate, handler->choices, PLATFORM_PROFILE_LAST); + bitmap_or(tmp, handler->choices, handler->hidden_choices, PLATFORM_PROFILE_LAST); + if (test_bit(PLATFORM_PROFILE_LAST, data->aggregate)) + bitmap_copy(data->aggregate, tmp, PLATFORM_PROFILE_LAST); else - bitmap_and(aggregate, handler->choices, aggregate, PLATFORM_PROFILE_LAST); + bitmap_and(data->aggregate, tmp, data->aggregate, PLATFORM_PROFILE_LAST); + data->count++; + + return 0; +} + +/** + * _remove_hidden_choices - Remove hidden choices from aggregate data + * @dev: The device + * @arg: struct aggregate_choices_data + * + * Return: 0 on success, -errno on failure + */ +static int _remove_hidden_choices(struct device *dev, void *arg) +{ + struct aggregate_choices_data *data = arg; + struct platform_profile_handler *handler; + + lockdep_assert_held(&profile_lock); + handler = to_pprof_handler(dev); + bitmap_andnot(data->aggregate, handler->choices, + handler->hidden_choices, PLATFORM_PROFILE_LAST); return 0; } @@ -270,22 +299,31 @@ static ssize_t platform_profile_choices_show(struct device *dev, struct device_attribute *attr, char *buf) { - unsigned long aggregate[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + struct aggregate_choices_data data = { + .aggregate = { [0 ... BITS_TO_LONGS(PLATFORM_PROFILE_LAST) - 1] = ~0UL }, + .count = 0, + }; int err; - set_bit(PLATFORM_PROFILE_LAST, aggregate); + set_bit(PLATFORM_PROFILE_LAST, data.aggregate); scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { err = class_for_each_device(&platform_profile_class, NULL, - aggregate, _aggregate_choices); + &data, _aggregate_choices); if (err) return err; + if (data.count == 1) { + err = class_for_each_device(&platform_profile_class, NULL, + &data, _remove_hidden_choices); + if (err) + return err; + } } /* no profile handler registered any more */ - if (bitmap_empty(aggregate, PLATFORM_PROFILE_LAST)) + if (bitmap_empty(data.aggregate, PLATFORM_PROFILE_LAST)) return -EINVAL; - return _commmon_choices_show(aggregate, buf); + return _commmon_choices_show(data.aggregate, buf); } /** @@ -373,7 +411,10 @@ static ssize_t platform_profile_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + struct aggregate_choices_data data = { + .aggregate = { [0 ... BITS_TO_LONGS(PLATFORM_PROFILE_LAST) - 1] = ~0UL }, + .count = 0, + }; int ret; int i; @@ -381,13 +422,13 @@ static ssize_t platform_profile_store(struct device *dev, i = sysfs_match_string(profile_names, buf); if (i < 0 || i == PLATFORM_PROFILE_CUSTOM) return -EINVAL; - set_bit(PLATFORM_PROFILE_LAST, choices); + set_bit(PLATFORM_PROFILE_LAST, data.aggregate); scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { ret = class_for_each_device(&platform_profile_class, NULL, - choices, _aggregate_choices); + &data, _aggregate_choices); if (ret) return ret; - if (!test_bit(i, choices)) + if (!test_bit(i, data.aggregate)) return -EOPNOTSUPP; ret = class_for_each_device(&platform_profile_class, NULL, &i, @@ -453,12 +494,15 @@ EXPORT_SYMBOL_GPL(platform_profile_notify); */ int platform_profile_cycle(void) { + struct aggregate_choices_data data = { + .aggregate = { [0 ... BITS_TO_LONGS(PLATFORM_PROFILE_LAST) - 1] = ~0UL }, + .count = 0, + }; enum platform_profile_option next = PLATFORM_PROFILE_LAST; enum platform_profile_option profile = PLATFORM_PROFILE_LAST; - unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; int err; - set_bit(PLATFORM_PROFILE_LAST, choices); + set_bit(PLATFORM_PROFILE_LAST, data.aggregate); scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { err = class_for_each_device(&platform_profile_class, NULL, &profile, _aggregate_profiles); @@ -470,14 +514,14 @@ int platform_profile_cycle(void) return -EINVAL; err = class_for_each_device(&platform_profile_class, NULL, - choices, _aggregate_choices); + &data, _aggregate_choices); if (err) return err; /* never iterate into a custom if all drivers supported it */ - clear_bit(PLATFORM_PROFILE_CUSTOM, choices); + clear_bit(PLATFORM_PROFILE_CUSTOM, data.aggregate); - next = find_next_bit_wrap(choices, + next = find_next_bit_wrap(data.aggregate, PLATFORM_PROFILE_LAST, profile + 1); @@ -532,6 +576,14 @@ struct device *platform_profile_register(struct device *dev, const char *name, return ERR_PTR(-EINVAL); } + if (ops->hidden_choices) { + err = ops->hidden_choices(drvdata, pprof->hidden_choices); + if (err) { + dev_err(dev, "platform_profile hidden_choices failed\n"); + return ERR_PTR(err); + } + } + guard(mutex)(&profile_lock); /* create class interface for individual handler */ diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index bc6bae76ccaf..94c6446604fc 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -274,6 +274,7 @@ static void binderfs_evict_inode(struct inode *inode) mutex_unlock(&binderfs_minors_mutex); if (refcount_dec_and_test(&device->ref)) { + hlist_del_init(&device->hlist); kfree(device->context.name); kfree(device); } diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 8e895ae45c86..c842e2de6ef9 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -386,8 +386,12 @@ struct ahci_host_priv { static inline bool ahci_ignore_port(struct ahci_host_priv *hpriv, unsigned int portid) { - return portid >= hpriv->nports || - !(hpriv->mask_port_map & (1 << portid)); + if (portid >= hpriv->nports) + return true; + /* mask_port_map not set means that all ports are available */ + if (!hpriv->mask_port_map) + return false; + return !(hpriv->mask_port_map & (1 << portid)); } extern int ahci_ignore_sss; diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index fdfa7b266218..e7ace4b10f15 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -541,6 +541,7 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv) hpriv->saved_port_map = port_map; } + /* mask_port_map not set means that all ports are available */ if (hpriv->mask_port_map) { dev_warn(dev, "masking port_map 0x%lx -> 0x%lx\n", port_map, diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 63ec2f218431..c085dd81ebe7 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4143,10 +4143,6 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "Samsung SSD 860*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, - { "Samsung SSD 870 QVO*", NULL, ATA_QUIRK_NO_NCQ_TRIM | - ATA_QUIRK_ZERO_AFTER_TRIM | - ATA_QUIRK_NO_NCQ_ON_ATI | - ATA_QUIRK_NOLPM }, { "Samsung SSD 870*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, diff --git a/drivers/base/core.c b/drivers/base/core.c index 5a1f05198114..2fde698430df 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2079,6 +2079,7 @@ static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle, out: sup_handle->flags &= ~FWNODE_FLAG_VISITED; put_device(sup_dev); + put_device(con_dev); put_device(par_dev); return ret; } diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 529085181f35..ca9a67b5b537 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2715,9 +2715,12 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, if (ph.len > sizeof(struct ublk_params)) ph.len = sizeof(struct ublk_params); - /* parameters can only be changed when device isn't live */ mutex_lock(&ub->mutex); - if (ub->dev_info.state == UBLK_S_DEV_LIVE) { + if (test_bit(UB_STATE_USED, &ub->state)) { + /* + * Parameters can only be changed when device hasn't + * been started yet + */ ret = -EACCES; } else if (copy_from_user(&ub->params, argp, ph.len)) { ret = -EFAULT; diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 4ab32abf0f48..7771edf54fb3 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -56,6 +56,18 @@ config BT_HCIBTUSB_POLL_SYNC Say Y here to enable USB poll_sync for Bluetooth USB devices by default. +config BT_HCIBTUSB_AUTO_ISOC_ALT + bool "Automatically adjust alternate setting for Isoc endpoints" + depends on BT_HCIBTUSB + default y if CHROME_PLATFORMS + help + Say Y here to automatically adjusting the alternate setting for + HCI_USER_CHANNEL whenever a SCO link is established. + + When enabled, btusb intercepts the HCI_EV_SYNC_CONN_COMPLETE packets + and configures isoc endpoint alternate setting automatically when + HCI_USER_CHANNEL is in use. + config BT_HCIBTUSB_BCM bool "Broadcom protocol support" depends on BT_HCIBTUSB diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 8149e53fd0a7..a0fc465458b2 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -34,6 +34,7 @@ static bool force_scofix; static bool enable_autosuspend = IS_ENABLED(CONFIG_BT_HCIBTUSB_AUTOSUSPEND); static bool enable_poll_sync = IS_ENABLED(CONFIG_BT_HCIBTUSB_POLL_SYNC); static bool reset = true; +static bool auto_isoc_alt = IS_ENABLED(CONFIG_BT_HCIBTUSB_AUTO_ISOC_ALT); static struct usb_driver btusb_driver; @@ -1085,6 +1086,42 @@ static inline void btusb_free_frags(struct btusb_data *data) spin_unlock_irqrestore(&data->rxlock, flags); } +static void btusb_sco_connected(struct btusb_data *data, struct sk_buff *skb) +{ + struct hci_event_hdr *hdr = (void *) skb->data; + struct hci_ev_sync_conn_complete *ev = + (void *) skb->data + sizeof(*hdr); + struct hci_dev *hdev = data->hdev; + unsigned int notify_air_mode; + + if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT) + return; + + if (skb->len < sizeof(*hdr) || hdr->evt != HCI_EV_SYNC_CONN_COMPLETE) + return; + + if (skb->len != sizeof(*hdr) + sizeof(*ev) || ev->status) + return; + + switch (ev->air_mode) { + case BT_CODEC_CVSD: + notify_air_mode = HCI_NOTIFY_ENABLE_SCO_CVSD; + break; + + case BT_CODEC_TRANSPARENT: + notify_air_mode = HCI_NOTIFY_ENABLE_SCO_TRANSP; + break; + + default: + return; + } + + bt_dev_info(hdev, "enabling SCO with air mode %u", ev->air_mode); + data->sco_num = 1; + data->air_mode = notify_air_mode; + schedule_work(&data->work); +} + static int btusb_recv_event(struct btusb_data *data, struct sk_buff *skb) { if (data->intr_interval) { @@ -1092,6 +1129,10 @@ static int btusb_recv_event(struct btusb_data *data, struct sk_buff *skb) schedule_delayed_work(&data->rx_work, 0); } + /* Configure altsetting for HCI_USER_CHANNEL on SCO connected */ + if (auto_isoc_alt && hci_dev_test_flag(data->hdev, HCI_USER_CHANNEL)) + btusb_sco_connected(data, skb); + return data->recv_event(data->hdev, skb); } @@ -3644,6 +3685,7 @@ static ssize_t force_poll_sync_write(struct file *file, } static const struct file_operations force_poll_sync_fops = { + .owner = THIS_MODULE, .open = simple_open, .read = force_poll_sync_read, .write = force_poll_sync_write, diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index c41119b9079f..7ffea0f98162 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -1095,8 +1095,9 @@ static void mhi_pci_recovery_work(struct work_struct *work) err_unprepare: mhi_unprepare_after_power_down(mhi_cntrl); err_try_reset: - if (pci_reset_function(pdev)) - dev_err(&pdev->dev, "Recovery failed\n"); + err = pci_try_reset_function(pdev); + if (err) + dev_err(&pdev->dev, "Recovery failed: %d\n", err); } static void health_check(struct timer_list *t) diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c index 5dea31769f9a..d8e029e7e53f 100644 --- a/drivers/bus/simple-pm-bus.c +++ b/drivers/bus/simple-pm-bus.c @@ -109,9 +109,29 @@ static int simple_pm_bus_runtime_resume(struct device *dev) return 0; } +static int simple_pm_bus_suspend(struct device *dev) +{ + struct simple_pm_bus *bus = dev_get_drvdata(dev); + + if (!bus) + return 0; + + return pm_runtime_force_suspend(dev); +} + +static int simple_pm_bus_resume(struct device *dev) +{ + struct simple_pm_bus *bus = dev_get_drvdata(dev); + + if (!bus) + return 0; + + return pm_runtime_force_resume(dev); +} + static const struct dev_pm_ops simple_pm_bus_pm_ops = { RUNTIME_PM_OPS(simple_pm_bus_runtime_suspend, simple_pm_bus_runtime_resume, NULL) - NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + NOIRQ_SYSTEM_SLEEP_PM_OPS(simple_pm_bus_suspend, simple_pm_bus_resume) }; #define ONLY_BUS ((void *) 1) /* Match if the device is only a bus. */ diff --git a/drivers/cdx/cdx.c b/drivers/cdx/cdx.c index c573ed2ee71a..7811aa734053 100644 --- a/drivers/cdx/cdx.c +++ b/drivers/cdx/cdx.c @@ -473,8 +473,12 @@ static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cdx_device *cdx_dev = to_cdx_device(dev); + ssize_t len; - return sysfs_emit(buf, "%s\n", cdx_dev->driver_override); + device_lock(dev); + len = sysfs_emit(buf, "%s\n", cdx_dev->driver_override); + device_unlock(dev); + return len; } static DEVICE_ATTR_RW(driver_override); diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 2cf595d2e10b..f7dd455dd0dd 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -264,8 +264,8 @@ int misc_register(struct miscdevice *misc) device_create_with_groups(&misc_class, misc->parent, dev, misc, misc->groups, "%s", misc->name); if (IS_ERR(misc->this_device)) { + misc_minor_free(misc->minor); if (is_dynamic) { - misc_minor_free(misc->minor); misc->minor = MISC_DYNAMIC_MINOR; } err = PTR_ERR(misc->this_device); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 24442485e73e..18f92dd44d45 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -923,14 +923,14 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, pipe_lock(pipe); ret = 0; - if (pipe_empty(pipe->head, pipe->tail)) + if (pipe_is_empty(pipe)) goto error_out; ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); if (ret < 0) goto error_out; - occupancy = pipe_occupancy(pipe->head, pipe->tail); + occupancy = pipe_buf_usage(pipe); buf = alloc_buf(port->portdev->vdev, 0, occupancy); if (!buf) { diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index c14557efd577..bbc3276992bb 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -59,9 +59,6 @@ struct bam_desc_hw { #define DESC_FLAG_NWD BIT(12) #define DESC_FLAG_CMD BIT(11) -#define BAM_NDP_REVISION_START 0x20 -#define BAM_NDP_REVISION_END 0x27 - struct bam_async_desc { struct virt_dma_desc vd; @@ -401,7 +398,6 @@ struct bam_device { /* dma start transaction tasklet */ struct tasklet_struct task; - u32 bam_revision; }; /** @@ -445,10 +441,8 @@ static void bam_reset(struct bam_device *bdev) writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL)); /* set descriptor threshold, start with 4 bytes */ - if (in_range(bdev->bam_revision, BAM_NDP_REVISION_START, - BAM_NDP_REVISION_END)) - writel_relaxed(DEFAULT_CNT_THRSHLD, - bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); + writel_relaxed(DEFAULT_CNT_THRSHLD, + bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); /* Enable default set of h/w workarounds, ie all except BAM_FULL_PIPE */ writel_relaxed(BAM_CNFG_BITS_DEFAULT, bam_addr(bdev, 0, BAM_CNFG_BITS)); @@ -1006,10 +1000,9 @@ static void bam_apply_new_config(struct bam_chan *bchan, maxburst = bchan->slave.src_maxburst; else maxburst = bchan->slave.dst_maxburst; - if (in_range(bdev->bam_revision, BAM_NDP_REVISION_START, - BAM_NDP_REVISION_END)) - writel_relaxed(maxburst, - bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); + + writel_relaxed(maxburst, + bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); } bchan->reconfigure = 0; @@ -1199,11 +1192,10 @@ static int bam_init(struct bam_device *bdev) u32 val; /* read revision and configuration information */ - val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)); - if (!bdev->num_ees) + if (!bdev->num_ees) { + val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)); bdev->num_ees = (val >> NUM_EES_SHIFT) & NUM_EES_MASK; - - bdev->bam_revision = val & REVISION_MASK; + } /* check that configured EE is within range */ if (bdev->ee >= bdev->num_ees) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 5c6a5b358987..ce80ac4b1a1b 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -83,7 +83,9 @@ struct tegra_adma; * @nr_channels: Number of DMA channels available. * @ch_fifo_size_mask: Mask for FIFO size field. * @sreq_index_offset: Slave channel index offset. + * @max_page: Maximum ADMA Channel Page. * @has_outstanding_reqs: If DMA channel can have outstanding requests. + * @set_global_pg_config: Global page programming. */ struct tegra_adma_chip_data { unsigned int (*adma_get_burst_config)(unsigned int burst_size); @@ -99,6 +101,7 @@ struct tegra_adma_chip_data { unsigned int nr_channels; unsigned int ch_fifo_size_mask; unsigned int sreq_index_offset; + unsigned int max_page; bool has_outstanding_reqs; void (*set_global_pg_config)(struct tegra_adma *tdma); }; @@ -854,6 +857,7 @@ static const struct tegra_adma_chip_data tegra210_chip_data = { .nr_channels = 22, .ch_fifo_size_mask = 0xf, .sreq_index_offset = 2, + .max_page = 0, .has_outstanding_reqs = false, .set_global_pg_config = NULL, }; @@ -871,6 +875,7 @@ static const struct tegra_adma_chip_data tegra186_chip_data = { .nr_channels = 32, .ch_fifo_size_mask = 0x1f, .sreq_index_offset = 4, + .max_page = 4, .has_outstanding_reqs = true, .set_global_pg_config = tegra186_adma_global_page_config, }; diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c index fa9c1c3bf168..f0a63d09d3c4 100644 --- a/drivers/firmware/efi/cper-arm.c +++ b/drivers/firmware/efi/cper-arm.c @@ -311,7 +311,7 @@ void cper_print_proc_arm(const char *pfx, ctx_info = (struct cper_arm_ctx_info *)err_info; max_ctx_type = ARRAY_SIZE(arm_reg_ctx_strs) - 1; for (i = 0; i < proc->context_info_num; i++) { - int size = sizeof(*ctx_info) + ctx_info->size; + int size = ALIGN(sizeof(*ctx_info) + ctx_info->size, 16); printk("%sContext info structure %d:\n", pfx, i); if (len < size) { diff --git a/drivers/firmware/efi/cper-x86.c b/drivers/firmware/efi/cper-x86.c index 438ed9eff6d0..3949d7b5e808 100644 --- a/drivers/firmware/efi/cper-x86.c +++ b/drivers/firmware/efi/cper-x86.c @@ -325,7 +325,7 @@ void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc) ctx_info = (struct cper_ia_proc_ctx *)err_info; for (i = 0; i < VALID_PROC_CXT_INFO_NUM(proc->validation_bits); i++) { - int size = sizeof(*ctx_info) + ctx_info->reg_arr_size; + int size = ALIGN(sizeof(*ctx_info) + ctx_info->reg_arr_size, 16); int groupsize = 4; printk("%sContext Information Structure %d:\n", pfx, i); diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c index 5ed0602c2f75..208db29613c6 100644 --- a/drivers/firmware/efi/mokvar-table.c +++ b/drivers/firmware/efi/mokvar-table.c @@ -99,14 +99,13 @@ static struct kobject *mokvar_kobj; */ void __init efi_mokvar_table_init(void) { + struct efi_mokvar_table_entry __aligned(1) *mokvar_entry, *next_entry; efi_memory_desc_t md; void *va = NULL; unsigned long cur_offset = 0; unsigned long offset_limit; - unsigned long map_size = 0; unsigned long map_size_needed = 0; unsigned long size; - struct efi_mokvar_table_entry *mokvar_entry; int err; if (!efi_enabled(EFI_MEMMAP)) @@ -134,48 +133,46 @@ void __init efi_mokvar_table_init(void) */ err = -EINVAL; while (cur_offset + sizeof(*mokvar_entry) <= offset_limit) { - mokvar_entry = va + cur_offset; - map_size_needed = cur_offset + sizeof(*mokvar_entry); - if (map_size_needed > map_size) { - if (va) - early_memunmap(va, map_size); - /* - * Map a little more than the fixed size entry - * header, anticipating some data. It's safe to - * do so as long as we stay within current memory - * descriptor. - */ - map_size = min(map_size_needed + 2*EFI_PAGE_SIZE, - offset_limit); - va = early_memremap(efi.mokvar_table, map_size); - if (!va) { - pr_err("Failed to map EFI MOKvar config table pa=0x%lx, size=%lu.\n", - efi.mokvar_table, map_size); - return; - } - mokvar_entry = va + cur_offset; + if (va) + early_memunmap(va, sizeof(*mokvar_entry)); + va = early_memremap(efi.mokvar_table + cur_offset, sizeof(*mokvar_entry)); + if (!va) { + pr_err("Failed to map EFI MOKvar config table pa=0x%lx, size=%zu.\n", + efi.mokvar_table + cur_offset, sizeof(*mokvar_entry)); + return; } - + mokvar_entry = va; +next: /* Check for last sentinel entry */ if (mokvar_entry->name[0] == '\0') { if (mokvar_entry->data_size != 0) break; err = 0; + map_size_needed = cur_offset + sizeof(*mokvar_entry); break; } - /* Sanity check that the name is null terminated */ - size = strnlen(mokvar_entry->name, - sizeof(mokvar_entry->name)); - if (size >= sizeof(mokvar_entry->name)) - break; + /* Enforce that the name is NUL terminated */ + mokvar_entry->name[sizeof(mokvar_entry->name) - 1] = '\0'; /* Advance to the next entry */ - cur_offset = map_size_needed + mokvar_entry->data_size; + size = sizeof(*mokvar_entry) + mokvar_entry->data_size; + cur_offset += size; + + /* + * Don't bother remapping if the current entry header and the + * next one end on the same page. + */ + next_entry = (void *)((unsigned long)mokvar_entry + size); + if (((((unsigned long)(mokvar_entry + 1) - 1) ^ + ((unsigned long)(next_entry + 1) - 1)) & PAGE_MASK) == 0) { + mokvar_entry = next_entry; + goto next; + } } if (va) - early_memunmap(va, map_size); + early_memunmap(va, sizeof(*mokvar_entry)); if (err) { pr_err("EFI MOKvar config table is not valid\n"); return; diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 65f41cc3eafc..d668ddb2e81d 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -119,10 +119,15 @@ static ssize_t new_device_store(struct device_driver *driver, const char *buf, struct platform_device *pdev; int res, id; + if (!try_module_get(THIS_MODULE)) + return -ENOENT; + /* kernfs guarantees string termination, so count + 1 is safe */ aggr = kzalloc(sizeof(*aggr) + count + 1, GFP_KERNEL); - if (!aggr) - return -ENOMEM; + if (!aggr) { + res = -ENOMEM; + goto put_module; + } memcpy(aggr->args, buf, count + 1); @@ -161,6 +166,7 @@ static ssize_t new_device_store(struct device_driver *driver, const char *buf, } aggr->pdev = pdev; + module_put(THIS_MODULE); return count; remove_table: @@ -175,6 +181,8 @@ free_table: kfree(aggr->lookups); free_ga: kfree(aggr); +put_module: + module_put(THIS_MODULE); return res; } @@ -203,13 +211,19 @@ static ssize_t delete_device_store(struct device_driver *driver, if (error) return error; + if (!try_module_get(THIS_MODULE)) + return -ENOENT; + mutex_lock(&gpio_aggregator_lock); aggr = idr_remove(&gpio_aggregator_idr, id); mutex_unlock(&gpio_aggregator_lock); - if (!aggr) + if (!aggr) { + module_put(THIS_MODULE); return -ENOENT; + } gpio_aggregator_free(aggr); + module_put(THIS_MODULE); return count; } static DRIVER_ATTR_WO(delete_device); diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index 2ecee3269a0c..a7a1cdf7ac66 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -40,7 +40,7 @@ struct gpio_rcar_info { struct gpio_rcar_priv { void __iomem *base; - spinlock_t lock; + raw_spinlock_t lock; struct device *dev; struct gpio_chip gpio_chip; unsigned int irq_parent; @@ -123,7 +123,7 @@ static void gpio_rcar_config_interrupt_input_mode(struct gpio_rcar_priv *p, * "Setting Level-Sensitive Interrupt Input Mode" */ - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); /* Configure positive or negative logic in POSNEG */ gpio_rcar_modify_bit(p, POSNEG, hwirq, !active_high_rising_edge); @@ -142,7 +142,7 @@ static void gpio_rcar_config_interrupt_input_mode(struct gpio_rcar_priv *p, if (!level_trigger) gpio_rcar_write(p, INTCLR, BIT(hwirq)); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static int gpio_rcar_irq_set_type(struct irq_data *d, unsigned int type) @@ -246,7 +246,7 @@ static void gpio_rcar_config_general_input_output_mode(struct gpio_chip *chip, * "Setting General Input Mode" */ - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); /* Configure positive logic in POSNEG */ gpio_rcar_modify_bit(p, POSNEG, gpio, false); @@ -261,7 +261,7 @@ static void gpio_rcar_config_general_input_output_mode(struct gpio_chip *chip, if (p->info.has_outdtsel && output) gpio_rcar_modify_bit(p, OUTDTSEL, gpio, false); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static int gpio_rcar_request(struct gpio_chip *chip, unsigned offset) @@ -347,7 +347,7 @@ static int gpio_rcar_get_multiple(struct gpio_chip *chip, unsigned long *mask, return 0; } - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); outputs = gpio_rcar_read(p, INOUTSEL); m = outputs & bankmask; if (m) @@ -356,7 +356,7 @@ static int gpio_rcar_get_multiple(struct gpio_chip *chip, unsigned long *mask, m = ~outputs & bankmask; if (m) val |= gpio_rcar_read(p, INDT) & m; - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); bits[0] = val; return 0; @@ -367,9 +367,9 @@ static void gpio_rcar_set(struct gpio_chip *chip, unsigned offset, int value) struct gpio_rcar_priv *p = gpiochip_get_data(chip); unsigned long flags; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); gpio_rcar_modify_bit(p, OUTDT, offset, value); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static void gpio_rcar_set_multiple(struct gpio_chip *chip, unsigned long *mask, @@ -386,12 +386,12 @@ static void gpio_rcar_set_multiple(struct gpio_chip *chip, unsigned long *mask, if (!bankmask) return; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); val = gpio_rcar_read(p, OUTDT); val &= ~bankmask; val |= (bankmask & bits[0]); gpio_rcar_write(p, OUTDT, val); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static int gpio_rcar_direction_output(struct gpio_chip *chip, unsigned offset, @@ -468,7 +468,12 @@ static int gpio_rcar_parse_dt(struct gpio_rcar_priv *p, unsigned int *npins) p->info = *info; ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args); - *npins = ret == 0 ? args.args[2] : RCAR_MAX_GPIO_PER_BANK; + if (ret) { + *npins = RCAR_MAX_GPIO_PER_BANK; + } else { + *npins = args.args[2]; + of_node_put(args.np); + } if (*npins == 0 || *npins > RCAR_MAX_GPIO_PER_BANK) { dev_warn(p->dev, "Invalid number of gpio lines %u, using %u\n", @@ -505,7 +510,7 @@ static int gpio_rcar_probe(struct platform_device *pdev) return -ENOMEM; p->dev = dev; - spin_lock_init(&p->lock); + raw_spin_lock_init(&p->lock); /* Get device configuration from DT node */ ret = gpio_rcar_parse_dt(p, &npins); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index fc19df5a64c2..8741600af7ef 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2712,7 +2712,7 @@ EXPORT_SYMBOL_GPL(gpiod_direction_input); int gpiod_direction_input_nonotify(struct gpio_desc *desc) { - int ret = 0; + int ret = 0, dir; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) @@ -2740,12 +2740,12 @@ int gpiod_direction_input_nonotify(struct gpio_desc *desc) ret = guard.gc->direction_input(guard.gc, gpio_chip_hwgpio(desc)); } else if (guard.gc->get_direction) { - ret = guard.gc->get_direction(guard.gc, + dir = guard.gc->get_direction(guard.gc, gpio_chip_hwgpio(desc)); - if (ret < 0) - return ret; + if (dir < 0) + return dir; - if (ret != GPIO_LINE_DIRECTION_IN) { + if (dir != GPIO_LINE_DIRECTION_IN) { gpiod_warn(desc, "%s: missing direction_input() operation and line is output\n", __func__); @@ -2764,7 +2764,7 @@ int gpiod_direction_input_nonotify(struct gpio_desc *desc) static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) { - int val = !!value, ret = 0; + int val = !!value, ret = 0, dir; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) @@ -2788,12 +2788,12 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) } else { /* Check that we are in output mode if we can */ if (guard.gc->get_direction) { - ret = guard.gc->get_direction(guard.gc, + dir = guard.gc->get_direction(guard.gc, gpio_chip_hwgpio(desc)); - if (ret < 0) - return ret; + if (dir < 0) + return dir; - if (ret != GPIO_LINE_DIRECTION_OUT) { + if (dir != GPIO_LINE_DIRECTION_OUT) { gpiod_warn(desc, "%s: missing direction_output() operation\n", __func__); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d100bb7a137c..018dfccd771b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1638,6 +1638,13 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + /* resizing on Dell G5 SE platforms causes problems with runtime pm */ + if ((amdgpu_runtime_pm != 0) && + adev->pdev->vendor == PCI_VENDOR_ID_ATI && + adev->pdev->device == 0x731f && + adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL) + return 0; + /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) DRM_WARN("System can't access extended configuration space, please check!!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 784b03abb3a4..c1f35ded684e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1638,22 +1638,19 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, } mutex_lock(&adev->enforce_isolation_mutex); - for (i = 0; i < num_partitions; i++) { - if (adev->enforce_isolation[i] && !partition_values[i]) { + if (adev->enforce_isolation[i] && !partition_values[i]) /* Going from enabled to disabled */ amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); - amdgpu_mes_set_enforce_isolation(adev, i, false); - } else if (!adev->enforce_isolation[i] && partition_values[i]) { + else if (!adev->enforce_isolation[i] && partition_values[i]) /* Going from disabled to enabled */ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); - amdgpu_mes_set_enforce_isolation(adev, i, true); - } adev->enforce_isolation[i] = partition_values[i]; } - mutex_unlock(&adev->enforce_isolation_mutex); + amdgpu_mes_update_enforce_isolation(adev); + return count; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 32b27a1658e7..709c11cbeabd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1681,7 +1681,8 @@ bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) } /* Fix me -- node_id is used to identify the correct MES instances in the future */ -int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable) +static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, + uint32_t node_id, bool enable) { struct mes_misc_op_input op_input = {0}; int r; @@ -1703,6 +1704,23 @@ error: return r; } +int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) +{ + int i, r = 0; + + if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { + mutex_lock(&adev->enforce_isolation_mutex); + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + if (adev->enforce_isolation[i]) + r |= amdgpu_mes_set_enforce_isolation(adev, i, true); + else + r |= amdgpu_mes_set_enforce_isolation(adev, i, false); + } + mutex_unlock(&adev->enforce_isolation_mutex); + } + return r; +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 2df2444ee892..e98ea7ede1ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -534,6 +534,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); -int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable); +int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev); #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 01ae2f88dec8..262bd010a283 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2281,7 +2281,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_res_cursor cursor; u64 addr; - int r; + int r = 0; if (!adev->mman.buffer_funcs_enabled) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 65f389eb65e5..f9a4d08eef92 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1633,6 +1633,10 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) goto failure; } + r = amdgpu_mes_update_enforce_isolation(adev); + if (r) + goto failure; + out: /* * Disable KIQ ring usage from the driver once MES is enabled. diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 901e924e69ad..0fd0fa6ed518 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1743,6 +1743,10 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) goto failure; } + r = amdgpu_mes_update_enforce_isolation(adev); + if (r) + goto failure; + out: /* * Disable KIQ ring usage from the driver once MES is enabled. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 2eff37aaf827..1695dd78ede8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -107,6 +107,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -167,10 +169,10 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 68dbc0399c87..3c0ae28c5923 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -154,6 +154,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -221,10 +223,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index 2b72d5b4949b..565858b9044d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -121,6 +121,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -184,10 +186,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index ff417d5361c4..3014925d95ff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -183,6 +183,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; + m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -245,7 +248,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index ecccd7adbab4..24396a2c77bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -266,8 +266,8 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope /* EOP buffer is not required for all ASICs */ if (properties->eop_ring_buffer_address) { if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { - pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", - properties->eop_buf_bo->tbo.base.size, + pr_debug("queue eop bo size 0x%x not equal to node eop buf size 0x%x\n", + properties->eop_ring_buffer_size, topo_dev->node_props.eop_buffer_size); err = -EINVAL; goto out_err_unreserve; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ac3fd81fecef..9d9645a2d18e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1618,75 +1618,130 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev) return false; } -static const struct dmi_system_id hpd_disconnect_quirk_table[] = { +struct amdgpu_dm_quirks { + bool aux_hpd_discon; + bool support_edp0_on_dp1; +}; + +static struct amdgpu_dm_quirks quirk_entries = { + .aux_hpd_discon = false, + .support_edp0_on_dp1 = false +}; + +static int edp0_on_dp1_callback(const struct dmi_system_id *id) +{ + quirk_entries.support_edp0_on_dp1 = true; + return 0; +} + +static int aux_hpd_discon_callback(const struct dmi_system_id *id) +{ + quirk_entries.aux_hpd_discon = true; + return 0; +} + +static const struct dmi_system_id dmi_quirk_table[] = { { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), }, }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite mt645 G8 Mobile Thin Client"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 665 16 inch G11 Notebook PC"), + }, + }, {} /* TODO: refactor this from a fixed table to a dynamic option */ }; -static void retrieve_dmi_info(struct amdgpu_display_manager *dm) +static void retrieve_dmi_info(struct amdgpu_display_manager *dm, struct dc_init_data *init_data) { - const struct dmi_system_id *dmi_id; + int dmi_id; + struct drm_device *dev = dm->ddev; dm->aux_hpd_discon_quirk = false; + init_data->flags.support_edp0_on_dp1 = false; + + dmi_id = dmi_check_system(dmi_quirk_table); + + if (!dmi_id) + return; - dmi_id = dmi_first_match(hpd_disconnect_quirk_table); - if (dmi_id) { + if (quirk_entries.aux_hpd_discon) { dm->aux_hpd_discon_quirk = true; - DRM_INFO("aux_hpd_discon_quirk attached\n"); + drm_info(dev, "aux_hpd_discon_quirk attached\n"); + } + if (quirk_entries.support_edp0_on_dp1) { + init_data->flags.support_edp0_on_dp1 = true; + drm_info(dev, "aux_hpd_discon_quirk attached\n"); } } @@ -1994,7 +2049,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) init_data.num_virtual_links = 1; - retrieve_dmi_info(&adev->dm); + retrieve_dmi_info(&adev->dm, &init_data); if (adev->dm.bb_from_dmub) init_data.bb_from_dmub = adev->dm.bb_from_dmub; @@ -7240,8 +7295,14 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) struct dc_link *dc_link = aconnector->dc_link; struct dc_sink *dc_em_sink = aconnector->dc_em_sink; const struct drm_edid *drm_edid; + struct i2c_adapter *ddc; + + if (dc_link && dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; - drm_edid = drm_edid_read(connector); + drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); if (!drm_edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); @@ -7286,14 +7347,21 @@ static int get_modes(struct drm_connector *connector) static void create_eml_sink(struct amdgpu_dm_connector *aconnector) { struct drm_connector *connector = &aconnector->base; + struct dc_link *dc_link = aconnector->dc_link; struct dc_sink_init_data init_params = { .link = aconnector->dc_link, .sink_signal = SIGNAL_TYPE_VIRTUAL }; const struct drm_edid *drm_edid; const struct edid *edid; + struct i2c_adapter *ddc; + + if (dc_link && dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; - drm_edid = drm_edid_read(connector); + drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); if (!drm_edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 3390f0d8420a..c4a7fd453e5f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -894,6 +894,7 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; + int i; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -920,6 +921,12 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); + + /* Update reference counts for HPDs */ + for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { + if (amdgpu_irq_get(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) + drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n", i); + } } /** @@ -935,6 +942,7 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; + int i; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -960,4 +968,10 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); + + /* Update reference counts for HPDs */ + for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { + if (amdgpu_irq_put(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) + drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n", i); + } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 45858bf1523d..e140b7a04d72 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -54,7 +54,8 @@ static bool link_supports_psrsu(struct dc_link *link) if (amdgpu_dc_debug_mask & DC_DISABLE_PSR_SU) return false; - return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub); + /* Temporarily disable PSR-SU to avoid glitches */ + return false; } /* diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 520a34a42827..a45037cb4cc0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1455,7 +1455,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); /* Invalid input */ - if (!plane_state->dst_rect.width || + if (!plane_state || + !plane_state->dst_rect.width || !plane_state->dst_rect.height || !plane_state->src_rect.width || !plane_state->src_rect.height) { diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 67a8e22b1126..e237ea1185a7 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -3042,6 +3042,7 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_dpm) return 0; + mutex_lock(&adev->pm.mutex); kv_dpm_setup_asic(adev); ret = kv_dpm_enable(adev); if (ret) @@ -3049,6 +3050,8 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block) else adev->pm.dpm_enabled = true; amdgpu_legacy_dpm_compute_clocks(adev); + mutex_unlock(&adev->pm.mutex); + return ret; } @@ -3066,32 +3069,42 @@ static int kv_dpm_suspend(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + cancel_work_sync(&adev->pm.dpm.thermal.work); + if (adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); + adev->pm.dpm_enabled = false; /* disable dpm */ kv_dpm_disable(adev); /* reset the power state */ adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; + mutex_unlock(&adev->pm.mutex); } return 0; } static int kv_dpm_resume(struct amdgpu_ip_block *ip_block) { - int ret; + int ret = 0; struct amdgpu_device *adev = ip_block->adev; - if (adev->pm.dpm_enabled) { + if (!amdgpu_dpm) + return 0; + + if (!adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); /* asic init will reset to the boot state */ kv_dpm_setup_asic(adev); ret = kv_dpm_enable(adev); - if (ret) + if (ret) { adev->pm.dpm_enabled = false; - else + } else { adev->pm.dpm_enabled = true; - if (adev->pm.dpm_enabled) amdgpu_legacy_dpm_compute_clocks(adev); + } + mutex_unlock(&adev->pm.mutex); } - return 0; + return ret; } static bool kv_dpm_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index e861355ebd75..c7518b13e787 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -1009,9 +1009,12 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL; int temp, size = sizeof(temp); - if (!adev->pm.dpm_enabled) - return; + mutex_lock(&adev->pm.mutex); + if (!adev->pm.dpm_enabled) { + mutex_unlock(&adev->pm.mutex); + return; + } if (!pp_funcs->read_sensor(adev->powerplay.pp_handle, AMDGPU_PP_SENSOR_GPU_TEMP, (void *)&temp, @@ -1033,4 +1036,5 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) adev->pm.dpm.state = dpm_state; amdgpu_legacy_dpm_compute_clocks(adev->powerplay.pp_handle); + mutex_unlock(&adev->pm.mutex); } diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index a87dcf0974bc..d6dfe2599ebe 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -7786,6 +7786,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_dpm) return 0; + mutex_lock(&adev->pm.mutex); si_dpm_setup_asic(adev); ret = si_dpm_enable(adev); if (ret) @@ -7793,6 +7794,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block) else adev->pm.dpm_enabled = true; amdgpu_legacy_dpm_compute_clocks(adev); + mutex_unlock(&adev->pm.mutex); return ret; } @@ -7810,32 +7812,44 @@ static int si_dpm_suspend(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + cancel_work_sync(&adev->pm.dpm.thermal.work); + if (adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); + adev->pm.dpm_enabled = false; /* disable dpm */ si_dpm_disable(adev); /* reset the power state */ adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; + mutex_unlock(&adev->pm.mutex); } + return 0; } static int si_dpm_resume(struct amdgpu_ip_block *ip_block) { - int ret; + int ret = 0; struct amdgpu_device *adev = ip_block->adev; - if (adev->pm.dpm_enabled) { + if (!amdgpu_dpm) + return 0; + + if (!adev->pm.dpm_enabled) { /* asic init will reset to the boot state */ + mutex_lock(&adev->pm.mutex); si_dpm_setup_asic(adev); ret = si_dpm_enable(adev); - if (ret) + if (ret) { adev->pm.dpm_enabled = false; - else + } else { adev->pm.dpm_enabled = true; - if (adev->pm.dpm_enabled) amdgpu_legacy_dpm_compute_clocks(adev); + } + mutex_unlock(&adev->pm.mutex); } - return 0; + + return ret; } static bool si_dpm_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 9b2f4fe1578b..ddb6444406d2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -1895,16 +1895,6 @@ static int smu_v14_0_allow_ih_interrupt(struct smu_context *smu) NULL); } -static int smu_v14_0_process_pending_interrupt(struct smu_context *smu) -{ - int ret = 0; - - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT)) - ret = smu_v14_0_allow_ih_interrupt(smu); - - return ret; -} - int smu_v14_0_enable_thermal_alert(struct smu_context *smu) { int ret = 0; @@ -1916,7 +1906,7 @@ int smu_v14_0_enable_thermal_alert(struct smu_context *smu) if (ret) return ret; - return smu_v14_0_process_pending_interrupt(smu); + return smu_v14_0_allow_ih_interrupt(smu); } int smu_v14_0_disable_thermal_alert(struct smu_context *smu) diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c index b14b581c059d..02a516e77192 100644 --- a/drivers/gpu/drm/drm_fbdev_dma.c +++ b/drivers/gpu/drm/drm_fbdev_dma.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: MIT #include <linux/fb.h> +#include <linux/vmalloc.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> @@ -70,37 +71,102 @@ static const struct fb_ops drm_fbdev_dma_fb_ops = { .fb_destroy = drm_fbdev_dma_fb_destroy, }; -FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma, +FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma_shadowed, drm_fb_helper_damage_range, drm_fb_helper_damage_area); -static int drm_fbdev_dma_deferred_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) +static void drm_fbdev_dma_shadowed_fb_destroy(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; - struct drm_framebuffer *fb = fb_helper->fb; - struct drm_gem_dma_object *dma = drm_fb_dma_get_gem_obj(fb, 0); + void *shadow = info->screen_buffer; + + if (!fb_helper->dev) + return; - if (!dma->map_noncoherent) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + if (info->fbdefio) + fb_deferred_io_cleanup(info); + drm_fb_helper_fini(fb_helper); + vfree(shadow); - return fb_deferred_io_mmap(info, vma); + drm_client_buffer_vunmap(fb_helper->buffer); + drm_client_framebuffer_delete(fb_helper->buffer); + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); } -static const struct fb_ops drm_fbdev_dma_deferred_fb_ops = { +static const struct fb_ops drm_fbdev_dma_shadowed_fb_ops = { .owner = THIS_MODULE, .fb_open = drm_fbdev_dma_fb_open, .fb_release = drm_fbdev_dma_fb_release, - __FB_DEFAULT_DEFERRED_OPS_RDWR(drm_fbdev_dma), + FB_DEFAULT_DEFERRED_OPS(drm_fbdev_dma_shadowed), DRM_FB_HELPER_DEFAULT_OPS, - __FB_DEFAULT_DEFERRED_OPS_DRAW(drm_fbdev_dma), - .fb_mmap = drm_fbdev_dma_deferred_fb_mmap, - .fb_destroy = drm_fbdev_dma_fb_destroy, + .fb_destroy = drm_fbdev_dma_shadowed_fb_destroy, }; /* * struct drm_fb_helper */ +static void drm_fbdev_dma_damage_blit_real(struct drm_fb_helper *fb_helper, + struct drm_clip_rect *clip, + struct iosys_map *dst) +{ + struct drm_framebuffer *fb = fb_helper->fb; + size_t offset = clip->y1 * fb->pitches[0]; + size_t len = clip->x2 - clip->x1; + unsigned int y; + void *src; + + switch (drm_format_info_bpp(fb->format, 0)) { + case 1: + offset += clip->x1 / 8; + len = DIV_ROUND_UP(len + clip->x1 % 8, 8); + break; + case 2: + offset += clip->x1 / 4; + len = DIV_ROUND_UP(len + clip->x1 % 4, 4); + break; + case 4: + offset += clip->x1 / 2; + len = DIV_ROUND_UP(len + clip->x1 % 2, 2); + break; + default: + offset += clip->x1 * fb->format->cpp[0]; + len *= fb->format->cpp[0]; + break; + } + + src = fb_helper->info->screen_buffer + offset; + iosys_map_incr(dst, offset); /* go to first pixel within clip rect */ + + for (y = clip->y1; y < clip->y2; y++) { + iosys_map_memcpy_to(dst, 0, src, len); + iosys_map_incr(dst, fb->pitches[0]); + src += fb->pitches[0]; + } +} + +static int drm_fbdev_dma_damage_blit(struct drm_fb_helper *fb_helper, + struct drm_clip_rect *clip) +{ + struct drm_client_buffer *buffer = fb_helper->buffer; + struct iosys_map dst; + + /* + * For fbdev emulation, we only have to protect against fbdev modeset + * operations. Nothing else will involve the client buffer's BO. So it + * is sufficient to acquire struct drm_fb_helper.lock here. + */ + mutex_lock(&fb_helper->lock); + + dst = buffer->map; + drm_fbdev_dma_damage_blit_real(fb_helper, clip, &dst); + + mutex_unlock(&fb_helper->lock); + + return 0; +} static int drm_fbdev_dma_helper_fb_dirty(struct drm_fb_helper *helper, struct drm_clip_rect *clip) { @@ -112,6 +178,10 @@ static int drm_fbdev_dma_helper_fb_dirty(struct drm_fb_helper *helper, return 0; if (helper->fb->funcs->dirty) { + ret = drm_fbdev_dma_damage_blit(helper, clip); + if (drm_WARN_ONCE(dev, ret, "Damage blitter failed: ret=%d\n", ret)) + return ret; + ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret)) return ret; @@ -128,14 +198,80 @@ static const struct drm_fb_helper_funcs drm_fbdev_dma_helper_funcs = { * struct drm_fb_helper */ +static int drm_fbdev_dma_driver_fbdev_probe_tail(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_device *dev = fb_helper->dev; + struct drm_client_buffer *buffer = fb_helper->buffer; + struct drm_gem_dma_object *dma_obj = to_drm_gem_dma_obj(buffer->gem); + struct drm_framebuffer *fb = fb_helper->fb; + struct fb_info *info = fb_helper->info; + struct iosys_map map = buffer->map; + + info->fbops = &drm_fbdev_dma_fb_ops; + + /* screen */ + info->flags |= FBINFO_VIRTFB; /* system memory */ + if (dma_obj->map_noncoherent) + info->flags |= FBINFO_READS_FAST; /* signal caching */ + info->screen_size = sizes->surface_height * fb->pitches[0]; + info->screen_buffer = map.vaddr; + if (!(info->flags & FBINFO_HIDE_SMEM_START)) { + if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer))) + info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); + } + info->fix.smem_len = info->screen_size; + + return 0; +} + +static int drm_fbdev_dma_driver_fbdev_probe_tail_shadowed(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_client_buffer *buffer = fb_helper->buffer; + struct fb_info *info = fb_helper->info; + size_t screen_size = buffer->gem->size; + void *screen_buffer; + int ret; + + /* + * Deferred I/O requires struct page for framebuffer memory, + * which is not guaranteed for all DMA ranges. We thus create + * a shadow buffer in system memory. + */ + screen_buffer = vzalloc(screen_size); + if (!screen_buffer) + return -ENOMEM; + + info->fbops = &drm_fbdev_dma_shadowed_fb_ops; + + /* screen */ + info->flags |= FBINFO_VIRTFB; /* system memory */ + info->flags |= FBINFO_READS_FAST; /* signal caching */ + info->screen_buffer = screen_buffer; + info->fix.smem_len = screen_size; + + fb_helper->fbdefio.delay = HZ / 20; + fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; + + info->fbdefio = &fb_helper->fbdefio; + ret = fb_deferred_io_init(info); + if (ret) + goto err_vfree; + + return 0; + +err_vfree: + vfree(screen_buffer); + return ret; +} + int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; - bool use_deferred_io = false; struct drm_client_buffer *buffer; - struct drm_gem_dma_object *dma_obj; struct drm_framebuffer *fb; struct fb_info *info; u32 format; @@ -152,19 +288,9 @@ int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, sizes->surface_height, format); if (IS_ERR(buffer)) return PTR_ERR(buffer); - dma_obj = to_drm_gem_dma_obj(buffer->gem); fb = buffer->fb; - /* - * Deferred I/O requires struct page for framebuffer memory, - * which is not guaranteed for all DMA ranges. We thus only - * install deferred I/O if we have a framebuffer that requires - * it. - */ - if (fb->funcs->dirty) - use_deferred_io = true; - ret = drm_client_buffer_vmap(buffer, &map); if (ret) { goto err_drm_client_buffer_delete; @@ -185,45 +311,12 @@ int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, drm_fb_helper_fill_info(info, fb_helper, sizes); - if (use_deferred_io) - info->fbops = &drm_fbdev_dma_deferred_fb_ops; + if (fb->funcs->dirty) + ret = drm_fbdev_dma_driver_fbdev_probe_tail_shadowed(fb_helper, sizes); else - info->fbops = &drm_fbdev_dma_fb_ops; - - /* screen */ - info->flags |= FBINFO_VIRTFB; /* system memory */ - if (dma_obj->map_noncoherent) - info->flags |= FBINFO_READS_FAST; /* signal caching */ - info->screen_size = sizes->surface_height * fb->pitches[0]; - info->screen_buffer = map.vaddr; - if (!(info->flags & FBINFO_HIDE_SMEM_START)) { - if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer))) - info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); - } - info->fix.smem_len = info->screen_size; - - /* - * Only set up deferred I/O if the screen buffer supports - * it. If this disagrees with the previous test for ->dirty, - * mmap on the /dev/fb file might not work correctly. - */ - if (!is_vmalloc_addr(info->screen_buffer) && info->fix.smem_start) { - unsigned long pfn = info->fix.smem_start >> PAGE_SHIFT; - - if (drm_WARN_ON(dev, !pfn_to_page(pfn))) - use_deferred_io = false; - } - - /* deferred I/O */ - if (use_deferred_io) { - fb_helper->fbdefio.delay = HZ / 20; - fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; - - info->fbdefio = &fb_helper->fbdefio; - ret = fb_deferred_io_init(info); - if (ret) - goto err_drm_fb_helper_release_info; - } + ret = drm_fbdev_dma_driver_fbdev_probe_tail(fb_helper, sizes); + if (ret) + goto err_drm_fb_helper_release_info; return 0; diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index f59abfa7622a..0d49f168a919 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -154,6 +154,7 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, return 0; err_free_mmio: + iounmap(hv->vram); vmbus_free_mmio(hv->mem->start, hv->fb_size); err_vmbus_close: vmbus_close(hdev->channel); @@ -172,6 +173,7 @@ static void hyperv_vmbus_remove(struct hv_device *hdev) vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); + iounmap(hv->vram); vmbus_free_mmio(hv->mem->start, hv->fb_size); } diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 18c66992aa1d..ff2cf3daa7a2 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -866,7 +866,7 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, encoder->base.base.id, encoder->base.name); if (!mst_pipe_mask && dp128b132b_pipe_mask) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); /* * If we don't have 8b/10b MST, but have more than one @@ -878,7 +878,8 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, * we don't expect MST to have been enabled at that point, and * can assume it's SST. */ - if (hweight8(dp128b132b_pipe_mask) > 1 || intel_dp->is_mst) + if (hweight8(dp128b132b_pipe_mask) > 1 || + intel_dp_mst_encoder_active_links(dig_port)) mst_pipe_mask = dp128b132b_pipe_mask; } @@ -4151,13 +4152,13 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, } else if (ddi_mode == TRANS_DDI_MODE_SELECT_DP_MST) { intel_ddi_read_func_ctl_dp_mst(encoder, pipe_config, ddi_func_ctl); } else if (ddi_mode == TRANS_DDI_MODE_SELECT_FDI_OR_128B132B && HAS_DP20(display)) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); /* * If this is true, we know we're being called from mst stream * encoder's ->get_config(). */ - if (intel_dp->is_mst) + if (intel_dp_mst_encoder_active_links(dig_port)) intel_ddi_read_func_ctl_dp_mst(encoder, pipe_config, ddi_func_ctl); else intel_ddi_read_func_ctl_dp_sst(encoder, pipe_config, ddi_func_ctl); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index a65cf97ad12d..86d6185fda50 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1867,7 +1867,8 @@ intel_dp_mst_encoder_init(struct intel_digital_port *dig_port, int conn_base_id) /* create encoders */ mst_stream_encoders_create(dig_port); ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, display->drm, - &intel_dp->aux, 16, 3, conn_base_id); + &intel_dp->aux, 16, + INTEL_NUM_PIPES(display), conn_base_id); if (ret) { intel_dp->mst_mgr.cbs = NULL; return ret; diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile index 9bc6a3884c22..3d9d4d40fb80 100644 --- a/drivers/gpu/drm/imagination/Makefile +++ b/drivers/gpu/drm/imagination/Makefile @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only OR MIT # Copyright (c) 2023 Imagination Technologies Ltd. -subdir-ccflags-y := -I$(src) - powervr-y := \ pvr_ccb.o \ pvr_cccb.o \ diff --git a/drivers/gpu/drm/imagination/pvr_fw_meta.c b/drivers/gpu/drm/imagination/pvr_fw_meta.c index c39beb70c317..6d13864851fc 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_meta.c +++ b/drivers/gpu/drm/imagination/pvr_fw_meta.c @@ -527,8 +527,10 @@ pvr_meta_vm_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) static void pvr_meta_vm_unmap(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) { - pvr_vm_unmap(pvr_dev->kernel_vm_ctx, fw_obj->fw_mm_node.start, - fw_obj->fw_mm_node.size); + struct pvr_gem_object *pvr_obj = fw_obj->gem; + + pvr_vm_unmap_obj(pvr_dev->kernel_vm_ctx, pvr_obj, + fw_obj->fw_mm_node.start, fw_obj->fw_mm_node.size); } static bool diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c index 73707daa4e52..5dbb636d7d4f 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_trace.c +++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c @@ -333,8 +333,8 @@ static int fw_trace_seq_show(struct seq_file *s, void *v) if (sf_id == ROGUE_FW_SF_LAST) return -EINVAL; - timestamp = read_fw_trace(trace_seq_data, 1) | - ((u64)read_fw_trace(trace_seq_data, 2) << 32); + timestamp = ((u64)read_fw_trace(trace_seq_data, 1) << 32) | + read_fw_trace(trace_seq_data, 2); timestamp = (timestamp & ~ROGUE_FWT_TIMESTAMP_TIME_CLRMSK) >> ROGUE_FWT_TIMESTAMP_TIME_SHIFT; diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index c4f08432882b..43411be930a2 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -109,12 +109,20 @@ pvr_queue_fence_get_driver_name(struct dma_fence *f) return PVR_DRIVER_NAME; } +static void pvr_queue_fence_release_work(struct work_struct *w) +{ + struct pvr_queue_fence *fence = container_of(w, struct pvr_queue_fence, release_work); + + pvr_context_put(fence->queue->ctx); + dma_fence_free(&fence->base); +} + static void pvr_queue_fence_release(struct dma_fence *f) { struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); + struct pvr_device *pvr_dev = fence->queue->ctx->pvr_dev; - pvr_context_put(fence->queue->ctx); - dma_fence_free(f); + queue_work(pvr_dev->sched_wq, &fence->release_work); } static const char * @@ -268,6 +276,7 @@ pvr_queue_fence_init(struct dma_fence *f, pvr_context_get(queue->ctx); fence->queue = queue; + INIT_WORK(&fence->release_work, pvr_queue_fence_release_work); dma_fence_init(&fence->base, fence_ops, &fence_ctx->lock, fence_ctx->id, atomic_inc_return(&fence_ctx->seqno)); @@ -304,8 +313,9 @@ pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue) static void pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue) { - pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, - &queue->job_fence_ctx); + if (!fence->ops) + pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, + &queue->job_fence_ctx); } /** diff --git a/drivers/gpu/drm/imagination/pvr_queue.h b/drivers/gpu/drm/imagination/pvr_queue.h index e06ced69302f..93fe9ac9f58c 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.h +++ b/drivers/gpu/drm/imagination/pvr_queue.h @@ -5,6 +5,7 @@ #define PVR_QUEUE_H #include <drm/gpu_scheduler.h> +#include <linux/workqueue.h> #include "pvr_cccb.h" #include "pvr_device.h" @@ -63,6 +64,9 @@ struct pvr_queue_fence { /** @queue: Queue that created this fence. */ struct pvr_queue *queue; + + /** @release_work: Fence release work structure. */ + struct work_struct release_work; }; /** diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c index 363f885a7098..2896fa7501b1 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.c +++ b/drivers/gpu/drm/imagination/pvr_vm.c @@ -293,8 +293,9 @@ err_bind_op_fini: static int pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op, - struct pvr_vm_context *vm_ctx, u64 device_addr, - u64 size) + struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) { int err; @@ -318,6 +319,7 @@ pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op, goto err_bind_op_fini; } + bind_op->pvr_obj = pvr_obj; bind_op->vm_ctx = vm_ctx; bind_op->device_addr = device_addr; bind_op->size = size; @@ -598,20 +600,6 @@ err_free: } /** - * pvr_vm_unmap_all() - Unmap all mappings associated with a VM context. - * @vm_ctx: Target VM context. - * - * This function ensures that no mappings are left dangling by unmapping them - * all in order of ascending device-virtual address. - */ -void -pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx) -{ - WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start, - vm_ctx->gpuvm_mgr.mm_range)); -} - -/** * pvr_vm_context_release() - Teardown a VM context. * @ref_count: Pointer to reference counter of the VM context. * @@ -703,11 +691,7 @@ pvr_vm_lock_extra(struct drm_gpuvm_exec *vm_exec) struct pvr_vm_bind_op *bind_op = vm_exec->extra.priv; struct pvr_gem_object *pvr_obj = bind_op->pvr_obj; - /* Unmap operations don't have an object to lock. */ - if (!pvr_obj) - return 0; - - /* Acquire lock on the GEM being mapped. */ + /* Acquire lock on the GEM object being mapped/unmapped. */ return drm_exec_lock_obj(&vm_exec->exec, gem_from_pvr_gem(pvr_obj)); } @@ -772,8 +756,10 @@ err_cleanup: } /** - * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory. + * pvr_vm_unmap_obj_locked() - Unmap an already mapped section of device-virtual + * memory. * @vm_ctx: Target VM context. + * @pvr_obj: Target PowerVR memory object. * @device_addr: Virtual device address at the start of the target mapping. * @size: Size of the target mapping. * @@ -784,9 +770,13 @@ err_cleanup: * * Any error encountered while performing internal operations required to * destroy the mapping (returned from pvr_vm_gpuva_unmap or * pvr_vm_gpuva_remap). + * + * The vm_ctx->lock must be held when calling this function. */ -int -pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) +static int +pvr_vm_unmap_obj_locked(struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) { struct pvr_vm_bind_op bind_op = {0}; struct drm_gpuvm_exec vm_exec = { @@ -799,11 +789,13 @@ pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) }, }; - int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, device_addr, - size); + int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, pvr_obj, + device_addr, size); if (err) return err; + pvr_gem_object_get(pvr_obj); + err = drm_gpuvm_exec_lock(&vm_exec); if (err) goto err_cleanup; @@ -818,6 +810,96 @@ err_cleanup: return err; } +/** + * pvr_vm_unmap_obj() - Unmap an already mapped section of device-virtual + * memory. + * @vm_ctx: Target VM context. + * @pvr_obj: Target PowerVR memory object. + * @device_addr: Virtual device address at the start of the target mapping. + * @size: Size of the target mapping. + * + * Return: + * * 0 on success, + * * Any error encountered by pvr_vm_unmap_obj_locked. + */ +int +pvr_vm_unmap_obj(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) +{ + int err; + + mutex_lock(&vm_ctx->lock); + err = pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, device_addr, size); + mutex_unlock(&vm_ctx->lock); + + return err; +} + +/** + * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory. + * @vm_ctx: Target VM context. + * @device_addr: Virtual device address at the start of the target mapping. + * @size: Size of the target mapping. + * + * Return: + * * 0 on success, + * * Any error encountered by drm_gpuva_find, + * * Any error encountered by pvr_vm_unmap_obj_locked. + */ +int +pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) +{ + struct pvr_gem_object *pvr_obj; + struct drm_gpuva *va; + int err; + + mutex_lock(&vm_ctx->lock); + + va = drm_gpuva_find(&vm_ctx->gpuvm_mgr, device_addr, size); + if (va) { + pvr_obj = gem_to_pvr_gem(va->gem.obj); + err = pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, + va->va.addr, va->va.range); + } else { + err = -ENOENT; + } + + mutex_unlock(&vm_ctx->lock); + + return err; +} + +/** + * pvr_vm_unmap_all() - Unmap all mappings associated with a VM context. + * @vm_ctx: Target VM context. + * + * This function ensures that no mappings are left dangling by unmapping them + * all in order of ascending device-virtual address. + */ +void +pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx) +{ + mutex_lock(&vm_ctx->lock); + + for (;;) { + struct pvr_gem_object *pvr_obj; + struct drm_gpuva *va; + + va = drm_gpuva_find_first(&vm_ctx->gpuvm_mgr, + vm_ctx->gpuvm_mgr.mm_start, + vm_ctx->gpuvm_mgr.mm_range); + if (!va) + break; + + pvr_obj = gem_to_pvr_gem(va->gem.obj); + + WARN_ON(pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, + va->va.addr, va->va.range)); + } + + mutex_unlock(&vm_ctx->lock); +} + /* Static data areas are determined by firmware. */ static const struct drm_pvr_static_data_area static_data_areas[] = { { diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h index 79406243617c..b0528dffa7f1 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.h +++ b/drivers/gpu/drm/imagination/pvr_vm.h @@ -38,6 +38,9 @@ struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev, int pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset, u64 device_addr, u64 size); +int pvr_vm_unmap_obj(struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size); int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size); void pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx); diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index ce840300578d..1050a4617fc1 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -4,6 +4,7 @@ config DRM_NOUVEAU depends on DRM && PCI && MMU select IOMMU_API select FW_LOADER + select FW_CACHE if PM_SLEEP select DRM_CLIENT_SELECTION select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 8d5c9c74cbb9..eac0d1d2dbda 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -775,7 +775,6 @@ nouveau_connector_force(struct drm_connector *connector) if (!nv_encoder) { NV_ERROR(drm, "can't find encoder to force %s on!\n", connector->name); - connector->status = connector_status_disconnected; return; } diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 05c13102a8cb..d22889fbfa9c 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -359,7 +359,8 @@ int r300_mc_wait_for_idle(struct radeon_device *rdev) return -1; } -static void r300_gpu_init(struct radeon_device *rdev) +/* rs400_gpu_init also calls this! */ +void r300_gpu_init(struct radeon_device *rdev) { uint32_t gb_tile_config, tmp; diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 1e00f6b99f94..8f5e07834fcc 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -165,6 +165,7 @@ void r200_set_safe_registers(struct radeon_device *rdev); */ extern int r300_init(struct radeon_device *rdev); extern void r300_fini(struct radeon_device *rdev); +extern void r300_gpu_init(struct radeon_device *rdev); extern int r300_suspend(struct radeon_device *rdev); extern int r300_resume(struct radeon_device *rdev); extern int r300_asic_reset(struct radeon_device *rdev, bool hard); diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index d6c18fd740ec..13cd0a688a65 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -256,8 +256,22 @@ int rs400_mc_wait_for_idle(struct radeon_device *rdev) static void rs400_gpu_init(struct radeon_device *rdev) { - /* FIXME: is this correct ? */ - r420_pipes_init(rdev); + /* Earlier code was calling r420_pipes_init and then + * rs400_mc_wait_for_idle(rdev). The problem is that + * at least on my Mobility Radeon Xpress 200M RC410 card + * that ends up in this code path ends up num_gb_pipes == 3 + * while the card seems to have only one pipe. With the + * r420 pipe initialization method. + * + * Problems shown up as HyperZ glitches, see: + * https://bugs.freedesktop.org/show_bug.cgi?id=110897 + * + * Delegating initialization to r300 code seems to work + * and results in proper pipe numbers. The rs400 cards + * are said to be not r400, but r300 kind of cards. + */ + r300_gpu_init(rdev); + if (rs400_mc_wait_for_idle(rdev)) { pr_warn("rs400: Failed to wait MC idle while programming pipes. Bad things might happen. %08x\n", RREG32(RADEON_MC_STATUS)); diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index c75302ca3427..f56e77e7f6d0 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -21,7 +21,7 @@ * */ -#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#if !defined(_GPU_SCHED_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _GPU_SCHED_TRACE_H_ #include <linux/stringify.h> @@ -106,7 +106,7 @@ TRACE_EVENT(drm_sched_job_wait_dep, __entry->seqno) ); -#endif +#endif /* _GPU_SCHED_TRACE_H_ */ /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index c67e1f906785..8706763af8fb 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -335,8 +335,6 @@ static void bochs_hw_setmode(struct bochs_device *bochs, struct drm_display_mode bochs->xres, bochs->yres, bochs->bpp, bochs->yres_virtual); - bochs_hw_blank(bochs, false); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_ENABLE, 0); bochs_dispi_write(bochs, VBE_DISPI_INDEX_BPP, bochs->bpp); bochs_dispi_write(bochs, VBE_DISPI_INDEX_XRES, bochs->xres); @@ -506,6 +504,9 @@ static int bochs_crtc_helper_atomic_check(struct drm_crtc *crtc, static void bochs_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { + struct bochs_device *bochs = to_bochs_device(crtc->dev); + + bochs_hw_blank(bochs, false); } static void bochs_crtc_helper_atomic_disable(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index b20ac1705726..fa269d279e25 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -67,7 +67,7 @@ static u16 lerp_u16(u16 a, u16 b, s64 t) s64 delta = drm_fixp_mul(b_fp - a_fp, t); - return drm_fixp2int(a_fp + delta); + return drm_fixp2int_round(a_fp + delta); } static s64 get_lut_index(const struct vkms_color_lut *lut, u16 channel_value) diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 2eb9633f163a..2a2f250fa495 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -194,8 +194,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, to_intel_plane(crtc->base.primary); struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); - struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); struct drm_framebuffer *fb; struct i915_vma *vma; @@ -241,14 +239,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits); plane_config->vma = vma; - - /* - * Flip to the newly created mapping ASAP, so we can re-use the - * first part of GGTT for WOPCM, prevent flickering, and prevent - * the lookup of sysmem scratch pages. - */ - plane->check_plane(crtc_state, plane_state); - plane->async_flip(NULL, plane, crtc_state, plane_state, true); return; nofb: diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index d86219dedde2..b732c89816df 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -53,7 +53,6 @@ #define RING_CTL(base) XE_REG((base) + 0x3c) #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ -#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ #define RING_START_UDW(base) XE_REG((base) + 0x48) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 5d6fb79957b6..9f4f27d1ef4a 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -380,9 +380,7 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_wa_process_gt(gt); xe_wa_process_oob(gt); - xe_tuning_process_gt(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); @@ -474,6 +472,8 @@ static int all_fw_domain_init(struct xe_gt *gt) } xe_gt_mcr_set_implicit_defaults(gt); + xe_wa_process_gt(gt); + xe_tuning_process_gt(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); err = xe_gt_clock_init(gt); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 913c74d6e2ae..b6a2dd742ebd 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1248,6 +1248,8 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); + /* Confirm no work left behind accessing device structures */ + cancel_delayed_work_sync(&ge->sched.base.work_tdr); release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 089834467880..392102515f3d 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -19,11 +19,10 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end) return (end - start) >> PAGE_SHIFT; } -/* +/** * xe_mark_range_accessed() - mark a range is accessed, so core mm * have such information for memory eviction or write back to * hard disk - * * @range: the range to mark * @write: if write to this range, we mark pages in this range * as dirty @@ -43,15 +42,51 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) } } -/* +static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, + struct hmm_range *range, struct rw_semaphore *notifier_sem) +{ + unsigned long i, npages, hmm_pfn; + unsigned long num_chunks = 0; + int ret; + + /* HMM docs says this is needed. */ + ret = down_read_interruptible(notifier_sem); + if (ret) + return ret; + + if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { + up_read(notifier_sem); + return -EAGAIN; + } + + npages = xe_npages_in_range(range->start, range->end); + for (i = 0; i < npages;) { + unsigned long len; + + hmm_pfn = range->hmm_pfns[i]; + xe_assert(xe, hmm_pfn & HMM_PFN_VALID); + + len = 1UL << hmm_pfn_to_map_order(hmm_pfn); + + /* If order > 0 the page may extend beyond range->start */ + len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1); + i += len; + num_chunks++; + } + up_read(notifier_sem); + + return sg_alloc_table(st, num_chunks, GFP_KERNEL); +} + +/** * xe_build_sg() - build a scatter gather table for all the physical pages/pfn * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table * and will be used to program GPU page table later. - * * @xe: the xe device who will access the dma-address in sg table * @range: the hmm range that we build the sg table from. range->hmm_pfns[] * has the pfn numbers of pages that back up this hmm address range. * @st: pointer to the sg table. + * @notifier_sem: The xe notifier lock. * @write: whether we write to this range. This decides dma map direction * for system pages. If write we map it bi-diretional; otherwise * DMA_TO_DEVICE @@ -78,43 +113,84 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) * Returns 0 if successful; -ENOMEM if fails to allocate memory */ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, - struct sg_table *st, bool write) + struct sg_table *st, + struct rw_semaphore *notifier_sem, + bool write) { + unsigned long npages = xe_npages_in_range(range->start, range->end); struct device *dev = xe->drm.dev; - struct page **pages; - u64 i, npages; - int ret; + struct scatterlist *sgl; + struct page *page; + unsigned long i, j; - npages = xe_npages_in_range(range->start, range->end); - pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; + lockdep_assert_held(notifier_sem); - for (i = 0; i < npages; i++) { - pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); - xe_assert(xe, !is_device_private_page(pages[i])); + i = 0; + for_each_sg(st->sgl, sgl, st->nents, j) { + unsigned long hmm_pfn, size; + + hmm_pfn = range->hmm_pfns[i]; + page = hmm_pfn_to_page(hmm_pfn); + xe_assert(xe, !is_device_private_page(page)); + + size = 1UL << hmm_pfn_to_map_order(hmm_pfn); + size -= page_to_pfn(page) & (size - 1); + i += size; + + if (unlikely(j == st->nents - 1)) { + if (i > npages) + size -= (i - npages); + sg_mark_end(sgl); + } + sg_set_page(sgl, page, size << PAGE_SHIFT, 0); } + xe_assert(xe, i == npages); - ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0, npages << PAGE_SHIFT, - xe_sg_segment_size(dev), GFP_KERNEL); - if (ret) - goto free_pages; + return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); +} + +static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + lockdep_assert_held_write(&vm->lock); + lockdep_assert_held(&vm->userptr.notifier_lock); + + mutex_lock(&userptr->unmap_mutex); + xe_assert(vm->xe, !userptr->mapped); + userptr->mapped = true; + mutex_unlock(&userptr->unmap_mutex); +} + +void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; + bool write = !xe_vma_read_only(vma); + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; - ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); - if (ret) { - sg_free_table(st); - st = NULL; + if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) && + !lockdep_is_held_type(&vm->lock, 0) && + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { + /* Don't unmap in exec critical section. */ + xe_vm_assert_held(vm); + /* Don't unmap while mapping the sg. */ + lockdep_assert_held(&vm->lock); } -free_pages: - kvfree(pages); - return ret; + mutex_lock(&userptr->unmap_mutex); + if (userptr->sg && userptr->mapped) + dma_unmap_sgtable(xe->drm.dev, userptr->sg, + write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + userptr->mapped = false; + mutex_unlock(&userptr->unmap_mutex); } -/* +/** * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr - * * @uvma: the userptr vma which hold the scatter gather table * * With function xe_userptr_populate_range, we allocate storage of @@ -124,16 +200,9 @@ free_pages: void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) { struct xe_userptr *userptr = &uvma->userptr; - struct xe_vma *vma = &uvma->vma; - bool write = !xe_vma_read_only(vma); - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - struct device *dev = xe->drm.dev; - - xe_assert(xe, userptr->sg); - dma_unmap_sgtable(dev, userptr->sg, - write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg); + xe_hmm_userptr_unmap(uvma); sg_free_table(userptr->sg); userptr->sg = NULL; } @@ -166,13 +235,20 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, { unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - unsigned long *pfns, flags = HMM_PFN_REQ_FAULT; + unsigned long *pfns; struct xe_userptr *userptr; struct xe_vma *vma = &uvma->vma; u64 userptr_start = xe_vma_userptr(vma); u64 userptr_end = userptr_start + xe_vma_size(vma); struct xe_vm *vm = xe_vma_vm(vma); - struct hmm_range hmm_range; + struct hmm_range hmm_range = { + .pfn_flags_mask = 0, /* ignore pfns */ + .default_flags = HMM_PFN_REQ_FAULT, + .start = userptr_start, + .end = userptr_end, + .notifier = &uvma->userptr.notifier, + .dev_private_owner = vm->xe, + }; bool write = !xe_vma_read_only(vma); unsigned long notifier_seq; u64 npages; @@ -199,19 +275,14 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, return -ENOMEM; if (write) - flags |= HMM_PFN_REQ_WRITE; + hmm_range.default_flags |= HMM_PFN_REQ_WRITE; if (!mmget_not_zero(userptr->notifier.mm)) { ret = -EFAULT; goto free_pfns; } - hmm_range.default_flags = flags; hmm_range.hmm_pfns = pfns; - hmm_range.notifier = &userptr->notifier; - hmm_range.start = userptr_start; - hmm_range.end = userptr_end; - hmm_range.dev_private_owner = vm->xe; while (true) { hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); @@ -238,16 +309,37 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, if (ret) goto free_pfns; - ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write); + ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock); if (ret) goto free_pfns; + ret = down_read_interruptible(&vm->userptr.notifier_lock); + if (ret) + goto free_st; + + if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) { + ret = -EAGAIN; + goto out_unlock; + } + + ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, + &vm->userptr.notifier_lock, write); + if (ret) + goto out_unlock; + xe_mark_range_accessed(&hmm_range, write); userptr->sg = &userptr->sgt; + xe_hmm_userptr_set_mapped(uvma); userptr->notifier_seq = hmm_range.notifier_seq; + up_read(&vm->userptr.notifier_lock); + kvfree(pfns); + return 0; +out_unlock: + up_read(&vm->userptr.notifier_lock); +free_st: + sg_free_table(&userptr->sgt); free_pfns: kvfree(pfns); return ret; } - diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h index 909dc2bdcd97..0ea98d8e7bbc 100644 --- a/drivers/gpu/drm/xe/xe_hmm.h +++ b/drivers/gpu/drm/xe/xe_hmm.h @@ -3,9 +3,16 @@ * Copyright © 2024 Intel Corporation */ +#ifndef _XE_HMM_H_ +#define _XE_HMM_H_ + #include <linux/types.h> struct xe_userptr_vma; int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); + void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); + +void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma); +#endif diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index fa873f3d0a9d..eb6cd91e1e22 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1689,7 +1689,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; stream->sample = param->sample; - stream->periodic = param->period_exponent > 0; + stream->periodic = param->period_exponent >= 0; stream->period_exponent = param->period_exponent; stream->no_preempt = param->no_preempt; stream->wait_num_reports = param->wait_num_reports; @@ -1970,6 +1970,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f } param.xef = xef; + param.period_exponent = -1; ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, ¶m); if (ret) return ret; @@ -2024,7 +2025,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - if (param.period_exponent > 0) { + if (param.period_exponent >= 0) { u64 oa_period, oa_freq_hz; /* Requesting samples from OAG buffer is a privileged operation */ diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 1ddcc7e79a93..dc24baa84092 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -28,6 +28,8 @@ struct xe_pt_dir { struct xe_pt pt; /** @children: Array of page-table child nodes */ struct xe_ptw *children[XE_PDES]; + /** @staging: Array of page-table staging nodes */ + struct xe_ptw *staging[XE_PDES]; }; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) @@ -48,9 +50,10 @@ static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) return container_of(pt, struct xe_pt_dir, pt); } -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) +static struct xe_pt * +xe_pt_entry_staging(struct xe_pt_dir *pt_dir, unsigned int index) { - return container_of(pt_dir->children[index], struct xe_pt, base); + return container_of(pt_dir->staging[index], struct xe_pt, base); } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -125,6 +128,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, } pt->bo = bo; pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; + pt->base.staging = level ? as_xe_pt_dir(pt)->staging : NULL; if (vm->xef) xe_drm_client_add_bo(vm->xef->client, pt->bo); @@ -206,8 +210,8 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, + if (xe_pt_entry_staging(pt_dir, i)) + xe_pt_destroy(xe_pt_entry_staging(pt_dir, i), flags, deferred); } } @@ -376,8 +380,10 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, /* Continue building a non-connected subtree. */ struct iosys_map *map = &parent->bo->vmap; - if (unlikely(xe_child)) + if (unlikely(xe_child)) { parent->base.children[offset] = &xe_child->base; + parent->base.staging[offset] = &xe_child->base; + } xe_pt_write(xe_walk->vm->xe, map, offset, pte); parent->num_live++; @@ -614,6 +620,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .ops = &xe_pt_stage_bind_ops, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, + .staging = true, }, .vm = xe_vma_vm(vma), .tile = tile, @@ -873,7 +880,7 @@ static void xe_pt_cancel_bind(struct xe_vma *vma, } } -static void xe_pt_commit_locks_assert(struct xe_vma *vma) +static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); @@ -885,6 +892,16 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) xe_vm_assert_held(vm); } +static void xe_pt_commit_locks_assert(struct xe_vma *vma) +{ + struct xe_vm *vm = xe_vma_vm(vma); + + xe_pt_commit_prepare_locks_assert(vma); + + if (xe_vma_is_userptr(vma)) + lockdep_assert_held_read(&vm->userptr.notifier_lock); +} + static void xe_pt_commit(struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 num_entries, struct llist_head *deferred) @@ -895,13 +912,17 @@ static void xe_pt_commit(struct xe_vma *vma, for (i = 0; i < num_entries; i++) { struct xe_pt *pt = entries[i].pt; + struct xe_pt_dir *pt_dir; if (!pt->level) continue; + pt_dir = as_xe_pt_dir(pt); for (j = 0; j < entries[i].qwords; j++) { struct xe_pt *oldpte = entries[i].pt_entries[j].pt; + int j_ = j + entries[i].ofs; + pt_dir->children[j_] = pt_dir->staging[j_]; xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred); } } @@ -913,7 +934,7 @@ static void xe_pt_abort_bind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = num_entries - 1; i >= 0; --i) { struct xe_pt *pt = entries[i].pt; @@ -928,10 +949,10 @@ static void xe_pt_abort_bind(struct xe_vma *vma, pt_dir = as_xe_pt_dir(pt); for (j = 0; j < entries[i].qwords; j++) { u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = xe_pt_entry(pt_dir, j_); + struct xe_pt *newpte = xe_pt_entry_staging(pt_dir, j_); struct xe_pt *oldpte = entries[i].pt_entries[j].pt; - pt_dir->children[j_] = oldpte ? &oldpte->base : 0; + pt_dir->staging[j_] = oldpte ? &oldpte->base : 0; xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL); } } @@ -943,7 +964,7 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma, { u32 i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = 0; i < num_entries; i++) { struct xe_pt *pt = entries[i].pt; @@ -961,10 +982,10 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma, struct xe_pt *newpte = entries[i].pt_entries[j].pt; struct xe_pt *oldpte = NULL; - if (xe_pt_entry(pt_dir, j_)) - oldpte = xe_pt_entry(pt_dir, j_); + if (xe_pt_entry_staging(pt_dir, j_)) + oldpte = xe_pt_entry_staging(pt_dir, j_); - pt_dir->children[j_] = &newpte->base; + pt_dir->staging[j_] = &newpte->base; entries[i].pt_entries[j].pt = oldpte; } } @@ -1213,42 +1234,22 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, return 0; uvma = to_userptr_vma(vma); - notifier_seq = uvma->userptr.notifier_seq; + if (xe_pt_userptr_inject_eagain(uvma)) + xe_vma_userptr_force_invalidate(uvma); - if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) - return 0; + notifier_seq = uvma->userptr.notifier_seq; if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq) && - !xe_pt_userptr_inject_eagain(uvma)) + notifier_seq)) return 0; - if (xe_vm_in_fault_mode(vm)) { + if (xe_vm_in_fault_mode(vm)) return -EAGAIN; - } else { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - - if (xe_vm_in_preempt_fence_mode(vm)) { - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - } - } + /* + * Just continue the operation since exec or rebind worker + * will take care of rebinding. + */ return 0; } @@ -1514,6 +1515,7 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, .ops = &xe_pt_stage_unbind_ops, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, + .staging = true, }, .tile = tile, .modified_start = xe_vma_start(vma), @@ -1555,7 +1557,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = num_entries - 1; i >= 0; --i) { struct xe_vm_pgtable_update *entry = &entries[i]; @@ -1568,7 +1570,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma, continue; for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) - pt_dir->children[j] = + pt_dir->staging[j] = entries[i].pt_entries[j - entry->ofs].pt ? &entries[i].pt_entries[j - entry->ofs].pt->base : NULL; } @@ -1581,7 +1583,7 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = 0; i < num_entries; ++i) { struct xe_vm_pgtable_update *entry = &entries[i]; @@ -1595,8 +1597,8 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma, pt_dir = as_xe_pt_dir(pt); for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) { entry->pt_entries[j - entry->ofs].pt = - xe_pt_entry(pt_dir, j); - pt_dir->children[j] = NULL; + xe_pt_entry_staging(pt_dir, j); + pt_dir->staging[j] = NULL; } } } diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c index b8b3d2aea492..be602a763ff3 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.c +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -74,7 +74,8 @@ int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, u64 addr, u64 end, struct xe_pt_walk *walk) { pgoff_t offset = xe_pt_offset(addr, level, walk); - struct xe_ptw **entries = parent->children ? parent->children : NULL; + struct xe_ptw **entries = walk->staging ? (parent->staging ?: NULL) : + (parent->children ?: NULL); const struct xe_pt_walk_ops *ops = walk->ops; enum page_walk_action action; struct xe_ptw *child; diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h index 5ecc4d2f0f65..5c02c244f7de 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.h +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -11,12 +11,14 @@ /** * struct xe_ptw - base class for driver pagetable subclassing. * @children: Pointer to an array of children if any. + * @staging: Pointer to an array of staging if any. * * Drivers could subclass this, and if it's a page-directory, typically * embed an array of xe_ptw pointers. */ struct xe_ptw { struct xe_ptw **children; + struct xe_ptw **staging; }; /** @@ -41,6 +43,8 @@ struct xe_pt_walk { * as shared pagetables. */ bool shared_pt_mode; + /** @staging: Walk staging PT structure */ + bool staging; }; /** diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 690330352d4c..ec6ec18ab3fa 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -579,51 +579,26 @@ out_unlock_outer: trace_xe_vm_rebind_worker_exit(vm); } -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) +static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) { - struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); - struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); + struct xe_userptr *userptr = &uvma->userptr; struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); struct dma_resv_iter cursor; struct dma_fence *fence; long err; - xe_assert(vm->xe, xe_vma_is_userptr(vma)); - trace_xe_vma_userptr_invalidate(vma); - - if (!mmu_notifier_range_blockable(range)) - return false; - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "NOTIFIER: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - down_write(&vm->userptr.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - /* No need to stop gpu access if the userptr is not yet bound. */ - if (!userptr->initial_bind) { - up_write(&vm->userptr.notifier_lock); - return true; - } - /* * Tell exec and rebind worker they need to repin and rebind this * userptr. */ if (!xe_vm_in_fault_mode(vm) && - !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { spin_lock(&vm->userptr.invalidated_lock); list_move_tail(&userptr->invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); } - up_write(&vm->userptr.notifier_lock); - /* * Preempt fences turn into schedule disables, pipeline these. * Note that even in fault mode, we need to wait for binds and @@ -641,11 +616,37 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, false, MAX_SCHEDULE_TIMEOUT); XE_WARN_ON(err <= 0); - if (xe_vm_in_fault_mode(vm)) { + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { err = xe_vm_invalidate_vma(vma); XE_WARN_ON(err); } + xe_hmm_userptr_unmap(uvma); +} + +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + + xe_assert(vm->xe, xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "NOTIFIER: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + + down_write(&vm->userptr.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + __vma_userptr_invalidate(vm, uvma); + up_write(&vm->userptr.notifier_lock); trace_xe_vma_userptr_invalidate_complete(vma); return true; @@ -655,6 +656,34 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { .invalidate = vma_userptr_invalidate, }; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +/** + * xe_vma_userptr_force_invalidate() - force invalidate a userptr + * @uvma: The userptr vma to invalidate + * + * Perform a forced userptr invalidation for testing purposes. + */ +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + /* Protect against concurrent userptr pinning */ + lockdep_assert_held(&vm->lock); + /* Protect against concurrent notifiers */ + lockdep_assert_held(&vm->userptr.notifier_lock); + /* + * Protect against concurrent instances of this function and + * the critical exec sections + */ + xe_vm_assert_held(vm); + + if (!mmu_interval_read_retry(&uvma->userptr.notifier, + uvma->userptr.notifier_seq)) + uvma->userptr.notifier_seq -= 2; + __vma_userptr_invalidate(vm, uvma); +} +#endif + int xe_vm_userptr_pin(struct xe_vm *vm) { struct xe_userptr_vma *uvma, *next; @@ -666,20 +695,33 @@ int xe_vm_userptr_pin(struct xe_vm *vm) /* Collect invalidated userptrs */ spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, userptr.invalidate_link) { list_del_init(&uvma->userptr.invalidate_link); - list_move_tail(&uvma->userptr.repin_link, - &vm->userptr.repin_list); + list_add_tail(&uvma->userptr.repin_link, + &vm->userptr.repin_list); } spin_unlock(&vm->userptr.invalidated_lock); - /* Pin and move to temporary list */ + /* Pin and move to bind list */ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, userptr.repin_link) { err = xe_vma_userptr_pin_pages(uvma); if (err == -EFAULT) { list_del_init(&uvma->userptr.repin_link); + /* + * We might have already done the pin once already, but + * then had to retry before the re-bind happened, due + * some other condition in the caller, but in the + * meantime the userptr got dinged by the notifier such + * that we need to revalidate here, but this time we hit + * the EFAULT. In such a case make sure we remove + * ourselves from the rebind list to avoid going down in + * flames. + */ + if (!list_empty(&uvma->vma.combined_links.rebind)) + list_del_init(&uvma->vma.combined_links.rebind); /* Wait for pending binds */ xe_vm_lock(vm, false); @@ -690,10 +732,10 @@ int xe_vm_userptr_pin(struct xe_vm *vm) err = xe_vm_invalidate_vma(&uvma->vma); xe_vm_unlock(vm); if (err) - return err; + break; } else { - if (err < 0) - return err; + if (err) + break; list_del_init(&uvma->userptr.repin_link); list_move_tail(&uvma->vma.combined_links.rebind, @@ -701,7 +743,19 @@ int xe_vm_userptr_pin(struct xe_vm *vm) } } - return 0; + if (err) { + down_write(&vm->userptr.notifier_lock); + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->userptr.invalidate_link, + &vm->userptr.invalidated); + } + spin_unlock(&vm->userptr.invalidated_lock); + up_write(&vm->userptr.notifier_lock); + } + return err; } /** @@ -987,6 +1041,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, INIT_LIST_HEAD(&userptr->invalidate_link); INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; + mutex_init(&userptr->unmap_mutex); err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, @@ -1028,6 +1083,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) * them anymore */ mmu_interval_notifier_remove(&userptr->notifier); + mutex_destroy(&userptr->unmap_mutex); xe_vm_put(vm); } else if (xe_vma_is_null(vma)) { xe_vm_put(vm); @@ -1066,6 +1122,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); list_del(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } else if (!xe_vma_is_null(vma)) { @@ -2260,8 +2317,17 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, break; } case DRM_GPUVA_OP_UNMAP: + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + break; case DRM_GPUVA_OP_PREFETCH: - /* FIXME: Need to skip some prefetch ops */ + vma = gpuva_to_vma(op->base.prefetch.va); + + if (xe_vma_is_userptr(vma)) { + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); + if (err) + return err; + } + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); break; default: diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 23adb7442881..b882bfb31bd0 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -274,9 +274,17 @@ static inline void vm_dbg(const struct drm_device *dev, const char *format, ...) { /* noop */ } #endif -#endif struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm); void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); + +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); +#else +static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ +} +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 7f9a303e51d8..a4b4091cfd0d 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -59,12 +59,16 @@ struct xe_userptr { struct sg_table *sg; /** @notifier_seq: notifier sequence number */ unsigned long notifier_seq; + /** @unmap_mutex: Mutex protecting dma-unmapping */ + struct mutex unmap_mutex; /** * @initial_bind: user pointer has been bound at least once. * write: vm->userptr.notifier_lock in read mode and vm->resv held. * read: vm->userptr.notifier_lock in write mode or vm->resv held. */ bool initial_bind; + /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */ + bool mapped; #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) u32 divisor; #endif @@ -227,8 +231,8 @@ struct xe_vm { * up for revalidation. Protected from access with the * @invalidated_lock. Removing items from the list * additionally requires @lock in write mode, and adding - * items to the list requires the @userptr.notifer_lock in - * write mode. + * items to the list requires either the @userptr.notifer_lock in + * write mode, OR @lock in write mode. */ struct list_head invalidated; } userptr; diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 49812a76b7ed..d900dd05c335 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -378,6 +378,12 @@ static bool apple_is_non_apple_keyboard(struct hid_device *hdev) return false; } +static bool apple_is_omoton_kb066(struct hid_device *hdev) +{ + return hdev->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI && + strcmp(hdev->name, "Bluetooth Keyboard") == 0; +} + static inline void apple_setup_key_translation(struct input_dev *input, const struct apple_key_translation *table) { @@ -546,9 +552,6 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input, } } - if (usage->hid == 0xc0301) /* Omoton KB066 quirk */ - code = KEY_F6; - if (usage->code != code) { input_event_with_scancode(input, usage->type, code, usage->hid, value); @@ -728,7 +731,7 @@ static int apple_input_configured(struct hid_device *hdev, { struct apple_sc *asc = hid_get_drvdata(hdev); - if ((asc->quirks & APPLE_HAS_FN) && !asc->fn_found) { + if (((asc->quirks & APPLE_HAS_FN) && !asc->fn_found) || apple_is_omoton_kb066(hdev)) { hid_info(hdev, "Fn key not found (Apple Wireless Keyboard clone?), disabling Fn key handling\n"); asc->quirks &= ~APPLE_HAS_FN; } diff --git a/drivers/hid/hid-appleir.c b/drivers/hid/hid-appleir.c index 8deded185725..c45e5aa569d2 100644 --- a/drivers/hid/hid-appleir.c +++ b/drivers/hid/hid-appleir.c @@ -188,7 +188,7 @@ static int appleir_raw_event(struct hid_device *hid, struct hid_report *report, static const u8 flatbattery[] = { 0x25, 0x87, 0xe0 }; unsigned long flags; - if (len != 5) + if (len != 5 || !(hid->claimed & HID_CLAIMED_INPUT)) goto out; if (!memcmp(data, keydown, sizeof(keydown))) { diff --git a/drivers/hid/hid-corsair-void.c b/drivers/hid/hid-corsair-void.c index 56e858066c3c..afbd67aa9719 100644 --- a/drivers/hid/hid-corsair-void.c +++ b/drivers/hid/hid-corsair-void.c @@ -71,11 +71,9 @@ #include <linux/bitfield.h> #include <linux/bitops.h> -#include <linux/cleanup.h> #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> -#include <linux/mutex.h> #include <linux/power_supply.h> #include <linux/usb.h> #include <linux/workqueue.h> @@ -120,6 +118,12 @@ enum { CORSAIR_VOID_BATTERY_CHARGING = 5, }; +enum { + CORSAIR_VOID_ADD_BATTERY = 0, + CORSAIR_VOID_REMOVE_BATTERY = 1, + CORSAIR_VOID_UPDATE_BATTERY = 2, +}; + static enum power_supply_property corsair_void_battery_props[] = { POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_PRESENT, @@ -155,12 +159,12 @@ struct corsair_void_drvdata { struct power_supply *battery; struct power_supply_desc battery_desc; - struct mutex battery_mutex; struct delayed_work delayed_status_work; struct delayed_work delayed_firmware_work; - struct work_struct battery_remove_work; - struct work_struct battery_add_work; + + unsigned long battery_work_flags; + struct work_struct battery_work; }; /* @@ -260,11 +264,9 @@ success: /* Inform power supply if battery values changed */ if (memcmp(&orig_battery_data, battery_data, sizeof(*battery_data))) { - scoped_guard(mutex, &drvdata->battery_mutex) { - if (drvdata->battery) { - power_supply_changed(drvdata->battery); - } - } + set_bit(CORSAIR_VOID_UPDATE_BATTERY, + &drvdata->battery_work_flags); + schedule_work(&drvdata->battery_work); } } @@ -536,29 +538,11 @@ static void corsair_void_firmware_work_handler(struct work_struct *work) } -static void corsair_void_battery_remove_work_handler(struct work_struct *work) -{ - struct corsair_void_drvdata *drvdata; - - drvdata = container_of(work, struct corsair_void_drvdata, - battery_remove_work); - scoped_guard(mutex, &drvdata->battery_mutex) { - if (drvdata->battery) { - power_supply_unregister(drvdata->battery); - drvdata->battery = NULL; - } - } -} - -static void corsair_void_battery_add_work_handler(struct work_struct *work) +static void corsair_void_add_battery(struct corsair_void_drvdata *drvdata) { - struct corsair_void_drvdata *drvdata; struct power_supply_config psy_cfg = {}; struct power_supply *new_supply; - drvdata = container_of(work, struct corsair_void_drvdata, - battery_add_work); - guard(mutex)(&drvdata->battery_mutex); if (drvdata->battery) return; @@ -583,16 +567,42 @@ static void corsair_void_battery_add_work_handler(struct work_struct *work) drvdata->battery = new_supply; } +static void corsair_void_battery_work_handler(struct work_struct *work) +{ + struct corsair_void_drvdata *drvdata = container_of(work, + struct corsair_void_drvdata, battery_work); + + bool add_battery = test_and_clear_bit(CORSAIR_VOID_ADD_BATTERY, + &drvdata->battery_work_flags); + bool remove_battery = test_and_clear_bit(CORSAIR_VOID_REMOVE_BATTERY, + &drvdata->battery_work_flags); + bool update_battery = test_and_clear_bit(CORSAIR_VOID_UPDATE_BATTERY, + &drvdata->battery_work_flags); + + if (add_battery && !remove_battery) { + corsair_void_add_battery(drvdata); + } else if (remove_battery && !add_battery && drvdata->battery) { + power_supply_unregister(drvdata->battery); + drvdata->battery = NULL; + } + + if (update_battery && drvdata->battery) + power_supply_changed(drvdata->battery); + +} + static void corsair_void_headset_connected(struct corsair_void_drvdata *drvdata) { - schedule_work(&drvdata->battery_add_work); + set_bit(CORSAIR_VOID_ADD_BATTERY, &drvdata->battery_work_flags); + schedule_work(&drvdata->battery_work); schedule_delayed_work(&drvdata->delayed_firmware_work, msecs_to_jiffies(100)); } static void corsair_void_headset_disconnected(struct corsair_void_drvdata *drvdata) { - schedule_work(&drvdata->battery_remove_work); + set_bit(CORSAIR_VOID_REMOVE_BATTERY, &drvdata->battery_work_flags); + schedule_work(&drvdata->battery_work); corsair_void_set_unknown_wireless_data(drvdata); corsair_void_set_unknown_batt(drvdata); @@ -678,13 +688,7 @@ static int corsair_void_probe(struct hid_device *hid_dev, drvdata->battery_desc.get_property = corsair_void_battery_get_property; drvdata->battery = NULL; - INIT_WORK(&drvdata->battery_remove_work, - corsair_void_battery_remove_work_handler); - INIT_WORK(&drvdata->battery_add_work, - corsair_void_battery_add_work_handler); - ret = devm_mutex_init(drvdata->dev, &drvdata->battery_mutex); - if (ret) - return ret; + INIT_WORK(&drvdata->battery_work, corsair_void_battery_work_handler); ret = sysfs_create_group(&hid_dev->dev.kobj, &corsair_void_attr_group); if (ret) @@ -721,8 +725,7 @@ static void corsair_void_remove(struct hid_device *hid_dev) struct corsair_void_drvdata *drvdata = hid_get_drvdata(hid_dev); hid_hw_stop(hid_dev); - cancel_work_sync(&drvdata->battery_remove_work); - cancel_work_sync(&drvdata->battery_add_work); + cancel_work_sync(&drvdata->battery_work); if (drvdata->battery) power_supply_unregister(drvdata->battery); diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 541d682af15a..8433306148d5 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -3450,7 +3450,7 @@ static const char *keys[KEY_MAX + 1] = { [KEY_MACRO_RECORD_START] = "MacroRecordStart", [KEY_MACRO_RECORD_STOP] = "MacroRecordStop", [KEY_MARK_WAYPOINT] = "MarkWayPoint", [KEY_MEDIA_REPEAT] = "MediaRepeat", - [KEY_MEDIA_TOP_MENU] = "MediaTopMenu", [KEY_MESSENGER] = "Messanger", + [KEY_MEDIA_TOP_MENU] = "MediaTopMenu", [KEY_MESSENGER] = "Messenger", [KEY_NAV_CHART] = "NavChar", [KEY_NAV_INFO] = "NavInfo", [KEY_NEWS] = "News", [KEY_NEXT_ELEMENT] = "NextElement", [KEY_NEXT_FAVORITE] = "NextFavorite", [KEY_NOTIFICATION_CENTER] = "NotificationCenter", diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 0f292b5d3e26..eb6fd2dc75d0 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -268,11 +268,13 @@ static void cbas_ec_remove(struct platform_device *pdev) mutex_unlock(&cbas_ec_reglock); } +#ifdef CONFIG_ACPI static const struct acpi_device_id cbas_ec_acpi_ids[] = { { "GOOG000B", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, cbas_ec_acpi_ids); +#endif #ifdef CONFIG_OF static const struct of_device_id cbas_ec_of_match[] = { diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 11ac246176ae..839d5bcd72b1 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -457,13 +457,13 @@ static const struct joycon_ctlr_button_mapping snescon_button_mappings[] = { }; static const struct joycon_ctlr_button_mapping gencon_button_mappings[] = { - { BTN_A, JC_BTN_A, }, - { BTN_B, JC_BTN_B, }, - { BTN_C, JC_BTN_R, }, - { BTN_X, JC_BTN_X, }, /* MD/GEN 6B Only */ - { BTN_Y, JC_BTN_Y, }, /* MD/GEN 6B Only */ - { BTN_Z, JC_BTN_L, }, /* MD/GEN 6B Only */ - { BTN_SELECT, JC_BTN_ZR, }, + { BTN_WEST, JC_BTN_A, }, /* A */ + { BTN_SOUTH, JC_BTN_B, }, /* B */ + { BTN_EAST, JC_BTN_R, }, /* C */ + { BTN_TL, JC_BTN_X, }, /* X MD/GEN 6B Only */ + { BTN_NORTH, JC_BTN_Y, }, /* Y MD/GEN 6B Only */ + { BTN_TR, JC_BTN_L, }, /* Z MD/GEN 6B Only */ + { BTN_SELECT, JC_BTN_ZR, }, /* Mode */ { BTN_START, JC_BTN_PLUS, }, { BTN_MODE, JC_BTN_HOME, }, { BTN_Z, JC_BTN_CAP, }, diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index c9e65e9088b3..10460b7bde1a 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1327,11 +1327,11 @@ static void steam_remove(struct hid_device *hdev) return; } + hid_destroy_device(steam->client_hdev); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->work_connect); cancel_work_sync(&steam->rumble_work); cancel_work_sync(&steam->unregister_work); - hid_destroy_device(steam->client_hdev); steam->client_hdev = NULL; steam->client_opened = 0; if (steam->quirks & STEAM_QUIRK_WIRELESS) { diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 75544448c239..d3912e3f2f13 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -290,7 +290,7 @@ static int i2c_hid_get_report(struct i2c_hid *ihid, ihid->rawbuf, recv_len + sizeof(__le16)); if (error) { dev_err(&ihid->client->dev, - "failed to set a report to device: %d\n", error); + "failed to get a report from device: %d\n", error); return error; } diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index cb04cd1d980b..6550ad5bfbb5 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -832,9 +832,9 @@ static void hid_ishtp_cl_remove(struct ishtp_cl_device *cl_device) hid_ishtp_cl); dev_dbg(ishtp_device(cl_device), "%s\n", __func__); - hid_ishtp_cl_deinit(hid_ishtp_cl); ishtp_put_device(cl_device); ishtp_hid_remove(client_data); + hid_ishtp_cl_deinit(hid_ishtp_cl); hid_ishtp_cl = NULL; diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.c b/drivers/hid/intel-ish-hid/ishtp-hid.c index 00c6f0ebf356..be2c62fc8251 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid.c @@ -261,12 +261,14 @@ err_hid_data: */ void ishtp_hid_remove(struct ishtp_cl_data *client_data) { + void *data; int i; for (i = 0; i < client_data->num_hid_devices; ++i) { if (client_data->hid_sensor_hubs[i]) { - kfree(client_data->hid_sensor_hubs[i]->driver_data); + data = client_data->hid_sensor_hubs[i]->driver_data; hid_destroy_device(client_data->hid_sensor_hubs[i]); + kfree(data); client_data->hid_sensor_hubs[i] = NULL; } } diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c index 4641e818dfa4..6b2c7620be2b 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c @@ -909,6 +909,8 @@ static int quickspi_restore(struct device *device) thc_change_ltr_mode(qsdev->thc_hw, THC_LTR_MODE_ACTIVE); + qsdev->state = QUICKSPI_ENABLED; + return 0; } diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c index 7373238ceb18..918050af73e5 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c @@ -107,7 +107,7 @@ static int quickspi_get_device_descriptor(struct quickspi_device *qsdev) return 0; } - dev_err_once(qsdev->dev, "Unexpected intput report type: %d\n", input_rep_type); + dev_err_once(qsdev->dev, "Unexpected input report type: %d\n", input_rep_type); return -EINVAL; } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 0f6cd44fff29..6e55a1a2613d 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2262,12 +2262,25 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size) struct resource *iter; mutex_lock(&hyperv_mmio_lock); + + /* + * If all bytes of the MMIO range to be released are within the + * special case fb_mmio shadow region, skip releasing the shadow + * region since no corresponding __request_region() was done + * in vmbus_allocate_mmio(). + */ + if (fb_mmio && start >= fb_mmio->start && + (start + size - 1 <= fb_mmio->end)) + goto skip_shadow_release; + for (iter = hyperv_mmio; iter; iter = iter->sibling) { if ((iter->start >= start + size) || (iter->end <= start)) continue; __release_region(iter, start, size); } + +skip_shadow_release: release_mem_region(start, size); mutex_unlock(&hyperv_mmio_lock); diff --git a/drivers/hwmon/ad7314.c b/drivers/hwmon/ad7314.c index 7802bbf5f958..59424103f634 100644 --- a/drivers/hwmon/ad7314.c +++ b/drivers/hwmon/ad7314.c @@ -22,11 +22,13 @@ */ #define AD7314_TEMP_MASK 0x7FE0 #define AD7314_TEMP_SHIFT 5 +#define AD7314_LEADING_ZEROS_MASK BIT(15) /* * ADT7301 and ADT7302 temperature masks */ #define ADT7301_TEMP_MASK 0x3FFF +#define ADT7301_LEADING_ZEROS_MASK (BIT(15) | BIT(14)) enum ad7314_variant { adt7301, @@ -65,12 +67,20 @@ static ssize_t ad7314_temperature_show(struct device *dev, return ret; switch (spi_get_device_id(chip->spi_dev)->driver_data) { case ad7314: + if (ret & AD7314_LEADING_ZEROS_MASK) { + /* Invalid read-out, leading zero part is missing */ + return -EIO; + } data = (ret & AD7314_TEMP_MASK) >> AD7314_TEMP_SHIFT; data = sign_extend32(data, 9); return sprintf(buf, "%d\n", 250 * data); case adt7301: case adt7302: + if (ret & ADT7301_LEADING_ZEROS_MASK) { + /* Invalid read-out, leading zero part is missing */ + return -EIO; + } /* * Documented as a 13 bit twos complement register * with a sign bit - which is a 14 bit 2's complement diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index b5352900463f..0d29c8f97ba7 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -181,40 +181,40 @@ static const struct ntc_compensation ncpXXwf104[] = { }; static const struct ntc_compensation ncpXXxh103[] = { - { .temp_c = -40, .ohm = 247565 }, - { .temp_c = -35, .ohm = 181742 }, - { .temp_c = -30, .ohm = 135128 }, - { .temp_c = -25, .ohm = 101678 }, - { .temp_c = -20, .ohm = 77373 }, - { .temp_c = -15, .ohm = 59504 }, - { .temp_c = -10, .ohm = 46222 }, - { .temp_c = -5, .ohm = 36244 }, - { .temp_c = 0, .ohm = 28674 }, - { .temp_c = 5, .ohm = 22878 }, - { .temp_c = 10, .ohm = 18399 }, - { .temp_c = 15, .ohm = 14910 }, - { .temp_c = 20, .ohm = 12169 }, + { .temp_c = -40, .ohm = 195652 }, + { .temp_c = -35, .ohm = 148171 }, + { .temp_c = -30, .ohm = 113347 }, + { .temp_c = -25, .ohm = 87559 }, + { .temp_c = -20, .ohm = 68237 }, + { .temp_c = -15, .ohm = 53650 }, + { .temp_c = -10, .ohm = 42506 }, + { .temp_c = -5, .ohm = 33892 }, + { .temp_c = 0, .ohm = 27219 }, + { .temp_c = 5, .ohm = 22021 }, + { .temp_c = 10, .ohm = 17926 }, + { .temp_c = 15, .ohm = 14674 }, + { .temp_c = 20, .ohm = 12081 }, { .temp_c = 25, .ohm = 10000 }, - { .temp_c = 30, .ohm = 8271 }, - { .temp_c = 35, .ohm = 6883 }, - { .temp_c = 40, .ohm = 5762 }, - { .temp_c = 45, .ohm = 4851 }, - { .temp_c = 50, .ohm = 4105 }, - { .temp_c = 55, .ohm = 3492 }, - { .temp_c = 60, .ohm = 2985 }, - { .temp_c = 65, .ohm = 2563 }, - { .temp_c = 70, .ohm = 2211 }, - { .temp_c = 75, .ohm = 1915 }, - { .temp_c = 80, .ohm = 1666 }, - { .temp_c = 85, .ohm = 1454 }, - { .temp_c = 90, .ohm = 1275 }, - { .temp_c = 95, .ohm = 1121 }, - { .temp_c = 100, .ohm = 990 }, - { .temp_c = 105, .ohm = 876 }, - { .temp_c = 110, .ohm = 779 }, - { .temp_c = 115, .ohm = 694 }, - { .temp_c = 120, .ohm = 620 }, - { .temp_c = 125, .ohm = 556 }, + { .temp_c = 30, .ohm = 8315 }, + { .temp_c = 35, .ohm = 6948 }, + { .temp_c = 40, .ohm = 5834 }, + { .temp_c = 45, .ohm = 4917 }, + { .temp_c = 50, .ohm = 4161 }, + { .temp_c = 55, .ohm = 3535 }, + { .temp_c = 60, .ohm = 3014 }, + { .temp_c = 65, .ohm = 2586 }, + { .temp_c = 70, .ohm = 2228 }, + { .temp_c = 75, .ohm = 1925 }, + { .temp_c = 80, .ohm = 1669 }, + { .temp_c = 85, .ohm = 1452 }, + { .temp_c = 90, .ohm = 1268 }, + { .temp_c = 95, .ohm = 1110 }, + { .temp_c = 100, .ohm = 974 }, + { .temp_c = 105, .ohm = 858 }, + { .temp_c = 110, .ohm = 758 }, + { .temp_c = 115, .ohm = 672 }, + { .temp_c = 120, .ohm = 596 }, + { .temp_c = 125, .ohm = 531 }, }; /* diff --git a/drivers/hwmon/peci/dimmtemp.c b/drivers/hwmon/peci/dimmtemp.c index d6762259dd69..fbe82d9852e0 100644 --- a/drivers/hwmon/peci/dimmtemp.c +++ b/drivers/hwmon/peci/dimmtemp.c @@ -127,8 +127,6 @@ static int update_thresholds(struct peci_dimmtemp *priv, int dimm_no) return 0; ret = priv->gen_info->read_thresholds(priv, dimm_order, chan_rank, &data); - if (ret == -ENODATA) /* Use default or previous value */ - return 0; if (ret) return ret; @@ -509,11 +507,11 @@ read_thresholds_icx(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u ret = peci_ep_pci_local_read(priv->peci_dev, 0, 13, 0, 2, 0xd4, ®_val); if (ret || !(reg_val & BIT(31))) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; ret = peci_ep_pci_local_read(priv->peci_dev, 0, 13, 0, 2, 0xd0, ®_val); if (ret) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; /* * Device 26, Offset 224e0: IMC 0 channel 0 -> rank 0 @@ -546,11 +544,11 @@ read_thresholds_spr(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u ret = peci_ep_pci_local_read(priv->peci_dev, 0, 30, 0, 2, 0xd4, ®_val); if (ret || !(reg_val & BIT(31))) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; ret = peci_ep_pci_local_read(priv->peci_dev, 0, 30, 0, 2, 0xd0, ®_val); if (ret) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; /* * Device 26, Offset 219a8: IMC 0 channel 0 -> rank 0 diff --git a/drivers/hwmon/pmbus/pmbus.c b/drivers/hwmon/pmbus/pmbus.c index 77cf268e7d2d..920cd5408141 100644 --- a/drivers/hwmon/pmbus/pmbus.c +++ b/drivers/hwmon/pmbus/pmbus.c @@ -103,6 +103,8 @@ static int pmbus_identify(struct i2c_client *client, if (pmbus_check_byte_register(client, 0, PMBUS_PAGE)) { int page; + info->pages = PMBUS_PAGES; + for (page = 1; page < PMBUS_PAGES; page++) { if (pmbus_set_page(client, page, 0xff) < 0) break; diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c index 1e3bd129a922..7087197383c9 100644 --- a/drivers/hwmon/xgene-hwmon.c +++ b/drivers/hwmon/xgene-hwmon.c @@ -706,7 +706,7 @@ static int xgene_hwmon_probe(struct platform_device *pdev) goto out; } - if (!ctx->pcc_comm_addr) { + if (IS_ERR_OR_NULL(ctx->pcc_comm_addr)) { dev_err(&pdev->dev, "Failed to ioremap PCC comm region\n"); rc = -ENOMEM; diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 66123d684ac9..bf99d79a4192 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -105,23 +105,32 @@ struct msc_iter { /** * struct msc - MSC device representation - * @reg_base: register window base address + * @reg_base: register window base address for the entire MSU + * @msu_base: register window base address for this MSC * @thdev: intel_th_device pointer * @mbuf: MSU buffer, if assigned - * @mbuf_priv MSU buffer's private data, if @mbuf + * @mbuf_priv: MSU buffer's private data, if @mbuf + * @work: a work to stop the trace when the buffer is full * @win_list: list of windows in multiblock mode * @single_sgt: single mode buffer * @cur_win: current window + * @switch_on_unlock: window to switch to when it becomes available * @nr_pages: total number of pages allocated for this buffer * @single_sz: amount of data in single mode * @single_wrap: single mode wrap occurred * @base: buffer's base pointer * @base_addr: buffer's base address + * @orig_addr: MSC0 buffer's base address + * @orig_sz: MSC0 buffer's size * @user_count: number of users of the buffer * @mmap_count: number of mappings * @buf_mutex: mutex to serialize access to buffer-related bits + * @iter_list: list of open file descriptor iterators + * @stop_on_full: stop the trace if the current window is full * @enabled: MSC is enabled * @wrap: wrapping is enabled + * @do_irq: IRQ resource is available, handle interrupts + * @multi_is_broken: multiblock mode enabled (not disabled by PCI drvdata) * @mode: MSC operating mode * @burst_len: write burst length * @index: number of this MSC in the MSU diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index e9d8d28e055f..e3def163d5cf 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -335,6 +335,21 @@ static const struct pci_device_id intel_th_pci_id_table[] = { .driver_data = (kernel_ulong_t)&intel_th_2x, }, { + /* Arrow Lake */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7724), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, + { + /* Panther Lake-H */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe324), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, + { + /* Panther Lake-P/U */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe424), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, + { /* Alder Lake CPU */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), .driver_data = (kernel_ulong_t)&intel_th_2x, diff --git a/drivers/i2c/busses/i2c-amd-asf-plat.c b/drivers/i2c/busses/i2c-amd-asf-plat.c index 7512614bf4b7..93ebec162c6d 100644 --- a/drivers/i2c/busses/i2c-amd-asf-plat.c +++ b/drivers/i2c/busses/i2c-amd-asf-plat.c @@ -293,6 +293,7 @@ static irqreturn_t amd_asf_irq_handler(int irq, void *ptr) amd_asf_update_ioport_target(piix4_smba, ASF_SLV_INTR, SMBHSTSTS, true); } + iowrite32(irq, dev->eoi_base); return IRQ_HANDLED; } diff --git a/drivers/i2c/busses/i2c-ls2x.c b/drivers/i2c/busses/i2c-ls2x.c index 8821cac3897b..b475dd27b7af 100644 --- a/drivers/i2c/busses/i2c-ls2x.c +++ b/drivers/i2c/busses/i2c-ls2x.c @@ -10,6 +10,7 @@ * Rewritten for mainline by Binbin Zhou <zhoubinbin@loongson.cn> */ +#include <linux/bitfield.h> #include <linux/bits.h> #include <linux/completion.h> #include <linux/device.h> @@ -26,7 +27,8 @@ #include <linux/units.h> /* I2C Registers */ -#define I2C_LS2X_PRER 0x0 /* Freq Division Register(16 bits) */ +#define I2C_LS2X_PRER_LO 0x0 /* Freq Division Low Byte Register */ +#define I2C_LS2X_PRER_HI 0x1 /* Freq Division High Byte Register */ #define I2C_LS2X_CTR 0x2 /* Control Register */ #define I2C_LS2X_TXR 0x3 /* Transport Data Register */ #define I2C_LS2X_RXR 0x3 /* Receive Data Register */ @@ -93,6 +95,7 @@ static irqreturn_t ls2x_i2c_isr(int this_irq, void *dev_id) */ static void ls2x_i2c_adjust_bus_speed(struct ls2x_i2c_priv *priv) { + u16 val; struct i2c_timings *t = &priv->i2c_t; struct device *dev = priv->adapter.dev.parent; u32 acpi_speed = i2c_acpi_find_bus_speed(dev); @@ -104,9 +107,14 @@ static void ls2x_i2c_adjust_bus_speed(struct ls2x_i2c_priv *priv) else t->bus_freq_hz = LS2X_I2C_FREQ_STD; - /* Calculate and set i2c frequency. */ - writew(LS2X_I2C_PCLK_FREQ / (5 * t->bus_freq_hz) - 1, - priv->base + I2C_LS2X_PRER); + /* + * According to the chip manual, we can only access the registers as bytes, + * otherwise the high bits will be truncated. + * So set the I2C frequency with a sequential writeb() instead of writew(). + */ + val = LS2X_I2C_PCLK_FREQ / (5 * t->bus_freq_hz) - 1; + writeb(FIELD_GET(GENMASK(7, 0), val), priv->base + I2C_LS2X_PRER_LO); + writeb(FIELD_GET(GENMASK(15, 8), val), priv->base + I2C_LS2X_PRER_HI); } static void ls2x_i2c_init(struct ls2x_i2c_priv *priv) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 3ca08b8ef8af..de713b5747fe 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2554,6 +2554,13 @@ static int npcm_i2c_probe_bus(struct platform_device *pdev) if (irq < 0) return irq; + /* + * Disable the interrupt to avoid the interrupt handler being triggered + * incorrectly by the asynchronous interrupt status since the machine + * might do a warm reset during the last smbus/i2c transfer session. + */ + npcm_i2c_int_enable(bus, false); + ret = devm_request_irq(bus->dev, irq, npcm_i2c_bus_irq, 0, dev_name(bus->dev), bus); if (ret) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 118fe1d37c22..0fdb1d1316c4 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -56,6 +56,7 @@ #include <asm/intel-family.h> #include <asm/mwait.h> #include <asm/spec-ctrl.h> +#include <asm/tsc.h> #include <asm/fpu/api.h> #define INTEL_IDLE_VERSION "0.5.1" @@ -1799,6 +1800,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) if (intel_idle_state_needs_timer_stop(state)) state->flags |= CPUIDLE_FLAG_TIMER_STOP; + if (cx->type > ACPI_STATE_C1 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halts in idle"); + state->enter = intel_idle; state->enter_s2idle = intel_idle_s2idle; } diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index e96a5ae92375..cfaf8f7e0a07 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -1084,7 +1084,7 @@ static int ad7192_update_scan_mode(struct iio_dev *indio_dev, const unsigned lon conf &= ~AD7192_CONF_CHAN_MASK; for_each_set_bit(i, scan_mask, 8) - conf |= FIELD_PREP(AD7192_CONF_CHAN_MASK, i); + conf |= FIELD_PREP(AD7192_CONF_CHAN_MASK, BIT(i)); ret = ad_sd_write_reg(&st->sd, AD7192_REG_CONF, 3, conf); if (ret < 0) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index d8e3c7a43678..d39354afd539 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -1047,7 +1047,7 @@ static int ad7606_read_avail(struct iio_dev *indio_dev, cs = &st->chan_scales[ch]; *vals = (int *)cs->scale_avail; - *length = cs->num_scales; + *length = cs->num_scales * 2; *type = IIO_VAL_INT_PLUS_MICRO; return IIO_AVAIL_LIST; diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 8e5aaf15a921..c3a1dea2aa82 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -329,7 +329,7 @@ static const struct at91_adc_reg_layout sama7g5_layout = { #define AT91_HWFIFO_MAX_SIZE_STR "128" #define AT91_HWFIFO_MAX_SIZE 128 -#define AT91_SAMA5D2_CHAN_SINGLE(index, num, addr) \ +#define AT91_SAMA_CHAN_SINGLE(index, num, addr, rbits) \ { \ .type = IIO_VOLTAGE, \ .channel = num, \ @@ -337,7 +337,7 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .scan_index = index, \ .scan_type = { \ .sign = 'u', \ - .realbits = 14, \ + .realbits = rbits, \ .storagebits = 16, \ }, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ @@ -350,7 +350,13 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .indexed = 1, \ } -#define AT91_SAMA5D2_CHAN_DIFF(index, num, num2, addr) \ +#define AT91_SAMA5D2_CHAN_SINGLE(index, num, addr) \ + AT91_SAMA_CHAN_SINGLE(index, num, addr, 14) + +#define AT91_SAMA7G5_CHAN_SINGLE(index, num, addr) \ + AT91_SAMA_CHAN_SINGLE(index, num, addr, 16) + +#define AT91_SAMA_CHAN_DIFF(index, num, num2, addr, rbits) \ { \ .type = IIO_VOLTAGE, \ .differential = 1, \ @@ -360,7 +366,7 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .scan_index = index, \ .scan_type = { \ .sign = 's', \ - .realbits = 14, \ + .realbits = rbits, \ .storagebits = 16, \ }, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ @@ -373,6 +379,12 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .indexed = 1, \ } +#define AT91_SAMA5D2_CHAN_DIFF(index, num, num2, addr) \ + AT91_SAMA_CHAN_DIFF(index, num, num2, addr, 14) + +#define AT91_SAMA7G5_CHAN_DIFF(index, num, num2, addr) \ + AT91_SAMA_CHAN_DIFF(index, num, num2, addr, 16) + #define AT91_SAMA5D2_CHAN_TOUCH(num, name, mod) \ { \ .type = IIO_POSITIONRELATIVE, \ @@ -666,30 +678,30 @@ static const struct iio_chan_spec at91_sama5d2_adc_channels[] = { }; static const struct iio_chan_spec at91_sama7g5_adc_channels[] = { - AT91_SAMA5D2_CHAN_SINGLE(0, 0, 0x60), - AT91_SAMA5D2_CHAN_SINGLE(1, 1, 0x64), - AT91_SAMA5D2_CHAN_SINGLE(2, 2, 0x68), - AT91_SAMA5D2_CHAN_SINGLE(3, 3, 0x6c), - AT91_SAMA5D2_CHAN_SINGLE(4, 4, 0x70), - AT91_SAMA5D2_CHAN_SINGLE(5, 5, 0x74), - AT91_SAMA5D2_CHAN_SINGLE(6, 6, 0x78), - AT91_SAMA5D2_CHAN_SINGLE(7, 7, 0x7c), - AT91_SAMA5D2_CHAN_SINGLE(8, 8, 0x80), - AT91_SAMA5D2_CHAN_SINGLE(9, 9, 0x84), - AT91_SAMA5D2_CHAN_SINGLE(10, 10, 0x88), - AT91_SAMA5D2_CHAN_SINGLE(11, 11, 0x8c), - AT91_SAMA5D2_CHAN_SINGLE(12, 12, 0x90), - AT91_SAMA5D2_CHAN_SINGLE(13, 13, 0x94), - AT91_SAMA5D2_CHAN_SINGLE(14, 14, 0x98), - AT91_SAMA5D2_CHAN_SINGLE(15, 15, 0x9c), - AT91_SAMA5D2_CHAN_DIFF(16, 0, 1, 0x60), - AT91_SAMA5D2_CHAN_DIFF(17, 2, 3, 0x68), - AT91_SAMA5D2_CHAN_DIFF(18, 4, 5, 0x70), - AT91_SAMA5D2_CHAN_DIFF(19, 6, 7, 0x78), - AT91_SAMA5D2_CHAN_DIFF(20, 8, 9, 0x80), - AT91_SAMA5D2_CHAN_DIFF(21, 10, 11, 0x88), - AT91_SAMA5D2_CHAN_DIFF(22, 12, 13, 0x90), - AT91_SAMA5D2_CHAN_DIFF(23, 14, 15, 0x98), + AT91_SAMA7G5_CHAN_SINGLE(0, 0, 0x60), + AT91_SAMA7G5_CHAN_SINGLE(1, 1, 0x64), + AT91_SAMA7G5_CHAN_SINGLE(2, 2, 0x68), + AT91_SAMA7G5_CHAN_SINGLE(3, 3, 0x6c), + AT91_SAMA7G5_CHAN_SINGLE(4, 4, 0x70), + AT91_SAMA7G5_CHAN_SINGLE(5, 5, 0x74), + AT91_SAMA7G5_CHAN_SINGLE(6, 6, 0x78), + AT91_SAMA7G5_CHAN_SINGLE(7, 7, 0x7c), + AT91_SAMA7G5_CHAN_SINGLE(8, 8, 0x80), + AT91_SAMA7G5_CHAN_SINGLE(9, 9, 0x84), + AT91_SAMA7G5_CHAN_SINGLE(10, 10, 0x88), + AT91_SAMA7G5_CHAN_SINGLE(11, 11, 0x8c), + AT91_SAMA7G5_CHAN_SINGLE(12, 12, 0x90), + AT91_SAMA7G5_CHAN_SINGLE(13, 13, 0x94), + AT91_SAMA7G5_CHAN_SINGLE(14, 14, 0x98), + AT91_SAMA7G5_CHAN_SINGLE(15, 15, 0x9c), + AT91_SAMA7G5_CHAN_DIFF(16, 0, 1, 0x60), + AT91_SAMA7G5_CHAN_DIFF(17, 2, 3, 0x68), + AT91_SAMA7G5_CHAN_DIFF(18, 4, 5, 0x70), + AT91_SAMA7G5_CHAN_DIFF(19, 6, 7, 0x78), + AT91_SAMA7G5_CHAN_DIFF(20, 8, 9, 0x80), + AT91_SAMA7G5_CHAN_DIFF(21, 10, 11, 0x88), + AT91_SAMA7G5_CHAN_DIFF(22, 12, 13, 0x90), + AT91_SAMA7G5_CHAN_DIFF(23, 14, 15, 0x98), IIO_CHAN_SOFT_TIMESTAMP(24), AT91_SAMA5D2_CHAN_TEMP(AT91_SAMA7G5_ADC_TEMP_CHANNEL, "temp", 0xdc), }; diff --git a/drivers/iio/adc/pac1921.c b/drivers/iio/adc/pac1921.c index 90f61c47b1c4..63f518215156 100644 --- a/drivers/iio/adc/pac1921.c +++ b/drivers/iio/adc/pac1921.c @@ -1198,11 +1198,11 @@ static int pac1921_match_acpi_device(struct iio_dev *indio_dev) label = devm_kstrdup(dev, status->package.elements[0].string.pointer, GFP_KERNEL); + ACPI_FREE(status); if (!label) return -ENOMEM; indio_dev->label = label; - ACPI_FREE(status); return 0; } diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c index e7206af53af6..7944f5c1d264 100644 --- a/drivers/iio/dac/ad3552r.c +++ b/drivers/iio/dac/ad3552r.c @@ -410,6 +410,12 @@ static int ad3552r_reset(struct ad3552r_desc *dac) return ret; } + /* Clear reset error flag, see ad3552r manual, rev B table 38. */ + ret = ad3552r_write_reg(dac, AD3552R_REG_ADDR_ERR_STATUS, + AD3552R_MASK_RESET_STATUS); + if (ret) + return ret; + return ad3552r_update_reg_field(dac, AD3552R_REG_ADDR_INTERFACE_CONFIG_A, AD3552R_MASK_ADDR_ASCENSION, diff --git a/drivers/iio/filter/admv8818.c b/drivers/iio/filter/admv8818.c index 848baa6e3bbf..d85b7d3de866 100644 --- a/drivers/iio/filter/admv8818.c +++ b/drivers/iio/filter/admv8818.c @@ -574,21 +574,15 @@ static int admv8818_init(struct admv8818_state *st) struct spi_device *spi = st->spi; unsigned int chip_id; - ret = regmap_update_bits(st->regmap, ADMV8818_REG_SPI_CONFIG_A, - ADMV8818_SOFTRESET_N_MSK | - ADMV8818_SOFTRESET_MSK, - FIELD_PREP(ADMV8818_SOFTRESET_N_MSK, 1) | - FIELD_PREP(ADMV8818_SOFTRESET_MSK, 1)); + ret = regmap_write(st->regmap, ADMV8818_REG_SPI_CONFIG_A, + ADMV8818_SOFTRESET_N_MSK | ADMV8818_SOFTRESET_MSK); if (ret) { dev_err(&spi->dev, "ADMV8818 Soft Reset failed.\n"); return ret; } - ret = regmap_update_bits(st->regmap, ADMV8818_REG_SPI_CONFIG_A, - ADMV8818_SDOACTIVE_N_MSK | - ADMV8818_SDOACTIVE_MSK, - FIELD_PREP(ADMV8818_SDOACTIVE_N_MSK, 1) | - FIELD_PREP(ADMV8818_SDOACTIVE_MSK, 1)); + ret = regmap_write(st->regmap, ADMV8818_REG_SPI_CONFIG_A, + ADMV8818_SDOACTIVE_N_MSK | ADMV8818_SDOACTIVE_MSK); if (ret) { dev_err(&spi->dev, "ADMV8818 SDO Enable failed.\n"); return ret; diff --git a/drivers/iio/light/apds9306.c b/drivers/iio/light/apds9306.c index 69a0d609cffc..5ed7e17f49e7 100644 --- a/drivers/iio/light/apds9306.c +++ b/drivers/iio/light/apds9306.c @@ -108,11 +108,11 @@ static const struct part_id_gts_multiplier apds9306_gts_mul[] = { { .part_id = 0xB1, .max_scale_int = 16, - .max_scale_nano = 3264320, + .max_scale_nano = 326432000, }, { .part_id = 0xB3, .max_scale_int = 14, - .max_scale_nano = 9712000, + .max_scale_nano = 97120000, }, }; diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 7ab64f5c623c..76b76d12b388 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -49,9 +49,10 @@ static const u32 prox_sensitivity_addresses[] = { #define PROX_CHANNEL(_is_proximity, _channel) \ {\ .type = _is_proximity ? IIO_PROXIMITY : IIO_ATTENTION,\ - .info_mask_separate = _is_proximity ? BIT(IIO_CHAN_INFO_RAW) :\ - BIT(IIO_CHAN_INFO_PROCESSED),\ - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_OFFSET) |\ + .info_mask_separate = \ + (_is_proximity ? BIT(IIO_CHAN_INFO_RAW) :\ + BIT(IIO_CHAN_INFO_PROCESSED)) |\ + BIT(IIO_CHAN_INFO_OFFSET) |\ BIT(IIO_CHAN_INFO_SCALE) |\ BIT(IIO_CHAN_INFO_SAMP_FREQ) |\ BIT(IIO_CHAN_INFO_HYSTERESIS),\ diff --git a/drivers/iio/proximity/hx9023s.c b/drivers/iio/proximity/hx9023s.c index e092a935dbac..5aa8e5a22f32 100644 --- a/drivers/iio/proximity/hx9023s.c +++ b/drivers/iio/proximity/hx9023s.c @@ -1036,12 +1036,13 @@ static int hx9023s_send_cfg(const struct firmware *fw, struct hx9023s_data *data return -ENOMEM; memcpy(bin->data, fw->data, fw->size); - release_firmware(fw); bin->fw_size = fw->size; bin->fw_ver = bin->data[FW_VER_OFFSET]; bin->reg_count = get_unaligned_le16(bin->data + FW_REG_CNT_OFFSET); + release_firmware(fw); + return hx9023s_bin_load(data, bin); } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b48a72bd7b23..cd5116d8c3b2 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2043,12 +2043,12 @@ static void set_dte_entry(struct amd_iommu *iommu, make_clear_dte(dev_data, dte, &new); if (domain->iop.mode != PAGE_MODE_NONE) - new.data[0] = iommu_virt_to_phys(domain->iop.root); + new.data[0] |= iommu_virt_to_phys(domain->iop.root); new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; - new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V; + new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW; /* * When SNP is enabled, we can only support TV=1 with non-zero domain ID. diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 9f424acf474e..e540092d664d 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -2043,6 +2043,7 @@ int enable_drhd_fault_handling(unsigned int cpu) /* * Enable fault control interrupt. */ + guard(rwsem_read)(&dmar_global_lock); for_each_iommu(iommu, drhd) { u32 fault_status; int ret; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index cc46098f875b..bf1f0c814348 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3146,7 +3146,14 @@ int __init intel_iommu_init(void) iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, "%s", iommu->name); + /* + * The iommu device probe is protected by the iommu_probe_device_lock. + * Release the dmar_global_lock before entering the device probe path + * to avoid unnecessary lock order splat. + */ + up_read(&dmar_global_lock); iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); + down_read(&dmar_global_lock); iommu_pmu_register(iommu); } @@ -4378,9 +4385,6 @@ static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void * { struct device *dev = data; - if (dev != &pdev->dev) - return 0; - return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); } diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index e0174da5e9fc..77b0490a1b38 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -286,7 +286,6 @@ static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status) int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) { int ret; - u8 interrupt_val = 0; u16 *buf; if (!status) @@ -309,20 +308,6 @@ int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) ret = rtsx_usb_get_status_with_bulk(ucr, status); } - rtsx_usb_read_register(ucr, CARD_INT_PEND, &interrupt_val); - /* Cross check presence with interrupts */ - if (*status & XD_CD) - if (!(interrupt_val & XD_INT)) - *status &= ~XD_CD; - - if (*status & SD_CD) - if (!(interrupt_val & SD_INT)) - *status &= ~SD_CD; - - if (*status & MS_CD) - if (!(interrupt_val & MS_INT)) - *status &= ~MS_CD; - /* usb_control_msg may return positive when success */ if (ret < 0) return ret; diff --git a/drivers/misc/eeprom/digsy_mtc_eeprom.c b/drivers/misc/eeprom/digsy_mtc_eeprom.c index 88888485e6f8..ee58f7ce5bfa 100644 --- a/drivers/misc/eeprom/digsy_mtc_eeprom.c +++ b/drivers/misc/eeprom/digsy_mtc_eeprom.c @@ -50,7 +50,7 @@ static struct platform_device digsy_mtc_eeprom = { }; static struct gpiod_lookup_table eeprom_spi_gpiod_table = { - .dev_id = "spi_gpio", + .dev_id = "spi_gpio.1", .table = { GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_CLK, "sck", GPIO_ACTIVE_HIGH), diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index c3a6657dcd4a..a5f88ec97df7 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -117,6 +117,8 @@ #define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */ +#define MEI_DEV_ID_PTL_P 0xE470 /* Panther Lake P */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 6589635f8ba3..d6ff9d82ae94 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -124,6 +124,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_P, MEI_ME_PCH15_CFG)}, + /* required last entry */ {0, } }; diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index 35d349fee769..7be1649b1972 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -502,7 +502,7 @@ static int vsc_tp_probe(struct spi_device *spi) if (ret) return ret; - tp->wakeuphost = devm_gpiod_get(dev, "wakeuphost", GPIOD_IN); + tp->wakeuphost = devm_gpiod_get(dev, "wakeuphostint", GPIOD_IN); if (IS_ERR(tp->wakeuphost)) return PTR_ERR(tp->wakeuphost); diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 055395cde42b..999026a1ae04 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -873,6 +873,7 @@ static int setup_wait(struct ntsync_device *dev, { int fds[NTSYNC_MAX_WAIT_COUNT + 1]; const __u32 count = args->count; + size_t size = array_size(count, sizeof(fds[0])); struct ntsync_q *q; __u32 total_count; __u32 i, j; @@ -880,15 +881,14 @@ static int setup_wait(struct ntsync_device *dev, if (args->pad || (args->flags & ~NTSYNC_WAIT_REALTIME)) return -EINVAL; - if (args->count > NTSYNC_MAX_WAIT_COUNT) + if (size >= sizeof(fds)) return -EINVAL; total_count = count; if (args->alert) total_count++; - if (copy_from_user(fds, u64_to_user_ptr(args->objs), - array_size(count, sizeof(*fds)))) + if (copy_from_user(fds, u64_to_user_ptr(args->objs), size)) return -EFAULT; if (args->alert) fds[count] = args->alert; @@ -1208,6 +1208,7 @@ static struct miscdevice ntsync_misc = { .minor = MISC_DYNAMIC_MINOR, .name = NTSYNC_NAME, .fops = &ntsync_fops, + .mode = 0666, }; module_misc_device(ntsync_misc); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fe7072336e8f..6c95f478ab80 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -90,6 +90,7 @@ #include <net/tls.h> #endif #include <net/ip6_route.h> +#include <net/netdev_lock.h> #include <net/xdp.h> #include "bonding_priv.h" @@ -855,7 +856,6 @@ static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) { const struct net_device_ops *slave_ops = slave_dev->netdev_ops; - int (*ioctl)(struct net_device *, struct ifreq *, int); struct ifreq ifr; struct mii_ioctl_data *mii; @@ -871,8 +871,7 @@ static int bond_check_dev_link(struct bonding *bond, BMSR_LSTATUS : 0; /* Ethtool can't be used, fallback to MII ioctls. */ - ioctl = slave_ops->ndo_eth_ioctl; - if (ioctl) { + if (slave_ops->ndo_eth_ioctl) { /* TODO: set pointer to correct ioctl on a per team member * bases to make this more efficient. that is, once * we determine the correct ioctl, we will always @@ -888,9 +887,10 @@ static int bond_check_dev_link(struct bonding *bond, /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ strscpy_pad(ifr.ifr_name, slave_dev->name, IFNAMSIZ); mii = if_mii(&ifr); - if (ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) { + + if (dev_eth_ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) { mii->reg_num = MII_BMSR; - if (ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0) + if (dev_eth_ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0) return mii->val_out & BMSR_LSTATUS; } } @@ -2645,10 +2645,13 @@ static int __bond_release_one(struct net_device *bond_dev, dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL); } - if (unregister) + if (unregister) { + netdev_lock_ops(slave_dev); __dev_set_mtu(slave_dev, slave->original_mtu); - else + netdev_unlock_ops(slave_dev); + } else { dev_set_mtu(slave_dev, slave->original_mtu); + } if (!netif_is_bond_master(slave_dev)) slave_dev->priv_flags &= ~IFF_BONDING; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 327b6ecdc77e..d1b095af253b 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1242,10 +1242,28 @@ static bool slave_can_set_ns_maddr(const struct bonding *bond, struct slave *sla slave->dev->flags & IFF_MULTICAST; } +/** + * slave_set_ns_maddrs - add/del all NS mac addresses for slave + * @bond: bond device + * @slave: slave device + * @add: add or remove all the NS mac addresses + * + * This function tries to add or delete all the NS mac addresses on the slave + * + * Note, the IPv6 NS target address is the unicast address in Neighbor + * Solicitation (NS) message. The dest address of NS message should be + * solicited-node multicast address of the target. The dest mac of NS message + * is converted from the solicited-node multicast address. + * + * This function is called when + * * arp_validate changes + * * enslaving, releasing new slaves + */ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add) { struct in6_addr *targets = bond->params.ns_targets; char slot_maddr[MAX_ADDR_LEN]; + struct in6_addr mcaddr; int i; if (!slave_can_set_ns_maddr(bond, slave)) @@ -1255,7 +1273,8 @@ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool if (ipv6_addr_any(&targets[i])) break; - if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) { + addrconf_addr_solict_mult(&targets[i], &mcaddr); + if (!ndisc_mc_map(&mcaddr, slot_maddr, slave->dev, 0)) { if (add) dev_mc_add(slave->dev, slot_maddr); else @@ -1278,23 +1297,43 @@ void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave) slave_set_ns_maddrs(bond, slave, false); } +/** + * slave_set_ns_maddr - set new NS mac address for slave + * @bond: bond device + * @slave: slave device + * @target: the new IPv6 target + * @slot: the old IPv6 target in the slot + * + * This function tries to replace the old mac address to new one on the slave. + * + * Note, the target/slot IPv6 address is the unicast address in Neighbor + * Solicitation (NS) message. The dest address of NS message should be + * solicited-node multicast address of the target. The dest mac of NS message + * is converted from the solicited-node multicast address. + * + * This function is called when + * * An IPv6 NS target is added or removed. + */ static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave, struct in6_addr *target, struct in6_addr *slot) { - char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN]; + char mac_addr[MAX_ADDR_LEN]; + struct in6_addr mcast_addr; if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave)) return; - /* remove the previous maddr from slave */ + /* remove the previous mac addr from slave */ + addrconf_addr_solict_mult(slot, &mcast_addr); if (!ipv6_addr_any(slot) && - !ndisc_mc_map(slot, slot_maddr, slave->dev, 0)) - dev_mc_del(slave->dev, slot_maddr); + !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0)) + dev_mc_del(slave->dev, mac_addr); - /* add new maddr on slave if target is set */ + /* add new mac addr on slave if target is set */ + addrconf_addr_solict_mult(target, &mcast_addr); if (!ipv6_addr_any(target) && - !ndisc_mc_map(target, target_maddr, slave->dev, 0)) - dev_mc_add(slave->dev, target_maddr); + !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0)) + dev_mc_add(slave->dev, mac_addr); } static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot, diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index 7fea00c7ca8a..c60386bf2d1a 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -745,7 +745,7 @@ err: if (cfv->vr_rx) vdev->vringh_config->del_vrhs(cfv->vdev); - if (cfv->vdev) + if (cfv->vq_tx) vdev->config->del_vqs(cfv->vdev); free_netdev(netdev); return err; diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c index b347a1c93536..7588cb54a909 100644 --- a/drivers/net/can/flexcan/flexcan-core.c +++ b/drivers/net/can/flexcan/flexcan-core.c @@ -26,6 +26,7 @@ #include <linux/pinctrl/consumer.h> #include <linux/platform_device.h> #include <linux/can/platform/flexcan.h> +#include <linux/phy/phy.h> #include <linux/pm_runtime.h> #include <linux/property.h> #include <linux/regmap.h> @@ -644,18 +645,22 @@ static void flexcan_clks_disable(const struct flexcan_priv *priv) static inline int flexcan_transceiver_enable(const struct flexcan_priv *priv) { - if (!priv->reg_xceiver) - return 0; + if (priv->reg_xceiver) + return regulator_enable(priv->reg_xceiver); + else if (priv->transceiver) + return phy_power_on(priv->transceiver); - return regulator_enable(priv->reg_xceiver); + return 0; } static inline int flexcan_transceiver_disable(const struct flexcan_priv *priv) { - if (!priv->reg_xceiver) - return 0; + if (priv->reg_xceiver) + return regulator_disable(priv->reg_xceiver); + else if (priv->transceiver) + return phy_power_off(priv->transceiver); - return regulator_disable(priv->reg_xceiver); + return 0; } static int flexcan_chip_enable(struct flexcan_priv *priv) @@ -2086,6 +2091,7 @@ static int flexcan_probe(struct platform_device *pdev) struct net_device *dev; struct flexcan_priv *priv; struct regulator *reg_xceiver; + struct phy *transceiver; struct clk *clk_ipg = NULL, *clk_per = NULL; struct flexcan_regs __iomem *regs; struct flexcan_platform_data *pdata; @@ -2101,6 +2107,11 @@ static int flexcan_probe(struct platform_device *pdev) else if (IS_ERR(reg_xceiver)) return PTR_ERR(reg_xceiver); + transceiver = devm_phy_optional_get(&pdev->dev, NULL); + if (IS_ERR(transceiver)) + return dev_err_probe(&pdev->dev, PTR_ERR(transceiver), + "failed to get phy\n"); + if (pdev->dev.of_node) { of_property_read_u32(pdev->dev.of_node, "clock-frequency", &clock_freq); @@ -2198,6 +2209,10 @@ static int flexcan_probe(struct platform_device *pdev) priv->clk_per = clk_per; priv->clk_src = clk_src; priv->reg_xceiver = reg_xceiver; + priv->transceiver = transceiver; + + if (transceiver) + priv->can.bitrate_max = transceiver->attrs.max_link_rate; if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3) { priv->irq_boff = platform_get_irq(pdev, 1); diff --git a/drivers/net/can/flexcan/flexcan.h b/drivers/net/can/flexcan/flexcan.h index 2cf886618c96..16692a2502eb 100644 --- a/drivers/net/can/flexcan/flexcan.h +++ b/drivers/net/can/flexcan/flexcan.h @@ -107,6 +107,7 @@ struct flexcan_priv { struct clk *clk_per; struct flexcan_devtype_data devtype_data; struct regulator *reg_xceiver; + struct phy *transceiver; struct flexcan_stop_mode stm; int irq_boff; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index ac5d9f7a1722..d70399bce5b9 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2474,7 +2474,8 @@ mt7531_setup_common(struct dsa_switch *ds) if (ret < 0) return ret; - return 0; + /* Setup VLAN ID 0 for VLAN-unaware bridges */ + return mt7530_setup_vlan0(priv); } static int @@ -2570,11 +2571,6 @@ mt7531_setup(struct dsa_switch *ds) if (ret) return ret; - /* Setup VLAN ID 0 for VLAN-unaware bridges */ - ret = mt7530_setup_vlan0(priv); - if (ret) - return ret; - ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 68d1e891752b..5db96ca52505 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2208,13 +2208,11 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, return err; } -static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, - const unsigned char *addr, u16 vid, - u8 state) +static int mv88e6xxx_port_db_get(struct mv88e6xxx_chip *chip, + const unsigned char *addr, u16 vid, + u16 *fid, struct mv88e6xxx_atu_entry *entry) { - struct mv88e6xxx_atu_entry entry; struct mv88e6xxx_vtu_entry vlan; - u16 fid; int err; /* Ports have two private address databases: one for when the port is @@ -2225,7 +2223,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, * VLAN ID into the port's database used for VLAN-unaware bridging. */ if (vid == 0) { - fid = MV88E6XXX_FID_BRIDGED; + *fid = MV88E6XXX_FID_BRIDGED; } else { err = mv88e6xxx_vtu_get(chip, vid, &vlan); if (err) @@ -2235,14 +2233,39 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, if (!vlan.valid) return -EOPNOTSUPP; - fid = vlan.fid; + *fid = vlan.fid; } - entry.state = 0; - ether_addr_copy(entry.mac, addr); - eth_addr_dec(entry.mac); + entry->state = 0; + ether_addr_copy(entry->mac, addr); + eth_addr_dec(entry->mac); + + return mv88e6xxx_g1_atu_getnext(chip, *fid, entry); +} + +static bool mv88e6xxx_port_db_find(struct mv88e6xxx_chip *chip, + const unsigned char *addr, u16 vid) +{ + struct mv88e6xxx_atu_entry entry; + u16 fid; + int err; - err = mv88e6xxx_g1_atu_getnext(chip, fid, &entry); + err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); + if (err) + return false; + + return entry.state && ether_addr_equal(entry.mac, addr); +} + +static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, + const unsigned char *addr, u16 vid, + u8 state) +{ + struct mv88e6xxx_atu_entry entry; + u16 fid; + int err; + + err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); if (err) return err; @@ -2846,6 +2869,13 @@ static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, mv88e6xxx_reg_lock(chip); err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC); + if (err) + goto out; + + if (!mv88e6xxx_port_db_find(chip, addr, vid)) + err = -ENOSPC; + +out: mv88e6xxx_reg_unlock(chip); return err; @@ -6614,6 +6644,13 @@ static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, mv88e6xxx_reg_lock(chip); err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC); + if (err) + goto out; + + if (!mv88e6xxx_port_db_find(chip, mdb->addr, mdb->vid)) + err = -ENOSPC; + +out: mv88e6xxx_reg_unlock(chip); return err; diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig index 10687722d14c..d6eb6713e5f6 100644 --- a/drivers/net/dsa/realtek/Kconfig +++ b/drivers/net/dsa/realtek/Kconfig @@ -44,7 +44,7 @@ config NET_DSA_REALTEK_RTL8366RB Select to enable support for Realtek RTL8366RB. config NET_DSA_REALTEK_RTL8366RB_LEDS - bool "Support RTL8366RB LED control" + bool depends on (LEDS_CLASS=y || LEDS_CLASS=NET_DSA_REALTEK_RTL8366RB) depends on NET_DSA_REALTEK_RTL8366RB default NET_DSA_REALTEK_RTL8366RB diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 005d79975f3b..a4938c6a5ebb 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -38,6 +38,7 @@ #include <linux/moduleparam.h> #include <linux/rtnetlink.h> #include <linux/net_tstamp.h> +#include <net/netdev_lock.h> #include <net/rtnetlink.h> #include <linux/u64_stats_sync.h> diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 7941983d21e9..f86d4557d8d7 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -21,7 +21,6 @@ source "drivers/net/ethernet/adaptec/Kconfig" source "drivers/net/ethernet/aeroflex/Kconfig" source "drivers/net/ethernet/agere/Kconfig" source "drivers/net/ethernet/airoha/Kconfig" -source "drivers/net/ethernet/mellanox/Kconfig" source "drivers/net/ethernet/alacritech/Kconfig" source "drivers/net/ethernet/allwinner/Kconfig" source "drivers/net/ethernet/alteon/Kconfig" diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index ff837168845d..c0a642568ac1 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -138,15 +138,10 @@ static void airoha_fe_maccr_init(struct airoha_eth *eth) { int p; - for (p = 1; p <= ARRAY_SIZE(eth->ports); p++) { + for (p = 1; p <= ARRAY_SIZE(eth->ports); p++) airoha_fe_set(eth, REG_GDM_FWD_CFG(p), GDM_TCP_CKSUM | GDM_UDP_CKSUM | GDM_IP4_CKSUM | GDM_DROP_CRC_ERR); - airoha_fe_rmw(eth, REG_GDM_LEN_CFG(p), - GDM_SHORT_LEN_MASK | GDM_LONG_LEN_MASK, - FIELD_PREP(GDM_SHORT_LEN_MASK, 60) | - FIELD_PREP(GDM_LONG_LEN_MASK, 4004)); - } airoha_fe_rmw(eth, REG_CDM1_VLAN_CTRL, CDM1_VLAN_MASK, FIELD_PREP(CDM1_VLAN_MASK, 0x8100)); @@ -594,7 +589,7 @@ static int airoha_qdma_get_gdm_port(struct airoha_eth *eth, sport = FIELD_GET(QDMA_ETH_RXMSG_SPORT_MASK, msg1); switch (sport) { - case 0x10 ... 0x13: + case 0x10 ... 0x14: port = 0; break; case 0x2 ... 0x4: @@ -620,10 +615,10 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) struct airoha_qdma_desc *desc = &q->desc[q->tail]; u32 hash, reason, msg1 = le32_to_cpu(desc->msg1); dma_addr_t dma_addr = le32_to_cpu(desc->addr); + struct page *page = virt_to_head_page(e->buf); u32 desc_ctrl = le32_to_cpu(desc->ctrl); struct airoha_gdm_port *port; - struct sk_buff *skb; - int len, p; + int data_len, len, p; if (!(desc_ctrl & QDMA_DESC_DONE_MASK)) break; @@ -641,30 +636,41 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) dma_sync_single_for_cpu(eth->dev, dma_addr, SKB_WITH_OVERHEAD(q->buf_size), dir); + data_len = q->skb ? q->buf_size + : SKB_WITH_OVERHEAD(q->buf_size); + if (data_len < len) + goto free_frag; + p = airoha_qdma_get_gdm_port(eth, desc); - if (p < 0 || !eth->ports[p]) { - page_pool_put_full_page(q->page_pool, - virt_to_head_page(e->buf), - true); - continue; - } + if (p < 0 || !eth->ports[p]) + goto free_frag; port = eth->ports[p]; - skb = napi_build_skb(e->buf, q->buf_size); - if (!skb) { - page_pool_put_full_page(q->page_pool, - virt_to_head_page(e->buf), - true); - break; + if (!q->skb) { /* first buffer */ + q->skb = napi_build_skb(e->buf, q->buf_size); + if (!q->skb) + goto free_frag; + + __skb_put(q->skb, len); + skb_mark_for_recycle(q->skb); + q->skb->dev = port->dev; + q->skb->protocol = eth_type_trans(q->skb, port->dev); + q->skb->ip_summed = CHECKSUM_UNNECESSARY; + skb_record_rx_queue(q->skb, qid); + } else { /* scattered frame */ + struct skb_shared_info *shinfo = skb_shinfo(q->skb); + int nr_frags = shinfo->nr_frags; + + if (nr_frags >= ARRAY_SIZE(shinfo->frags)) + goto free_frag; + + skb_add_rx_frag(q->skb, nr_frags, page, + e->buf - page_address(page), len, + q->buf_size); } - skb_reserve(skb, 2); - __skb_put(skb, len); - skb_mark_for_recycle(skb); - skb->dev = port->dev; - skb->protocol = eth_type_trans(skb, skb->dev); - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb_record_rx_queue(skb, qid); + if (FIELD_GET(QDMA_DESC_MORE_MASK, desc_ctrl)) + continue; if (netdev_uses_dsa(port->dev)) { /* PPE module requires untagged packets to work @@ -677,22 +683,27 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) if (sptag < ARRAY_SIZE(port->dsa_meta) && port->dsa_meta[sptag]) - skb_dst_set_noref(skb, + skb_dst_set_noref(q->skb, &port->dsa_meta[sptag]->dst); } hash = FIELD_GET(AIROHA_RXD4_FOE_ENTRY, msg1); if (hash != AIROHA_RXD4_FOE_ENTRY) - skb_set_hash(skb, jhash_1word(hash, 0), + skb_set_hash(q->skb, jhash_1word(hash, 0), PKT_HASH_TYPE_L4); reason = FIELD_GET(AIROHA_RXD4_PPE_CPU_REASON, msg1); if (reason == PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) airoha_ppe_check_skb(eth->ppe, hash); - napi_gro_receive(&q->napi, skb); - done++; + napi_gro_receive(&q->napi, q->skb); + q->skb = NULL; + continue; +free_frag: + page_pool_put_full_page(q->page_pool, page, true); + dev_kfree_skb(q->skb); + q->skb = NULL; } airoha_qdma_fill_rx_queue(q); @@ -767,6 +778,7 @@ static int airoha_qdma_init_rx_queue(struct airoha_queue *q, FIELD_PREP(RX_RING_THR_MASK, thr)); airoha_qdma_rmw(qdma, REG_RX_DMA_IDX(qid), RX_RING_DMA_IDX_MASK, FIELD_PREP(RX_RING_DMA_IDX_MASK, q->head)); + airoha_qdma_set(qdma, REG_RX_SCATTER_CFG(qid), RX_RING_SG_EN_MASK); airoha_qdma_fill_rx_queue(q); @@ -1166,7 +1178,6 @@ static int airoha_qdma_hw_init(struct airoha_qdma *qdma) } airoha_qdma_wr(qdma, REG_QDMA_GLOBAL_CFG, - GLOBAL_CFG_RX_2B_OFFSET_MASK | FIELD_PREP(GLOBAL_CFG_DMA_PREFERENCE_MASK, 3) | GLOBAL_CFG_CPU_TXR_RR_MASK | GLOBAL_CFG_PAYLOAD_BYTE_SWAP_MASK | @@ -1520,9 +1531,9 @@ static void airoha_update_hw_stats(struct airoha_gdm_port *port) static int airoha_dev_open(struct net_device *dev) { + int err, len = ETH_HLEN + dev->mtu + ETH_FCS_LEN; struct airoha_gdm_port *port = netdev_priv(dev); struct airoha_qdma *qdma = port->qdma; - int err; netif_tx_start_all_queues(dev); err = airoha_set_vip_for_gdm_port(port, true); @@ -1536,6 +1547,11 @@ static int airoha_dev_open(struct net_device *dev) airoha_fe_clear(qdma->eth, REG_GDM_INGRESS_CFG(port->id), GDM_STAG_EN_MASK); + airoha_fe_rmw(qdma->eth, REG_GDM_LEN_CFG(port->id), + GDM_SHORT_LEN_MASK | GDM_LONG_LEN_MASK, + FIELD_PREP(GDM_SHORT_LEN_MASK, 60) | + FIELD_PREP(GDM_LONG_LEN_MASK, len)); + airoha_qdma_set(qdma, REG_QDMA_GLOBAL_CFG, GLOBAL_CFG_TX_DMA_EN_MASK | GLOBAL_CFG_RX_DMA_EN_MASK); @@ -1689,6 +1705,20 @@ static void airoha_dev_get_stats64(struct net_device *dev, } while (u64_stats_fetch_retry(&port->stats.syncp, start)); } +static int airoha_dev_change_mtu(struct net_device *dev, int mtu) +{ + struct airoha_gdm_port *port = netdev_priv(dev); + struct airoha_eth *eth = port->qdma->eth; + u32 len = ETH_HLEN + mtu + ETH_FCS_LEN; + + airoha_fe_rmw(eth, REG_GDM_LEN_CFG(port->id), + GDM_LONG_LEN_MASK, + FIELD_PREP(GDM_LONG_LEN_MASK, len)); + WRITE_ONCE(dev->mtu, mtu); + + return 0; +} + static u16 airoha_dev_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { @@ -1711,18 +1741,13 @@ static u32 airoha_get_dsa_tag(struct sk_buff *skb, struct net_device *dev) { #if IS_ENABLED(CONFIG_NET_DSA) struct ethhdr *ehdr; - struct dsa_port *dp; u8 xmit_tpid; u16 tag; if (!netdev_uses_dsa(dev)) return 0; - dp = dev->dsa_ptr; - if (IS_ERR(dp)) - return 0; - - if (dp->tag_ops->proto != DSA_TAG_PROTO_MTK) + if (dev->dsa_ptr->tag_ops->proto != DSA_TAG_PROTO_MTK) return 0; if (skb_cow_head(skb, 0)) @@ -2384,6 +2409,7 @@ static const struct net_device_ops airoha_netdev_ops = { .ndo_init = airoha_dev_init, .ndo_open = airoha_dev_open, .ndo_stop = airoha_dev_stop, + .ndo_change_mtu = airoha_dev_change_mtu, .ndo_select_queue = airoha_dev_select_queue, .ndo_start_xmit = airoha_dev_xmit, .ndo_get_stats64 = airoha_dev_get_stats64, @@ -2474,6 +2500,7 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_TC; dev->features |= dev->hw_features; + dev->vlan_features = dev->hw_features; dev->dev.of_node = np; dev->irq = qdma->irq; SET_NETDEV_DEV(dev, eth->dev); diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index b7a3bd7a76b7..f66b9b736b94 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -20,7 +20,7 @@ #define AIROHA_MAX_DSA_PORTS 7 #define AIROHA_MAX_NUM_RSTS 3 #define AIROHA_MAX_NUM_XSI_RSTS 5 -#define AIROHA_MAX_MTU 2000 +#define AIROHA_MAX_MTU 9216 #define AIROHA_MAX_PACKET_SIZE 2048 #define AIROHA_NUM_QOS_CHANNELS 4 #define AIROHA_NUM_QOS_QUEUES 8 @@ -176,6 +176,7 @@ struct airoha_queue { struct napi_struct napi; struct page_pool *page_pool; + struct sk_buff *skb; }; struct airoha_tx_irq_queue { diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index 1aa06cdffe23..8146cde4e8ba 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -626,10 +626,15 @@ #define REG_RX_DELAY_INT_IDX(_n) \ (((_n) < 16) ? 0x0210 + ((_n) << 5) : 0x0e10 + (((_n) - 16) << 5)) +#define REG_RX_SCATTER_CFG(_n) \ + (((_n) < 16) ? 0x0214 + ((_n) << 5) : 0x0e14 + (((_n) - 16) << 5)) + #define RX_DELAY_INT_MASK GENMASK(15, 0) #define RX_RING_DMA_IDX_MASK GENMASK(15, 0) +#define RX_RING_SG_EN_MASK BIT(0) + #define REG_INGRESS_TRTCM_CFG 0x0070 #define INGRESS_TRTCM_EN_MASK BIT(31) #define INGRESS_TRTCM_MODE_MASK BIT(30) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 15c57a06ecaf..08a41e9c25ff 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -54,6 +54,7 @@ #include <net/pkt_cls.h> #include <net/page_pool/helpers.h> #include <linux/align.h> +#include <net/netdev_lock.h> #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> #include <linux/pci-tph.h> @@ -2041,6 +2042,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, struct rx_cmp_ext *rxcmp1; u32 tmp_raw_cons = *raw_cons; u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons); + struct skb_shared_info *sinfo; struct bnxt_sw_rx_bd *rx_buf; unsigned int len; u8 *data_ptr, agg_bufs, cmp_type; @@ -2167,6 +2169,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, false); if (!frag_len) goto oom_next_rx; + } xdp_active = true; } @@ -2176,6 +2179,12 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, rc = 1; goto next_rx; } + if (xdp_buff_has_frags(&xdp)) { + sinfo = xdp_get_shared_info_from_buff(&xdp); + agg_bufs = sinfo->nr_frags; + } else { + agg_bufs = 0; + } } if (len <= bp->rx_copybreak) { @@ -2213,7 +2222,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (!skb) goto oom_next_rx; } else { - skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1); + skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, + rxr->page_pool, &xdp); if (!skb) { /* we should be able to free the old skb here */ bnxt_xdp_buff_frags_free(rxr, &xdp); @@ -5246,8 +5256,10 @@ static void bnxt_free_ntp_fltrs(struct bnxt *bp, bool all) { int i; - /* Under rtnl_lock and all our NAPIs have been disabled. It's - * safe to delete the hash table. + netdev_assert_locked(bp->dev); + + /* Under netdev instance lock and all our NAPIs have been disabled. + * It's safe to delete the hash table. */ for (i = 0; i < BNXT_NTP_FLTR_HASH_SIZE; i++) { struct hlist_head *head; @@ -11378,14 +11390,14 @@ static void bnxt_irq_affinity_notify(struct irq_affinity_notify *notify, if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag)) return; - rtnl_lock(); + netdev_lock(irq->bp->dev); if (netif_running(irq->bp->dev)) { err = netdev_rx_queue_restart(irq->bp->dev, irq->ring_nr); if (err) netdev_err(irq->bp->dev, "RX queue restart failed: err=%d\n", err); } - rtnl_unlock(); + netdev_unlock(irq->bp->dev); } static void bnxt_irq_affinity_release(struct kref *ref) @@ -11508,7 +11520,7 @@ static int bnxt_request_irq(struct bnxt *bp) if (rc) break; - netif_napi_set_irq(&bp->bnapi[i]->napi, irq->vector); + netif_napi_set_irq_locked(&bp->bnapi[i]->napi, irq->vector); irq->requested = 1; if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) { @@ -11557,9 +11569,9 @@ static void bnxt_del_napi(struct bnxt *bp) for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; - __netif_napi_del(&bnapi->napi); + __netif_napi_del_locked(&bnapi->napi); } - /* We called __netif_napi_del(), we need + /* We called __netif_napi_del_locked(), we need * to respect an RCU grace period before freeing napi structures. */ synchronize_net(); @@ -11578,12 +11590,12 @@ static void bnxt_init_napi(struct bnxt *bp) cp_nr_rings--; for (i = 0; i < cp_nr_rings; i++) { bnapi = bp->bnapi[i]; - netif_napi_add_config(bp->dev, &bnapi->napi, poll_fn, - bnapi->index); + netif_napi_add_config_locked(bp->dev, &bnapi->napi, poll_fn, + bnapi->index); } if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { bnapi = bp->bnapi[cp_nr_rings]; - netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll_nitroa0); + netif_napi_add_locked(bp->dev, &bnapi->napi, bnxt_poll_nitroa0); } } @@ -11604,7 +11616,7 @@ static void bnxt_disable_napi(struct bnxt *bp) cpr->sw_stats->tx.tx_resets++; if (bnapi->in_reset) cpr->sw_stats->rx.rx_resets++; - napi_disable(&bnapi->napi); + napi_disable_locked(&bnapi->napi); } } @@ -11626,7 +11638,7 @@ static void bnxt_enable_napi(struct bnxt *bp) INIT_WORK(&cpr->dim.work, bnxt_dim_work); cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } - napi_enable(&bnapi->napi); + napi_enable_locked(&bnapi->napi); } } @@ -12297,6 +12309,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) struct hwrm_func_drv_if_change_input *req; bool fw_reset = !bp->irq_tbl; bool resc_reinit = false; + bool caps_change = false; int rc, retry = 0; u32 flags = 0; @@ -12352,8 +12365,11 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) set_bit(BNXT_STATE_ABORT_ERR, &bp->state); return -ENODEV; } - if (resc_reinit || fw_reset) { - if (fw_reset) { + if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_CAPS_CHANGE) + caps_change = true; + + if (resc_reinit || fw_reset || caps_change) { + if (fw_reset || caps_change) { set_bit(BNXT_STATE_FW_RESET_DET, &bp->state); if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) bnxt_ulp_irq_stop(bp); @@ -12789,7 +12805,6 @@ open_err_free_mem: return rc; } -/* rtnl_lock held */ int bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -12800,14 +12815,14 @@ int bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) rc = __bnxt_open_nic(bp, irq_re_init, link_re_init); if (rc) { netdev_err(bp->dev, "nic open fail (rc: %x)\n", rc); - dev_close(bp->dev); + netif_close(bp->dev); } return rc; } -/* rtnl_lock held, open the NIC half way by allocating all resources, but - * NAPI, IRQ, and TX are not enabled. This is mainly used for offline - * self tests. +/* netdev instance lock held, open the NIC half way by allocating all + * resources, but NAPI, IRQ, and TX are not enabled. This is mainly used + * for offline self tests. */ int bnxt_half_open_nic(struct bnxt *bp) { @@ -12838,12 +12853,12 @@ int bnxt_half_open_nic(struct bnxt *bp) half_open_err: bnxt_free_skbs(bp); bnxt_free_mem(bp, true); - dev_close(bp->dev); + netif_close(bp->dev); return rc; } -/* rtnl_lock held, this call can only be made after a previous successful - * call to bnxt_half_open_nic(). +/* netdev instance lock held, this call can only be made after a previous + * successful call to bnxt_half_open_nic(). */ void bnxt_half_close_nic(struct bnxt *bp) { @@ -12952,10 +12967,11 @@ void bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { /* If we get here, it means firmware reset is in progress * while we are trying to close. We can safely proceed with - * the close because we are holding rtnl_lock(). Some firmware - * messages may fail as we proceed to close. We set the - * ABORT_ERR flag here so that the FW reset thread will later - * abort when it gets the rtnl_lock() and sees the flag. + * the close because we are holding netdev instance lock. + * Some firmware messages may fail as we proceed to close. + * We set the ABORT_ERR flag here so that the FW reset thread + * will later abort when it gets the netdev instance lock + * and sees the flag. */ netdev_warn(bp->dev, "FW reset in progress during close, FW reset will be aborted\n"); set_bit(BNXT_STATE_ABORT_ERR, &bp->state); @@ -13046,7 +13062,7 @@ static int bnxt_hwrm_port_phy_write(struct bnxt *bp, u16 phy_addr, u16 reg, return hwrm_req_send(bp, req); } -/* rtnl_lock held */ +/* netdev instance lock held */ static int bnxt_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mii_ioctl_data *mdio = if_mii(ifr); @@ -13965,30 +13981,31 @@ bnxt_restart_timer: mod_timer(&bp->timer, jiffies + bp->current_interval); } -static void bnxt_rtnl_lock_sp(struct bnxt *bp) +static void bnxt_lock_sp(struct bnxt *bp) { /* We are called from bnxt_sp_task which has BNXT_STATE_IN_SP_TASK * set. If the device is being closed, bnxt_close() may be holding - * rtnl() and waiting for BNXT_STATE_IN_SP_TASK to clear. So we - * must clear BNXT_STATE_IN_SP_TASK before holding rtnl(). + * netdev instance lock and waiting for BNXT_STATE_IN_SP_TASK to clear. + * So we must clear BNXT_STATE_IN_SP_TASK before holding netdev + * instance lock. */ clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); - rtnl_lock(); + netdev_lock(bp->dev); } -static void bnxt_rtnl_unlock_sp(struct bnxt *bp) +static void bnxt_unlock_sp(struct bnxt *bp) { set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); - rtnl_unlock(); + netdev_unlock(bp->dev); } /* Only called from bnxt_sp_task() */ static void bnxt_reset(struct bnxt *bp, bool silent) { - bnxt_rtnl_lock_sp(bp); + bnxt_lock_sp(bp); if (test_bit(BNXT_STATE_OPEN, &bp->state)) bnxt_reset_task(bp, silent); - bnxt_rtnl_unlock_sp(bp); + bnxt_unlock_sp(bp); } /* Only called from bnxt_sp_task() */ @@ -13996,9 +14013,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) { int i; - bnxt_rtnl_lock_sp(bp); + bnxt_lock_sp(bp); if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { - bnxt_rtnl_unlock_sp(bp); + bnxt_unlock_sp(bp); return; } /* Disable and flush TPA before resetting the RX ring */ @@ -14037,7 +14054,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) } if (bp->flags & BNXT_FLAG_TPA) bnxt_set_tpa(bp, true); - bnxt_rtnl_unlock_sp(bp); + bnxt_unlock_sp(bp); } static void bnxt_fw_fatal_close(struct bnxt *bp) @@ -14093,7 +14110,7 @@ static bool is_bnxt_fw_ok(struct bnxt *bp) return false; } -/* rtnl_lock is acquired before calling this function */ +/* netdev instance lock is acquired before calling this function */ static void bnxt_force_fw_reset(struct bnxt *bp) { struct bnxt_fw_health *fw_health = bp->fw_health; @@ -14136,9 +14153,9 @@ void bnxt_fw_exception(struct bnxt *bp) netdev_warn(bp->dev, "Detected firmware fatal condition, initiating reset\n"); set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); bnxt_ulp_stop(bp); - bnxt_rtnl_lock_sp(bp); + bnxt_lock_sp(bp); bnxt_force_fw_reset(bp); - bnxt_rtnl_unlock_sp(bp); + bnxt_unlock_sp(bp); } /* Returns the number of registered VFs, or 1 if VF configuration is pending, or @@ -14168,7 +14185,7 @@ static int bnxt_get_registered_vfs(struct bnxt *bp) void bnxt_fw_reset(struct bnxt *bp) { bnxt_ulp_stop(bp); - bnxt_rtnl_lock_sp(bp); + bnxt_lock_sp(bp); if (test_bit(BNXT_STATE_OPEN, &bp->state) && !test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; @@ -14191,7 +14208,7 @@ void bnxt_fw_reset(struct bnxt *bp) netdev_err(bp->dev, "Firmware reset aborted, rc = %d\n", n); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - dev_close(bp->dev); + netif_close(bp->dev); goto fw_reset_exit; } else if (n > 0) { u16 vf_tmo_dsecs = n * 10; @@ -14214,7 +14231,7 @@ void bnxt_fw_reset(struct bnxt *bp) bnxt_queue_fw_reset_work(bp, tmo); } fw_reset_exit: - bnxt_rtnl_unlock_sp(bp); + bnxt_unlock_sp(bp); } static void bnxt_chk_missed_irq(struct bnxt *bp) @@ -14413,7 +14430,7 @@ static void bnxt_sp_task(struct work_struct *work) static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, int *max_cp); -/* Under rtnl_lock */ +/* Under netdev instance lock */ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int tx_xdp) { @@ -14806,7 +14823,7 @@ static void bnxt_fw_reset_abort(struct bnxt *bp, int rc) if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) bnxt_dl_health_fw_status_update(bp, false); bp->fw_reset_state = 0; - dev_close(bp->dev); + netif_close(bp->dev); } static void bnxt_fw_reset_task(struct work_struct *work) @@ -14841,10 +14858,10 @@ static void bnxt_fw_reset_task(struct work_struct *work) return; } bp->fw_reset_timestamp = jiffies; - rtnl_lock(); + netdev_lock(bp->dev); if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { bnxt_fw_reset_abort(bp, rc); - rtnl_unlock(); + netdev_unlock(bp->dev); goto ulp_start; } bnxt_fw_reset_close(bp); @@ -14855,7 +14872,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV; tmo = bp->fw_reset_min_dsecs * HZ / 10; } - rtnl_unlock(); + netdev_unlock(bp->dev); bnxt_queue_fw_reset_work(bp, tmo); return; } @@ -14929,7 +14946,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING; fallthrough; case BNXT_FW_RESET_STATE_OPENING: - while (!rtnl_trylock()) { + while (!netdev_trylock(bp->dev)) { bnxt_queue_fw_reset_work(bp, HZ / 10); return; } @@ -14937,7 +14954,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) if (rc) { netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); bnxt_fw_reset_abort(bp, rc); - rtnl_unlock(); + netdev_unlock(bp->dev); goto ulp_start; } @@ -14956,13 +14973,13 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_dl_health_fw_recovery_done(bp); bnxt_dl_health_fw_status_update(bp, true); } - rtnl_unlock(); + netdev_unlock(bp->dev); bnxt_ulp_start(bp, 0); bnxt_reenable_sriov(bp); - rtnl_lock(); + netdev_lock(bp->dev); bnxt_vf_reps_alloc(bp); bnxt_vf_reps_open(bp); - rtnl_unlock(); + netdev_unlock(bp->dev); break; } return; @@ -14975,9 +14992,9 @@ fw_reset_abort_status: netdev_err(bp->dev, "fw_health_status 0x%x\n", sts); } fw_reset_abort: - rtnl_lock(); + netdev_lock(bp->dev); bnxt_fw_reset_abort(bp, rc); - rtnl_unlock(); + netdev_unlock(bp->dev); ulp_start: bnxt_ulp_start(bp, rc); } @@ -15069,13 +15086,14 @@ init_err: return rc; } -/* rtnl_lock held */ static int bnxt_change_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; struct bnxt *bp = netdev_priv(dev); int rc = 0; + netdev_assert_locked(dev); + if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; @@ -15096,11 +15114,12 @@ static int bnxt_change_mac_addr(struct net_device *dev, void *p) return rc; } -/* rtnl_lock held */ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) { struct bnxt *bp = netdev_priv(dev); + netdev_assert_locked(dev); + if (netif_running(dev)) bnxt_close_nic(bp, true, false); @@ -15595,6 +15614,9 @@ static void bnxt_get_queue_stats_rx(struct net_device *dev, int i, struct bnxt_cp_ring_info *cpr; u64 *sw; + if (!bp->bnapi) + return; + cpr = &bp->bnapi[i]->cp_ring; sw = cpr->stats.sw_stats; @@ -15618,6 +15640,9 @@ static void bnxt_get_queue_stats_tx(struct net_device *dev, int i, struct bnxt_napi *bnapi; u64 *sw; + if (!bp->tx_ring) + return; + bnapi = bp->tx_ring[bp->tx_ring_map[i]].bnapi; sw = bnapi->cp_ring.stats.sw_stats; @@ -15659,6 +15684,9 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) struct bnxt_ring_struct *ring; int rc; + if (!bp->rx_ring) + return -ENETDOWN; + rxr = &bp->rx_ring[idx]; clone = qmem; memcpy(clone, rxr, sizeof(*rxr)); @@ -15741,6 +15769,7 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem) struct bnxt_ring_struct *ring; bnxt_free_one_rx_ring_skbs(bp, rxr); + bnxt_free_one_tpa_info(bp, rxr); xdp_rxq_info_unreg(&rxr->xdp_rxq); @@ -15872,7 +15901,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) napi_enable(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); - for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { + for (i = 0; i <= bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); @@ -15906,7 +15935,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) struct bnxt_napi *bnapi; int i; - for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { + for (i = 0; i <= bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; vnic->mru = 0; bnxt_hwrm_vnic_update(bp, vnic, @@ -16257,7 +16286,7 @@ int bnxt_restore_pf_fw_resources(struct bnxt *bp) { int rc; - ASSERT_RTNL(); + netdev_ops_assert_locked(bp->dev); bnxt_hwrm_func_qcaps(bp); if (netif_running(bp->dev)) @@ -16270,7 +16299,7 @@ int bnxt_restore_pf_fw_resources(struct bnxt *bp) if (netif_running(bp->dev)) { if (rc) - dev_close(bp->dev); + netif_close(bp->dev); else rc = bnxt_open_nic(bp, true, false); } @@ -16607,6 +16636,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bp->rss_cap |= BNXT_RSS_CAP_MULTI_RSS_CTX; if (BNXT_SUPPORTS_QUEUE_API(bp)) dev->queue_mgmt_ops = &bnxt_queue_mgmt_ops; + dev->request_ops_lock = true; rc = register_netdev(dev); if (rc) @@ -16657,13 +16687,13 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (!dev) return; - rtnl_lock(); + netdev_lock(dev); bp = netdev_priv(dev); if (!bp) goto shutdown_exit; if (netif_running(dev)) - dev_close(dev); + netif_close(dev); bnxt_ptp_clear(bp); bnxt_clear_int_mode(bp); @@ -16675,7 +16705,7 @@ static void bnxt_shutdown(struct pci_dev *pdev) } shutdown_exit: - rtnl_unlock(); + netdev_unlock(dev); } #ifdef CONFIG_PM_SLEEP @@ -16687,7 +16717,7 @@ static int bnxt_suspend(struct device *device) bnxt_ulp_stop(bp); - rtnl_lock(); + netdev_lock(dev); if (netif_running(dev)) { netif_device_detach(dev); rc = bnxt_close(dev); @@ -16696,7 +16726,7 @@ static int bnxt_suspend(struct device *device) bnxt_ptp_clear(bp); pci_disable_device(bp->pdev); bnxt_free_ctx_mem(bp, false); - rtnl_unlock(); + netdev_unlock(dev); return rc; } @@ -16706,7 +16736,7 @@ static int bnxt_resume(struct device *device) struct bnxt *bp = netdev_priv(dev); int rc = 0; - rtnl_lock(); + netdev_lock(dev); rc = pci_enable_device(bp->pdev); if (rc) { netdev_err(dev, "Cannot re-enable PCI device during resume, err = %d\n", @@ -16749,7 +16779,7 @@ static int bnxt_resume(struct device *device) } resume_exit: - rtnl_unlock(); + netdev_unlock(bp->dev); bnxt_ulp_start(bp, rc); if (!rc) bnxt_reenable_sriov(bp); @@ -16784,7 +16814,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, bnxt_ulp_stop(bp); - rtnl_lock(); + netdev_lock(netdev); netif_device_detach(netdev); if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { @@ -16793,7 +16823,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, } if (abort || state == pci_channel_io_perm_failure) { - rtnl_unlock(); + netdev_unlock(netdev); return PCI_ERS_RESULT_DISCONNECT; } @@ -16812,7 +16842,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, if (pci_is_enabled(pdev)) pci_disable_device(pdev); bnxt_free_ctx_mem(bp, false); - rtnl_unlock(); + netdev_unlock(netdev); /* Request a slot slot reset. */ return PCI_ERS_RESULT_NEED_RESET; @@ -16842,7 +16872,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state)) msleep(900); - rtnl_lock(); + netdev_lock(netdev); if (pci_enable_device(pdev)) { dev_err(&pdev->dev, @@ -16897,7 +16927,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) reset_exit: clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); bnxt_clear_reservations(bp, true); - rtnl_unlock(); + netdev_unlock(netdev); return result; } @@ -16916,7 +16946,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) int err; netdev_info(bp->dev, "PCI Slot Resume\n"); - rtnl_lock(); + netdev_lock(netdev); err = bnxt_hwrm_func_qcaps(bp); if (!err) { @@ -16929,7 +16959,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) if (!err) netif_device_attach(netdev); - rtnl_unlock(); + netdev_unlock(netdev); bnxt_ulp_start(bp, err); if (!err) bnxt_reenable_sriov(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index e85b5ce94f58..e93ba0e4f087 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2697,6 +2697,7 @@ struct bnxt { #define BNXT_DUMP_LIVE 0 #define BNXT_DUMP_CRASH 1 #define BNXT_DUMP_DRIVER 2 +#define BNXT_DUMP_LIVE_WITH_CTX_L1_CACHE 3 struct bpf_prog *xdp_prog; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 7236d8e548ab..5576e7cf8463 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -159,8 +159,8 @@ static int bnxt_hwrm_dbg_coredump_list(struct bnxt *bp, return rc; } -static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id, - u16 segment_id) +static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 dump_type, + u16 component_id, u16 segment_id) { struct hwrm_dbg_coredump_initiate_input *req; int rc; @@ -172,6 +172,8 @@ static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id, hwrm_req_timeout(bp, req, bp->hwrm_cmd_max_timeout); req->component_id = cpu_to_le16(component_id); req->segment_id = cpu_to_le16(segment_id); + if (dump_type == BNXT_DUMP_LIVE_WITH_CTX_L1_CACHE) + req->seg_flags = DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_COLLECT_CTX_L1_CACHE; return hwrm_req_send(bp, req); } @@ -450,7 +452,8 @@ static int __bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, start = jiffies; - rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id); + rc = bnxt_hwrm_dbg_coredump_initiate(bp, dump_type, comp_id, + seg_id); if (rc) { netdev_err(bp->dev, "Failed to initiate coredump for seg = %d\n", diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index ef8288fd68f4..777880594a04 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -11,6 +11,7 @@ #include <linux/netdevice.h> #include <linux/vmalloc.h> #include <net/devlink.h> +#include <net/netdev_lock.h> #include "bnxt_hsi.h" #include "bnxt.h" #include "bnxt_hwrm.h" @@ -439,14 +440,17 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: { bnxt_ulp_stop(bp); rtnl_lock(); + netdev_lock(bp->dev); if (bnxt_sriov_cfg(bp)) { NL_SET_ERR_MSG_MOD(extack, "reload is unsupported while VFs are allocated or being configured"); + netdev_unlock(bp->dev); rtnl_unlock(); bnxt_ulp_start(bp, 0); return -EOPNOTSUPP; } if (bp->dev->reg_state == NETREG_UNREGISTERED) { + netdev_unlock(bp->dev); rtnl_unlock(); bnxt_ulp_start(bp, 0); return -ENODEV; @@ -458,7 +462,8 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, if (rc) { NL_SET_ERR_MSG_MOD(extack, "Failed to deregister"); if (netif_running(bp->dev)) - dev_close(bp->dev); + netif_close(bp->dev); + netdev_unlock(bp->dev); rtnl_unlock(); break; } @@ -479,7 +484,9 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, return -EPERM; } rtnl_lock(); + netdev_lock(bp->dev); if (bp->dev->reg_state == NETREG_UNREGISTERED) { + netdev_unlock(bp->dev); rtnl_unlock(); return -ENODEV; } @@ -493,6 +500,7 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, if (rc) { NL_SET_ERR_MSG_MOD(extack, "Failed to activate firmware"); clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state); + netdev_unlock(bp->dev); rtnl_unlock(); } break; @@ -511,6 +519,8 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti struct bnxt *bp = bnxt_get_bp_from_dl(dl); int rc = 0; + netdev_assert_locked(bp->dev); + *actions_performed = 0; switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: { @@ -535,6 +545,7 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti if (!netif_running(bp->dev)) NL_SET_ERR_MSG_MOD(extack, "Device is closed, not waiting for reset notice that will never come"); + netdev_unlock(bp->dev); rtnl_unlock(); while (test_bit(BNXT_STATE_FW_ACTIVATE, &bp->state)) { if (time_after(jiffies, timeout)) { @@ -550,6 +561,7 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti msleep(50); } rtnl_lock(); + netdev_lock(bp->dev); if (!rc) *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state); @@ -568,8 +580,9 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti } *actions_performed |= BIT(action); } else if (netif_running(bp->dev)) { - dev_close(bp->dev); + netif_close(bp->dev); } + netdev_unlock(bp->dev); rtnl_unlock(); if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) bnxt_ulp_start(bp, rc); @@ -666,6 +679,8 @@ static const struct bnxt_dl_nvm_param nvm_params[] = { NVM_OFF_MSIX_VEC_PER_PF_MAX, BNXT_NVM_SHARED_CFG, 10, 4}, {DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7, 4}, + {DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, NVM_OFF_SUPPORT_RDMA, + BNXT_NVM_FUNC_CFG, 1, 1}, {BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK, NVM_OFF_DIS_GRE_VER_CHECK, BNXT_NVM_SHARED_CFG, 1, 1}, }; @@ -1010,37 +1025,19 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, } -static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, - union devlink_param_value *val) +static int __bnxt_hwrm_nvm_req(struct bnxt *bp, + const struct bnxt_dl_nvm_param *nvm, void *msg, + union devlink_param_value *val) { struct hwrm_nvm_get_variable_input *req = msg; - struct bnxt_dl_nvm_param nvm_param; struct hwrm_err_output *resp; union bnxt_nvm_data *data; dma_addr_t data_dma_addr; - int idx = 0, rc, i; - - /* Get/Set NVM CFG parameter is supported only on PFs */ - if (BNXT_VF(bp)) { - hwrm_req_drop(bp, req); - return -EPERM; - } - - for (i = 0; i < ARRAY_SIZE(nvm_params); i++) { - if (nvm_params[i].id == param_id) { - nvm_param = nvm_params[i]; - break; - } - } + int idx = 0, rc; - if (i == ARRAY_SIZE(nvm_params)) { - hwrm_req_drop(bp, req); - return -EOPNOTSUPP; - } - - if (nvm_param.dir_type == BNXT_NVM_PORT_CFG) + if (nvm->dir_type == BNXT_NVM_PORT_CFG) idx = bp->pf.port_id; - else if (nvm_param.dir_type == BNXT_NVM_FUNC_CFG) + else if (nvm->dir_type == BNXT_NVM_FUNC_CFG) idx = bp->pf.fw_fid - BNXT_FIRST_PF_FID; data = hwrm_req_dma_slice(bp, req, sizeof(*data), &data_dma_addr); @@ -1051,23 +1048,23 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, } req->dest_data_addr = cpu_to_le64(data_dma_addr); - req->data_len = cpu_to_le16(nvm_param.nvm_num_bits); - req->option_num = cpu_to_le16(nvm_param.offset); + req->data_len = cpu_to_le16(nvm->nvm_num_bits); + req->option_num = cpu_to_le16(nvm->offset); req->index_0 = cpu_to_le16(idx); if (idx) req->dimensions = cpu_to_le16(1); resp = hwrm_req_hold(bp, req); if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) { - bnxt_copy_to_nvm_data(data, val, nvm_param.nvm_num_bits, - nvm_param.dl_num_bytes); + bnxt_copy_to_nvm_data(data, val, nvm->nvm_num_bits, + nvm->dl_num_bytes); rc = hwrm_req_send(bp, msg); } else { rc = hwrm_req_send_silent(bp, msg); if (!rc) { bnxt_copy_from_nvm_data(val, data, - nvm_param.nvm_num_bits, - nvm_param.dl_num_bytes); + nvm->nvm_num_bits, + nvm->dl_num_bytes); } else { if (resp->cmd_err == NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST) @@ -1080,6 +1077,27 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, return rc; } +static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, + union devlink_param_value *val) +{ + struct hwrm_nvm_get_variable_input *req = msg; + const struct bnxt_dl_nvm_param *nvm_param; + int i; + + /* Get/Set NVM CFG parameter is supported only on PFs */ + if (BNXT_VF(bp)) { + hwrm_req_drop(bp, req); + return -EPERM; + } + + for (i = 0; i < ARRAY_SIZE(nvm_params); i++) { + nvm_param = &nvm_params[i]; + if (nvm_param->id == param_id) + return __bnxt_hwrm_nvm_req(bp, nvm_param, msg, val); + } + return -EOPNOTSUPP; +} + static int bnxt_dl_nvm_param_get(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx) { @@ -1116,6 +1134,32 @@ static int bnxt_dl_nvm_param_set(struct devlink *dl, u32 id, return bnxt_hwrm_nvm_req(bp, id, req, &ctx->val); } +static int bnxt_dl_roce_validate(struct devlink *dl, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + const struct bnxt_dl_nvm_param nvm_roce_cap = {0, NVM_OFF_RDMA_CAPABLE, + BNXT_NVM_SHARED_CFG, 1, 1}; + struct bnxt *bp = bnxt_get_bp_from_dl(dl); + struct hwrm_nvm_get_variable_input *req; + union devlink_param_value roce_cap; + int rc; + + rc = hwrm_req_init(bp, req, HWRM_NVM_GET_VARIABLE); + if (rc) + return rc; + + if (__bnxt_hwrm_nvm_req(bp, &nvm_roce_cap, req, &roce_cap)) { + NL_SET_ERR_MSG_MOD(extack, "Unable to verify if device is RDMA Capable"); + return -EINVAL; + } + if (!roce_cap.vbool) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support RDMA"); + return -EINVAL; + } + return 0; +} + static int bnxt_dl_msix_validate(struct devlink *dl, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack) @@ -1180,6 +1224,10 @@ static const struct devlink_param bnxt_dl_params[] = { BIT(DEVLINK_PARAM_CMODE_PERMANENT), bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set, bnxt_dl_msix_validate), + DEVLINK_PARAM_GENERIC(ENABLE_ROCE, + BIT(DEVLINK_PARAM_CMODE_PERMANENT), + bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set, + bnxt_dl_roce_validate), DEVLINK_PARAM_DRIVER(BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK, "gre_ver_check", DEVLINK_PARAM_TYPE_BOOL, BIT(DEVLINK_PARAM_CMODE_PERMANENT), diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h index b8105065367b..7f45dcd7b287 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -41,8 +41,10 @@ static inline void bnxt_dl_set_remote_reset(struct devlink *dl, bool value) #define NVM_OFF_MSIX_VEC_PER_PF_MAX 108 #define NVM_OFF_MSIX_VEC_PER_PF_MIN 114 #define NVM_OFF_IGNORE_ARI 164 +#define NVM_OFF_RDMA_CAPABLE 161 #define NVM_OFF_DIS_GRE_VER_CHECK 171 #define NVM_OFF_ENABLE_SRIOV 401 +#define NVM_OFF_SUPPORT_RDMA 506 #define NVM_OFF_NVM_CFG_VER 602 #define BNXT_NVM_CFG_VER_BITS 8 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 9c5820839514..48dd5922e4dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -4541,16 +4541,16 @@ static int bnxt_get_module_status(struct bnxt *bp, struct netlink_ext_ack *extac return -EINVAL; } -static int bnxt_get_module_eeprom_by_page(struct net_device *dev, - const struct ethtool_module_eeprom *page_data, - struct netlink_ext_ack *extack) +static int +bnxt_mod_eeprom_by_page_precheck(struct bnxt *bp, + const struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) { - struct bnxt *bp = netdev_priv(dev); int rc; if (BNXT_VF(bp) && !BNXT_VF_IS_TRUSTED(bp)) { NL_SET_ERR_MSG_MOD(extack, - "Module read not permitted on untrusted VF"); + "Module read/write not permitted on untrusted VF"); return -EPERM; } @@ -4567,6 +4567,19 @@ static int bnxt_get_module_eeprom_by_page(struct net_device *dev, NL_SET_ERR_MSG_MOD(extack, "Firmware not capable for bank selection"); return -EINVAL; } + return 0; +} + +static int bnxt_get_module_eeprom_by_page(struct net_device *dev, + const struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) +{ + struct bnxt *bp = netdev_priv(dev); + int rc; + + rc = bnxt_mod_eeprom_by_page_precheck(bp, page_data, extack); + if (rc) + return rc; rc = bnxt_read_sfp_module_eeprom_info(bp, page_data->i2c_address << 1, page_data->page, page_data->bank, @@ -4580,6 +4593,62 @@ static int bnxt_get_module_eeprom_by_page(struct net_device *dev, return page_data->length; } +static int bnxt_write_sfp_module_eeprom_info(struct bnxt *bp, + const struct ethtool_module_eeprom *page) +{ + struct hwrm_port_phy_i2c_write_input *req; + int bytes_written = 0; + int rc; + + rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_I2C_WRITE); + if (rc) + return rc; + + hwrm_req_hold(bp, req); + req->i2c_slave_addr = page->i2c_address << 1; + req->page_number = cpu_to_le16(page->page); + req->bank_number = page->bank; + req->port_id = cpu_to_le16(bp->pf.port_id); + req->enables = cpu_to_le32(PORT_PHY_I2C_WRITE_REQ_ENABLES_PAGE_OFFSET | + PORT_PHY_I2C_WRITE_REQ_ENABLES_BANK_NUMBER); + + while (bytes_written < page->length) { + u16 xfer_size; + + xfer_size = min_t(u16, page->length - bytes_written, + BNXT_MAX_PHY_I2C_RESP_SIZE); + req->page_offset = cpu_to_le16(page->offset + bytes_written); + req->data_length = xfer_size; + memcpy(req->data, page->data + bytes_written, xfer_size); + rc = hwrm_req_send(bp, req); + if (rc) + break; + bytes_written += xfer_size; + } + + hwrm_req_drop(bp, req); + return rc; +} + +static int bnxt_set_module_eeprom_by_page(struct net_device *dev, + const struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) +{ + struct bnxt *bp = netdev_priv(dev); + int rc; + + rc = bnxt_mod_eeprom_by_page_precheck(bp, page_data, extack); + if (rc) + return rc; + + rc = bnxt_write_sfp_module_eeprom_info(bp, page_data); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Module`s eeprom write failed"); + return rc; + } + return page_data->length; +} + static int bnxt_nway_reset(struct net_device *dev) { int rc = 0; @@ -5077,8 +5146,9 @@ static int bnxt_set_dump(struct net_device *dev, struct ethtool_dump *dump) { struct bnxt *bp = netdev_priv(dev); - if (dump->flag > BNXT_DUMP_DRIVER) { - netdev_info(dev, "Supports only Live(0), Crash(1), Driver(2) dumps.\n"); + if (dump->flag > BNXT_DUMP_LIVE_WITH_CTX_L1_CACHE) { + netdev_info(dev, + "Supports only Live(0), Crash(1), Driver(2), Live with cached context(3) dumps.\n"); return -EINVAL; } @@ -5441,6 +5511,7 @@ const struct ethtool_ops bnxt_ethtool_ops = { .get_module_info = bnxt_get_module_info, .get_module_eeprom = bnxt_get_module_eeprom, .get_module_eeprom_by_page = bnxt_get_module_eeprom_by_page, + .set_module_eeprom_by_page = bnxt_set_module_eeprom_by_page, .nway_reset = bnxt_nway_reset, .set_phys_id = bnxt_set_phys_id, .self_test = bnxt_self_test, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 5f8de1634378..549231703bce 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -2,7 +2,7 @@ * * Copyright (c) 2014-2016 Broadcom Corporation * Copyright (c) 2014-2018 Broadcom Limited - * Copyright (c) 2018-2024 Broadcom Inc. + * Copyright (c) 2018-2025 Broadcom Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -438,6 +438,7 @@ struct cmd_nums { #define HWRM_MFG_PRVSN_EXPORT_CERT 0x219UL #define HWRM_STAT_DB_ERROR_QSTATS 0x21aUL #define HWRM_MFG_TESTS 0x21bUL + #define HWRM_MFG_WRITE_CERT_NVM 0x21cUL #define HWRM_PORT_POE_CFG 0x230UL #define HWRM_PORT_POE_QCFG 0x231UL #define HWRM_UDCC_QCAPS 0x258UL @@ -514,6 +515,8 @@ struct cmd_nums { #define HWRM_TFC_TBL_SCOPE_CONFIG_GET 0x39aUL #define HWRM_TFC_RESC_USAGE_QUERY 0x39bUL #define HWRM_TFC_GLOBAL_ID_FREE 0x39cUL + #define HWRM_TFC_TCAM_PRI_UPDATE 0x39dUL + #define HWRM_TFC_HOT_UPGRADE_PROCESS 0x3a0UL #define HWRM_SV 0x400UL #define HWRM_DBG_SERDES_TEST 0xff0eUL #define HWRM_DBG_LOG_BUFFER_FLUSH 0xff0fUL @@ -629,8 +632,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 3 -#define HWRM_VERSION_RSVD 85 -#define HWRM_VERSION_STR "1.10.3.85" +#define HWRM_VERSION_RSVD 97 +#define HWRM_VERSION_STR "1.10.3.97" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -1905,11 +1908,15 @@ struct hwrm_func_qcaps_output { __le32 roce_vf_max_srq; __le32 roce_vf_max_gid; __le32 flags_ext3; - #define FUNC_QCAPS_RESP_FLAGS_EXT3_RM_RSV_WHILE_ALLOC_CAP 0x1UL - #define FUNC_QCAPS_RESP_FLAGS_EXT3_REQUIRE_L2_FILTER 0x2UL - #define FUNC_QCAPS_RESP_FLAGS_EXT3_MAX_ROCE_VFS_SUPPORTED 0x4UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_RM_RSV_WHILE_ALLOC_CAP 0x1UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_REQUIRE_L2_FILTER 0x2UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_MAX_ROCE_VFS_SUPPORTED 0x4UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_RX_RATE_PROFILE_SEL_SUPPORTED 0x8UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_BIDI_OPT_SUPPORTED 0x10UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED 0x20UL __le16 max_roce_vfs; - u8 unused_3[5]; + __le16 max_crypto_rx_flow_filters; + u8 unused_3[3]; u8 valid; }; @@ -1924,7 +1931,7 @@ struct hwrm_func_qcfg_input { u8 unused_0[6]; }; -/* hwrm_func_qcfg_output (size:1280b/160B) */ +/* hwrm_func_qcfg_output (size:1344b/168B) */ struct hwrm_func_qcfg_output { __le16 error_code; __le16 req_type; @@ -2087,14 +2094,18 @@ struct hwrm_func_qcfg_output { __le16 host_mtu; __le16 flags2; #define FUNC_QCFG_RESP_FLAGS2_SRIOV_DSCP_INSERT_ENABLED 0x1UL - u8 unused_4[2]; + __le16 stag_vid; u8 port_kdnet_mode; #define FUNC_QCFG_RESP_PORT_KDNET_MODE_DISABLED 0x0UL #define FUNC_QCFG_RESP_PORT_KDNET_MODE_ENABLED 0x1UL #define FUNC_QCFG_RESP_PORT_KDNET_MODE_LAST FUNC_QCFG_RESP_PORT_KDNET_MODE_ENABLED u8 kdnet_pcie_function; __le16 port_kdnet_fid; - u8 unused_5[2]; + u8 unused_5; + u8 roce_bidi_opt_mode; + #define FUNC_QCFG_RESP_ROCE_BIDI_OPT_MODE_DISABLED 0x1UL + #define FUNC_QCFG_RESP_ROCE_BIDI_OPT_MODE_DEDICATED 0x2UL + #define FUNC_QCFG_RESP_ROCE_BIDI_OPT_MODE_SHARED 0x4UL __le32 num_ktls_tx_key_ctxs; __le32 num_ktls_rx_key_ctxs; u8 lag_id; @@ -2112,7 +2123,8 @@ struct hwrm_func_qcfg_output { __le16 xid_partition_cfg; #define FUNC_QCFG_RESP_XID_PARTITION_CFG_TX_CK 0x1UL #define FUNC_QCFG_RESP_XID_PARTITION_CFG_RX_CK 0x2UL - u8 unused_7; + __le16 mirror_vnic_id; + u8 unused_7[7]; u8 valid; }; @@ -3965,7 +3977,7 @@ struct ts_split_entries { __le32 region_num_entries; u8 tsid; u8 lkup_static_bkt_cnt_exp[2]; - u8 rsvd; + u8 locked; __le32 rsvd2[2]; }; @@ -5483,6 +5495,37 @@ struct hwrm_port_phy_qcaps_output { u8 valid; }; +/* hwrm_port_phy_i2c_write_input (size:832b/104B) */ +struct hwrm_port_phy_i2c_write_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + __le32 enables; + #define PORT_PHY_I2C_WRITE_REQ_ENABLES_PAGE_OFFSET 0x1UL + #define PORT_PHY_I2C_WRITE_REQ_ENABLES_BANK_NUMBER 0x2UL + __le16 port_id; + u8 i2c_slave_addr; + u8 bank_number; + __le16 page_number; + __le16 page_offset; + u8 data_length; + u8 unused_1[7]; + __le32 data[16]; +}; + +/* hwrm_port_phy_i2c_write_output (size:128b/16B) */ +struct hwrm_port_phy_i2c_write_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + /* hwrm_port_phy_i2c_read_input (size:320b/40B) */ struct hwrm_port_phy_i2c_read_input { __le16 req_type; @@ -6610,8 +6653,9 @@ struct hwrm_vnic_alloc_input { __le32 flags; #define VNIC_ALLOC_REQ_FLAGS_DEFAULT 0x1UL #define VNIC_ALLOC_REQ_FLAGS_VIRTIO_NET_FID_VALID 0x2UL + #define VNIC_ALLOC_REQ_FLAGS_VNIC_ID_VALID 0x4UL __le16 virtio_net_fid; - u8 unused_0[2]; + __le16 vnic_id; }; /* hwrm_vnic_alloc_output (size:128b/16B) */ @@ -6710,6 +6754,7 @@ struct hwrm_vnic_cfg_input { #define VNIC_CFG_REQ_ENABLES_QUEUE_ID 0x80UL #define VNIC_CFG_REQ_ENABLES_RX_CSUM_V2_MODE 0x100UL #define VNIC_CFG_REQ_ENABLES_L2_CQE_MODE 0x200UL + #define VNIC_CFG_REQ_ENABLES_RAW_QP_ID 0x400UL __le16 vnic_id; __le16 dflt_ring_grp; __le16 rss_rule; @@ -6729,7 +6774,7 @@ struct hwrm_vnic_cfg_input { #define VNIC_CFG_REQ_L2_CQE_MODE_COMPRESSED 0x1UL #define VNIC_CFG_REQ_L2_CQE_MODE_MIXED 0x2UL #define VNIC_CFG_REQ_L2_CQE_MODE_LAST VNIC_CFG_REQ_L2_CQE_MODE_MIXED - u8 unused0[4]; + __le32 raw_qp_id; }; /* hwrm_vnic_cfg_output (size:128b/16B) */ @@ -7082,6 +7127,15 @@ struct hwrm_vnic_plcmodes_cfg_output { u8 valid; }; +/* hwrm_vnic_plcmodes_cfg_cmd_err (size:64b/8B) */ +struct hwrm_vnic_plcmodes_cfg_cmd_err { + u8 code; + #define VNIC_PLCMODES_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL + #define VNIC_PLCMODES_CFG_CMD_ERR_CODE_INVALID_HDS_THRESHOLD 0x1UL + #define VNIC_PLCMODES_CFG_CMD_ERR_CODE_LAST VNIC_PLCMODES_CFG_CMD_ERR_CODE_INVALID_HDS_THRESHOLD + u8 unused_0[7]; +}; + /* hwrm_vnic_rss_cos_lb_ctx_alloc_input (size:128b/16B) */ struct hwrm_vnic_rss_cos_lb_ctx_alloc_input { __le16 req_type; @@ -7131,15 +7185,16 @@ struct hwrm_ring_alloc_input { __le16 target_id; __le64 resp_addr; __le32 enables; - #define RING_ALLOC_REQ_ENABLES_RING_ARB_CFG 0x2UL - #define RING_ALLOC_REQ_ENABLES_STAT_CTX_ID_VALID 0x8UL - #define RING_ALLOC_REQ_ENABLES_MAX_BW_VALID 0x20UL - #define RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID 0x40UL - #define RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID 0x80UL - #define RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID 0x100UL - #define RING_ALLOC_REQ_ENABLES_SCHQ_ID 0x200UL - #define RING_ALLOC_REQ_ENABLES_MPC_CHNLS_TYPE 0x400UL - #define RING_ALLOC_REQ_ENABLES_STEERING_TAG_VALID 0x800UL + #define RING_ALLOC_REQ_ENABLES_RING_ARB_CFG 0x2UL + #define RING_ALLOC_REQ_ENABLES_STAT_CTX_ID_VALID 0x8UL + #define RING_ALLOC_REQ_ENABLES_MAX_BW_VALID 0x20UL + #define RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID 0x40UL + #define RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID 0x80UL + #define RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID 0x100UL + #define RING_ALLOC_REQ_ENABLES_SCHQ_ID 0x200UL + #define RING_ALLOC_REQ_ENABLES_MPC_CHNLS_TYPE 0x400UL + #define RING_ALLOC_REQ_ENABLES_STEERING_TAG_VALID 0x800UL + #define RING_ALLOC_REQ_ENABLES_RX_RATE_PROFILE_VALID 0x1000UL u8 ring_type; #define RING_ALLOC_REQ_RING_TYPE_L2_CMPL 0x0UL #define RING_ALLOC_REQ_RING_TYPE_TX 0x1UL @@ -7226,7 +7281,11 @@ struct hwrm_ring_alloc_input { #define RING_ALLOC_REQ_MPC_CHNLS_TYPE_RE_CFA 0x3UL #define RING_ALLOC_REQ_MPC_CHNLS_TYPE_PRIMATE 0x4UL #define RING_ALLOC_REQ_MPC_CHNLS_TYPE_LAST RING_ALLOC_REQ_MPC_CHNLS_TYPE_PRIMATE - u8 unused_4[2]; + u8 rx_rate_profile_sel; + #define RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_DEFAULT 0x0UL + #define RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_POLL_MODE 0x1UL + #define RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_LAST RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_POLL_MODE + u8 unused_4; __le64 cq_handle; }; @@ -9122,6 +9181,39 @@ struct pcie_ctx_hw_stats { __le64 pcie_recovery_histogram; }; +/* pcie_ctx_hw_stats_v2 (size:4096b/512B) */ +struct pcie_ctx_hw_stats_v2 { + __le64 pcie_pl_signal_integrity; + __le64 pcie_dl_signal_integrity; + __le64 pcie_tl_signal_integrity; + __le64 pcie_link_integrity; + __le64 pcie_tx_traffic_rate; + __le64 pcie_rx_traffic_rate; + __le64 pcie_tx_dllp_statistics; + __le64 pcie_rx_dllp_statistics; + __le64 pcie_equalization_time; + __le32 pcie_ltssm_histogram[4]; + __le64 pcie_recovery_histogram; + __le32 pcie_tl_credit_nph_histogram[8]; + __le32 pcie_tl_credit_ph_histogram[8]; + __le32 pcie_tl_credit_pd_histogram[8]; + __le32 pcie_cmpl_latest_times[4]; + __le32 pcie_cmpl_longest_time; + __le32 pcie_cmpl_shortest_time; + __le32 unused_0[2]; + __le32 pcie_cmpl_latest_headers[4][4]; + __le32 pcie_cmpl_longest_headers[4][4]; + __le32 pcie_cmpl_shortest_headers[4][4]; + __le32 pcie_wr_latency_histogram[12]; + __le32 pcie_wr_latency_all_normal_count; + __le32 unused_1; + __le64 pcie_posted_packet_count; + __le64 pcie_non_posted_packet_count; + __le64 pcie_other_packet_count; + __le64 pcie_blocked_packet_count; + __le64 pcie_cmpl_packet_count; +}; + /* hwrm_stat_generic_qstats_input (size:256b/32B) */ struct hwrm_stat_generic_qstats_input { __le16 req_type; @@ -9317,6 +9409,9 @@ struct hwrm_struct_hdr { #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND __le16 len; u8 version; + #define STRUCT_HDR_VERSION_0 0x0UL + #define STRUCT_HDR_VERSION_1 0x1UL + #define STRUCT_HDR_VERSION_LAST STRUCT_HDR_VERSION_1 u8 count; __le16 subtype; __le16 next_offset; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 12b6ed51fd88..5ddddd89052f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -946,7 +946,9 @@ void bnxt_sriov_disable(struct bnxt *bp) /* Reclaim all resources for the PF. */ rtnl_lock(); + netdev_lock(bp->dev); bnxt_restore_pf_fw_resources(bp); + netdev_unlock(bp->dev); rtnl_unlock(); } @@ -956,17 +958,21 @@ int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs) struct bnxt *bp = netdev_priv(dev); rtnl_lock(); + netdev_lock(dev); if (!netif_running(dev)) { netdev_warn(dev, "Reject SRIOV config request since if is down!\n"); + netdev_unlock(dev); rtnl_unlock(); return 0; } if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { netdev_warn(dev, "Reject SRIOV config request when FW reset is in progress\n"); + netdev_unlock(dev); rtnl_unlock(); return 0; } bp->sriov_cfg = true; + netdev_unlock(dev); rtnl_unlock(); if (pci_vfs_assigned(bp->pdev)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index e4a7f37036ed..a8e930d5dbb0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -112,7 +112,7 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, struct bnxt_ulp *ulp; int rc = 0; - rtnl_lock(); + netdev_lock(dev); mutex_lock(&edev->en_dev_lock); if (!bp->irq_tbl) { rc = -ENODEV; @@ -138,7 +138,7 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, edev->flags |= BNXT_EN_FLAG_MSIX_REQUESTED; exit: mutex_unlock(&edev->en_dev_lock); - rtnl_unlock(); + netdev_unlock(dev); return rc; } EXPORT_SYMBOL(bnxt_register_dev); @@ -151,7 +151,7 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) int i = 0; ulp = edev->ulp_tbl; - rtnl_lock(); + netdev_lock(dev); mutex_lock(&edev->en_dev_lock); if (ulp->msix_requested) edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED; @@ -169,7 +169,7 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) i++; } mutex_unlock(&edev->en_dev_lock); - rtnl_unlock(); + netdev_unlock(dev); return; } EXPORT_SYMBOL(bnxt_unregister_dev); @@ -309,12 +309,14 @@ void bnxt_ulp_irq_stop(struct bnxt *bp) if (!ulp->msix_requested) return; - ops = rtnl_dereference(ulp->ulp_ops); + netdev_lock(bp->dev); + ops = rcu_dereference(ulp->ulp_ops); if (!ops || !ops->ulp_irq_stop) return; if (test_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) reset = true; ops->ulp_irq_stop(ulp->handle, reset); + netdev_unlock(bp->dev); } } @@ -333,7 +335,8 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) if (!ulp->msix_requested) return; - ops = rtnl_dereference(ulp->ulp_ops); + netdev_lock(bp->dev); + ops = rcu_dereference(ulp->ulp_ops); if (!ops || !ops->ulp_irq_restart) return; @@ -345,6 +348,7 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) bnxt_fill_msix_vecs(bp, ent); } ops->ulp_irq_restart(ulp->handle, ent); + netdev_unlock(bp->dev); kfree(ent); } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index 1467b94a6427..619f0844e778 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -257,8 +257,7 @@ bool bnxt_dev_is_vf_rep(struct net_device *dev) /* Called when the parent PF interface is closed: * As the mode transition from SWITCHDEV to LEGACY - * happens under the rtnl_lock() this routine is safe - * under the rtnl_lock() + * happens under the netdev instance lock this routine is safe */ void bnxt_vf_reps_close(struct bnxt *bp) { @@ -278,8 +277,7 @@ void bnxt_vf_reps_close(struct bnxt *bp) /* Called when the parent PF interface is opened (re-opened): * As the mode transition from SWITCHDEV to LEGACY - * happen under the rtnl_lock() this routine is safe - * under the rtnl_lock() + * happen under the netdev instance lock this routine is safe */ void bnxt_vf_reps_open(struct bnxt *bp) { @@ -348,7 +346,7 @@ void bnxt_vf_reps_destroy(struct bnxt *bp) /* Ensure that parent PF's and VF-reps' RX/TX has been quiesced * before proceeding with VF-rep cleanup. */ - rtnl_lock(); + netdev_lock(bp->dev); if (netif_running(bp->dev)) { bnxt_close_nic(bp, false, false); closed = true; @@ -365,10 +363,10 @@ void bnxt_vf_reps_destroy(struct bnxt *bp) bnxt_open_nic(bp, false, false); bp->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; } - rtnl_unlock(); + netdev_unlock(bp->dev); - /* Need to call vf_reps_destroy() outside of rntl_lock - * as unregister_netdev takes rtnl_lock + /* Need to call vf_reps_destroy() outside of netdev instance lock + * as unregister_netdev takes it */ __bnxt_vf_reps_destroy(bp); } @@ -376,7 +374,7 @@ void bnxt_vf_reps_destroy(struct bnxt *bp) /* Free the VF-Reps in firmware, during firmware hot-reset processing. * Note that the VF-Rep netdevs are still active (not unregistered) during * this process. As the mode transition from SWITCHDEV to LEGACY happens - * under the rtnl_lock() this routine is safe under the rtnl_lock(). + * under the netdev instance lock this routine is safe. */ void bnxt_vf_reps_free(struct bnxt *bp) { @@ -413,7 +411,7 @@ static int bnxt_alloc_vf_rep(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, /* Allocate the VF-Reps in firmware, during firmware hot-reset processing. * Note that the VF-Rep netdevs are still active (not unregistered) during * this process. As the mode transition from SWITCHDEV to LEGACY happens - * under the rtnl_lock() this routine is safe under the rtnl_lock(). + * under the netdev instance lock this routine is safe. */ int bnxt_vf_reps_alloc(struct bnxt *bp) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index e6c64e4bd66c..a7d390709e38 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -15,6 +15,7 @@ #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/filter.h> +#include <net/netdev_lock.h> #include <net/page_pool/helpers.h> #include "bnxt_hsi.h" #include "bnxt.h" @@ -382,13 +383,14 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, return nxmit; } -/* Under rtnl_lock */ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) { struct net_device *dev = bp->dev; int tx_xdp = 0, tx_cp, rc, tc; struct bpf_prog *old; + netdev_assert_locked(dev); + if (prog && !prog->aux->xdp_has_frags && bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) { netdev_warn(dev, "MTU %d larger than %d without XDP frag support.\n", @@ -460,23 +462,16 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp) struct sk_buff * bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, - struct page_pool *pool, struct xdp_buff *xdp, - struct rx_cmp_ext *rxcmp1) + struct page_pool *pool, struct xdp_buff *xdp) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); if (!skb) return NULL; - skb_checksum_none_assert(skb); - if (RX_CMP_L4_CS_OK(rxcmp1)) { - if (bp->dev->features & NETIF_F_RXCSUM) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = RX_CMP_ENCAP(rxcmp1); - } - } + xdp_update_skb_shared_info(skb, num_frags, sinfo->xdp_frags_size, - BNXT_RX_PAGE_SIZE * sinfo->nr_frags, + BNXT_RX_PAGE_SIZE * num_frags, xdp_buff_is_frag_pfmemalloc(xdp)); return skb; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 0122782400b8..220285e190fc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -33,6 +33,5 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, struct xdp_buff *xdp); struct sk_buff *bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, struct page_pool *pool, - struct xdp_buff *xdp, - struct rx_cmp_ext *rxcmp1); + struct xdp_buff *xdp); #endif diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 3e93f957430b..73d78dcb774d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) controller driver * - * Copyright (c) 2014-2024 Broadcom + * Copyright (c) 2014-2025 Broadcom */ #define pr_fmt(fmt) "bcmgenet: " fmt @@ -41,15 +41,13 @@ #include "bcmgenet.h" -/* Maximum number of hardware queues, downsized if needed */ -#define GENET_MAX_MQ_CNT 4 - /* Default highest priority queue for multi queue support */ -#define GENET_Q0_PRIORITY 0 +#define GENET_Q1_PRIORITY 0 +#define GENET_Q0_PRIORITY 1 -#define GENET_Q16_RX_BD_CNT \ +#define GENET_Q0_RX_BD_CNT \ (TOTAL_DESC - priv->hw_params->rx_queues * priv->hw_params->rx_bds_per_q) -#define GENET_Q16_TX_BD_CNT \ +#define GENET_Q0_TX_BD_CNT \ (TOTAL_DESC - priv->hw_params->tx_queues * priv->hw_params->tx_bds_per_q) #define RX_BUF_LENGTH 2048 @@ -104,7 +102,7 @@ static inline void dmadesc_set_addr(struct bcmgenet_priv *priv, * the platform is explicitly configured for 64-bits/LPAE. */ #ifdef CONFIG_PHYS_ADDR_T_64BIT - if (priv->hw_params->flags & GENET_HAS_40BITS) + if (bcmgenet_has_40bits(priv)) bcmgenet_writel(upper_32_bits(addr), d + DMA_DESC_ADDRESS_HI); #endif } @@ -446,33 +444,48 @@ static void bcmgenet_hfb_enable_filter(struct bcmgenet_priv *priv, u32 f_index) u32 offset; u32 reg; - offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32); - reg = bcmgenet_hfb_reg_readl(priv, offset); - reg |= (1 << (f_index % 32)); - bcmgenet_hfb_reg_writel(priv, reg, offset); - reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); - reg |= RBUF_HFB_EN; - bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); + if (GENET_IS_V1(priv) || GENET_IS_V2(priv)) { + reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); + reg |= (1 << ((f_index % 32) + RBUF_HFB_FILTER_EN_SHIFT)) | + RBUF_HFB_EN; + bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); + } else { + offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32); + reg = bcmgenet_hfb_reg_readl(priv, offset); + reg |= (1 << (f_index % 32)); + bcmgenet_hfb_reg_writel(priv, reg, offset); + reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); + reg |= RBUF_HFB_EN; + bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); + } } static void bcmgenet_hfb_disable_filter(struct bcmgenet_priv *priv, u32 f_index) { u32 offset, reg, reg1; - offset = HFB_FLT_ENABLE_V3PLUS; - reg = bcmgenet_hfb_reg_readl(priv, offset); - reg1 = bcmgenet_hfb_reg_readl(priv, offset + sizeof(u32)); - if (f_index < 32) { - reg1 &= ~(1 << (f_index % 32)); - bcmgenet_hfb_reg_writel(priv, reg1, offset + sizeof(u32)); - } else { - reg &= ~(1 << (f_index % 32)); - bcmgenet_hfb_reg_writel(priv, reg, offset); - } - if (!reg && !reg1) { + if (GENET_IS_V1(priv) || GENET_IS_V2(priv)) { reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); - reg &= ~RBUF_HFB_EN; + reg &= ~(1 << ((f_index % 32) + RBUF_HFB_FILTER_EN_SHIFT)); + if (!(reg & RBUF_HFB_FILTER_EN_MASK)) + reg &= ~RBUF_HFB_EN; bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); + } else { + offset = HFB_FLT_ENABLE_V3PLUS; + reg = bcmgenet_hfb_reg_readl(priv, offset); + reg1 = bcmgenet_hfb_reg_readl(priv, offset + sizeof(u32)); + if (f_index < 32) { + reg1 &= ~(1 << (f_index % 32)); + bcmgenet_hfb_reg_writel(priv, reg1, offset + sizeof(u32)); + } else { + reg &= ~(1 << (f_index % 32)); + bcmgenet_hfb_reg_writel(priv, reg, offset); + } + if (!reg && !reg1) { + reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); + reg &= ~RBUF_HFB_EN; + bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); + } } } @@ -482,6 +495,9 @@ static void bcmgenet_hfb_set_filter_rx_queue_mapping(struct bcmgenet_priv *priv, u32 offset; u32 reg; + if (GENET_IS_V1(priv) || GENET_IS_V2(priv)) + return; + offset = f_index / 8; reg = bcmgenet_rdma_readl(priv, DMA_INDEX2RING_0 + offset); reg &= ~(0xF << (4 * (f_index % 8))); @@ -495,9 +511,13 @@ static void bcmgenet_hfb_set_filter_length(struct bcmgenet_priv *priv, u32 offset; u32 reg; - offset = HFB_FLT_LEN_V3PLUS + - ((priv->hw_params->hfb_filter_cnt - 1 - f_index) / 4) * - sizeof(u32); + if (GENET_IS_V1(priv) || GENET_IS_V2(priv)) + offset = HFB_FLT_LEN_V2; + else + offset = HFB_FLT_LEN_V3PLUS; + + offset += sizeof(u32) * + ((priv->hw_params->hfb_filter_cnt - 1 - f_index) / 4); reg = bcmgenet_hfb_reg_readl(priv, offset); reg &= ~(0xFF << (8 * (f_index % 4))); reg |= ((f_length & 0xFF) << (8 * (f_index % 4))); @@ -579,13 +599,13 @@ static void bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv, struct bcmgenet_rxnfc_rule *rule) { struct ethtool_rx_flow_spec *fs = &rule->fs; - u32 offset = 0, f_length = 0, f; + u32 offset = 0, f_length = 0, f, q; u8 val_8, mask_8; __be16 val_16; u16 mask_16; size_t size; - f = fs->location; + f = fs->location + 1; if (fs->flow_type & FLOW_MAC_EXT) { bcmgenet_hfb_insert_data(priv, f, 0, &fs->h_ext.h_dest, &fs->m_ext.h_dest, @@ -667,19 +687,16 @@ static void bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv, } bcmgenet_hfb_set_filter_length(priv, f, 2 * f_length); - if (!fs->ring_cookie || fs->ring_cookie == RX_CLS_FLOW_WAKE) { - /* Ring 0 flows can be handled by the default Descriptor Ring - * We'll map them to ring 0, but don't enable the filter - */ - bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f, 0); - rule->state = BCMGENET_RXNFC_STATE_DISABLED; - } else { + if (fs->ring_cookie == RX_CLS_FLOW_WAKE) + q = 0; + else if (fs->ring_cookie == RX_CLS_FLOW_DISC) + q = priv->hw_params->rx_queues + 1; + else /* Other Rx rings are direct mapped here */ - bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f, - fs->ring_cookie); - bcmgenet_hfb_enable_filter(priv, f); - rule->state = BCMGENET_RXNFC_STATE_ENABLED; - } + q = fs->ring_cookie; + bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f, q); + bcmgenet_hfb_enable_filter(priv, f); + rule->state = BCMGENET_RXNFC_STATE_ENABLED; } /* bcmgenet_hfb_clear @@ -690,6 +707,7 @@ static void bcmgenet_hfb_clear_filter(struct bcmgenet_priv *priv, u32 f_index) { u32 base, i; + bcmgenet_hfb_set_filter_length(priv, f_index, 0); base = f_index * priv->hw_params->hfb_filter_size; for (i = 0; i < priv->hw_params->hfb_filter_size; i++) bcmgenet_hfb_writel(priv, 0x0, (base + i) * sizeof(u32)); @@ -699,22 +717,23 @@ static void bcmgenet_hfb_clear(struct bcmgenet_priv *priv) { u32 i; - if (GENET_IS_V1(priv) || GENET_IS_V2(priv)) - return; - - bcmgenet_hfb_reg_writel(priv, 0x0, HFB_CTRL); - bcmgenet_hfb_reg_writel(priv, 0x0, HFB_FLT_ENABLE_V3PLUS); - bcmgenet_hfb_reg_writel(priv, 0x0, HFB_FLT_ENABLE_V3PLUS + 4); - - for (i = DMA_INDEX2RING_0; i <= DMA_INDEX2RING_7; i++) - bcmgenet_rdma_writel(priv, 0x0, i); + bcmgenet_hfb_reg_writel(priv, 0, HFB_CTRL); - for (i = 0; i < (priv->hw_params->hfb_filter_cnt / 4); i++) - bcmgenet_hfb_reg_writel(priv, 0x0, - HFB_FLT_LEN_V3PLUS + i * sizeof(u32)); + if (!GENET_IS_V1(priv) && !GENET_IS_V2(priv)) { + bcmgenet_hfb_reg_writel(priv, 0, + HFB_FLT_ENABLE_V3PLUS); + bcmgenet_hfb_reg_writel(priv, 0, + HFB_FLT_ENABLE_V3PLUS + 4); + for (i = DMA_INDEX2RING_0; i <= DMA_INDEX2RING_7; i++) + bcmgenet_rdma_writel(priv, 0, i); + } for (i = 0; i < priv->hw_params->hfb_filter_cnt; i++) bcmgenet_hfb_clear_filter(priv, i); + + /* Enable filter 0 to send default flow to ring 0 */ + bcmgenet_hfb_set_filter_length(priv, 0, 4); + bcmgenet_hfb_enable_filter(priv, 0); } static void bcmgenet_hfb_init(struct bcmgenet_priv *priv) @@ -722,9 +741,6 @@ static void bcmgenet_hfb_init(struct bcmgenet_priv *priv) int i; INIT_LIST_HEAD(&priv->rxnfc_list); - if (GENET_IS_V1(priv) || GENET_IS_V2(priv)) - return; - for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) { INIT_LIST_HEAD(&priv->rxnfc_rules[i].list); priv->rxnfc_rules[i].state = BCMGENET_RXNFC_STATE_UNUSED; @@ -819,20 +835,16 @@ static int bcmgenet_get_coalesce(struct net_device *dev, unsigned int i; ec->tx_max_coalesced_frames = - bcmgenet_tdma_ring_readl(priv, DESC_INDEX, - DMA_MBUF_DONE_THRESH); + bcmgenet_tdma_ring_readl(priv, 0, DMA_MBUF_DONE_THRESH); ec->rx_max_coalesced_frames = - bcmgenet_rdma_ring_readl(priv, DESC_INDEX, - DMA_MBUF_DONE_THRESH); + bcmgenet_rdma_ring_readl(priv, 0, DMA_MBUF_DONE_THRESH); ec->rx_coalesce_usecs = - bcmgenet_rdma_readl(priv, DMA_RING16_TIMEOUT) * 8192 / 1000; + bcmgenet_rdma_readl(priv, DMA_RING0_TIMEOUT) * 8192 / 1000; - for (i = 0; i < priv->hw_params->rx_queues; i++) { + for (i = 0; i <= priv->hw_params->rx_queues; i++) { ring = &priv->rx_rings[i]; ec->use_adaptive_rx_coalesce |= ring->dim.use_dim; } - ring = &priv->rx_rings[DESC_INDEX]; - ec->use_adaptive_rx_coalesce |= ring->dim.use_dim; return 0; } @@ -902,17 +914,13 @@ static int bcmgenet_set_coalesce(struct net_device *dev, /* Program all TX queues with the same values, as there is no * ethtool knob to do coalescing on a per-queue basis */ - for (i = 0; i < priv->hw_params->tx_queues; i++) + for (i = 0; i <= priv->hw_params->tx_queues; i++) bcmgenet_tdma_ring_writel(priv, i, ec->tx_max_coalesced_frames, DMA_MBUF_DONE_THRESH); - bcmgenet_tdma_ring_writel(priv, DESC_INDEX, - ec->tx_max_coalesced_frames, - DMA_MBUF_DONE_THRESH); - for (i = 0; i < priv->hw_params->rx_queues; i++) + for (i = 0; i <= priv->hw_params->rx_queues; i++) bcmgenet_set_ring_rx_coalesce(&priv->rx_rings[i], ec); - bcmgenet_set_ring_rx_coalesce(&priv->rx_rings[DESC_INDEX], ec); return 0; } @@ -1120,7 +1128,7 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { STAT_GENET_Q(1), STAT_GENET_Q(2), STAT_GENET_Q(3), - STAT_GENET_Q(16), + STAT_GENET_Q(4), }; #define BCMGENET_STATS_LEN ARRAY_SIZE(bcmgenet_gstrings_stats) @@ -1438,7 +1446,8 @@ static int bcmgenet_insert_flow(struct net_device *dev, } if (cmd->fs.ring_cookie > priv->hw_params->rx_queues && - cmd->fs.ring_cookie != RX_CLS_FLOW_WAKE) { + cmd->fs.ring_cookie != RX_CLS_FLOW_WAKE && + cmd->fs.ring_cookie != RX_CLS_FLOW_DISC) { netdev_err(dev, "rxnfc: Unsupported action (%llu)\n", cmd->fs.ring_cookie); return -EINVAL; @@ -1472,10 +1481,10 @@ static int bcmgenet_insert_flow(struct net_device *dev, loc_rule = &priv->rxnfc_rules[cmd->fs.location]; } if (loc_rule->state == BCMGENET_RXNFC_STATE_ENABLED) - bcmgenet_hfb_disable_filter(priv, cmd->fs.location); + bcmgenet_hfb_disable_filter(priv, cmd->fs.location + 1); if (loc_rule->state != BCMGENET_RXNFC_STATE_UNUSED) { list_del(&loc_rule->list); - bcmgenet_hfb_clear_filter(priv, cmd->fs.location); + bcmgenet_hfb_clear_filter(priv, cmd->fs.location + 1); } loc_rule->state = BCMGENET_RXNFC_STATE_UNUSED; memcpy(&loc_rule->fs, &cmd->fs, @@ -1505,10 +1514,10 @@ static int bcmgenet_delete_flow(struct net_device *dev, } if (rule->state == BCMGENET_RXNFC_STATE_ENABLED) - bcmgenet_hfb_disable_filter(priv, cmd->fs.location); + bcmgenet_hfb_disable_filter(priv, cmd->fs.location + 1); if (rule->state != BCMGENET_RXNFC_STATE_UNUSED) { list_del(&rule->list); - bcmgenet_hfb_clear_filter(priv, cmd->fs.location); + bcmgenet_hfb_clear_filter(priv, cmd->fs.location + 1); } rule->state = BCMGENET_RXNFC_STATE_UNUSED; memset(&rule->fs, 0, sizeof(struct ethtool_rx_flow_spec)); @@ -1651,9 +1660,9 @@ static int bcmgenet_power_down(struct bcmgenet_priv *priv, case GENET_POWER_PASSIVE: /* Power down LED */ - if (priv->hw_params->flags & GENET_HAS_EXT) { + if (bcmgenet_has_ext(priv)) { reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT); - if (GENET_IS_V5(priv) && !priv->ephy_16nm) + if (GENET_IS_V5(priv) && !bcmgenet_has_ephy_16nm(priv)) reg |= EXT_PWR_DOWN_PHY_EN | EXT_PWR_DOWN_PHY_RD | EXT_PWR_DOWN_PHY_SD | @@ -1676,13 +1685,14 @@ static int bcmgenet_power_down(struct bcmgenet_priv *priv, return ret; } -static void bcmgenet_power_up(struct bcmgenet_priv *priv, - enum bcmgenet_power_mode mode) +static int bcmgenet_power_up(struct bcmgenet_priv *priv, + enum bcmgenet_power_mode mode) { + int ret = 0; u32 reg; - if (!(priv->hw_params->flags & GENET_HAS_EXT)) - return; + if (!bcmgenet_has_ext(priv)) + return ret; reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT); @@ -1690,7 +1700,7 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, case GENET_POWER_PASSIVE: reg &= ~(EXT_PWR_DOWN_DLL | EXT_PWR_DOWN_BIAS | EXT_ENERGY_DET_MASK); - if (GENET_IS_V5(priv) && !priv->ephy_16nm) { + if (GENET_IS_V5(priv) && !bcmgenet_has_ephy_16nm(priv)) { reg &= ~(EXT_PWR_DOWN_PHY_EN | EXT_PWR_DOWN_PHY_RD | EXT_PWR_DOWN_PHY_SD | @@ -1718,11 +1728,13 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, } break; case GENET_POWER_WOL_MAGIC: - bcmgenet_wol_power_up_cfg(priv, mode); - return; + ret = bcmgenet_wol_power_up_cfg(priv, mode); + break; default: break; } + + return ret; } static struct enet_cb *bcmgenet_get_txcb(struct bcmgenet_priv *priv, @@ -1759,18 +1771,6 @@ static struct enet_cb *bcmgenet_put_txcb(struct bcmgenet_priv *priv, return tx_cb_ptr; } -static inline void bcmgenet_rx_ring16_int_disable(struct bcmgenet_rx_ring *ring) -{ - bcmgenet_intrl2_0_writel(ring->priv, UMAC_IRQ_RXDMA_DONE, - INTRL2_CPU_MASK_SET); -} - -static inline void bcmgenet_rx_ring16_int_enable(struct bcmgenet_rx_ring *ring) -{ - bcmgenet_intrl2_0_writel(ring->priv, UMAC_IRQ_RXDMA_DONE, - INTRL2_CPU_MASK_CLEAR); -} - static inline void bcmgenet_rx_ring_int_disable(struct bcmgenet_rx_ring *ring) { bcmgenet_intrl2_1_writel(ring->priv, @@ -1785,18 +1785,6 @@ static inline void bcmgenet_rx_ring_int_enable(struct bcmgenet_rx_ring *ring) INTRL2_CPU_MASK_CLEAR); } -static inline void bcmgenet_tx_ring16_int_disable(struct bcmgenet_tx_ring *ring) -{ - bcmgenet_intrl2_0_writel(ring->priv, UMAC_IRQ_TXDMA_DONE, - INTRL2_CPU_MASK_SET); -} - -static inline void bcmgenet_tx_ring16_int_enable(struct bcmgenet_tx_ring *ring) -{ - bcmgenet_intrl2_0_writel(ring->priv, UMAC_IRQ_TXDMA_DONE, - INTRL2_CPU_MASK_CLEAR); -} - static inline void bcmgenet_tx_ring_int_enable(struct bcmgenet_tx_ring *ring) { bcmgenet_intrl2_1_writel(ring->priv, 1 << ring->index, @@ -1877,12 +1865,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, struct sk_buff *skb; /* Clear status before servicing to reduce spurious interrupts */ - if (ring->index == DESC_INDEX) - bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_TXDMA_DONE, - INTRL2_CPU_CLEAR); - else - bcmgenet_intrl2_1_writel(priv, (1 << ring->index), - INTRL2_CPU_CLEAR); + bcmgenet_intrl2_1_writel(priv, (1 << ring->index), INTRL2_CPU_CLEAR); /* Compute how many buffers are transmitted since last xmit call */ c_index = bcmgenet_tdma_ring_readl(priv, ring->index, TDMA_CONS_INDEX) @@ -1916,19 +1899,46 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, ring->packets += pkts_compl; ring->bytes += bytes_compl; - netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->queue), + netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->index), pkts_compl, bytes_compl); return txbds_processed; } static unsigned int bcmgenet_tx_reclaim(struct net_device *dev, - struct bcmgenet_tx_ring *ring) + struct bcmgenet_tx_ring *ring, + bool all) { - unsigned int released; + struct bcmgenet_priv *priv = netdev_priv(dev); + struct device *kdev = &priv->pdev->dev; + unsigned int released, drop, wr_ptr; + struct enet_cb *cb_ptr; + struct sk_buff *skb; spin_lock_bh(&ring->lock); released = __bcmgenet_tx_reclaim(dev, ring); + if (all) { + skb = NULL; + drop = (ring->prod_index - ring->c_index) & DMA_C_INDEX_MASK; + released += drop; + ring->prod_index = ring->c_index & DMA_C_INDEX_MASK; + while (drop--) { + cb_ptr = bcmgenet_put_txcb(priv, ring); + skb = cb_ptr->skb; + bcmgenet_free_tx_cb(kdev, cb_ptr); + if (skb && cb_ptr == GENET_CB(skb)->first_cb) { + dev_consume_skb_any(skb); + skb = NULL; + } + } + if (skb) + dev_consume_skb_any(skb); + bcmgenet_tdma_ring_writel(priv, ring->index, + ring->prod_index, TDMA_PROD_INDEX); + wr_ptr = ring->write_ptr * WORDS_PER_BD(priv); + bcmgenet_tdma_ring_writel(priv, ring->index, wr_ptr, + TDMA_WRITE_PTR); + } spin_unlock_bh(&ring->lock); return released; @@ -1944,14 +1954,14 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget) spin_lock(&ring->lock); work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring); if (ring->free_bds > (MAX_SKB_FRAGS + 1)) { - txq = netdev_get_tx_queue(ring->priv->dev, ring->queue); + txq = netdev_get_tx_queue(ring->priv->dev, ring->index); netif_tx_wake_queue(txq); } spin_unlock(&ring->lock); if (work_done == 0) { napi_complete(napi); - ring->int_enable(ring); + bcmgenet_tx_ring_int_enable(ring); return 0; } @@ -1962,14 +1972,11 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget) static void bcmgenet_tx_reclaim_all(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - int i; - - if (netif_is_multiqueue(dev)) { - for (i = 0; i < priv->hw_params->tx_queues; i++) - bcmgenet_tx_reclaim(dev, &priv->tx_rings[i]); - } + int i = 0; - bcmgenet_tx_reclaim(dev, &priv->tx_rings[DESC_INDEX]); + do { + bcmgenet_tx_reclaim(dev, &priv->tx_rings[i++], true); + } while (i <= priv->hw_params->tx_queues && netif_is_multiqueue(dev)); } /* Reallocate the SKB to put enough headroom in front of it and insert @@ -2057,19 +2064,14 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) index = skb_get_queue_mapping(skb); /* Mapping strategy: - * queue_mapping = 0, unclassified, packet xmited through ring16 - * queue_mapping = 1, goes to ring 0. (highest priority queue - * queue_mapping = 2, goes to ring 1. - * queue_mapping = 3, goes to ring 2. - * queue_mapping = 4, goes to ring 3. + * queue_mapping = 0, unclassified, packet xmited through ring 0 + * queue_mapping = 1, goes to ring 1. (highest priority queue) + * queue_mapping = 2, goes to ring 2. + * queue_mapping = 3, goes to ring 3. + * queue_mapping = 4, goes to ring 4. */ - if (index == 0) - index = DESC_INDEX; - else - index -= 1; - ring = &priv->tx_rings[index]; - txq = netdev_get_tx_queue(dev, ring->queue); + txq = netdev_get_tx_queue(dev, index); nr_frags = skb_shinfo(skb)->nr_frags; @@ -2242,15 +2244,8 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, unsigned int discards; /* Clear status before servicing to reduce spurious interrupts */ - if (ring->index == DESC_INDEX) { - bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_RXDMA_DONE, - INTRL2_CPU_CLEAR); - } else { - mask = 1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index); - bcmgenet_intrl2_1_writel(priv, - mask, - INTRL2_CPU_CLEAR); - } + mask = 1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index); + bcmgenet_intrl2_1_writel(priv, mask, INTRL2_CPU_CLEAR); p_index = bcmgenet_rdma_ring_readl(priv, ring->index, RDMA_PROD_INDEX); @@ -2399,7 +2394,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete_done(napi, work_done); - ring->int_enable(ring); + bcmgenet_rx_ring_int_enable(ring); } if (ring->dim.use_dim) { @@ -2523,7 +2518,7 @@ static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv) } else if (priv->ext_phy) { int0_enable |= UMAC_IRQ_LINK_EVENT; } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { - if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) + if (bcmgenet_has_moca_link_det(priv)) int0_enable |= UMAC_IRQ_LINK_EVENT; } bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); @@ -2588,8 +2583,8 @@ static void init_umac(struct bcmgenet_priv *priv) } /* Enable MDIO interrupts on GENET v3+ */ - if (priv->hw_params->flags & GENET_HAS_MDIO_INTR) - int0_enable |= (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR); + if (bcmgenet_has_mdio_intr(priv)) + int0_enable |= UMAC_IRQ_MDIO_EVENT; bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); @@ -2639,15 +2634,6 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, spin_lock_init(&ring->lock); ring->priv = priv; ring->index = index; - if (index == DESC_INDEX) { - ring->queue = 0; - ring->int_enable = bcmgenet_tx_ring16_int_enable; - ring->int_disable = bcmgenet_tx_ring16_int_disable; - } else { - ring->queue = index + 1; - ring->int_enable = bcmgenet_tx_ring_int_enable; - ring->int_disable = bcmgenet_tx_ring_int_disable; - } ring->cbs = priv->tx_cbs + start_ptr; ring->size = size; ring->clean_ptr = start_ptr; @@ -2658,8 +2644,8 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, ring->end_ptr = end_ptr - 1; ring->prod_index = 0; - /* Set flow period for ring != 16 */ - if (index != DESC_INDEX) + /* Set flow period for ring != 0 */ + if (index) flow_period_val = ENET_MAX_MTU_SIZE << 16; bcmgenet_tdma_ring_writel(priv, index, 0, TDMA_PROD_INDEX); @@ -2697,13 +2683,6 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv, ring->priv = priv; ring->index = index; - if (index == DESC_INDEX) { - ring->int_enable = bcmgenet_rx_ring16_int_enable; - ring->int_disable = bcmgenet_rx_ring16_int_disable; - } else { - ring->int_enable = bcmgenet_rx_ring_int_enable; - ring->int_disable = bcmgenet_rx_ring_int_disable; - } ring->cbs = priv->rx_cbs + start_ptr; ring->size = size; ring->c_index = 0; @@ -2749,15 +2728,11 @@ static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv) unsigned int i; struct bcmgenet_tx_ring *ring; - for (i = 0; i < priv->hw_params->tx_queues; ++i) { + for (i = 0; i <= priv->hw_params->tx_queues; ++i) { ring = &priv->tx_rings[i]; napi_enable(&ring->napi); - ring->int_enable(ring); + bcmgenet_tx_ring_int_enable(ring); } - - ring = &priv->tx_rings[DESC_INDEX]; - napi_enable(&ring->napi); - ring->int_enable(ring); } static void bcmgenet_disable_tx_napi(struct bcmgenet_priv *priv) @@ -2765,13 +2740,10 @@ static void bcmgenet_disable_tx_napi(struct bcmgenet_priv *priv) unsigned int i; struct bcmgenet_tx_ring *ring; - for (i = 0; i < priv->hw_params->tx_queues; ++i) { + for (i = 0; i <= priv->hw_params->tx_queues; ++i) { ring = &priv->tx_rings[i]; napi_disable(&ring->napi); } - - ring = &priv->tx_rings[DESC_INDEX]; - napi_disable(&ring->napi); } static void bcmgenet_fini_tx_napi(struct bcmgenet_priv *priv) @@ -2779,82 +2751,104 @@ static void bcmgenet_fini_tx_napi(struct bcmgenet_priv *priv) unsigned int i; struct bcmgenet_tx_ring *ring; - for (i = 0; i < priv->hw_params->tx_queues; ++i) { + for (i = 0; i <= priv->hw_params->tx_queues; ++i) { ring = &priv->tx_rings[i]; netif_napi_del(&ring->napi); } +} - ring = &priv->tx_rings[DESC_INDEX]; - netif_napi_del(&ring->napi); +static int bcmgenet_tdma_disable(struct bcmgenet_priv *priv) +{ + int timeout = 0; + u32 reg, mask; + + reg = bcmgenet_tdma_readl(priv, DMA_CTRL); + mask = (1 << (priv->hw_params->tx_queues + 1)) - 1; + mask = (mask << DMA_RING_BUF_EN_SHIFT) | DMA_EN; + reg &= ~mask; + bcmgenet_tdma_writel(priv, reg, DMA_CTRL); + + /* Check DMA status register to confirm DMA is disabled */ + while (timeout++ < DMA_TIMEOUT_VAL) { + reg = bcmgenet_tdma_readl(priv, DMA_STATUS); + if ((reg & mask) == mask) + return 0; + + udelay(1); + } + + return -ETIMEDOUT; +} + +static int bcmgenet_rdma_disable(struct bcmgenet_priv *priv) +{ + int timeout = 0; + u32 reg, mask; + + reg = bcmgenet_rdma_readl(priv, DMA_CTRL); + mask = (1 << (priv->hw_params->rx_queues + 1)) - 1; + mask = (mask << DMA_RING_BUF_EN_SHIFT) | DMA_EN; + reg &= ~mask; + bcmgenet_rdma_writel(priv, reg, DMA_CTRL); + + /* Check DMA status register to confirm DMA is disabled */ + while (timeout++ < DMA_TIMEOUT_VAL) { + reg = bcmgenet_rdma_readl(priv, DMA_STATUS); + if ((reg & mask) == mask) + return 0; + + udelay(1); + } + + return -ETIMEDOUT; } /* Initialize Tx queues * - * Queues 0-3 are priority-based, each one has 32 descriptors, - * with queue 0 being the highest priority queue. + * Queues 1-4 are priority-based, each one has 32 descriptors, + * with queue 1 being the highest priority queue. * - * Queue 16 is the default Tx queue with - * GENET_Q16_TX_BD_CNT = 256 - 4 * 32 = 128 descriptors. + * Queue 0 is the default Tx queue with + * GENET_Q0_TX_BD_CNT = 256 - 4 * 32 = 128 descriptors. * * The transmit control block pool is then partitioned as follows: - * - Tx queue 0 uses tx_cbs[0..31] - * - Tx queue 1 uses tx_cbs[32..63] - * - Tx queue 2 uses tx_cbs[64..95] - * - Tx queue 3 uses tx_cbs[96..127] - * - Tx queue 16 uses tx_cbs[128..255] + * - Tx queue 0 uses tx_cbs[0..127] + * - Tx queue 1 uses tx_cbs[128..159] + * - Tx queue 2 uses tx_cbs[160..191] + * - Tx queue 3 uses tx_cbs[192..223] + * - Tx queue 4 uses tx_cbs[224..255] */ static void bcmgenet_init_tx_queues(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - u32 i, dma_enable; - u32 dma_ctrl, ring_cfg; - u32 dma_priority[3] = {0, 0, 0}; - - dma_ctrl = bcmgenet_tdma_readl(priv, DMA_CTRL); - dma_enable = dma_ctrl & DMA_EN; - dma_ctrl &= ~DMA_EN; - bcmgenet_tdma_writel(priv, dma_ctrl, DMA_CTRL); - - dma_ctrl = 0; - ring_cfg = 0; + unsigned int start = 0, end = GENET_Q0_TX_BD_CNT; + u32 i, ring_mask, dma_priority[3] = {0, 0, 0}; /* Enable strict priority arbiter mode */ bcmgenet_tdma_writel(priv, DMA_ARBITER_SP, DMA_ARB_CTRL); /* Initialize Tx priority queues */ - for (i = 0; i < priv->hw_params->tx_queues; i++) { - bcmgenet_init_tx_ring(priv, i, priv->hw_params->tx_bds_per_q, - i * priv->hw_params->tx_bds_per_q, - (i + 1) * priv->hw_params->tx_bds_per_q); - ring_cfg |= (1 << i); - dma_ctrl |= (1 << (i + DMA_RING_BUF_EN_SHIFT)); + for (i = 0; i <= priv->hw_params->tx_queues; i++) { + bcmgenet_init_tx_ring(priv, i, end - start, start, end); + start = end; + end += priv->hw_params->tx_bds_per_q; dma_priority[DMA_PRIO_REG_INDEX(i)] |= - ((GENET_Q0_PRIORITY + i) << DMA_PRIO_REG_SHIFT(i)); + (i ? GENET_Q1_PRIORITY : GENET_Q0_PRIORITY) + << DMA_PRIO_REG_SHIFT(i); } - /* Initialize Tx default queue 16 */ - bcmgenet_init_tx_ring(priv, DESC_INDEX, GENET_Q16_TX_BD_CNT, - priv->hw_params->tx_queues * - priv->hw_params->tx_bds_per_q, - TOTAL_DESC); - ring_cfg |= (1 << DESC_INDEX); - dma_ctrl |= (1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT)); - dma_priority[DMA_PRIO_REG_INDEX(DESC_INDEX)] |= - ((GENET_Q0_PRIORITY + priv->hw_params->tx_queues) << - DMA_PRIO_REG_SHIFT(DESC_INDEX)); - /* Set Tx queue priorities */ bcmgenet_tdma_writel(priv, dma_priority[0], DMA_PRIORITY_0); bcmgenet_tdma_writel(priv, dma_priority[1], DMA_PRIORITY_1); bcmgenet_tdma_writel(priv, dma_priority[2], DMA_PRIORITY_2); - /* Enable Tx queues */ - bcmgenet_tdma_writel(priv, ring_cfg, DMA_RING_CFG); + /* Configure Tx queues as descriptor rings */ + ring_mask = (1 << (priv->hw_params->tx_queues + 1)) - 1; + bcmgenet_tdma_writel(priv, ring_mask, DMA_RING_CFG); - /* Enable Tx DMA */ - if (dma_enable) - dma_ctrl |= DMA_EN; - bcmgenet_tdma_writel(priv, dma_ctrl, DMA_CTRL); + /* Enable Tx rings */ + ring_mask <<= DMA_RING_BUF_EN_SHIFT; + bcmgenet_tdma_writel(priv, ring_mask, DMA_CTRL); } static void bcmgenet_enable_rx_napi(struct bcmgenet_priv *priv) @@ -2862,15 +2856,11 @@ static void bcmgenet_enable_rx_napi(struct bcmgenet_priv *priv) unsigned int i; struct bcmgenet_rx_ring *ring; - for (i = 0; i < priv->hw_params->rx_queues; ++i) { + for (i = 0; i <= priv->hw_params->rx_queues; ++i) { ring = &priv->rx_rings[i]; napi_enable(&ring->napi); - ring->int_enable(ring); + bcmgenet_rx_ring_int_enable(ring); } - - ring = &priv->rx_rings[DESC_INDEX]; - napi_enable(&ring->napi); - ring->int_enable(ring); } static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv) @@ -2878,15 +2868,11 @@ static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv) unsigned int i; struct bcmgenet_rx_ring *ring; - for (i = 0; i < priv->hw_params->rx_queues; ++i) { + for (i = 0; i <= priv->hw_params->rx_queues; ++i) { ring = &priv->rx_rings[i]; napi_disable(&ring->napi); cancel_work_sync(&ring->dim.dim.work); } - - ring = &priv->rx_rings[DESC_INDEX]; - napi_disable(&ring->napi); - cancel_work_sync(&ring->dim.dim.work); } static void bcmgenet_fini_rx_napi(struct bcmgenet_priv *priv) @@ -2894,13 +2880,10 @@ static void bcmgenet_fini_rx_napi(struct bcmgenet_priv *priv) unsigned int i; struct bcmgenet_rx_ring *ring; - for (i = 0; i < priv->hw_params->rx_queues; ++i) { + for (i = 0; i <= priv->hw_params->rx_queues; ++i) { ring = &priv->rx_rings[i]; netif_napi_del(&ring->napi); } - - ring = &priv->rx_rings[DESC_INDEX]; - netif_napi_del(&ring->napi); } /* Initialize Rx queues @@ -2908,57 +2891,32 @@ static void bcmgenet_fini_rx_napi(struct bcmgenet_priv *priv) * Queues 0-15 are priority queues. Hardware Filtering Block (HFB) can be * used to direct traffic to these queues. * - * Queue 16 is the default Rx queue with GENET_Q16_RX_BD_CNT descriptors. + * Queue 0 is also the default Rx queue with GENET_Q0_RX_BD_CNT descriptors. */ static int bcmgenet_init_rx_queues(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - u32 i; - u32 dma_enable; - u32 dma_ctrl; - u32 ring_cfg; + unsigned int start = 0, end = GENET_Q0_RX_BD_CNT; + u32 i, ring_mask; int ret; - dma_ctrl = bcmgenet_rdma_readl(priv, DMA_CTRL); - dma_enable = dma_ctrl & DMA_EN; - dma_ctrl &= ~DMA_EN; - bcmgenet_rdma_writel(priv, dma_ctrl, DMA_CTRL); - - dma_ctrl = 0; - ring_cfg = 0; - /* Initialize Rx priority queues */ - for (i = 0; i < priv->hw_params->rx_queues; i++) { - ret = bcmgenet_init_rx_ring(priv, i, - priv->hw_params->rx_bds_per_q, - i * priv->hw_params->rx_bds_per_q, - (i + 1) * - priv->hw_params->rx_bds_per_q); + for (i = 0; i <= priv->hw_params->rx_queues; i++) { + ret = bcmgenet_init_rx_ring(priv, i, end - start, start, end); if (ret) return ret; - ring_cfg |= (1 << i); - dma_ctrl |= (1 << (i + DMA_RING_BUF_EN_SHIFT)); + start = end; + end += priv->hw_params->rx_bds_per_q; } - /* Initialize Rx default queue 16 */ - ret = bcmgenet_init_rx_ring(priv, DESC_INDEX, GENET_Q16_RX_BD_CNT, - priv->hw_params->rx_queues * - priv->hw_params->rx_bds_per_q, - TOTAL_DESC); - if (ret) - return ret; - - ring_cfg |= (1 << DESC_INDEX); - dma_ctrl |= (1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT)); - - /* Enable rings */ - bcmgenet_rdma_writel(priv, ring_cfg, DMA_RING_CFG); + /* Configure Rx queues as descriptor rings */ + ring_mask = (1 << (priv->hw_params->rx_queues + 1)) - 1; + bcmgenet_rdma_writel(priv, ring_mask, DMA_RING_CFG); - /* Configure ring as descriptor ring and re-enable DMA if enabled */ - if (dma_enable) - dma_ctrl |= DMA_EN; - bcmgenet_rdma_writel(priv, dma_ctrl, DMA_CTRL); + /* Enable Rx rings */ + ring_mask <<= DMA_RING_BUF_EN_SHIFT; + bcmgenet_rdma_writel(priv, ring_mask, DMA_CTRL); return 0; } @@ -2966,26 +2924,9 @@ static int bcmgenet_init_rx_queues(struct net_device *dev) static int bcmgenet_dma_teardown(struct bcmgenet_priv *priv) { int ret = 0; - int timeout = 0; - u32 reg; - u32 dma_ctrl; - int i; /* Disable TDMA to stop add more frames in TX DMA */ - reg = bcmgenet_tdma_readl(priv, DMA_CTRL); - reg &= ~DMA_EN; - bcmgenet_tdma_writel(priv, reg, DMA_CTRL); - - /* Check TDMA status register to confirm TDMA is disabled */ - while (timeout++ < DMA_TIMEOUT_VAL) { - reg = bcmgenet_tdma_readl(priv, DMA_STATUS); - if (reg & DMA_DISABLED) - break; - - udelay(1); - } - - if (timeout == DMA_TIMEOUT_VAL) { + if (-ETIMEDOUT == bcmgenet_tdma_disable(priv)) { netdev_warn(priv->dev, "Timed out while disabling TX DMA\n"); ret = -ETIMEDOUT; } @@ -2994,39 +2935,11 @@ static int bcmgenet_dma_teardown(struct bcmgenet_priv *priv) usleep_range(10000, 20000); /* Disable RDMA */ - reg = bcmgenet_rdma_readl(priv, DMA_CTRL); - reg &= ~DMA_EN; - bcmgenet_rdma_writel(priv, reg, DMA_CTRL); - - timeout = 0; - /* Check RDMA status register to confirm RDMA is disabled */ - while (timeout++ < DMA_TIMEOUT_VAL) { - reg = bcmgenet_rdma_readl(priv, DMA_STATUS); - if (reg & DMA_DISABLED) - break; - - udelay(1); - } - - if (timeout == DMA_TIMEOUT_VAL) { + if (-ETIMEDOUT == bcmgenet_rdma_disable(priv)) { netdev_warn(priv->dev, "Timed out while disabling RX DMA\n"); ret = -ETIMEDOUT; } - dma_ctrl = 0; - for (i = 0; i < priv->hw_params->rx_queues; i++) - dma_ctrl |= (1 << (i + DMA_RING_BUF_EN_SHIFT)); - reg = bcmgenet_rdma_readl(priv, DMA_CTRL); - reg &= ~dma_ctrl; - bcmgenet_rdma_writel(priv, reg, DMA_CTRL); - - dma_ctrl = 0; - for (i = 0; i < priv->hw_params->tx_queues; i++) - dma_ctrl |= (1 << (i + DMA_RING_BUF_EN_SHIFT)); - reg = bcmgenet_tdma_readl(priv, DMA_CTRL); - reg &= ~dma_ctrl; - bcmgenet_tdma_writel(priv, reg, DMA_CTRL); - return ret; } @@ -3038,32 +2951,53 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) bcmgenet_fini_rx_napi(priv); bcmgenet_fini_tx_napi(priv); - for (i = 0; i < priv->num_tx_bds; i++) - dev_kfree_skb(bcmgenet_free_tx_cb(&priv->pdev->dev, - priv->tx_cbs + i)); - - for (i = 0; i < priv->hw_params->tx_queues; i++) { - txq = netdev_get_tx_queue(priv->dev, priv->tx_rings[i].queue); + for (i = 0; i <= priv->hw_params->tx_queues; i++) { + txq = netdev_get_tx_queue(priv->dev, i); netdev_tx_reset_queue(txq); } - txq = netdev_get_tx_queue(priv->dev, priv->tx_rings[DESC_INDEX].queue); - netdev_tx_reset_queue(txq); - bcmgenet_free_rx_buffers(priv); kfree(priv->rx_cbs); kfree(priv->tx_cbs); } /* init_edma: Initialize DMA control register */ -static int bcmgenet_init_dma(struct bcmgenet_priv *priv) +static int bcmgenet_init_dma(struct bcmgenet_priv *priv, bool flush_rx) { - int ret; - unsigned int i; struct enet_cb *cb; + unsigned int i; + int ret; + u32 reg; netif_dbg(priv, hw, priv->dev, "%s\n", __func__); + /* Disable TX DMA */ + ret = bcmgenet_tdma_disable(priv); + if (ret) { + netdev_err(priv->dev, "failed to halt Tx DMA\n"); + return ret; + } + + /* Disable RX DMA */ + ret = bcmgenet_rdma_disable(priv); + if (ret) { + netdev_err(priv->dev, "failed to halt Rx DMA\n"); + return ret; + } + + /* Flush TX queues */ + bcmgenet_umac_writel(priv, 1, UMAC_TX_FLUSH); + udelay(10); + bcmgenet_umac_writel(priv, 0, UMAC_TX_FLUSH); + + if (flush_rx) { + reg = bcmgenet_rbuf_ctrl_get(priv); + bcmgenet_rbuf_ctrl_set(priv, reg | BIT(0)); + udelay(10); + bcmgenet_rbuf_ctrl_set(priv, reg); + udelay(10); + } + /* Initialize common Rx ring structures */ priv->rx_bds = priv->base + priv->hw_params->rdma_offset; priv->num_rx_bds = TOTAL_DESC; @@ -3113,6 +3047,15 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) /* Initialize Tx queues */ bcmgenet_init_tx_queues(priv->dev); + /* Enable RX/TX DMA */ + reg = bcmgenet_rdma_readl(priv, DMA_CTRL); + reg |= DMA_EN; + bcmgenet_rdma_writel(priv, reg, DMA_CTRL); + + reg = bcmgenet_tdma_readl(priv, DMA_CTRL); + reg |= DMA_EN; + bcmgenet_tdma_writel(priv, reg, DMA_CTRL); + return 0; } @@ -3142,7 +3085,7 @@ static void bcmgenet_irq_task(struct work_struct *work) } -/* bcmgenet_isr1: handle Rx and Tx priority queues */ +/* bcmgenet_isr1: handle Rx and Tx queues */ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) { struct bcmgenet_priv *priv = dev_id; @@ -3161,7 +3104,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) "%s: IRQ=0x%x\n", __func__, status); /* Check Rx priority queue interrupts */ - for (index = 0; index < priv->hw_params->rx_queues; index++) { + for (index = 0; index <= priv->hw_params->rx_queues; index++) { if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index))) continue; @@ -3169,20 +3112,20 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) rx_ring->dim.event_ctr++; if (likely(napi_schedule_prep(&rx_ring->napi))) { - rx_ring->int_disable(rx_ring); + bcmgenet_rx_ring_int_disable(rx_ring); __napi_schedule_irqoff(&rx_ring->napi); } } /* Check Tx priority queue interrupts */ - for (index = 0; index < priv->hw_params->tx_queues; index++) { + for (index = 0; index <= priv->hw_params->tx_queues; index++) { if (!(status & BIT(index))) continue; tx_ring = &priv->tx_rings[index]; if (likely(napi_schedule_prep(&tx_ring->napi))) { - tx_ring->int_disable(tx_ring); + bcmgenet_tx_ring_int_disable(tx_ring); __napi_schedule_irqoff(&tx_ring->napi); } } @@ -3190,12 +3133,10 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) return IRQ_HANDLED; } -/* bcmgenet_isr0: handle Rx and Tx default queues + other stuff */ +/* bcmgenet_isr0: handle other stuff */ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) { struct bcmgenet_priv *priv = dev_id; - struct bcmgenet_rx_ring *rx_ring; - struct bcmgenet_tx_ring *tx_ring; unsigned int status; unsigned long flags; @@ -3209,29 +3150,8 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) netif_dbg(priv, intr, priv->dev, "IRQ=0x%x\n", status); - if (status & UMAC_IRQ_RXDMA_DONE) { - rx_ring = &priv->rx_rings[DESC_INDEX]; - rx_ring->dim.event_ctr++; - - if (likely(napi_schedule_prep(&rx_ring->napi))) { - rx_ring->int_disable(rx_ring); - __napi_schedule_irqoff(&rx_ring->napi); - } - } - - if (status & UMAC_IRQ_TXDMA_DONE) { - tx_ring = &priv->tx_rings[DESC_INDEX]; - - if (likely(napi_schedule_prep(&tx_ring->napi))) { - tx_ring->int_disable(tx_ring); - __napi_schedule_irqoff(&tx_ring->napi); - } - } - - if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) && - status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) { + if (bcmgenet_has_mdio_intr(priv) && status & UMAC_IRQ_MDIO_EVENT) wake_up(&priv->wq); - } /* all other interested interrupts handled in bottom half */ status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_PHY_DET_R); @@ -3285,56 +3205,6 @@ static void bcmgenet_get_hw_addr(struct bcmgenet_priv *priv, put_unaligned_be16(addr_tmp, &addr[4]); } -/* Returns a reusable dma control register value */ -static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv, bool flush_rx) -{ - unsigned int i; - u32 reg; - u32 dma_ctrl; - - /* disable DMA */ - dma_ctrl = 1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT) | DMA_EN; - for (i = 0; i < priv->hw_params->tx_queues; i++) - dma_ctrl |= (1 << (i + DMA_RING_BUF_EN_SHIFT)); - reg = bcmgenet_tdma_readl(priv, DMA_CTRL); - reg &= ~dma_ctrl; - bcmgenet_tdma_writel(priv, reg, DMA_CTRL); - - dma_ctrl = 1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT) | DMA_EN; - for (i = 0; i < priv->hw_params->rx_queues; i++) - dma_ctrl |= (1 << (i + DMA_RING_BUF_EN_SHIFT)); - reg = bcmgenet_rdma_readl(priv, DMA_CTRL); - reg &= ~dma_ctrl; - bcmgenet_rdma_writel(priv, reg, DMA_CTRL); - - bcmgenet_umac_writel(priv, 1, UMAC_TX_FLUSH); - udelay(10); - bcmgenet_umac_writel(priv, 0, UMAC_TX_FLUSH); - - if (flush_rx) { - reg = bcmgenet_rbuf_ctrl_get(priv); - bcmgenet_rbuf_ctrl_set(priv, reg | BIT(0)); - udelay(10); - bcmgenet_rbuf_ctrl_set(priv, reg); - udelay(10); - } - - return dma_ctrl; -} - -static void bcmgenet_enable_dma(struct bcmgenet_priv *priv, u32 dma_ctrl) -{ - u32 reg; - - reg = bcmgenet_rdma_readl(priv, DMA_CTRL); - reg |= dma_ctrl; - bcmgenet_rdma_writel(priv, reg, DMA_CTRL); - - reg = bcmgenet_tdma_readl(priv, DMA_CTRL); - reg |= dma_ctrl; - bcmgenet_tdma_writel(priv, reg, DMA_CTRL); -} - static void bcmgenet_netif_start(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -3358,7 +3228,6 @@ static void bcmgenet_netif_start(struct net_device *dev) static int bcmgenet_open(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - unsigned long dma_ctrl; int ret; netif_dbg(priv, ifup, dev, "bcmgenet_open\n"); @@ -3384,22 +3253,16 @@ static int bcmgenet_open(struct net_device *dev) bcmgenet_set_hw_addr(priv, dev->dev_addr); - /* Disable RX/TX DMA and flush TX and RX queues */ - dma_ctrl = bcmgenet_dma_disable(priv, true); + /* HFB init */ + bcmgenet_hfb_init(priv); /* Reinitialize TDMA and RDMA and SW housekeeping */ - ret = bcmgenet_init_dma(priv); + ret = bcmgenet_init_dma(priv, true); if (ret) { netdev_err(dev, "failed to initialize DMA\n"); goto err_clk_disable; } - /* Always enable ring 16 - descriptor ring */ - bcmgenet_enable_dma(priv, dma_ctrl); - - /* HFB init */ - bcmgenet_hfb_init(priv); - ret = request_irq(priv->irq0, bcmgenet_isr0, IRQF_SHARED, dev->name, priv); if (ret < 0) { @@ -3446,19 +3309,21 @@ static void bcmgenet_netif_stop(struct net_device *dev, bool stop_phy) { struct bcmgenet_priv *priv = netdev_priv(dev); - bcmgenet_disable_tx_napi(priv); netif_tx_disable(dev); /* Disable MAC receive */ + bcmgenet_hfb_reg_writel(priv, 0, HFB_CTRL); umac_enable_set(priv, CMD_RX_EN, false); + if (stop_phy) + phy_stop(dev->phydev); + bcmgenet_dma_teardown(priv); /* Disable MAC transmit. TX DMA disabled must be done before this */ umac_enable_set(priv, CMD_TX_EN, false); - if (stop_phy) - phy_stop(dev->phydev); + bcmgenet_disable_tx_napi(priv); bcmgenet_disable_rx_napi(priv); bcmgenet_intr_disable(priv); @@ -3506,16 +3371,11 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) if (!netif_msg_tx_err(priv)) return; - txq = netdev_get_tx_queue(priv->dev, ring->queue); + txq = netdev_get_tx_queue(priv->dev, ring->index); spin_lock(&ring->lock); - if (ring->index == DESC_INDEX) { - intsts = ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); - intmsk = UMAC_IRQ_TXDMA_DONE | UMAC_IRQ_TXDMA_MBDONE; - } else { - intsts = ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS); - intmsk = 1 << ring->index; - } + intsts = ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS); + intmsk = 1 << ring->index; c_index = bcmgenet_tdma_ring_readl(priv, ring->index, TDMA_CONS_INDEX); p_index = bcmgenet_tdma_ring_readl(priv, ring->index, TDMA_PROD_INDEX); txq_stopped = netif_tx_queue_stopped(txq); @@ -3529,7 +3389,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) "(sw)c_index: %d (hw)c_index: %d\n" "(sw)clean_p: %d (sw)write_p: %d\n" "(sw)cb_ptr: %d (sw)end_ptr: %d\n", - ring->index, ring->queue, + ring->index, ring->index, txq_stopped ? "stopped" : "active", intsts & intmsk ? "enabled" : "disabled", free_bds, ring->size, @@ -3542,25 +3402,20 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) static void bcmgenet_timeout(struct net_device *dev, unsigned int txqueue) { struct bcmgenet_priv *priv = netdev_priv(dev); - u32 int0_enable = 0; u32 int1_enable = 0; unsigned int q; netif_dbg(priv, tx_err, dev, "bcmgenet_timeout\n"); - for (q = 0; q < priv->hw_params->tx_queues; q++) + for (q = 0; q <= priv->hw_params->tx_queues; q++) bcmgenet_dump_tx_queue(&priv->tx_rings[q]); - bcmgenet_dump_tx_queue(&priv->tx_rings[DESC_INDEX]); bcmgenet_tx_reclaim_all(dev); - for (q = 0; q < priv->hw_params->tx_queues; q++) + for (q = 0; q <= priv->hw_params->tx_queues; q++) int1_enable |= (1 << q); - int0_enable = UMAC_IRQ_TXDMA_DONE; - /* Re-enable TX interrupts if disabled */ - bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR); netif_trans_update(dev); @@ -3664,16 +3519,13 @@ static struct net_device_stats *bcmgenet_get_stats(struct net_device *dev) struct bcmgenet_rx_ring *rx_ring; unsigned int q; - for (q = 0; q < priv->hw_params->tx_queues; q++) { + for (q = 0; q <= priv->hw_params->tx_queues; q++) { tx_ring = &priv->tx_rings[q]; tx_bytes += tx_ring->bytes; tx_packets += tx_ring->packets; } - tx_ring = &priv->tx_rings[DESC_INDEX]; - tx_bytes += tx_ring->bytes; - tx_packets += tx_ring->packets; - for (q = 0; q < priv->hw_params->rx_queues; q++) { + for (q = 0; q <= priv->hw_params->rx_queues; q++) { rx_ring = &priv->rx_rings[q]; rx_bytes += rx_ring->bytes; @@ -3681,11 +3533,6 @@ static struct net_device_stats *bcmgenet_get_stats(struct net_device *dev) rx_errors += rx_ring->errors; rx_dropped += rx_ring->dropped; } - rx_ring = &priv->rx_rings[DESC_INDEX]; - rx_bytes += rx_ring->bytes; - rx_packets += rx_ring->packets; - rx_errors += rx_ring->errors; - rx_dropped += rx_ring->dropped; dev->stats.tx_bytes = tx_bytes; dev->stats.tx_packets = tx_packets; @@ -3726,128 +3573,109 @@ static const struct net_device_ops bcmgenet_netdev_ops = { .ndo_change_carrier = bcmgenet_change_carrier, }; -/* Array of GENET hardware parameters/characteristics */ -static struct bcmgenet_hw_params bcmgenet_hw_params[] = { - [GENET_V1] = { - .tx_queues = 0, - .tx_bds_per_q = 0, - .rx_queues = 0, - .rx_bds_per_q = 0, - .bp_in_en_shift = 16, - .bp_in_mask = 0xffff, - .hfb_filter_cnt = 16, - .qtag_mask = 0x1F, - .hfb_offset = 0x1000, - .rdma_offset = 0x2000, - .tdma_offset = 0x3000, - .words_per_bd = 2, - }, - [GENET_V2] = { - .tx_queues = 4, - .tx_bds_per_q = 32, - .rx_queues = 0, - .rx_bds_per_q = 0, - .bp_in_en_shift = 16, - .bp_in_mask = 0xffff, - .hfb_filter_cnt = 16, - .qtag_mask = 0x1F, - .tbuf_offset = 0x0600, - .hfb_offset = 0x1000, - .hfb_reg_offset = 0x2000, - .rdma_offset = 0x3000, - .tdma_offset = 0x4000, - .words_per_bd = 2, - .flags = GENET_HAS_EXT, - }, - [GENET_V3] = { - .tx_queues = 4, - .tx_bds_per_q = 32, - .rx_queues = 0, - .rx_bds_per_q = 0, - .bp_in_en_shift = 17, - .bp_in_mask = 0x1ffff, - .hfb_filter_cnt = 48, - .hfb_filter_size = 128, - .qtag_mask = 0x3F, - .tbuf_offset = 0x0600, - .hfb_offset = 0x8000, - .hfb_reg_offset = 0xfc00, - .rdma_offset = 0x10000, - .tdma_offset = 0x11000, - .words_per_bd = 2, - .flags = GENET_HAS_EXT | GENET_HAS_MDIO_INTR | - GENET_HAS_MOCA_LINK_DET, - }, - [GENET_V4] = { - .tx_queues = 4, - .tx_bds_per_q = 32, - .rx_queues = 0, - .rx_bds_per_q = 0, - .bp_in_en_shift = 17, - .bp_in_mask = 0x1ffff, - .hfb_filter_cnt = 48, - .hfb_filter_size = 128, - .qtag_mask = 0x3F, - .tbuf_offset = 0x0600, - .hfb_offset = 0x8000, - .hfb_reg_offset = 0xfc00, - .rdma_offset = 0x2000, - .tdma_offset = 0x4000, - .words_per_bd = 3, - .flags = GENET_HAS_40BITS | GENET_HAS_EXT | - GENET_HAS_MDIO_INTR | GENET_HAS_MOCA_LINK_DET, - }, - [GENET_V5] = { - .tx_queues = 4, - .tx_bds_per_q = 32, - .rx_queues = 0, - .rx_bds_per_q = 0, - .bp_in_en_shift = 17, - .bp_in_mask = 0x1ffff, - .hfb_filter_cnt = 48, - .hfb_filter_size = 128, - .qtag_mask = 0x3F, - .tbuf_offset = 0x0600, - .hfb_offset = 0x8000, - .hfb_reg_offset = 0xfc00, - .rdma_offset = 0x2000, - .tdma_offset = 0x4000, - .words_per_bd = 3, - .flags = GENET_HAS_40BITS | GENET_HAS_EXT | - GENET_HAS_MDIO_INTR | GENET_HAS_MOCA_LINK_DET, - }, +/* GENET hardware parameters/characteristics */ +static const struct bcmgenet_hw_params bcmgenet_hw_params_v1 = { + .tx_queues = 0, + .tx_bds_per_q = 0, + .rx_queues = 0, + .rx_bds_per_q = 0, + .bp_in_en_shift = 16, + .bp_in_mask = 0xffff, + .hfb_filter_cnt = 16, + .hfb_filter_size = 64, + .qtag_mask = 0x1F, + .hfb_offset = 0x1000, + .hfb_reg_offset = GENET_RBUF_OFF + RBUF_HFB_CTRL_V1, + .rdma_offset = 0x2000, + .tdma_offset = 0x3000, + .words_per_bd = 2, +}; + +static const struct bcmgenet_hw_params bcmgenet_hw_params_v2 = { + .tx_queues = 4, + .tx_bds_per_q = 32, + .rx_queues = 0, + .rx_bds_per_q = 0, + .bp_in_en_shift = 16, + .bp_in_mask = 0xffff, + .hfb_filter_cnt = 16, + .hfb_filter_size = 64, + .qtag_mask = 0x1F, + .tbuf_offset = 0x0600, + .hfb_offset = 0x1000, + .hfb_reg_offset = 0x2000, + .rdma_offset = 0x3000, + .tdma_offset = 0x4000, + .words_per_bd = 2, +}; + +static const struct bcmgenet_hw_params bcmgenet_hw_params_v3 = { + .tx_queues = 4, + .tx_bds_per_q = 32, + .rx_queues = 0, + .rx_bds_per_q = 0, + .bp_in_en_shift = 17, + .bp_in_mask = 0x1ffff, + .hfb_filter_cnt = 48, + .hfb_filter_size = 128, + .qtag_mask = 0x3F, + .tbuf_offset = 0x0600, + .hfb_offset = 0x8000, + .hfb_reg_offset = 0xfc00, + .rdma_offset = 0x10000, + .tdma_offset = 0x11000, + .words_per_bd = 2, +}; + +static const struct bcmgenet_hw_params bcmgenet_hw_params_v4 = { + .tx_queues = 4, + .tx_bds_per_q = 32, + .rx_queues = 0, + .rx_bds_per_q = 0, + .bp_in_en_shift = 17, + .bp_in_mask = 0x1ffff, + .hfb_filter_cnt = 48, + .hfb_filter_size = 128, + .qtag_mask = 0x3F, + .tbuf_offset = 0x0600, + .hfb_offset = 0x8000, + .hfb_reg_offset = 0xfc00, + .rdma_offset = 0x2000, + .tdma_offset = 0x4000, + .words_per_bd = 3, }; /* Infer hardware parameters from the detected GENET version */ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) { - struct bcmgenet_hw_params *params; + const struct bcmgenet_hw_params *params; u32 reg; u8 major; u16 gphy_rev; - if (GENET_IS_V5(priv) || GENET_IS_V4(priv)) { - bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus; - genet_dma_ring_regs = genet_dma_ring_regs_v4; - } else if (GENET_IS_V3(priv)) { + /* default to latest values */ + params = &bcmgenet_hw_params_v4; + bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus; + genet_dma_ring_regs = genet_dma_ring_regs_v4; + if (GENET_IS_V3(priv)) { + params = &bcmgenet_hw_params_v3; bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus; genet_dma_ring_regs = genet_dma_ring_regs_v123; } else if (GENET_IS_V2(priv)) { + params = &bcmgenet_hw_params_v2; bcmgenet_dma_regs = bcmgenet_dma_regs_v2; genet_dma_ring_regs = genet_dma_ring_regs_v123; } else if (GENET_IS_V1(priv)) { + params = &bcmgenet_hw_params_v1; bcmgenet_dma_regs = bcmgenet_dma_regs_v1; genet_dma_ring_regs = genet_dma_ring_regs_v123; } - - /* enum genet_version starts at 1 */ - priv->hw_params = &bcmgenet_hw_params[priv->version]; - params = priv->hw_params; + priv->hw_params = params; /* Read GENET HW version */ reg = bcmgenet_sys_readl(priv, SYS_REV_CTRL); major = (reg >> 24 & 0x0f); - if (major == 6) + if (major == 6 || major == 7) major = 5; else if (major == 5) major = 4; @@ -3898,7 +3726,7 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) } #ifdef CONFIG_PHYS_ADDR_T_64BIT - if (!(params->flags & GENET_HAS_40BITS)) + if (!bcmgenet_has_40bits(priv)) pr_warn("GENET does not support 40-bits PA\n"); #endif @@ -3923,7 +3751,7 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) struct bcmgenet_plat_data { enum bcmgenet_version version; u32 dma_max_burst_length; - bool ephy_16nm; + u32 flags; }; static const struct bcmgenet_plat_data v1_plat_data = { @@ -3934,32 +3762,43 @@ static const struct bcmgenet_plat_data v1_plat_data = { static const struct bcmgenet_plat_data v2_plat_data = { .version = GENET_V2, .dma_max_burst_length = DMA_MAX_BURST_LENGTH, + .flags = GENET_HAS_EXT, }; static const struct bcmgenet_plat_data v3_plat_data = { .version = GENET_V3, .dma_max_burst_length = DMA_MAX_BURST_LENGTH, + .flags = GENET_HAS_EXT | GENET_HAS_MDIO_INTR | + GENET_HAS_MOCA_LINK_DET, }; static const struct bcmgenet_plat_data v4_plat_data = { .version = GENET_V4, .dma_max_burst_length = DMA_MAX_BURST_LENGTH, + .flags = GENET_HAS_40BITS | GENET_HAS_EXT | + GENET_HAS_MDIO_INTR | GENET_HAS_MOCA_LINK_DET, }; static const struct bcmgenet_plat_data v5_plat_data = { .version = GENET_V5, .dma_max_burst_length = DMA_MAX_BURST_LENGTH, + .flags = GENET_HAS_40BITS | GENET_HAS_EXT | + GENET_HAS_MDIO_INTR | GENET_HAS_MOCA_LINK_DET, }; static const struct bcmgenet_plat_data bcm2711_plat_data = { .version = GENET_V5, .dma_max_burst_length = 0x08, + .flags = GENET_HAS_40BITS | GENET_HAS_EXT | + GENET_HAS_MDIO_INTR | GENET_HAS_MOCA_LINK_DET, }; static const struct bcmgenet_plat_data bcm7712_plat_data = { .version = GENET_V5, .dma_max_burst_length = DMA_MAX_BURST_LENGTH, - .ephy_16nm = true, + .flags = GENET_HAS_40BITS | GENET_HAS_EXT | + GENET_HAS_MDIO_INTR | GENET_HAS_MOCA_LINK_DET | + GENET_HAS_EPHY_16NM, }; static const struct of_device_id bcmgenet_match[] = { @@ -4057,7 +3896,7 @@ static int bcmgenet_probe(struct platform_device *pdev) if (pdata) { priv->version = pdata->version; priv->dma_max_burst_length = pdata->dma_max_burst_length; - priv->ephy_16nm = pdata->ephy_16nm; + priv->flags = pdata->flags; } else { priv->version = pd->genet_version; priv->dma_max_burst_length = DMA_MAX_BURST_LENGTH; @@ -4077,7 +3916,7 @@ static int bcmgenet_probe(struct platform_device *pdev) bcmgenet_set_hw_params(priv); err = -EIO; - if (priv->hw_params->flags & GENET_HAS_40BITS) + if (bcmgenet_has_40bits(priv)) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); if (err) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); @@ -4132,16 +3971,13 @@ static int bcmgenet_probe(struct platform_device *pdev) if (err) goto err_clk_disable; - /* setup number of real queues + 1 (GENET_V1 has 0 hardware queues - * just the ring 16 descriptor based TX - */ + /* setup number of real queues + 1 */ netif_set_real_num_tx_queues(priv->dev, priv->hw_params->tx_queues + 1); netif_set_real_num_rx_queues(priv->dev, priv->hw_params->rx_queues + 1); /* Set default coalescing parameters */ - for (i = 0; i < priv->hw_params->rx_queues; i++) + for (i = 0; i <= priv->hw_params->rx_queues; i++) priv->rx_rings[i].rx_max_coalesced_frames = 1; - priv->rx_rings[DESC_INDEX].rx_max_coalesced_frames = 1; /* libphy will determine the link state */ netif_carrier_off(dev); @@ -4205,9 +4041,22 @@ static int bcmgenet_resume_noirq(struct device *d) reg = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT); if (reg & UMAC_IRQ_WAKE_EVENT) pm_wakeup_event(&priv->pdev->dev, 0); + + /* From WOL-enabled suspend, switch to regular clock */ + if (!bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC)) + return 0; + + /* Failed so fall through to reset MAC */ } - bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_WAKE_EVENT, INTRL2_CPU_CLEAR); + /* If this is an internal GPHY, power it back on now, before UniMAC is + * brought out of reset as absolutely no UniMAC activity is allowed + */ + if (priv->internal_phy) + bcmgenet_power_up(priv, GENET_POWER_PASSIVE); + + /* take MAC out of reset */ + bcmgenet_umac_reset(priv); return 0; } @@ -4217,23 +4066,46 @@ static int bcmgenet_resume(struct device *d) struct net_device *dev = dev_get_drvdata(d); struct bcmgenet_priv *priv = netdev_priv(dev); struct bcmgenet_rxnfc_rule *rule; - unsigned long dma_ctrl; int ret; + u32 reg; if (!netif_running(dev)) return 0; - /* From WOL-enabled suspend, switch to regular clock */ - if (device_may_wakeup(d) && priv->wolopts) - bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC); - - /* If this is an internal GPHY, power it back on now, before UniMAC is - * brought out of reset as absolutely no UniMAC activity is allowed - */ - if (priv->internal_phy) - bcmgenet_power_up(priv, GENET_POWER_PASSIVE); - - bcmgenet_umac_reset(priv); + if (device_may_wakeup(d) && priv->wolopts) { + reg = bcmgenet_umac_readl(priv, UMAC_CMD); + if (reg & CMD_RX_EN) { + /* Successfully exited WoL, just resume data flows */ + list_for_each_entry(rule, &priv->rxnfc_list, list) + if (rule->state == BCMGENET_RXNFC_STATE_ENABLED) + bcmgenet_hfb_enable_filter(priv, + rule->fs.location + 1); + bcmgenet_hfb_enable_filter(priv, 0); + bcmgenet_set_rx_mode(dev); + bcmgenet_enable_rx_napi(priv); + + /* Reinitialize Tx flows */ + bcmgenet_tdma_disable(priv); + bcmgenet_init_tx_queues(priv->dev); + reg = bcmgenet_tdma_readl(priv, DMA_CTRL); + reg |= DMA_EN; + bcmgenet_tdma_writel(priv, reg, DMA_CTRL); + bcmgenet_enable_tx_napi(priv); + + bcmgenet_link_intr_enable(priv); + phy_start_machine(dev->phydev); + + netif_device_attach(dev); + enable_irq(priv->irq1); + return 0; + } + /* MAC was reset so complete bcmgenet_netif_stop() */ + umac_enable_set(priv, CMD_RX_EN | CMD_TX_EN, false); + bcmgenet_rdma_disable(priv); + bcmgenet_intr_disable(priv); + bcmgenet_fini_dma(priv); + enable_irq(priv->irq1); + } init_umac(priv); @@ -4254,19 +4126,13 @@ static int bcmgenet_resume(struct device *d) if (rule->state != BCMGENET_RXNFC_STATE_UNUSED) bcmgenet_hfb_create_rxnfc_filter(priv, rule); - /* Disable RX/TX DMA and flush TX queues */ - dma_ctrl = bcmgenet_dma_disable(priv, false); - /* Reinitialize TDMA and RDMA and SW housekeeping */ - ret = bcmgenet_init_dma(priv); + ret = bcmgenet_init_dma(priv, false); if (ret) { netdev_err(dev, "failed to initialize DMA\n"); goto out_clk_disable; } - /* Always enable ring 16 - descriptor ring */ - bcmgenet_enable_dma(priv, dma_ctrl); - if (!device_may_wakeup(d)) phy_resume(dev->phydev); @@ -4287,19 +4153,52 @@ static int bcmgenet_suspend(struct device *d) { struct net_device *dev = dev_get_drvdata(d); struct bcmgenet_priv *priv = netdev_priv(dev); + struct bcmgenet_rxnfc_rule *rule; + u32 reg, hfb_enable = 0; if (!netif_running(dev)) return 0; netif_device_detach(dev); - bcmgenet_netif_stop(dev, true); + if (device_may_wakeup(d) && priv->wolopts) { + netif_tx_disable(dev); + + /* Suspend non-wake Rx data flows */ + if (priv->wolopts & WAKE_FILTER) + list_for_each_entry(rule, &priv->rxnfc_list, list) + if (rule->fs.ring_cookie == RX_CLS_FLOW_WAKE && + rule->state == BCMGENET_RXNFC_STATE_ENABLED) + hfb_enable |= 1 << rule->fs.location; + reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); + if (GENET_IS_V1(priv) || GENET_IS_V2(priv)) { + reg &= ~RBUF_HFB_FILTER_EN_MASK; + reg |= hfb_enable << (RBUF_HFB_FILTER_EN_SHIFT + 1); + } else { + bcmgenet_hfb_reg_writel(priv, hfb_enable << 1, + HFB_FLT_ENABLE_V3PLUS + 4); + } + if (!hfb_enable) + reg &= ~RBUF_HFB_EN; + bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); - if (!device_may_wakeup(d)) - phy_suspend(dev->phydev); + /* Clear any old filter matches so only new matches wake */ + bcmgenet_intrl2_0_writel(priv, 0xFFFFFFFF, INTRL2_CPU_MASK_SET); + bcmgenet_intrl2_0_writel(priv, 0xFFFFFFFF, INTRL2_CPU_CLEAR); - /* Disable filtering */ - bcmgenet_hfb_reg_writel(priv, 0, HFB_CTRL); + if (-ETIMEDOUT == bcmgenet_tdma_disable(priv)) + netdev_warn(priv->dev, + "Timed out while disabling TX DMA\n"); + + bcmgenet_disable_tx_napi(priv); + bcmgenet_disable_rx_napi(priv); + disable_irq(priv->irq1); + bcmgenet_tx_reclaim_all(dev); + bcmgenet_fini_tx_napi(priv); + } else { + /* Teardown the interface */ + bcmgenet_netif_stop(dev, true); + } return 0; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 43b923c48b14..10c631bbe964 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2014-2024 Broadcom + * Copyright (c) 2014-2025 Broadcom */ #ifndef __BCMGENET_H__ @@ -18,6 +18,9 @@ #include "../unimac.h" +/* Maximum number of hardware queues, downsized if needed */ +#define GENET_MAX_MQ_CNT 4 + /* total number of Buffer Descriptors, same for Rx/Tx */ #define TOTAL_DESC 256 @@ -271,6 +274,8 @@ struct bcmgenet_mib_counters { /* Only valid for GENETv3+ */ #define UMAC_IRQ_MDIO_DONE (1 << 23) #define UMAC_IRQ_MDIO_ERROR (1 << 24) +#define UMAC_IRQ_MDIO_EVENT (UMAC_IRQ_MDIO_DONE | \ + UMAC_IRQ_MDIO_ERROR) /* INTRL2 instance 1 definitions */ #define UMAC_IRQ1_TX_INTR_MASK 0xFFFF @@ -476,6 +481,7 @@ enum bcmgenet_version { #define GENET_HAS_EXT (1 << 1) #define GENET_HAS_MDIO_INTR (1 << 2) #define GENET_HAS_MOCA_LINK_DET (1 << 3) +#define GENET_HAS_EPHY_16NM (1 << 4) /* BCMGENET hardware parameters, keep this structure nicely aligned * since it is going to be used in hot paths @@ -496,7 +502,6 @@ struct bcmgenet_hw_params { u32 rdma_offset; u32 tdma_offset; u32 words_per_bd; - u32 flags; }; struct bcmgenet_skb_cb { @@ -513,7 +518,6 @@ struct bcmgenet_tx_ring { unsigned long packets; unsigned long bytes; unsigned int index; /* ring index */ - unsigned int queue; /* queue index */ struct enet_cb *cbs; /* tx ring buffer control block*/ unsigned int size; /* size of each tx ring */ unsigned int clean_ptr; /* Tx ring clean pointer */ @@ -523,8 +527,6 @@ struct bcmgenet_tx_ring { unsigned int prod_index; /* Tx ring producer index SW copy */ unsigned int cb_ptr; /* Tx ring initial CB ptr */ unsigned int end_ptr; /* Tx ring end CB ptr */ - void (*int_enable)(struct bcmgenet_tx_ring *); - void (*int_disable)(struct bcmgenet_tx_ring *); struct bcmgenet_priv *priv; }; @@ -553,8 +555,6 @@ struct bcmgenet_rx_ring { struct bcmgenet_net_dim dim; u32 rx_max_coalesced_frames; u32 rx_coalesce_usecs; - void (*int_enable)(struct bcmgenet_rx_ring *); - void (*int_disable)(struct bcmgenet_rx_ring *); struct bcmgenet_priv *priv; }; @@ -583,7 +583,7 @@ struct bcmgenet_priv { struct enet_cb *tx_cbs; unsigned int num_tx_bds; - struct bcmgenet_tx_ring tx_rings[DESC_INDEX + 1]; + struct bcmgenet_tx_ring tx_rings[GENET_MAX_MQ_CNT + 1]; /* receive variables */ void __iomem *rx_bds; @@ -593,10 +593,11 @@ struct bcmgenet_priv { struct bcmgenet_rxnfc_rule rxnfc_rules[MAX_NUM_OF_FS_RULES]; struct list_head rxnfc_list; - struct bcmgenet_rx_ring rx_rings[DESC_INDEX + 1]; + struct bcmgenet_rx_ring rx_rings[GENET_MAX_MQ_CNT + 1]; /* other misc variables */ - struct bcmgenet_hw_params *hw_params; + const struct bcmgenet_hw_params *hw_params; + u32 flags; unsigned autoneg_pause:1; unsigned tx_pause:1; unsigned rx_pause:1; @@ -615,7 +616,6 @@ struct bcmgenet_priv { phy_interface_t phy_interface; int phy_addr; int ext_phy; - bool ephy_16nm; /* Interrupt variables */ struct work_struct bcmgenet_irq_work; @@ -643,13 +643,37 @@ struct bcmgenet_priv { struct clk *clk_wol; u32 wolopts; u8 sopass[SOPASS_MAX]; - bool wol_active; struct bcmgenet_mib_counters mib; struct ethtool_keee eee; }; +static inline bool bcmgenet_has_40bits(struct bcmgenet_priv *priv) +{ + return !!(priv->flags & GENET_HAS_40BITS); +} + +static inline bool bcmgenet_has_ext(struct bcmgenet_priv *priv) +{ + return !!(priv->flags & GENET_HAS_EXT); +} + +static inline bool bcmgenet_has_mdio_intr(struct bcmgenet_priv *priv) +{ + return !!(priv->flags & GENET_HAS_MDIO_INTR); +} + +static inline bool bcmgenet_has_moca_link_det(struct bcmgenet_priv *priv) +{ + return !!(priv->flags & GENET_HAS_MOCA_LINK_DET); +} + +static inline bool bcmgenet_has_ephy_16nm(struct bcmgenet_priv *priv) +{ + return !!(priv->flags & GENET_HAS_EPHY_16NM); +} + #define GENET_IO_MACRO(name, offset) \ static inline u32 bcmgenet_##name##_readl(struct bcmgenet_priv *priv, \ u32 off) \ @@ -702,8 +726,8 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol); int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol); int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode); -void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, - enum bcmgenet_power_mode mode); +int bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, + enum bcmgenet_power_mode mode); void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, bool tx_lpi_enabled); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 3b082114f2e5..8fb551288298 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) Wake-on-LAN support * - * Copyright (c) 2014-2024 Broadcom + * Copyright (c) 2014-2025 Broadcom */ #define pr_fmt(fmt) "bcmgenet_wol: " fmt @@ -145,8 +145,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode) { struct net_device *dev = priv->dev; - struct bcmgenet_rxnfc_rule *rule; - u32 reg, hfb_ctrl_reg, hfb_enable = 0; + u32 reg, hfb_ctrl_reg; int retries = 0; if (mode != GENET_POWER_WOL_MAGIC) { @@ -154,18 +153,6 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, return -EINVAL; } - /* Can't suspend with WoL if MAC is still in reset */ - spin_lock_bh(&priv->reg_lock); - reg = bcmgenet_umac_readl(priv, UMAC_CMD); - if (reg & CMD_SW_RESET) - reg &= ~CMD_SW_RESET; - - /* disable RX */ - reg &= ~CMD_RX_EN; - bcmgenet_umac_writel(priv, reg, UMAC_CMD); - spin_unlock_bh(&priv->reg_lock); - mdelay(10); - if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) { reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL); reg |= MPD_EN; @@ -177,13 +164,8 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, } hfb_ctrl_reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); - if (priv->wolopts & WAKE_FILTER) { - list_for_each_entry(rule, &priv->rxnfc_list, list) - if (rule->fs.ring_cookie == RX_CLS_FLOW_WAKE) - hfb_enable |= (1 << rule->fs.location); - reg = (hfb_ctrl_reg & ~RBUF_HFB_EN) | RBUF_ACPI_EN; - bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); - } + reg = hfb_ctrl_reg | RBUF_ACPI_EN; + bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); /* Do not leave UniMAC in MPD mode only */ retries = bcmgenet_poll_wol_status(priv); @@ -198,15 +180,12 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, netif_dbg(priv, wol, dev, "MPD WOL-ready status set after %d msec\n", retries); - clk_prepare_enable(priv->clk_wol); - priv->wol_active = 1; + /* Disable phy status updates while suspending */ + mutex_lock(&dev->phydev->lock); + dev->phydev->state = PHY_READY; + mutex_unlock(&dev->phydev->lock); - if (hfb_enable) { - bcmgenet_hfb_reg_writel(priv, hfb_enable, - HFB_FLT_ENABLE_V3PLUS + 4); - hfb_ctrl_reg = RBUF_HFB_EN | RBUF_ACPI_EN; - bcmgenet_hfb_reg_writel(priv, hfb_ctrl_reg, HFB_CTRL); - } + clk_prepare_enable(priv->clk_wol); /* Enable CRC forward */ spin_lock_bh(&priv->reg_lock); @@ -214,13 +193,17 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, priv->crc_fwd_en = 1; reg |= CMD_CRC_FWD; + /* Can't suspend with WoL if MAC is still in reset */ + if (reg & CMD_SW_RESET) + reg &= ~CMD_SW_RESET; + /* Receiver must be enabled for WOL MP detection */ reg |= CMD_RX_EN; bcmgenet_umac_writel(priv, reg, UMAC_CMD); spin_unlock_bh(&priv->reg_lock); reg = UMAC_IRQ_MPD_R; - if (hfb_enable) + if (hfb_ctrl_reg & RBUF_HFB_EN) reg |= UMAC_IRQ_HFB_SM | UMAC_IRQ_HFB_MM; bcmgenet_intrl2_0_writel(priv, reg, INTRL2_CPU_MASK_CLEAR); @@ -228,40 +211,42 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, return 0; } -void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, - enum bcmgenet_power_mode mode) +int bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, + enum bcmgenet_power_mode mode) { + struct net_device *dev = priv->dev; u32 reg; if (mode != GENET_POWER_WOL_MAGIC) { netif_err(priv, wol, priv->dev, "invalid mode: %d\n", mode); - return; + return -EINVAL; } - if (!priv->wol_active) - return; /* failed to suspend so skip the rest */ - - priv->wol_active = 0; clk_disable_unprepare(priv->clk_wol); priv->crc_fwd_en = 0; + bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_WAKE_EVENT, + INTRL2_CPU_MASK_SET); + if (bcmgenet_has_mdio_intr(priv)) + bcmgenet_intrl2_0_writel(priv, + UMAC_IRQ_MDIO_EVENT, + INTRL2_CPU_MASK_CLEAR); + /* Disable Magic Packet Detection */ if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) { reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL); if (!(reg & MPD_EN)) - return; /* already reset so skip the rest */ + return -EPERM; /* already reset so skip the rest */ reg &= ~(MPD_EN | MPD_PW_EN); bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL); } - /* Disable WAKE_FILTER Detection */ - if (priv->wolopts & WAKE_FILTER) { - reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); - if (!(reg & RBUF_ACPI_EN)) - return; /* already reset so skip the rest */ - reg &= ~(RBUF_HFB_EN | RBUF_ACPI_EN); - bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); - } + /* Disable ACPI mode */ + reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); + if (!(reg & RBUF_ACPI_EN)) + return -EPERM; /* already reset so skip the rest */ + reg &= ~RBUF_ACPI_EN; + bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); /* Disable CRC Forward */ spin_lock_bh(&priv->reg_lock); @@ -269,4 +254,14 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, reg &= ~CMD_CRC_FWD; bcmgenet_umac_writel(priv, reg, UMAC_CMD); spin_unlock_bh(&priv->reg_lock); + + /* Resume link status tracking */ + mutex_lock(&dev->phydev->lock); + if (dev->phydev->link) + dev->phydev->state = PHY_RUNNING; + else + dev->phydev->state = PHY_NOLINK; + mutex_unlock(&dev->phydev->lock); + + return 0; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index c4a3698cef66..71c619d2bea5 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -2,7 +2,7 @@ /* * Broadcom GENET MDIO routines * - * Copyright (c) 2014-2024 Broadcom + * Copyright (c) 2014-2025 Broadcom */ #include <linux/acpi.h> @@ -154,7 +154,7 @@ void bcmgenet_phy_power_set(struct net_device *dev, bool enable) u32 reg = 0; /* EXT_GPHY_CTRL is only valid for GENETv4 and onward */ - if (GENET_IS_V4(priv) || priv->ephy_16nm) { + if (GENET_IS_V4(priv) || bcmgenet_has_ephy_16nm(priv)) { reg = bcmgenet_ext_readl(priv, EXT_GPHY_CTRL); if (enable) { reg &= ~EXT_CK25_DIS; @@ -184,7 +184,7 @@ void bcmgenet_phy_power_set(struct net_device *dev, bool enable) static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) { - if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) + if (bcmgenet_has_moca_link_det(priv)) fixed_phy_set_link_update(priv->dev->phydev, bcmgenet_fixed_phy_link_update); } diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 9ad49aea2673..ff8f2f9f9cae 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -49,7 +49,7 @@ static int cn23xx_pf_soft_reset(struct octeon_device *oct) lio_pci_readq(oct, CN23XX_RST_SOFT_RST); lio_pci_writeq(oct, 1, CN23XX_RST_SOFT_RST); - /* Wait for 100ms as Octeon resets. */ + /* Wait for 100ms as Octeon resets */ mdelay(100); if (octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1)) { @@ -61,7 +61,7 @@ static int cn23xx_pf_soft_reset(struct octeon_device *oct) dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: Reset completed\n", oct->octeon_id); - /* restore the reset value*/ + /* Restore the reset value */ octeon_write_csr64(oct, CN23XX_WIN_WR_MASK_REG, 0xFF); return 0; @@ -121,7 +121,7 @@ u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us) oqticks_per_us /= 1024; /* time_intr is in microseconds. The next 2 steps gives the oq ticks - * corressponding to time_intr. + * corresponding to time_intr. */ oqticks_per_us *= time_intr_in_us; oqticks_per_us /= 1000; @@ -136,11 +136,11 @@ static void cn23xx_setup_global_mac_regs(struct octeon_device *oct) u64 reg_val; u64 temp; - /* programming SRN and TRS for each MAC(0..3) */ + /* Programming SRN and TRS for each MAC(0..3) */ dev_dbg(&oct->pci_dev->dev, "%s:Using pcie port %d\n", __func__, mac_no); - /* By default, mapping all 64 IOQs to a single MACs */ + /* By default, map all 64 IOQs to a single MAC */ reg_val = octeon_read_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num)); @@ -164,7 +164,7 @@ static void cn23xx_setup_global_mac_regs(struct octeon_device *oct) temp = oct->sriov_info.max_vfs & 0xff; reg_val |= (temp << CN23XX_PKT_MAC_CTL_RINFO_NVFS_BIT_POS); - /* write these settings to MAC register */ + /* Write these settings to MAC register */ octeon_write_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num), reg_val); @@ -183,10 +183,10 @@ static int cn23xx_reset_io_queues(struct octeon_device *oct) srn = oct->sriov_info.pf_srn; ern = srn + oct->sriov_info.num_pf_rings; - /*As per HRM reg description, s/w cant write 0 to ENB. */ - /*to make the queue off, need to set the RST bit. */ + /* As per HRM reg description, s/w can't write 0 to ENB. */ + /* We need to set the RST bit, to turn the queue off. */ - /* Reset the Enable bit for all the 64 IQs. */ + /* Reset the enable bit for all the 64 IQs. */ for (q_no = srn; q_no < ern; q_no++) { /* set RST bit to 1. This bit applies to both IQ and OQ */ d64 = octeon_read_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); @@ -194,7 +194,7 @@ static int cn23xx_reset_io_queues(struct octeon_device *oct) octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), d64); } - /*wait until the RST bit is clear or the RST and quite bits are set*/ + /* Wait until the RST bit is clear or the RST and quiet bits are set */ for (q_no = srn; q_no < ern; q_no++) { u64 reg_val = octeon_read_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); @@ -245,15 +245,15 @@ static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct) if (cn23xx_reset_io_queues(oct)) return -1; - /** Set the MAC_NUM and PVF_NUM in IQ_PKT_CONTROL reg - * for all queues.Only PF can set these bits. + /* Set the MAC_NUM and PVF_NUM in IQ_PKT_CONTROL reg + * for all queues. Only PF can set these bits. * bits 29:30 indicate the MAC num. * bits 32:47 indicate the PVF num. */ for (q_no = 0; q_no < ern; q_no++) { reg_val = (u64)oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; - /* for VF assigned queues. */ + /* For VF assigned queues. */ if (q_no < oct->sriov_info.pf_srn) { vf_num = q_no / oct->sriov_info.rings_per_vf; vf_num += 1; /* VF1, VF2,........ */ @@ -268,7 +268,7 @@ static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct) reg_val); } - /* Select ES, RO, NS, RDSIZE,DPTR Fomat#0 for + /* Select ES, RO, NS, RDSIZE,DPTR Format#0 for * pf queues */ for (q_no = srn; q_no < ern; q_no++) { @@ -289,7 +289,7 @@ static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct) octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), reg_val); - /* Set WMARK level for triggering PI_INT */ + /* Set WMARK level to trigger PI_INT */ /* intr_threshold = CN23XX_DEF_IQ_INTR_THRESHOLD & */ intr_threshold = CFG_GET_IQ_INTR_PKT(cn23xx->conf) & CN23XX_PKT_IN_DONE_WMARK_MASK; @@ -354,7 +354,7 @@ static void cn23xx_pf_setup_global_output_regs(struct octeon_device *oct) /* set the ES bit */ reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES); - /* write all the selected settings */ + /* Write all the selected settings */ octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no), reg_val); /* Enabling these interrupt in oct->fn_list.enable_interrupt() @@ -373,7 +373,7 @@ static void cn23xx_pf_setup_global_output_regs(struct octeon_device *oct) /** Setting the water mark level for pko back pressure **/ writeq(0x40, (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_WMARK); - /** Disabling setting OQs in reset when ring has no dorebells + /* Disabling setting OQs in reset when ring has no doorbells * enabling this will cause of head of line blocking */ /* Do it only for pass1.1. and pass1.2 */ @@ -383,7 +383,7 @@ static void cn23xx_pf_setup_global_output_regs(struct octeon_device *oct) CN23XX_SLI_GBL_CONTROL) | 0x2, (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_GBL_CONTROL); - /** Enable channel-level backpressure */ + /** Enable channel-level backpressure **/ if (oct->pf_num) writeq(0xffffffffffffffffULL, (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OUT_BP_EN2_W1S); @@ -396,7 +396,7 @@ static int cn23xx_setup_pf_device_regs(struct octeon_device *oct) { cn23xx_enable_error_reporting(oct); - /* program the MAC(0..3)_RINFO before setting up input/output regs */ + /* Program the MAC(0..3)_RINFO before setting up input/output regs */ cn23xx_setup_global_mac_regs(oct); if (cn23xx_pf_setup_global_input_regs(oct)) @@ -410,7 +410,7 @@ static int cn23xx_setup_pf_device_regs(struct octeon_device *oct) octeon_write_csr64(oct, CN23XX_SLI_WINDOW_CTL, CN23XX_SLI_WINDOW_CTL_DEFAULT); - /* set SLI_PKT_IN_JABBER to handle large VXLAN packets */ + /* Set SLI_PKT_IN_JABBER to handle large VXLAN packets */ octeon_write_csr64(oct, CN23XX_SLI_PKT_IN_JABBER, CN23XX_INPUT_JABBER); return 0; } @@ -574,7 +574,7 @@ static int cn23xx_setup_pf_mbox(struct octeon_device *oct) mbox->mbox_read_reg = (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_PKT_PF_VF_MBOX_SIG(q_no, 1); - /*Mail Box Thread creation*/ + /* Mail Box Thread creation */ INIT_DELAYED_WORK(&mbox->mbox_poll_wk.work, cn23xx_pf_mbox_thread); mbox->mbox_poll_wk.ctxptr = (void *)mbox; @@ -626,7 +626,7 @@ static int cn23xx_enable_io_queues(struct octeon_device *oct) ern = srn + oct->num_iqs; for (q_no = srn; q_no < ern; q_no++) { - /* set the corresponding IQ IS_64B bit */ + /* Set the corresponding IQ IS_64B bit */ if (oct->io_qmask.iq64B & BIT_ULL(q_no - srn)) { reg_val = octeon_read_csr64( oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); @@ -635,7 +635,7 @@ static int cn23xx_enable_io_queues(struct octeon_device *oct) oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), reg_val); } - /* set the corresponding IQ ENB bit */ + /* Set the corresponding IQ ENB bit */ if (oct->io_qmask.iq & BIT_ULL(q_no - srn)) { /* IOQs are in reset by default in PEM2 mode, * clearing reset bit @@ -681,7 +681,7 @@ static int cn23xx_enable_io_queues(struct octeon_device *oct) } for (q_no = srn; q_no < ern; q_no++) { u32 reg_val; - /* set the corresponding OQ ENB bit */ + /* Set the corresponding OQ ENB bit */ if (oct->io_qmask.oq & BIT_ULL(q_no - srn)) { reg_val = octeon_read_csr( oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no)); @@ -707,7 +707,7 @@ static void cn23xx_disable_io_queues(struct octeon_device *oct) for (q_no = srn; q_no < ern; q_no++) { loop = HZ; - /* start the Reset for a particular ring */ + /* Start the Reset for a particular ring */ WRITE_ONCE(d64, octeon_read_csr64( oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no))); WRITE_ONCE(d64, READ_ONCE(d64) & @@ -740,7 +740,7 @@ static void cn23xx_disable_io_queues(struct octeon_device *oct) loop = HZ; /* Wait until hardware indicates that the particular IQ - * is out of reset.It given that SLI_PKT_RING_RST is + * is out of reset. Given that SLI_PKT_RING_RST is * common for both IQs and OQs */ WRITE_ONCE(d64, octeon_read_csr64( @@ -760,7 +760,7 @@ static void cn23xx_disable_io_queues(struct octeon_device *oct) schedule_timeout_uninterruptible(1); } - /* clear the SLI_PKT(0..63)_CNTS[CNT] reg value */ + /* Clear the SLI_PKT(0..63)_CNTS[CNT] reg value */ WRITE_ONCE(d32, octeon_read_csr( oct, CN23XX_SLI_OQ_PKTS_SENT(q_no))); octeon_write_csr(oct, CN23XX_SLI_OQ_PKTS_SENT(q_no), @@ -793,7 +793,7 @@ static u64 cn23xx_pf_msix_interrupt_handler(void *dev) if (!pkts_sent || (pkts_sent == 0xFFFFFFFFFFFFFFFFULL)) return ret; - /* Write count reg in sli_pkt_cnts to clear these int.*/ + /* Write count reg in sli_pkt_cnts to clear these int. */ if ((pkts_sent & CN23XX_INTR_PO_INT) || (pkts_sent & CN23XX_INTR_PI_INT)) { if (pkts_sent & CN23XX_INTR_PO_INT) @@ -908,7 +908,7 @@ static u32 cn23xx_bar1_idx_read(struct octeon_device *oct, u32 idx) oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); } -/* always call with lock held */ +/* Always call with lock held */ static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq) { u32 new_idx; @@ -919,7 +919,7 @@ static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq) iq->pkt_in_done = pkt_in_done; /* Modulo of the new index with the IQ size will give us - * the new index. The iq->reset_instr_cnt is always zero for + * the new index. The iq->reset_instr_cnt is always zero for * cn23xx, so no extra adjustments are needed. */ new_idx = (iq->octeon_read_index + @@ -934,8 +934,8 @@ static void cn23xx_enable_pf_interrupt(struct octeon_device *oct, u8 intr_flag) struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; u64 intr_val = 0; - /* Divide the single write to multiple writes based on the flag. */ - /* Enable Interrupt */ + /* Divide the single write to multiple writes based on the flag. */ + /* Enable Interrupts */ if (intr_flag == OCTEON_ALL_INTR) { writeq(cn23xx->intr_mask64, cn23xx->intr_enb_reg64); } else if (intr_flag & OCTEON_OUTPUT_INTR) { @@ -990,7 +990,7 @@ static int cn23xx_get_pf_num(struct octeon_device *oct) ret = 0; - /** Read Function Dependency Link reg to get the function number */ + /* Read Function Dependency Link reg to get the function number */ if (pci_read_config_dword(oct->pci_dev, CN23XX_PCIE_SRIOV_FDL, &fdl_bit) == 0) { oct->pf_num = ((fdl_bit >> CN23XX_PCIE_SRIOV_FDL_BIT_POS) & @@ -1003,13 +1003,13 @@ static int cn23xx_get_pf_num(struct octeon_device *oct) * In this case, read the PF number from the * SLI_PKT0_INPUT_CONTROL reg (written by f/w) */ - pkt0_in_ctl = octeon_read_csr64(oct, - CN23XX_SLI_IQ_PKT_CONTROL64(0)); + pkt0_in_ctl = + octeon_read_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(0)); pfnum = (pkt0_in_ctl >> CN23XX_PKT_INPUT_CTL_PF_NUM_POS) & CN23XX_PKT_INPUT_CTL_PF_NUM_MASK; mac = (octeon_read_csr(oct, CN23XX_SLI_MAC_NUMBER)) & 0xff; - /* validate PF num by reading RINFO; f/w writes RINFO.trs == 1*/ + /* Validate PF num by reading RINFO; f/w writes RINFO.trs == 1 */ d64 = octeon_read_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac, pfnum)); trs = (int)(d64 >> CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS) & 0xff; @@ -1252,9 +1252,9 @@ int cn23xx_fw_loaded(struct octeon_device *oct) u64 val; /* If there's more than one active PF on this NIC, then that - * implies that the NIC firmware is loaded and running. This check + * implies that the NIC firmware is loaded and running. This check * prevents a rare false negative that might occur if we only relied - * on checking the SCR2_BIT_FW_LOADED flag. The false negative would + * on checking the SCR2_BIT_FW_LOADED flag. The false negative would * happen if the PF driver sees SCR2_BIT_FW_LOADED as cleared even * though the firmware was already loaded but still booting and has yet * to set SCR2_BIT_FW_LOADED. diff --git a/drivers/net/ethernet/cisco/enic/Makefile b/drivers/net/ethernet/cisco/enic/Makefile index b3b5196b2dfc..a96b8332e6e2 100644 --- a/drivers/net/ethernet/cisco/enic/Makefile +++ b/drivers/net/ethernet/cisco/enic/Makefile @@ -3,5 +3,5 @@ obj-$(CONFIG_ENIC) := enic.o enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \ enic_res.o enic_dev.o enic_pp.o vnic_dev.o vnic_rq.o vnic_vic.o \ - enic_ethtool.o enic_api.o enic_clsf.o enic_rq.o + enic_ethtool.o enic_api.o enic_clsf.o enic_rq.o enic_wq.o diff --git a/drivers/net/ethernet/cisco/enic/cq_desc.h b/drivers/net/ethernet/cisco/enic/cq_desc.h index 462c5435a206..bfb3f14e89f5 100644 --- a/drivers/net/ethernet/cisco/enic/cq_desc.h +++ b/drivers/net/ethernet/cisco/enic/cq_desc.h @@ -40,28 +40,7 @@ struct cq_desc { #define CQ_DESC_COMP_NDX_BITS 12 #define CQ_DESC_COMP_NDX_MASK ((1 << CQ_DESC_COMP_NDX_BITS) - 1) -static inline void cq_desc_dec(const struct cq_desc *desc_arg, - u8 *type, u8 *color, u16 *q_number, u16 *completed_index) -{ - const struct cq_desc *desc = desc_arg; - const u8 type_color = desc->type_color; - - *color = (type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK; - - /* - * Make sure color bit is read from desc *before* other fields - * are read from desc. Hardware guarantees color bit is last - * bit (byte) written. Adding the rmb() prevents the compiler - * and/or CPU from reordering the reads which would potentially - * result in reading stale values. - */ - - rmb(); - - *type = type_color & CQ_DESC_TYPE_MASK; - *q_number = le16_to_cpu(desc->q_number) & CQ_DESC_Q_NUM_MASK; - *completed_index = le16_to_cpu(desc->completed_index) & - CQ_DESC_COMP_NDX_MASK; -} +#define CQ_DESC_32_FI_MASK (BIT(0) | BIT(1)) +#define CQ_DESC_64_FI_MASK (BIT(0) | BIT(1)) #endif /* _CQ_DESC_H_ */ diff --git a/drivers/net/ethernet/cisco/enic/cq_enet_desc.h b/drivers/net/ethernet/cisco/enic/cq_enet_desc.h index d25426470a29..50787cff29db 100644 --- a/drivers/net/ethernet/cisco/enic/cq_enet_desc.h +++ b/drivers/net/ethernet/cisco/enic/cq_enet_desc.h @@ -17,12 +17,22 @@ struct cq_enet_wq_desc { u8 type_color; }; -static inline void cq_enet_wq_desc_dec(struct cq_enet_wq_desc *desc, - u8 *type, u8 *color, u16 *q_number, u16 *completed_index) -{ - cq_desc_dec((struct cq_desc *)desc, type, - color, q_number, completed_index); -} +/* + * Defines and Capabilities for CMD_CQ_ENTRY_SIZE_SET + */ +#define VNIC_RQ_ALL (~0ULL) + +#define VNIC_RQ_CQ_ENTRY_SIZE_16 0 +#define VNIC_RQ_CQ_ENTRY_SIZE_32 1 +#define VNIC_RQ_CQ_ENTRY_SIZE_64 2 + +#define VNIC_RQ_CQ_ENTRY_SIZE_16_CAPABLE BIT(VNIC_RQ_CQ_ENTRY_SIZE_16) +#define VNIC_RQ_CQ_ENTRY_SIZE_32_CAPABLE BIT(VNIC_RQ_CQ_ENTRY_SIZE_32) +#define VNIC_RQ_CQ_ENTRY_SIZE_64_CAPABLE BIT(VNIC_RQ_CQ_ENTRY_SIZE_64) + +#define VNIC_RQ_CQ_ENTRY_SIZE_ALL_BIT (VNIC_RQ_CQ_ENTRY_SIZE_16_CAPABLE | \ + VNIC_RQ_CQ_ENTRY_SIZE_32_CAPABLE | \ + VNIC_RQ_CQ_ENTRY_SIZE_64_CAPABLE) /* Completion queue descriptor: Ethernet receive queue, 16B */ struct cq_enet_rq_desc { @@ -36,6 +46,45 @@ struct cq_enet_rq_desc { u8 type_color; }; +/* Completion queue descriptor: Ethernet receive queue, 32B */ +struct cq_enet_rq_desc_32 { + __le16 completed_index_flags; + __le16 q_number_rss_type_flags; + __le32 rss_hash; + __le16 bytes_written_flags; + __le16 vlan; + __le16 checksum_fcoe; + u8 flags; + u8 fetch_index_flags; + __le32 time_stamp; + __le16 time_stamp2; + __le16 pie_info; + __le32 pie_info2; + __le16 pie_info3; + u8 pie_info4; + u8 type_color; +}; + +/* Completion queue descriptor: Ethernet receive queue, 64B */ +struct cq_enet_rq_desc_64 { + __le16 completed_index_flags; + __le16 q_number_rss_type_flags; + __le32 rss_hash; + __le16 bytes_written_flags; + __le16 vlan; + __le16 checksum_fcoe; + u8 flags; + u8 fetch_index_flags; + __le32 time_stamp; + __le16 time_stamp2; + __le16 pie_info; + __le32 pie_info2; + __le16 pie_info3; + u8 pie_info4; + u8 reserved[32]; + u8 type_color; +}; + #define CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT (0x1 << 12) #define CQ_ENET_RQ_DESC_FLAGS_FCOE (0x1 << 13) #define CQ_ENET_RQ_DESC_FLAGS_EOP (0x1 << 14) @@ -88,85 +137,4 @@ struct cq_enet_rq_desc { #define CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT (0x1 << 6) #define CQ_ENET_RQ_DESC_FLAGS_FCS_OK (0x1 << 7) -static inline void cq_enet_rq_desc_dec(struct cq_enet_rq_desc *desc, - u8 *type, u8 *color, u16 *q_number, u16 *completed_index, - u8 *ingress_port, u8 *fcoe, u8 *eop, u8 *sop, u8 *rss_type, - u8 *csum_not_calc, u32 *rss_hash, u16 *bytes_written, u8 *packet_error, - u8 *vlan_stripped, u16 *vlan_tci, u16 *checksum, u8 *fcoe_sof, - u8 *fcoe_fc_crc_ok, u8 *fcoe_enc_error, u8 *fcoe_eof, - u8 *tcp_udp_csum_ok, u8 *udp, u8 *tcp, u8 *ipv4_csum_ok, - u8 *ipv6, u8 *ipv4, u8 *ipv4_fragment, u8 *fcs_ok) -{ - u16 completed_index_flags; - u16 q_number_rss_type_flags; - u16 bytes_written_flags; - - cq_desc_dec((struct cq_desc *)desc, type, - color, q_number, completed_index); - - completed_index_flags = le16_to_cpu(desc->completed_index_flags); - q_number_rss_type_flags = - le16_to_cpu(desc->q_number_rss_type_flags); - bytes_written_flags = le16_to_cpu(desc->bytes_written_flags); - - *ingress_port = (completed_index_flags & - CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT) ? 1 : 0; - *fcoe = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_FCOE) ? - 1 : 0; - *eop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_EOP) ? - 1 : 0; - *sop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_SOP) ? - 1 : 0; - - *rss_type = (u8)((q_number_rss_type_flags >> CQ_DESC_Q_NUM_BITS) & - CQ_ENET_RQ_DESC_RSS_TYPE_MASK); - *csum_not_calc = (q_number_rss_type_flags & - CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ? 1 : 0; - - *rss_hash = le32_to_cpu(desc->rss_hash); - - *bytes_written = bytes_written_flags & - CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; - *packet_error = (bytes_written_flags & - CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ? 1 : 0; - *vlan_stripped = (bytes_written_flags & - CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) ? 1 : 0; - - /* - * Tag Control Information(16) = user_priority(3) + cfi(1) + vlan(12) - */ - *vlan_tci = le16_to_cpu(desc->vlan); - - if (*fcoe) { - *fcoe_sof = (u8)(le16_to_cpu(desc->checksum_fcoe) & - CQ_ENET_RQ_DESC_FCOE_SOF_MASK); - *fcoe_fc_crc_ok = (desc->flags & - CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK) ? 1 : 0; - *fcoe_enc_error = (desc->flags & - CQ_ENET_RQ_DESC_FCOE_ENC_ERROR) ? 1 : 0; - *fcoe_eof = (u8)((le16_to_cpu(desc->checksum_fcoe) >> - CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT) & - CQ_ENET_RQ_DESC_FCOE_EOF_MASK); - *checksum = 0; - } else { - *fcoe_sof = 0; - *fcoe_fc_crc_ok = 0; - *fcoe_enc_error = 0; - *fcoe_eof = 0; - *checksum = le16_to_cpu(desc->checksum_fcoe); - } - - *tcp_udp_csum_ok = - (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ? 1 : 0; - *udp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_UDP) ? 1 : 0; - *tcp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP) ? 1 : 0; - *ipv4_csum_ok = - (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ? 1 : 0; - *ipv6 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV6) ? 1 : 0; - *ipv4 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4) ? 1 : 0; - *ipv4_fragment = - (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT) ? 1 : 0; - *fcs_ok = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK) ? 1 : 0; -} - #endif /* _CQ_ENET_DESC_H_ */ diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 305ed12aa031..9c12e967e9f1 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -31,6 +31,13 @@ #define ENIC_AIC_LARGE_PKT_DIFF 3 +enum ext_cq { + ENIC_RQ_CQ_ENTRY_SIZE_16, + ENIC_RQ_CQ_ENTRY_SIZE_32, + ENIC_RQ_CQ_ENTRY_SIZE_64, + ENIC_RQ_CQ_ENTRY_SIZE_MAX, +}; + struct enic_msix_entry { int requested; char devname[IFNAMSIZ + 8]; @@ -76,6 +83,10 @@ struct enic_rx_coal { #define ENIC_SET_INSTANCE (1 << 3) #define ENIC_SET_HOST (1 << 4) +#define MAX_TSO BIT(16) +#define WQ_ENET_MAX_DESC_LEN BIT(WQ_ENET_LEN_BITS) +#define ENIC_DESC_MAX_SPLITS (MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1) + struct enic_port_profile { u32 set; u8 request; @@ -228,6 +239,7 @@ struct enic { struct enic_rfs_flw_tbl rfs_h; u8 rss_key[ENIC_RSS_LEN]; struct vnic_gen_stats gen_stats; + enum ext_cq ext_cq; }; static inline struct net_device *vnic_get_netdev(struct vnic_dev *vdev) @@ -349,5 +361,6 @@ int enic_is_valid_vf(struct enic *enic, int vf); int enic_is_dynamic(struct enic *enic); void enic_set_ethtool_ops(struct net_device *netdev); int __enic_set_rsskey(struct enic *enic); +void enic_ext_cq(struct enic *enic); #endif /* _ENIC_H_ */ diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 18b929fc2879..529160926a96 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -222,9 +222,9 @@ static void enic_get_ringparam(struct net_device *netdev, struct enic *enic = netdev_priv(netdev); struct vnic_enet_config *c = &enic->config; - ring->rx_max_pending = ENIC_MAX_RQ_DESCS; + ring->rx_max_pending = c->max_rq_ring; ring->rx_pending = c->rq_desc_count; - ring->tx_max_pending = ENIC_MAX_WQ_DESCS; + ring->tx_max_pending = c->max_wq_ring; ring->tx_pending = c->wq_desc_count; } @@ -252,18 +252,18 @@ static int enic_set_ringparam(struct net_device *netdev, } rx_pending = c->rq_desc_count; tx_pending = c->wq_desc_count; - if (ring->rx_pending > ENIC_MAX_RQ_DESCS || + if (ring->rx_pending > c->max_rq_ring || ring->rx_pending < ENIC_MIN_RQ_DESCS) { netdev_info(netdev, "rx pending (%u) not in range [%u,%u]", ring->rx_pending, ENIC_MIN_RQ_DESCS, - ENIC_MAX_RQ_DESCS); + c->max_rq_ring); return -EINVAL; } - if (ring->tx_pending > ENIC_MAX_WQ_DESCS || + if (ring->tx_pending > c->max_wq_ring || ring->tx_pending < ENIC_MIN_WQ_DESCS) { netdev_info(netdev, "tx pending (%u) not in range [%u,%u]", ring->tx_pending, ENIC_MIN_WQ_DESCS, - ENIC_MAX_WQ_DESCS); + c->max_wq_ring); return -EINVAL; } if (running) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index f24fd29ea207..54aa3953bf7b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -59,11 +59,9 @@ #include "enic_pp.h" #include "enic_clsf.h" #include "enic_rq.h" +#include "enic_wq.h" #define ENIC_NOTIFY_TIMER_PERIOD (2 * HZ) -#define WQ_ENET_MAX_DESC_LEN (1 << WQ_ENET_LEN_BITS) -#define MAX_TSO (1 << 16) -#define ENIC_DESC_MAX_SPLITS (MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1) #define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */ @@ -321,54 +319,6 @@ int enic_is_valid_vf(struct enic *enic, int vf) #endif } -static void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf) -{ - struct enic *enic = vnic_dev_priv(wq->vdev); - - if (buf->sop) - dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len, - DMA_TO_DEVICE); - else - dma_unmap_page(&enic->pdev->dev, buf->dma_addr, buf->len, - DMA_TO_DEVICE); - - if (buf->os_buf) - dev_kfree_skb_any(buf->os_buf); -} - -static void enic_wq_free_buf(struct vnic_wq *wq, - struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque) -{ - struct enic *enic = vnic_dev_priv(wq->vdev); - - enic->wq[wq->index].stats.cq_work++; - enic->wq[wq->index].stats.cq_bytes += buf->len; - enic_free_wq_buf(wq, buf); -} - -static int enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, - u8 type, u16 q_number, u16 completed_index, void *opaque) -{ - struct enic *enic = vnic_dev_priv(vdev); - - spin_lock(&enic->wq[q_number].lock); - - vnic_wq_service(&enic->wq[q_number].vwq, cq_desc, - completed_index, enic_wq_free_buf, - opaque); - - if (netif_tx_queue_stopped(netdev_get_tx_queue(enic->netdev, q_number)) && - vnic_wq_desc_avail(&enic->wq[q_number].vwq) >= - (MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)) { - netif_wake_subqueue(enic->netdev, q_number); - enic->wq[q_number].stats.wake++; - } - - spin_unlock(&enic->wq[q_number].lock); - - return 0; -} - static bool enic_log_q_error(struct enic *enic) { unsigned int i; @@ -1382,12 +1332,10 @@ static int enic_poll(struct napi_struct *napi, int budget) unsigned int work_done, rq_work_done = 0, wq_work_done; int err; - wq_work_done = vnic_cq_service(&enic->cq[cq_wq], wq_work_to_do, - enic_wq_service, NULL); + wq_work_done = enic_wq_cq_service(enic, cq_wq, wq_work_to_do); if (budget > 0) - rq_work_done = vnic_cq_service(&enic->cq[cq_rq], - rq_work_to_do, enic_rq_service, NULL); + rq_work_done = enic_rq_cq_service(enic, cq_rq, rq_work_to_do); /* Accumulate intr event credits for this polling * cycle. An intr event is the completion of a @@ -1486,8 +1434,8 @@ static int enic_poll_msix_wq(struct napi_struct *napi, int budget) wq_irq = wq->index; cq = enic_cq_wq(enic, wq_irq); intr = enic_msix_wq_intr(enic, wq_irq); - wq_work_done = vnic_cq_service(&enic->cq[cq], wq_work_to_do, - enic_wq_service, NULL); + + wq_work_done = enic_wq_cq_service(enic, cq, wq_work_to_do); vnic_intr_return_credits(&enic->intr[intr], wq_work_done, 0 /* don't unmask intr */, @@ -1516,8 +1464,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) */ if (budget > 0) - work_done = vnic_cq_service(&enic->cq[cq], - work_to_do, enic_rq_service, NULL); + work_done = enic_rq_cq_service(enic, cq, work_to_do); /* Return intr event credits for this polling * cycle. An intr event is the completion of a @@ -2194,6 +2141,7 @@ static void enic_reset(struct work_struct *work) enic_init_vnic_resources(enic); enic_set_rss_nic_cfg(enic); enic_dev_set_ig_vlan_rewrite_mode(enic); + enic_ext_cq(enic); enic_open(enic->netdev); /* Allow infiniband to fiddle with the device again */ @@ -2220,6 +2168,7 @@ static void enic_tx_hang_reset(struct work_struct *work) enic_init_vnic_resources(enic); enic_set_rss_nic_cfg(enic); enic_dev_set_ig_vlan_rewrite_mode(enic); + enic_ext_cq(enic); enic_open(enic->netdev); /* Allow infiniband to fiddle with the device again */ @@ -2594,6 +2543,8 @@ static int enic_dev_init(struct enic *enic) enic_get_res_counts(enic); + enic_ext_cq(enic); + err = enic_alloc_enic_resources(enic); if (err) { dev_err(dev, "Failed to allocate enic resources\n"); diff --git a/drivers/net/ethernet/cisco/enic/enic_res.c b/drivers/net/ethernet/cisco/enic/enic_res.c index 126125199833..bbd3143ed73e 100644 --- a/drivers/net/ethernet/cisco/enic/enic_res.c +++ b/drivers/net/ethernet/cisco/enic/enic_res.c @@ -59,31 +59,38 @@ int enic_get_vnic_config(struct enic *enic) GET_CONFIG(intr_timer_usec); GET_CONFIG(loop_tag); GET_CONFIG(num_arfs); + GET_CONFIG(max_rq_ring); + GET_CONFIG(max_wq_ring); + GET_CONFIG(max_cq_ring); + + if (!c->max_wq_ring) + c->max_wq_ring = ENIC_MAX_WQ_DESCS_DEFAULT; + if (!c->max_rq_ring) + c->max_rq_ring = ENIC_MAX_RQ_DESCS_DEFAULT; + if (!c->max_cq_ring) + c->max_cq_ring = ENIC_MAX_CQ_DESCS_DEFAULT; c->wq_desc_count = - min_t(u32, ENIC_MAX_WQ_DESCS, - max_t(u32, ENIC_MIN_WQ_DESCS, - c->wq_desc_count)); + min_t(u32, c->max_wq_ring, + max_t(u32, ENIC_MIN_WQ_DESCS, c->wq_desc_count)); c->wq_desc_count &= 0xffffffe0; /* must be aligned to groups of 32 */ c->rq_desc_count = - min_t(u32, ENIC_MAX_RQ_DESCS, - max_t(u32, ENIC_MIN_RQ_DESCS, - c->rq_desc_count)); + min_t(u32, c->max_rq_ring, + max_t(u32, ENIC_MIN_RQ_DESCS, c->rq_desc_count)); c->rq_desc_count &= 0xffffffe0; /* must be aligned to groups of 32 */ if (c->mtu == 0) c->mtu = 1500; - c->mtu = min_t(u16, ENIC_MAX_MTU, - max_t(u16, ENIC_MIN_MTU, - c->mtu)); + c->mtu = min_t(u16, ENIC_MAX_MTU, max_t(u16, ENIC_MIN_MTU, c->mtu)); c->intr_timer_usec = min_t(u32, c->intr_timer_usec, vnic_dev_get_intr_coal_timer_max(enic->vdev)); dev_info(enic_get_dev(enic), - "vNIC MAC addr %pM wq/rq %d/%d mtu %d\n", - enic->mac_addr, c->wq_desc_count, c->rq_desc_count, c->mtu); + "vNIC MAC addr %pM wq/rq %d/%d max wq/rq/cq %d/%d/%d mtu %d\n", + enic->mac_addr, c->wq_desc_count, c->rq_desc_count, + c->max_wq_ring, c->max_rq_ring, c->max_cq_ring, c->mtu); dev_info(enic_get_dev(enic), "vNIC csum tx/rx %s/%s " "tso/lro %s/%s rss %s intr mode %s type %s timer %d usec " @@ -312,6 +319,7 @@ void enic_init_vnic_resources(struct enic *enic) int enic_alloc_vnic_resources(struct enic *enic) { enum vnic_dev_intr_mode intr_mode; + int rq_cq_desc_size; unsigned int i; int err; @@ -326,6 +334,24 @@ int enic_alloc_vnic_resources(struct enic *enic) intr_mode == VNIC_DEV_INTR_MODE_MSIX ? "MSI-X" : "unknown"); + switch (enic->ext_cq) { + case ENIC_RQ_CQ_ENTRY_SIZE_16: + rq_cq_desc_size = 16; + break; + case ENIC_RQ_CQ_ENTRY_SIZE_32: + rq_cq_desc_size = 32; + break; + case ENIC_RQ_CQ_ENTRY_SIZE_64: + rq_cq_desc_size = 64; + break; + default: + dev_err(enic_get_dev(enic), + "Unable to determine rq cq desc size: %d", + enic->ext_cq); + err = -ENODEV; + goto err_out; + } + /* Allocate queue resources */ @@ -348,8 +374,8 @@ int enic_alloc_vnic_resources(struct enic *enic) for (i = 0; i < enic->cq_count; i++) { if (i < enic->rq_count) err = vnic_cq_alloc(enic->vdev, &enic->cq[i], i, - enic->config.rq_desc_count, - sizeof(struct cq_enet_rq_desc)); + enic->config.rq_desc_count, + rq_cq_desc_size); else err = vnic_cq_alloc(enic->vdev, &enic->cq[i], i, enic->config.wq_desc_count, @@ -380,6 +406,39 @@ int enic_alloc_vnic_resources(struct enic *enic) err_out_cleanup: enic_free_vnic_resources(enic); - +err_out: return err; } + +/* + * CMD_CQ_ENTRY_SIZE_SET can fail on older hw generations that don't support + * that command + */ +void enic_ext_cq(struct enic *enic) +{ + u64 a0 = CMD_CQ_ENTRY_SIZE_SET, a1 = 0; + int wait = 1000; + int ret; + + spin_lock_bh(&enic->devcmd_lock); + ret = vnic_dev_cmd(enic->vdev, CMD_CAPABILITY, &a0, &a1, wait); + if (ret || a0) { + dev_info(&enic->pdev->dev, + "CMD_CQ_ENTRY_SIZE_SET not supported."); + enic->ext_cq = ENIC_RQ_CQ_ENTRY_SIZE_16; + goto out; + } + a1 &= VNIC_RQ_CQ_ENTRY_SIZE_ALL_BIT; + enic->ext_cq = fls(a1) - 1; + a0 = VNIC_RQ_ALL; + a1 = enic->ext_cq; + ret = vnic_dev_cmd(enic->vdev, CMD_CQ_ENTRY_SIZE_SET, &a0, &a1, wait); + if (ret) { + dev_info(&enic->pdev->dev, "CMD_CQ_ENTRY_SIZE_SET failed."); + enic->ext_cq = ENIC_RQ_CQ_ENTRY_SIZE_16; + } +out: + spin_unlock_bh(&enic->devcmd_lock); + dev_info(&enic->pdev->dev, "CQ entry size set to %d bytes", + 16 << enic->ext_cq); +} diff --git a/drivers/net/ethernet/cisco/enic/enic_res.h b/drivers/net/ethernet/cisco/enic/enic_res.h index b8ee42d297aa..02dca1ae4a22 100644 --- a/drivers/net/ethernet/cisco/enic/enic_res.h +++ b/drivers/net/ethernet/cisco/enic/enic_res.h @@ -12,10 +12,13 @@ #include "vnic_wq.h" #include "vnic_rq.h" -#define ENIC_MIN_WQ_DESCS 64 -#define ENIC_MAX_WQ_DESCS 4096 -#define ENIC_MIN_RQ_DESCS 64 -#define ENIC_MAX_RQ_DESCS 4096 +#define ENIC_MIN_WQ_DESCS 64 +#define ENIC_MAX_WQ_DESCS_DEFAULT 4096 +#define ENIC_MAX_WQ_DESCS 16384 +#define ENIC_MIN_RQ_DESCS 64 +#define ENIC_MAX_RQ_DESCS 16384 +#define ENIC_MAX_RQ_DESCS_DEFAULT 4096 +#define ENIC_MAX_CQ_DESCS_DEFAULT (64 * 1024) #define ENIC_MIN_MTU ETH_MIN_MTU #define ENIC_MAX_MTU 9000 diff --git a/drivers/net/ethernet/cisco/enic/enic_rq.c b/drivers/net/ethernet/cisco/enic/enic_rq.c index e3228ef7988a..ccbf5c9a21d0 100644 --- a/drivers/net/ethernet/cisco/enic/enic_rq.c +++ b/drivers/net/ethernet/cisco/enic/enic_rq.c @@ -21,14 +21,78 @@ static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size, pkt_size->small_pkt_bytes_cnt += pkt_len; } -int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, u8 type, - u16 q_number, u16 completed_index, void *opaque) +static void enic_rq_cq_desc_dec(void *cq_desc, u8 cq_desc_size, u8 *type, + u8 *color, u16 *q_number, u16 *completed_index) { - struct enic *enic = vnic_dev_priv(vdev); + /* type_color is the last field for all cq structs */ + u8 type_color; - vnic_rq_service(&enic->rq[q_number].vrq, cq_desc, completed_index, - VNIC_RQ_RETURN_DESC, enic_rq_indicate_buf, opaque); - return 0; + switch (cq_desc_size) { + case VNIC_RQ_CQ_ENTRY_SIZE_16: { + struct cq_enet_rq_desc *desc = + (struct cq_enet_rq_desc *)cq_desc; + type_color = desc->type_color; + + /* Make sure color bit is read from desc *before* other fields + * are read from desc. Hardware guarantees color bit is last + * bit (byte) written. Adding the rmb() prevents the compiler + * and/or CPU from reordering the reads which would potentially + * result in reading stale values. + */ + rmb(); + + *q_number = le16_to_cpu(desc->q_number_rss_type_flags) & + CQ_DESC_Q_NUM_MASK; + *completed_index = le16_to_cpu(desc->completed_index_flags) & + CQ_DESC_COMP_NDX_MASK; + break; + } + case VNIC_RQ_CQ_ENTRY_SIZE_32: { + struct cq_enet_rq_desc_32 *desc = + (struct cq_enet_rq_desc_32 *)cq_desc; + type_color = desc->type_color; + + /* Make sure color bit is read from desc *before* other fields + * are read from desc. Hardware guarantees color bit is last + * bit (byte) written. Adding the rmb() prevents the compiler + * and/or CPU from reordering the reads which would potentially + * result in reading stale values. + */ + rmb(); + + *q_number = le16_to_cpu(desc->q_number_rss_type_flags) & + CQ_DESC_Q_NUM_MASK; + *completed_index = le16_to_cpu(desc->completed_index_flags) & + CQ_DESC_COMP_NDX_MASK; + *completed_index |= (desc->fetch_index_flags & CQ_DESC_32_FI_MASK) << + CQ_DESC_COMP_NDX_BITS; + break; + } + case VNIC_RQ_CQ_ENTRY_SIZE_64: { + struct cq_enet_rq_desc_64 *desc = + (struct cq_enet_rq_desc_64 *)cq_desc; + type_color = desc->type_color; + + /* Make sure color bit is read from desc *before* other fields + * are read from desc. Hardware guarantees color bit is last + * bit (byte) written. Adding the rmb() prevents the compiler + * and/or CPU from reordering the reads which would potentially + * result in reading stale values. + */ + rmb(); + + *q_number = le16_to_cpu(desc->q_number_rss_type_flags) & + CQ_DESC_Q_NUM_MASK; + *completed_index = le16_to_cpu(desc->completed_index_flags) & + CQ_DESC_COMP_NDX_MASK; + *completed_index |= (desc->fetch_index_flags & CQ_DESC_64_FI_MASK) << + CQ_DESC_COMP_NDX_BITS; + break; + } + } + + *color = (type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK; + *type = type_color & CQ_DESC_TYPE_MASK; } static void enic_rq_set_skb_flags(struct vnic_rq *vrq, u8 type, u32 rss_hash, @@ -101,6 +165,90 @@ static void enic_rq_set_skb_flags(struct vnic_rq *vrq, u8 type, u32 rss_hash, } } +/* + * cq_enet_rq_desc accesses section uses only the 1st 15 bytes of the cq which + * is identical for all type (16,32 and 64 byte) of cqs. + */ +static void cq_enet_rq_desc_dec(struct cq_enet_rq_desc *desc, u8 *ingress_port, + u8 *fcoe, u8 *eop, u8 *sop, u8 *rss_type, + u8 *csum_not_calc, u32 *rss_hash, + u16 *bytes_written, u8 *packet_error, + u8 *vlan_stripped, u16 *vlan_tci, + u16 *checksum, u8 *fcoe_sof, + u8 *fcoe_fc_crc_ok, u8 *fcoe_enc_error, + u8 *fcoe_eof, u8 *tcp_udp_csum_ok, u8 *udp, + u8 *tcp, u8 *ipv4_csum_ok, u8 *ipv6, u8 *ipv4, + u8 *ipv4_fragment, u8 *fcs_ok) +{ + u16 completed_index_flags; + u16 q_number_rss_type_flags; + u16 bytes_written_flags; + + completed_index_flags = le16_to_cpu(desc->completed_index_flags); + q_number_rss_type_flags = + le16_to_cpu(desc->q_number_rss_type_flags); + bytes_written_flags = le16_to_cpu(desc->bytes_written_flags); + + *ingress_port = (completed_index_flags & + CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT) ? 1 : 0; + *fcoe = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_FCOE) ? + 1 : 0; + *eop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_EOP) ? + 1 : 0; + *sop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_SOP) ? + 1 : 0; + + *rss_type = (u8)((q_number_rss_type_flags >> CQ_DESC_Q_NUM_BITS) & + CQ_ENET_RQ_DESC_RSS_TYPE_MASK); + *csum_not_calc = (q_number_rss_type_flags & + CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ? 1 : 0; + + *rss_hash = le32_to_cpu(desc->rss_hash); + + *bytes_written = bytes_written_flags & + CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; + *packet_error = (bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ? 1 : 0; + *vlan_stripped = (bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) ? 1 : 0; + + /* + * Tag Control Information(16) = user_priority(3) + cfi(1) + vlan(12) + */ + *vlan_tci = le16_to_cpu(desc->vlan); + + if (*fcoe) { + *fcoe_sof = (u8)(le16_to_cpu(desc->checksum_fcoe) & + CQ_ENET_RQ_DESC_FCOE_SOF_MASK); + *fcoe_fc_crc_ok = (desc->flags & + CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK) ? 1 : 0; + *fcoe_enc_error = (desc->flags & + CQ_ENET_RQ_DESC_FCOE_ENC_ERROR) ? 1 : 0; + *fcoe_eof = (u8)((le16_to_cpu(desc->checksum_fcoe) >> + CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT) & + CQ_ENET_RQ_DESC_FCOE_EOF_MASK); + *checksum = 0; + } else { + *fcoe_sof = 0; + *fcoe_fc_crc_ok = 0; + *fcoe_enc_error = 0; + *fcoe_eof = 0; + *checksum = le16_to_cpu(desc->checksum_fcoe); + } + + *tcp_udp_csum_ok = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ? 1 : 0; + *udp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_UDP) ? 1 : 0; + *tcp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP) ? 1 : 0; + *ipv4_csum_ok = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ? 1 : 0; + *ipv6 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV6) ? 1 : 0; + *ipv4 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4) ? 1 : 0; + *ipv4_fragment = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT) ? 1 : 0; + *fcs_ok = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK) ? 1 : 0; +} + static bool enic_rq_pkt_error(struct vnic_rq *vrq, u8 packet_error, u8 fcs_ok, u16 bytes_written) { @@ -165,37 +313,32 @@ void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf) buf->os_buf = NULL; } -void enic_rq_indicate_buf(struct vnic_rq *rq, struct cq_desc *cq_desc, - struct vnic_rq_buf *buf, int skipped, void *opaque) +static void enic_rq_indicate_buf(struct enic *enic, struct vnic_rq *rq, + struct vnic_rq_buf *buf, void *cq_desc, + u8 type, u16 q_number, u16 completed_index) { - struct enic *enic = vnic_dev_priv(rq->vdev); struct sk_buff *skb; struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; struct enic_rq_stats *rqstats = &enic->rq[rq->index].stats; struct napi_struct *napi; - u8 type, color, eop, sop, ingress_port, vlan_stripped; + u8 eop, sop, ingress_port, vlan_stripped; u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof; u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok; u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc; u8 packet_error; - u16 q_number, completed_index, bytes_written, vlan_tci, checksum; + u16 bytes_written, vlan_tci, checksum; u32 rss_hash; rqstats->packets++; - if (skipped) { - rqstats->desc_skip++; - return; - } - cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, &type, &color, - &q_number, &completed_index, &ingress_port, &fcoe, - &eop, &sop, &rss_type, &csum_not_calc, &rss_hash, - &bytes_written, &packet_error, &vlan_stripped, - &vlan_tci, &checksum, &fcoe_sof, &fcoe_fc_crc_ok, - &fcoe_enc_error, &fcoe_eof, &tcp_udp_csum_ok, &udp, - &tcp, &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment, - &fcs_ok); + cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, &ingress_port, + &fcoe, &eop, &sop, &rss_type, &csum_not_calc, + &rss_hash, &bytes_written, &packet_error, + &vlan_stripped, &vlan_tci, &checksum, &fcoe_sof, + &fcoe_fc_crc_ok, &fcoe_enc_error, &fcoe_eof, + &tcp_udp_csum_ok, &udp, &tcp, &ipv4_csum_ok, &ipv6, + &ipv4, &ipv4_fragment, &fcs_ok); if (enic_rq_pkt_error(rq, packet_error, fcs_ok, bytes_written)) return; @@ -240,3 +383,54 @@ void enic_rq_indicate_buf(struct vnic_rq *rq, struct cq_desc *cq_desc, rqstats->pkt_truncated++; } } + +static void enic_rq_service(struct enic *enic, void *cq_desc, u8 type, + u16 q_number, u16 completed_index) +{ + struct enic_rq_stats *rqstats = &enic->rq[q_number].stats; + struct vnic_rq *vrq = &enic->rq[q_number].vrq; + struct vnic_rq_buf *vrq_buf = vrq->to_clean; + int skipped; + + while (1) { + skipped = (vrq_buf->index != completed_index); + if (!skipped) + enic_rq_indicate_buf(enic, vrq, vrq_buf, cq_desc, type, + q_number, completed_index); + else + rqstats->desc_skip++; + + vrq->ring.desc_avail++; + vrq->to_clean = vrq_buf->next; + vrq_buf = vrq_buf->next; + if (!skipped) + break; + } +} + +unsigned int enic_rq_cq_service(struct enic *enic, unsigned int cq_index, + unsigned int work_to_do) +{ + struct vnic_cq *cq = &enic->cq[cq_index]; + void *cq_desc = vnic_cq_to_clean(cq); + u16 q_number, completed_index; + unsigned int work_done = 0; + u8 type, color; + + enic_rq_cq_desc_dec(cq_desc, enic->ext_cq, &type, &color, &q_number, + &completed_index); + + while (color != cq->last_color) { + enic_rq_service(enic, cq_desc, type, q_number, completed_index); + vnic_cq_inc_to_clean(cq); + + if (++work_done >= work_to_do) + break; + + cq_desc = vnic_cq_to_clean(cq); + enic_rq_cq_desc_dec(cq_desc, enic->ext_cq, &type, &color, + &q_number, &completed_index); + } + + return work_done; +} diff --git a/drivers/net/ethernet/cisco/enic/enic_rq.h b/drivers/net/ethernet/cisco/enic/enic_rq.h index a75d07562686..98476a7297af 100644 --- a/drivers/net/ethernet/cisco/enic/enic_rq.h +++ b/drivers/net/ethernet/cisco/enic/enic_rq.h @@ -2,9 +2,7 @@ * Copyright 2024 Cisco Systems, Inc. All rights reserved. */ -int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, u8 type, - u16 q_number, u16 completed_index, void *opaque); -void enic_rq_indicate_buf(struct vnic_rq *rq, struct cq_desc *cq_desc, - struct vnic_rq_buf *buf, int skipped, void *opaque); +unsigned int enic_rq_cq_service(struct enic *enic, unsigned int cq_index, + unsigned int work_to_do); int enic_rq_alloc_buf(struct vnic_rq *rq); void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf); diff --git a/drivers/net/ethernet/cisco/enic/enic_wq.c b/drivers/net/ethernet/cisco/enic/enic_wq.c new file mode 100644 index 000000000000..07936f8b4231 --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_wq.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright 2025 Cisco Systems, Inc. All rights reserved. + +#include <net/netdev_queues.h> +#include "enic_res.h" +#include "enic.h" +#include "enic_wq.h" + +#define ENET_CQ_DESC_COMP_NDX_BITS 14 +#define ENET_CQ_DESC_COMP_NDX_MASK GENMASK(ENET_CQ_DESC_COMP_NDX_BITS - 1, 0) + +static void enic_wq_cq_desc_dec(const struct cq_desc *desc_arg, bool ext_wq, + u8 *type, u8 *color, u16 *q_number, + u16 *completed_index) +{ + const struct cq_desc *desc = desc_arg; + const u8 type_color = desc->type_color; + + *color = (type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK; + + /* + * Make sure color bit is read from desc *before* other fields + * are read from desc. Hardware guarantees color bit is last + * bit (byte) written. Adding the rmb() prevents the compiler + * and/or CPU from reordering the reads which would potentially + * result in reading stale values. + */ + rmb(); + + *type = type_color & CQ_DESC_TYPE_MASK; + *q_number = le16_to_cpu(desc->q_number) & CQ_DESC_Q_NUM_MASK; + + if (ext_wq) + *completed_index = le16_to_cpu(desc->completed_index) & + ENET_CQ_DESC_COMP_NDX_MASK; + else + *completed_index = le16_to_cpu(desc->completed_index) & + CQ_DESC_COMP_NDX_MASK; +} + +void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf) +{ + struct enic *enic = vnic_dev_priv(wq->vdev); + + if (buf->sop) + dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len, + DMA_TO_DEVICE); + else + dma_unmap_page(&enic->pdev->dev, buf->dma_addr, buf->len, + DMA_TO_DEVICE); + + if (buf->os_buf) + dev_kfree_skb_any(buf->os_buf); +} + +static void enic_wq_free_buf(struct vnic_wq *wq, struct cq_desc *cq_desc, + struct vnic_wq_buf *buf, void *opaque) +{ + struct enic *enic = vnic_dev_priv(wq->vdev); + + enic->wq[wq->index].stats.cq_work++; + enic->wq[wq->index].stats.cq_bytes += buf->len; + enic_free_wq_buf(wq, buf); +} + +static void enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, + u8 type, u16 q_number, u16 completed_index) +{ + struct enic *enic = vnic_dev_priv(vdev); + + spin_lock(&enic->wq[q_number].lock); + + vnic_wq_service(&enic->wq[q_number].vwq, cq_desc, + completed_index, enic_wq_free_buf, NULL); + + if (netif_tx_queue_stopped(netdev_get_tx_queue(enic->netdev, q_number)) + && vnic_wq_desc_avail(&enic->wq[q_number].vwq) >= + (MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)) { + netif_wake_subqueue(enic->netdev, q_number); + enic->wq[q_number].stats.wake++; + } + + spin_unlock(&enic->wq[q_number].lock); +} + +unsigned int enic_wq_cq_service(struct enic *enic, unsigned int cq_index, + unsigned int work_to_do) +{ + struct vnic_cq *cq = &enic->cq[cq_index]; + u16 q_number, completed_index; + unsigned int work_done = 0; + struct cq_desc *cq_desc; + u8 type, color; + bool ext_wq; + + ext_wq = cq->ring.size > ENIC_MAX_WQ_DESCS_DEFAULT; + + cq_desc = (struct cq_desc *)vnic_cq_to_clean(cq); + enic_wq_cq_desc_dec(cq_desc, ext_wq, &type, &color, + &q_number, &completed_index); + + while (color != cq->last_color) { + enic_wq_service(cq->vdev, cq_desc, type, q_number, + completed_index); + + vnic_cq_inc_to_clean(cq); + + if (++work_done >= work_to_do) + break; + + cq_desc = (struct cq_desc *)vnic_cq_to_clean(cq); + enic_wq_cq_desc_dec(cq_desc, ext_wq, &type, &color, + &q_number, &completed_index); + } + + return work_done; +} diff --git a/drivers/net/ethernet/cisco/enic/enic_wq.h b/drivers/net/ethernet/cisco/enic/enic_wq.h new file mode 100644 index 000000000000..12acb3f2fbc9 --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_wq.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright 2025 Cisco Systems, Inc. All rights reserved. + */ + +void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf); +unsigned int enic_wq_cq_service(struct enic *enic, unsigned int cq_index, + unsigned int work_to_do); diff --git a/drivers/net/ethernet/cisco/enic/vnic_cq.h b/drivers/net/ethernet/cisco/enic/vnic_cq.h index eed5bf59e5d2..0e37f5d5e527 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_cq.h +++ b/drivers/net/ethernet/cisco/enic/vnic_cq.h @@ -56,45 +56,18 @@ struct vnic_cq { ktime_t prev_ts; }; -static inline unsigned int vnic_cq_service(struct vnic_cq *cq, - unsigned int work_to_do, - int (*q_service)(struct vnic_dev *vdev, struct cq_desc *cq_desc, - u8 type, u16 q_number, u16 completed_index, void *opaque), - void *opaque) +static inline void *vnic_cq_to_clean(struct vnic_cq *cq) { - struct cq_desc *cq_desc; - unsigned int work_done = 0; - u16 q_number, completed_index; - u8 type, color; - - cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs + - cq->ring.desc_size * cq->to_clean); - cq_desc_dec(cq_desc, &type, &color, - &q_number, &completed_index); - - while (color != cq->last_color) { - - if ((*q_service)(cq->vdev, cq_desc, type, - q_number, completed_index, opaque)) - break; - - cq->to_clean++; - if (cq->to_clean == cq->ring.desc_count) { - cq->to_clean = 0; - cq->last_color = cq->last_color ? 0 : 1; - } - - cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs + - cq->ring.desc_size * cq->to_clean); - cq_desc_dec(cq_desc, &type, &color, - &q_number, &completed_index); + return ((u8 *)cq->ring.descs + cq->ring.desc_size * cq->to_clean); +} - work_done++; - if (work_done >= work_to_do) - break; +static inline void vnic_cq_inc_to_clean(struct vnic_cq *cq) +{ + cq->to_clean++; + if (cq->to_clean == cq->ring.desc_count) { + cq->to_clean = 0; + cq->last_color = cq->last_color ? 0 : 1; } - - return work_done; } void vnic_cq_free(struct vnic_cq *cq); diff --git a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h index db56d778877a..605ef17f967e 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h +++ b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h @@ -436,6 +436,25 @@ enum vnic_devcmd_cmd { * in: (u16) a2 = unsigned short int port information */ CMD_OVERLAY_OFFLOAD_CFG = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 73), + + /* + * Set extended CQ field in MREGS of RQ (or all RQs) + * for given vNIC + * in: (u64) a0 = RQ selection (VNIC_RQ_ALL for all RQs) + * (u32) a1 = CQ entry size + * VNIC_RQ_CQ_ENTRY_SIZE_16 --> 16 bytes + * VNIC_RQ_CQ_ENTRY_SIZE_32 --> 32 bytes + * VNIC_RQ_CQ_ENTRY_SIZE_64 --> 64 bytes + * + * Capability query: + * out: (u32) a0 = errno, 0:valid cmd + * (u32) a1 = value consisting of supported entries + * bit 0: 16 bytes + * bit 1: 32 bytes + * bit 2: 64 bytes + */ + CMD_CQ_ENTRY_SIZE_SET = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 90), + }; /* CMD_ENABLE2 flags */ diff --git a/drivers/net/ethernet/cisco/enic/vnic_enet.h b/drivers/net/ethernet/cisco/enic/vnic_enet.h index 5acc236069de..9e8e86262a3f 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_enet.h +++ b/drivers/net/ethernet/cisco/enic/vnic_enet.h @@ -21,6 +21,11 @@ struct vnic_enet_config { u16 loop_tag; u16 vf_rq_count; u16 num_arfs; + u8 reserved[66]; + u32 max_rq_ring; // MAX RQ ring size + u32 max_wq_ring; // MAX WQ ring size + u32 max_cq_ring; // MAX CQ ring size + u32 rdma_rsvd_lkey; // Reserved (privileged) LKey }; #define VENETF_TSO 0x1 /* TSO enabled */ diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.h b/drivers/net/ethernet/cisco/enic/vnic_rq.h index 2ee4be2b9a34..a1cdd729caec 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.h +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.h @@ -50,7 +50,7 @@ struct vnic_rq_ctrl { (VNIC_RQ_BUF_BLK_ENTRIES(entries) * sizeof(struct vnic_rq_buf)) #define VNIC_RQ_BUF_BLKS_NEEDED(entries) \ DIV_ROUND_UP(entries, VNIC_RQ_BUF_BLK_ENTRIES(entries)) -#define VNIC_RQ_BUF_BLKS_MAX VNIC_RQ_BUF_BLKS_NEEDED(4096) +#define VNIC_RQ_BUF_BLKS_MAX VNIC_RQ_BUF_BLKS_NEEDED(16384) struct vnic_rq_buf { struct vnic_rq_buf *next; diff --git a/drivers/net/ethernet/cisco/enic/vnic_wq.h b/drivers/net/ethernet/cisco/enic/vnic_wq.h index 75c526911074..3bb4758100ba 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_wq.h +++ b/drivers/net/ethernet/cisco/enic/vnic_wq.h @@ -62,7 +62,7 @@ struct vnic_wq_buf { (VNIC_WQ_BUF_BLK_ENTRIES(entries) * sizeof(struct vnic_wq_buf)) #define VNIC_WQ_BUF_BLKS_NEEDED(entries) \ DIV_ROUND_UP(entries, VNIC_WQ_BUF_BLK_ENTRIES(entries)) -#define VNIC_WQ_BUF_BLKS_MAX VNIC_WQ_BUF_BLKS_NEEDED(4096) +#define VNIC_WQ_BUF_BLKS_MAX VNIC_WQ_BUF_BLKS_NEEDED(16384) struct vnic_wq { unsigned int index; diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index e48b861e4ce1..270ff9aab335 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -562,7 +562,7 @@ struct be_adapter { struct be_dma_mem mbox_mem_alloced; struct be_mcc_obj mcc_obj; - struct mutex mcc_lock; /* For serializing mcc cmds to BE card */ + spinlock_t mcc_lock; /* For serializing mcc cmds to BE card */ spinlock_t mcc_cq_lock; u16 cfg_num_rx_irqs; /* configured via set-channels */ diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 61adcebeef01..51b8377edd1d 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -575,7 +575,7 @@ int be_process_mcc(struct be_adapter *adapter) /* Wait till no more pending mcc requests are present */ static int be_mcc_wait_compl(struct be_adapter *adapter) { -#define mcc_timeout 12000 /* 12s timeout */ +#define mcc_timeout 120000 /* 12s timeout */ int i, status = 0; struct be_mcc_obj *mcc_obj = &adapter->mcc_obj; @@ -589,7 +589,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter) if (atomic_read(&mcc_obj->q.used) == 0) break; - usleep_range(500, 1000); + udelay(100); } if (i == mcc_timeout) { dev_err(&adapter->pdev->dev, "FW not responding\n"); @@ -866,7 +866,7 @@ static bool use_mcc(struct be_adapter *adapter) static int be_cmd_lock(struct be_adapter *adapter) { if (use_mcc(adapter)) { - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); return 0; } else { return mutex_lock_interruptible(&adapter->mbox_lock); @@ -877,7 +877,7 @@ static int be_cmd_lock(struct be_adapter *adapter) static void be_cmd_unlock(struct be_adapter *adapter) { if (use_mcc(adapter)) - return mutex_unlock(&adapter->mcc_lock); + return spin_unlock_bh(&adapter->mcc_lock); else return mutex_unlock(&adapter->mbox_lock); } @@ -1047,7 +1047,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, struct be_cmd_req_mac_query *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1076,7 +1076,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1088,7 +1088,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr, struct be_cmd_req_pmac_add *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1113,7 +1113,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (base_status(status) == MCC_STATUS_UNAUTHORIZED_REQUEST) status = -EPERM; @@ -1131,7 +1131,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom) if (pmac_id == -1) return 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1151,7 +1151,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1414,7 +1414,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter, struct be_dma_mem *q_mem = &rxq->dma_mem; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1444,7 +1444,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1508,7 +1508,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q) struct be_cmd_req_q_destroy *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1525,7 +1525,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q) q->created = false; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1593,7 +1593,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) struct be_cmd_req_hdr *hdr; int status = 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1621,7 +1621,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) adapter->stats_cmd_sent = true; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1637,7 +1637,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, CMD_SUBSYSTEM_ETH)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1660,7 +1660,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, adapter->stats_cmd_sent = true; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1697,7 +1697,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, struct be_cmd_req_link_status *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); if (link_status) *link_status = LINK_DOWN; @@ -1736,7 +1736,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1747,7 +1747,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter) struct be_cmd_req_get_cntl_addnl_attribs *req; int status = 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1762,7 +1762,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter) status = be_mcc_notify(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1811,7 +1811,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) if (!get_fat_cmd.va) return -ENOMEM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); while (total_size) { buf_size = min(total_size, (u32)60 * 1024); @@ -1849,9 +1849,9 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) log_offset += buf_size; } err: + spin_unlock_bh(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size, get_fat_cmd.va, get_fat_cmd.dma); - mutex_unlock(&adapter->mcc_lock); return status; } @@ -1862,7 +1862,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter) struct be_cmd_req_get_fw_version *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1885,7 +1885,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter) sizeof(adapter->fw_on_flash)); } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1899,7 +1899,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter, struct be_cmd_req_modify_eq_delay *req; int status = 0, i; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1922,7 +1922,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter, status = be_mcc_notify(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1949,7 +1949,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, struct be_cmd_req_vlan_config *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1971,7 +1971,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1982,7 +1982,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) struct be_cmd_req_rx_filter *req = mem->va; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2015,7 +2015,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2046,7 +2046,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc) CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2066,7 +2066,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (base_status(status) == MCC_STATUS_FEATURE_NOT_SUPPORTED) return -EOPNOTSUPP; @@ -2085,7 +2085,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc) CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2108,7 +2108,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc) } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2189,7 +2189,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS)) return 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2214,7 +2214,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2226,7 +2226,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, struct be_cmd_req_enable_disable_beacon *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2247,7 +2247,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2258,7 +2258,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) struct be_cmd_req_get_beacon_state *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2282,7 +2282,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2306,7 +2306,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, return -ENOMEM; } - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2328,7 +2328,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, memcpy(data, resp->page_data + off, len); } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); return status; } @@ -2345,7 +2345,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, void *ctxt = NULL; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); adapter->flash_status = 0; wrb = wrb_from_mccq(adapter); @@ -2387,7 +2387,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, if (status) goto err_unlock; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(60000))) @@ -2406,7 +2406,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, return status; err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2460,7 +2460,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter, struct be_mcc_wrb *wrb; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2478,7 +2478,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2491,7 +2491,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, struct lancer_cmd_resp_read_object *resp; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2525,7 +2525,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, } err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2537,7 +2537,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, struct be_cmd_write_flashrom *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); adapter->flash_status = 0; wrb = wrb_from_mccq(adapter); @@ -2562,7 +2562,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, if (status) goto err_unlock; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(40000))) @@ -2573,7 +2573,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, return status; err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2584,7 +2584,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, struct be_mcc_wrb *wrb; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2611,7 +2611,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, memcpy(flashed_crc, req->crc, 4); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3217,7 +3217,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac, struct be_cmd_req_acpi_wol_magic_config *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3234,7 +3234,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3249,7 +3249,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3272,7 +3272,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, if (status) goto err_unlock; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(SET_LB_MODE_TIMEOUT))) @@ -3281,7 +3281,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, return status; err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3298,7 +3298,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3324,7 +3324,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, if (status) goto err; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); wait_for_completion(&adapter->et_cmd_compl); resp = embedded_payload(wrb); @@ -3332,7 +3332,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, return status; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3348,7 +3348,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3382,7 +3382,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3393,7 +3393,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, struct be_cmd_req_seeprom_read *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3409,7 +3409,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3424,7 +3424,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3469,7 +3469,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) } dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3479,7 +3479,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain) struct be_cmd_req_set_qos *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3499,7 +3499,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3611,7 +3611,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege, struct be_cmd_req_get_fn_privileges *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3643,7 +3643,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3655,7 +3655,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges, struct be_cmd_req_set_fn_privileges *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3675,7 +3675,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3707,7 +3707,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, return -ENOMEM; } - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3771,7 +3771,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, } out: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size, get_mac_list_cmd.va, get_mac_list_cmd.dma); return status; @@ -3831,7 +3831,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, if (!cmd.va) return -ENOMEM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3853,7 +3853,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, err: dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3889,7 +3889,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3930,7 +3930,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3944,7 +3944,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, int status; u16 vid; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3991,7 +3991,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4190,7 +4190,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, struct be_cmd_req_set_ext_fat_caps *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4206,7 +4206,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4684,7 +4684,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op) if (iface == 0xFFFFFFFF) return -1; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4701,7 +4701,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4735,7 +4735,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, struct be_cmd_resp_get_iface_list *resp; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4756,7 +4756,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4850,7 +4850,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain) if (BEx_chip(adapter)) return 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4868,7 +4868,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain) req->enable = 1; status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4941,7 +4941,7 @@ __be_cmd_set_logical_link_config(struct be_adapter *adapter, u32 link_config = 0; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4969,7 +4969,7 @@ __be_cmd_set_logical_link_config(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -5000,8 +5000,7 @@ int be_cmd_set_features(struct be_adapter *adapter) struct be_mcc_wrb *wrb; int status; - if (mutex_lock_interruptible(&adapter->mcc_lock)) - return -1; + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -5039,7 +5038,7 @@ err: dev_info(&adapter->pdev->dev, "Adapter does not support HW error recovery\n"); - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -5053,7 +5052,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, struct be_cmd_resp_hdr *resp; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -5076,7 +5075,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, memcpy(wrb_payload, resp, sizeof(*resp) + resp->response_length); be_dws_le_to_cpu(wrb_payload, sizeof(*resp) + resp->response_length); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } EXPORT_SYMBOL(be_roce_mcc_cmd); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 875fe379eea2..3d2e21592119 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5667,8 +5667,8 @@ static int be_drv_init(struct be_adapter *adapter) } mutex_init(&adapter->mbox_lock); - mutex_init(&adapter->mcc_lock); mutex_init(&adapter->rx_filter_lock); + spin_lock_init(&adapter->mcc_lock); spin_lock_init(&adapter->mcc_cq_lock); init_completion(&adapter->et_cmd_compl); diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 216d6e157bef..483c43bab3a9 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -105,7 +105,13 @@ struct gve_rx_desc_queue { /* The page info for a single slot in the RX data queue */ struct gve_rx_slot_page_info { - struct page *page; + /* netmem is used for DQO RDA mode + * page is used in all other modes + */ + union { + struct page *page; + netmem_ref netmem; + }; void *page_address; u32 page_offset; /* offset to write to in page */ unsigned int buf_size; diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c index 403f0f335ba6..af84cb88f828 100644 --- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -205,32 +205,33 @@ void gve_free_to_page_pool(struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state, bool allow_direct) { - struct page *page = buf_state->page_info.page; + netmem_ref netmem = buf_state->page_info.netmem; - if (!page) + if (!netmem) return; - page_pool_put_full_page(page->pp, page, allow_direct); - buf_state->page_info.page = NULL; + page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, allow_direct); + buf_state->page_info.netmem = 0; } static int gve_alloc_from_page_pool(struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state) { struct gve_priv *priv = rx->gve; - struct page *page; + netmem_ref netmem; buf_state->page_info.buf_size = priv->data_buffer_size_dqo; - page = page_pool_alloc(rx->dqo.page_pool, - &buf_state->page_info.page_offset, - &buf_state->page_info.buf_size, GFP_ATOMIC); + netmem = page_pool_alloc_netmem(rx->dqo.page_pool, + &buf_state->page_info.page_offset, + &buf_state->page_info.buf_size, + GFP_ATOMIC); - if (!page) + if (!netmem) return -ENOMEM; - buf_state->page_info.page = page; - buf_state->page_info.page_address = page_address(page); - buf_state->addr = page_pool_get_dma_addr(page); + buf_state->page_info.netmem = netmem; + buf_state->page_info.page_address = netmem_address(netmem); + buf_state->addr = page_pool_get_dma_addr_netmem(netmem); return 0; } @@ -269,7 +270,7 @@ void gve_reuse_buffer(struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state) { if (rx->dqo.page_pool) { - buf_state->page_info.page = NULL; + buf_state->page_info.netmem = 0; gve_free_buf_state(rx, buf_state); } else { gve_dec_pagecnt_bias(&buf_state->page_info); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 029be8342b7b..6dcdcaf518f4 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1968,7 +1968,7 @@ static void gve_turndown(struct gve_priv *priv) netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_TX, NULL); - napi_disable(&block->napi); + napi_disable_locked(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -1979,7 +1979,7 @@ static void gve_turndown(struct gve_priv *priv) netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, NULL); - napi_disable(&block->napi); + napi_disable_locked(&block->napi); } /* Stop tx queues */ @@ -2009,7 +2009,7 @@ static void gve_turnup(struct gve_priv *priv) if (!gve_tx_was_added_to_block(priv, idx)) continue; - napi_enable(&block->napi); + napi_enable_locked(&block->napi); if (idx < priv->tx_cfg.num_queues) netif_queue_set_napi(priv->dev, idx, @@ -2037,7 +2037,7 @@ static void gve_turnup(struct gve_priv *priv) if (!gve_rx_was_added_to_block(priv, idx)) continue; - napi_enable(&block->napi); + napi_enable_locked(&block->napi); netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, &block->napi); @@ -2887,6 +2887,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) priv->suspend_cnt++; rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2895,6 +2896,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) gve_teardown_priv_resources(priv); } priv->up_before_suspend = was_up; + netdev_unlock(netdev); rtnl_unlock(); return 0; } @@ -2907,7 +2909,9 @@ static int gve_resume(struct pci_dev *pdev) priv->resume_cnt++; rtnl_lock(); + netdev_lock(netdev); err = gve_reset_recovery(priv, priv->up_before_suspend); + netdev_unlock(netdev); rtnl_unlock(); return err; } diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index f0674a443567..856ade0c209f 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -476,6 +476,24 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, return 0; } +static void gve_skb_add_rx_frag(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + int num_frags, u16 buf_len) +{ + if (rx->dqo.page_pool) { + skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags, + buf_state->page_info.netmem, + buf_state->page_info.page_offset, + buf_len, + buf_state->page_info.buf_size); + } else { + skb_add_rx_frag(rx->ctx.skb_tail, num_frags, + buf_state->page_info.page, + buf_state->page_info.page_offset, + buf_len, buf_state->page_info.buf_size); + } +} + /* Chains multi skbs for single rx packet. * Returns 0 if buffer is appended, -1 otherwise. */ @@ -513,10 +531,7 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (gve_rx_should_trigger_copy_ondemand(rx)) return gve_rx_copy_ondemand(rx, buf_state, buf_len); - skb_add_rx_frag(rx->ctx.skb_tail, num_frags, - buf_state->page_info.page, - buf_state->page_info.page_offset, - buf_len, buf_state->page_info.buf_size); + gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len); gve_reuse_buffer(rx, buf_state); return 0; } @@ -561,7 +576,12 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, /* Page might have not been used for awhile and was likely last written * by a different thread. */ - prefetch(buf_state->page_info.page); + if (rx->dqo.page_pool) { + if (!netmem_is_net_iov(buf_state->page_info.netmem)) + prefetch(netmem_to_page(buf_state->page_info.netmem)); + } else { + prefetch(buf_state->page_info.page); + } /* Copy the header into the skb in the case of header split */ if (hsplit) { @@ -632,9 +652,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, if (rx->dqo.page_pool) skb_mark_for_recycle(rx->ctx.skb_head); - skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page, - buf_state->page_info.page_offset, buf_len, - buf_state->page_info.buf_size); + gve_skb_add_rx_frag(rx, buf_state, 0, buf_len); gve_reuse_buffer(rx, buf_state); return 0; diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 30fef100257e..ace9b8698021 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -110,13 +110,13 @@ void gve_add_napi(struct gve_priv *priv, int ntfy_idx, { struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - netif_napi_add(priv->dev, &block->napi, gve_poll); - netif_napi_set_irq(&block->napi, block->irq); + netif_napi_add_locked(priv->dev, &block->napi, gve_poll); + netif_napi_set_irq_locked(&block->napi, block->irq); } void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) { struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - netif_napi_del(&block->napi); + netif_napi_del_locked(&block->napi); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 9bbece25552b..09749e9f7398 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -3,6 +3,7 @@ #include <linux/debugfs.h> #include <linux/device.h> +#include <linux/string_choices.h> #include "hnae3.h" #include "hns3_debugfs.h" @@ -661,12 +662,14 @@ static void hns3_dump_rx_queue_info(struct hns3_enet_ring *ring, HNS3_RING_RX_RING_PKTNUM_RECORD_REG)); sprintf(result[j++], "%u", ring->rx_copybreak); - sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + - HNS3_RING_EN_REG) ? "on" : "off"); + sprintf(result[j++], "%s", + str_on_off(readl_relaxed(ring->tqp->io_base + + HNS3_RING_EN_REG))); if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev)) - sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + - HNS3_RING_RX_EN_REG) ? "on" : "off"); + sprintf(result[j++], "%s", + str_on_off(readl_relaxed(ring->tqp->io_base + + HNS3_RING_RX_EN_REG))); else sprintf(result[j++], "%s", "NA"); @@ -764,12 +767,14 @@ static void hns3_dump_tx_queue_info(struct hns3_enet_ring *ring, sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_PKTNUM_RECORD_REG)); - sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + - HNS3_RING_EN_REG) ? "on" : "off"); + sprintf(result[j++], "%s", + str_on_off(readl_relaxed(ring->tqp->io_base + + HNS3_RING_EN_REG))); if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev)) - sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + - HNS3_RING_TX_EN_REG) ? "on" : "off"); + sprintf(result[j++], "%s", + str_on_off(readl_relaxed(ring->tqp->io_base + + HNS3_RING_TX_EN_REG))); else sprintf(result[j++], "%s", "NA"); @@ -1030,7 +1035,6 @@ static void hns3_dbg_dev_caps(struct hnae3_handle *h, char *buf, int len, int *pos) { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev); - const char * const str[] = {"no", "yes"}; unsigned long *caps = ae_dev->caps; u32 i, state; @@ -1039,7 +1043,7 @@ hns3_dbg_dev_caps(struct hnae3_handle *h, char *buf, int len, int *pos) for (i = 0; i < ARRAY_SIZE(hns3_dbg_cap); i++) { state = test_bit(hns3_dbg_cap[i].cap_bit, caps); *pos += scnprintf(buf + *pos, len - *pos, "%s: %s\n", - hns3_dbg_cap[i].name, str[state]); + hns3_dbg_cap[i].name, str_yes_no(state)); } *pos += scnprintf(buf + *pos, len - *pos, "\n"); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index b771a2daba43..6715222aeb66 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -3,6 +3,7 @@ #include <linux/etherdevice.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/phy.h> #include <linux/sfp.h> @@ -1198,7 +1199,7 @@ static int hns3_set_tx_push(struct net_device *netdev, u32 tx_push) return 0; netdev_dbg(netdev, "Changing tx push from %s to %s\n", - old_state ? "on" : "off", tx_push ? "on" : "off"); + str_on_off(old_state), str_on_off(tx_push)); if (tx_push) set_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index debf143e9940..c46490693594 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -3,6 +3,7 @@ #include <linux/device.h> #include <linux/sched/clock.h> +#include <linux/string_choices.h> #include "hclge_debugfs.h" #include "hclge_err.h" @@ -11,7 +12,6 @@ #include "hclge_tm.h" #include "hnae3.h" -static const char * const state_str[] = { "off", "on" }; static const char * const hclge_mac_state_str[] = { "TO_ADD", "TO_DEL", "ACTIVE" }; @@ -2573,7 +2573,7 @@ static int hclge_dbg_dump_loopback(struct hclge_dev *hdev, char *buf, int len) loopback_en = hnae3_get_bit(le32_to_cpu(req_app->txrx_pad_fcs_loop_en), HCLGE_MAC_APP_LP_B); pos += scnprintf(buf + pos, len - pos, "app loopback: %s\n", - state_str[loopback_en]); + str_on_off(loopback_en)); hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, true); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -2586,22 +2586,22 @@ static int hclge_dbg_dump_loopback(struct hclge_dev *hdev, char *buf, int len) loopback_en = req_common->enable & HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B; pos += scnprintf(buf + pos, len - pos, "serdes serial loopback: %s\n", - state_str[loopback_en]); + str_on_off(loopback_en)); loopback_en = req_common->enable & HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B ? 1 : 0; pos += scnprintf(buf + pos, len - pos, "serdes parallel loopback: %s\n", - state_str[loopback_en]); + str_on_off(loopback_en)); if (phydev) { loopback_en = phydev->loopback_enabled; pos += scnprintf(buf + pos, len - pos, "phy loopback: %s\n", - state_str[loopback_en]); + str_on_off(loopback_en)); } else if (hnae3_dev_phy_imp_supported(hdev)) { loopback_en = req_common->enable & HCLGE_CMD_GE_PHY_INNER_LOOP_B; pos += scnprintf(buf + pos, len - pos, "phy loopback: %s\n", - state_str[loopback_en]); + str_on_off(loopback_en)); } return 0; @@ -2894,9 +2894,9 @@ static int hclge_dbg_dump_vlan_filter_config(struct hclge_dev *hdev, char *buf, egress = vlan_fe & HCLGE_FILTER_FE_NIC_EGRESS_B ? 1 : 0; *pos += scnprintf(buf, len, "I_PORT_VLAN_FILTER: %s\n", - state_str[ingress]); + str_on_off(ingress)); *pos += scnprintf(buf + *pos, len - *pos, "E_PORT_VLAN_FILTER: %s\n", - state_str[egress]); + str_on_off(egress)); hclge_dbg_fill_content(content, sizeof(content), vlan_filter_items, NULL, ARRAY_SIZE(vlan_filter_items)); @@ -2915,11 +2915,11 @@ static int hclge_dbg_dump_vlan_filter_config(struct hclge_dev *hdev, char *buf, return ret; j = 0; result[j++] = hclge_dbg_get_func_id_str(str_id, i); - result[j++] = state_str[ingress]; - result[j++] = state_str[egress]; - result[j++] = - test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, - hdev->ae_dev->caps) ? state_str[bypass] : "NA"; + result[j++] = str_on_off(ingress); + result[j++] = str_on_off(egress); + result[j++] = test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, + hdev->ae_dev->caps) ? + str_on_off(bypass) : "NA"; hclge_dbg_fill_content(content, sizeof(content), vlan_filter_items, result, ARRAY_SIZE(vlan_filter_items)); @@ -2958,19 +2958,19 @@ static int hclge_dbg_dump_vlan_offload_config(struct hclge_dev *hdev, char *buf, j = 0; result[j++] = hclge_dbg_get_func_id_str(str_id, i); result[j++] = str_pvid; - result[j++] = state_str[vlan_cfg.accept_tag1]; - result[j++] = state_str[vlan_cfg.accept_tag2]; - result[j++] = state_str[vlan_cfg.accept_untag1]; - result[j++] = state_str[vlan_cfg.accept_untag2]; - result[j++] = state_str[vlan_cfg.insert_tag1]; - result[j++] = state_str[vlan_cfg.insert_tag2]; - result[j++] = state_str[vlan_cfg.shift_tag]; - result[j++] = state_str[vlan_cfg.strip_tag1]; - result[j++] = state_str[vlan_cfg.strip_tag2]; - result[j++] = state_str[vlan_cfg.drop_tag1]; - result[j++] = state_str[vlan_cfg.drop_tag2]; - result[j++] = state_str[vlan_cfg.pri_only1]; - result[j++] = state_str[vlan_cfg.pri_only2]; + result[j++] = str_on_off(vlan_cfg.accept_tag1); + result[j++] = str_on_off(vlan_cfg.accept_tag2); + result[j++] = str_on_off(vlan_cfg.accept_untag1); + result[j++] = str_on_off(vlan_cfg.accept_untag2); + result[j++] = str_on_off(vlan_cfg.insert_tag1); + result[j++] = str_on_off(vlan_cfg.insert_tag2); + result[j++] = str_on_off(vlan_cfg.shift_tag); + result[j++] = str_on_off(vlan_cfg.strip_tag1); + result[j++] = str_on_off(vlan_cfg.strip_tag2); + result[j++] = str_on_off(vlan_cfg.drop_tag1); + result[j++] = str_on_off(vlan_cfg.drop_tag2); + result[j++] = str_on_off(vlan_cfg.pri_only1); + result[j++] = str_on_off(vlan_cfg.pri_only2); hclge_dbg_fill_content(content, sizeof(content), vlan_offload_items, result, @@ -3007,14 +3007,13 @@ static int hclge_dbg_dump_ptp_info(struct hclge_dev *hdev, char *buf, int len) pos += scnprintf(buf + pos, len - pos, "phc %s's debug info:\n", ptp->info.name); pos += scnprintf(buf + pos, len - pos, "ptp enable: %s\n", - test_bit(HCLGE_PTP_FLAG_EN, &ptp->flags) ? - "yes" : "no"); + str_yes_no(test_bit(HCLGE_PTP_FLAG_EN, &ptp->flags))); pos += scnprintf(buf + pos, len - pos, "ptp tx enable: %s\n", - test_bit(HCLGE_PTP_FLAG_TX_EN, &ptp->flags) ? - "yes" : "no"); + str_yes_no(test_bit(HCLGE_PTP_FLAG_TX_EN, + &ptp->flags))); pos += scnprintf(buf + pos, len - pos, "ptp rx enable: %s\n", - test_bit(HCLGE_PTP_FLAG_RX_EN, &ptp->flags) ? - "yes" : "no"); + str_yes_no(test_bit(HCLGE_PTP_FLAG_RX_EN, + &ptp->flags))); last_rx = jiffies_to_msecs(ptp->last_rx); pos += scnprintf(buf + pos, len - pos, "last rx time: %lu.%lu\n", diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 3f17b3073e50..8c736a069eeb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -8000,7 +8000,7 @@ static int hclge_set_loopback(struct hnae3_handle *handle, ret = hclge_tqp_enable(handle, en); if (ret) dev_err(&hdev->pdev->dev, "failed to %s tqp in loopback, ret = %d\n", - en ? "enable" : "disable", ret); + str_enable_disable(en), ret); return ret; } @@ -11200,9 +11200,9 @@ static void hclge_info_show(struct hclge_dev *hdev) dev_info(dev, "This is %s PF\n", hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main"); dev_info(dev, "DCB %s\n", - handle->kinfo.tc_info.dcb_ets_active ? "enable" : "disable"); + str_enable_disable(handle->kinfo.tc_info.dcb_ets_active)); dev_info(dev, "MQPRIO %s\n", - handle->kinfo.tc_info.mqprio_active ? "enable" : "disable"); + str_enable_disable(handle->kinfo.tc_info.mqprio_active)); dev_info(dev, "Default tx spare buffer size: %u\n", hdev->tx_spare_buf_size); @@ -11976,7 +11976,7 @@ static int hclge_set_vf_spoofchk_hw(struct hclge_dev *hdev, int vf, bool enable) if (ret) { dev_err(&hdev->pdev->dev, "Set vf %d mac spoof check %s failed, ret=%d\n", - vf, enable ? "on" : "off", ret); + vf, str_on_off(enable), ret); return ret; } @@ -11984,7 +11984,7 @@ static int hclge_set_vf_spoofchk_hw(struct hclge_dev *hdev, int vf, bool enable) if (ret) dev_err(&hdev->pdev->dev, "Set vf %d vlan spoof check %s failed, ret=%d\n", - vf, enable ? "on" : "off", ret); + vf, str_on_off(enable), ret); return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index bab16c2191b2..59cc9221185f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -2,6 +2,7 @@ // Copyright (c) 2021 Hisilicon Limited. #include <linux/skbuff.h> +#include <linux/string_choices.h> #include "hclge_main.h" #include "hnae3.h" @@ -226,7 +227,7 @@ static int hclge_ptp_int_en(struct hclge_dev *hdev, bool en) if (ret) dev_err(&hdev->pdev->dev, "failed to %s ptp interrupt, ret = %d\n", - en ? "enable" : "disable", ret); + str_enable_disable(en), ret); return ret; } @@ -483,7 +484,7 @@ int hclge_ptp_init(struct hclge_dev *hdev) ret = hclge_ptp_get_cycle(hdev); if (ret) - return ret; + goto out; } ret = hclge_ptp_int_en(hdev, true); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 71f11f64b13d..6d7ba4d67a19 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2,6 +2,7 @@ /* Copyright(c) 2013 - 2018 Intel Corporation. */ #include <linux/net/intel/libie/rx.h> +#include <net/netdev_lock.h> #include "iavf.h" #include "iavf_ptp.h" @@ -3894,10 +3895,8 @@ exit: if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) return 0; - netdev_lock(netdev); netif_set_real_num_rx_queues(netdev, total_qps); netif_set_real_num_tx_queues(netdev, total_qps); - netdev_unlock(netdev); return ret; } @@ -4562,22 +4561,21 @@ static int iavf_open(struct net_device *netdev) struct iavf_adapter *adapter = netdev_priv(netdev); int err; + netdev_assert_locked(netdev); + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) { dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n"); return -EIO; } - netdev_lock(netdev); while (!mutex_trylock(&adapter->crit_lock)) { /* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock * is already taken and iavf_open is called from an upper * device's notifier reacting on NETDEV_REGISTER event. * We have to leave here to avoid dead lock. */ - if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) { - netdev_unlock(netdev); + if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) return -EBUSY; - } usleep_range(500, 1000); } @@ -4626,7 +4624,6 @@ static int iavf_open(struct net_device *netdev) iavf_irq_enable(adapter, true); mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); return 0; @@ -4639,7 +4636,6 @@ err_setup_tx: iavf_free_all_tx_resources(adapter); err_unlock: mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); return err; } @@ -4661,12 +4657,12 @@ static int iavf_close(struct net_device *netdev) u64 aq_to_restore; int status; - netdev_lock(netdev); + netdev_assert_locked(netdev); + mutex_lock(&adapter->crit_lock); if (adapter->state <= __IAVF_DOWN_PENDING) { mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); return 0; } @@ -4719,6 +4715,7 @@ static int iavf_close(struct net_device *netdev) if (!status) netdev_warn(netdev, "Device resources not yet released\n"); + netdev_lock(netdev); mutex_lock(&adapter->crit_lock); adapter->aq_required |= aq_to_restore; mutex_unlock(&adapter->crit_lock); diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 5ceeae664598..fd083647c14a 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -201,6 +201,7 @@ enum ice_feature { ICE_F_SMA_CTRL, ICE_F_CGU, ICE_F_GNSS, + ICE_F_GCS, ICE_F_ROCE_LAG, ICE_F_SRIOV_LAG, ICE_F_MBX_LIMIT, diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 171cdec741c2..2bc5c7f59844 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -511,7 +511,7 @@ void ice_init_arfs(struct ice_vsi *vsi) struct hlist_head *arfs_fltr_list; unsigned int i; - if (!vsi || vsi->type != ICE_VSI_PF) + if (!vsi || vsi->type != ICE_VSI_PF || ice_is_arfs_active(vsi)) return; arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list), diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index d649c197cf67..ed21d7f55ac1 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -49,9 +49,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) if (vlan_ops->dis_rx_filtering(uplink_vsi)) goto err_vlan_filtering; - if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override)) - goto err_override_uplink; - if (ice_vsi_update_local_lb(uplink_vsi, true)) goto err_override_local_lb; @@ -63,8 +60,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) err_up: ice_vsi_update_local_lb(uplink_vsi, false); err_override_local_lb: - ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); -err_override_uplink: vlan_ops->ena_rx_filtering(uplink_vsi); err_vlan_filtering: ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, @@ -275,7 +270,6 @@ static void ice_eswitch_release_env(struct ice_pf *pf) vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi); ice_vsi_update_local_lb(uplink_vsi, false); - ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); vlan_ops->ena_rx_filtering(uplink_vsi); ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, ICE_FLTR_TX); diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 1ccb572ce285..22371011c249 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1001,6 +1001,28 @@ static void ice_lag_link(struct ice_lag *lag) } /** + * ice_lag_config_eswitch - configure eswitch to work with LAG + * @lag: lag info struct + * @netdev: active network interface device struct + * + * Updates all port representors in eswitch to use @netdev for Tx. + * + * Configures the netdev to keep dst metadata (also used in representor Tx). + * This is required for an uplink without switchdev mode configured. + */ +static void ice_lag_config_eswitch(struct ice_lag *lag, + struct net_device *netdev) +{ + struct ice_repr *repr; + unsigned long id; + + xa_for_each(&lag->pf->eswitch.reprs, id, repr) + repr->dst->u.port_info.lower_dev = netdev; + + netif_keep_dst(netdev); +} + +/** * ice_lag_unlink - handle unlink event * @lag: LAG info struct */ @@ -1021,6 +1043,9 @@ static void ice_lag_unlink(struct ice_lag *lag) ice_lag_move_vf_nodes(lag, act_port, pri_port); lag->primary = false; lag->active_port = ICE_LAG_INVALID_PORT; + + /* Config primary's eswitch back to normal operation. */ + ice_lag_config_eswitch(lag, lag->netdev); } else { struct ice_lag *primary_lag; @@ -1419,6 +1444,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) ice_lag_move_vf_nodes(lag, prim_port, event_port); lag->active_port = event_port; + ice_lag_config_eswitch(lag, event_netdev); return; } @@ -1428,6 +1454,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) /* new active port */ ice_lag_move_vf_nodes(lag, lag->active_port, event_port); lag->active_port = event_port; + ice_lag_config_eswitch(lag, event_netdev); } else { /* port not set as currently active (e.g. new active port * has already claimed the nodes and filters diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 1479b45738af..77ba26538b07 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -229,7 +229,7 @@ struct ice_32b_rx_flex_desc_nic { __le16 status_error1; u8 flexi_flags2; u8 ts_low; - __le16 l2tag2_1st; + __le16 raw_csum; __le16 l2tag2_2nd; /* Qword 3 */ @@ -478,10 +478,15 @@ enum ice_tx_desc_len_fields { struct ice_tx_ctx_desc { __le32 tunneling_params; __le16 l2tag2; - __le16 rsvd; + __le16 gcs; __le64 qw1; }; +#define ICE_TX_GCS_DESC_START_M GENMASK(7, 0) +#define ICE_TX_GCS_DESC_OFFSET_M GENMASK(11, 8) +#define ICE_TX_GCS_DESC_TYPE_M GENMASK(14, 12) +#define ICE_TX_GCS_DESC_CSUM_PSH 1 + #define ICE_TXD_CTX_QW1_CMD_S 4 #define ICE_TXD_CTX_QW1_CMD_M (0x7FUL << ICE_TXD_CTX_QW1_CMD_S) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 715efd8a359f..0bcf9d127ac9 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1431,6 +1431,10 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->dev = dev; ring->count = vsi->num_rx_desc; ring->cached_phctime = pf->ptp.cached_phc_time; + + if (ice_is_feature_supported(pf, ICE_F_GCS)) + ring->flags |= ICE_RX_FLAGS_RING_GCS; + WRITE_ONCE(vsi->rx_rings[i], ring); } @@ -3899,8 +3903,10 @@ void ice_init_feature_support(struct ice_pf *pf) break; } - if (pf->hw.mac_type == ICE_MAC_E830) + if (pf->hw.mac_type == ICE_MAC_E830) { ice_set_feature_support(pf, ICE_F_MBX_LIMIT); + ice_set_feature_support(pf, ICE_F_GCS); + } } /** @@ -3947,24 +3953,6 @@ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx) } /** - * ice_vsi_ctx_set_allow_override - allow destination override on VSI - * @ctx: pointer to VSI ctx structure - */ -void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx) -{ - ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; -} - -/** - * ice_vsi_ctx_clear_allow_override - turn off destination override on VSI - * @ctx: pointer to VSI ctx structure - */ -void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx) -{ - ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; -} - -/** * ice_vsi_update_local_lb - update sw block in VSI with local loopback bit * @vsi: pointer to VSI structure * @set: set or unset the bit diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index eabb35834a24..b4c9cb28a016 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -105,10 +105,6 @@ ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)) void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx); void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx); - -void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx); - -void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx); int ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set); int ice_vsi_add_vlan_zero(struct ice_vsi *vsi); int ice_vsi_del_vlan_zero(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index eff4afabeef6..049edeb60104 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3634,6 +3634,12 @@ void ice_set_netdev_features(struct net_device *netdev) /* Allow core to manage IRQs affinity */ netif_set_affinity_auto(netdev); + /* Mutual exclusivity for TSO and GCS is enforced by the set features + * ndo callback. + */ + if (ice_is_feature_supported(pf, ICE_F_GCS)) + netdev->hw_features |= NETIF_F_HW_CSUM; + netif_set_tso_max_size(netdev, ICE_MAX_TSO_SIZE); } @@ -5009,16 +5015,16 @@ static int ice_init_devlink(struct ice_pf *pf) return err; ice_devlink_init_regions(pf); - ice_health_init(pf); ice_devlink_register(pf); + ice_health_init(pf); return 0; } static void ice_deinit_devlink(struct ice_pf *pf) { - ice_devlink_unregister(pf); ice_health_deinit(pf); + ice_devlink_unregister(pf); ice_devlink_destroy_regions(pf); ice_devlink_unregister_params(pf); } @@ -6549,6 +6555,18 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) if (changed & NETIF_F_LOOPBACK) ret = ice_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK)); + /* Due to E830 hardware limitations, TSO (NETIF_F_ALL_TSO) with GCS + * (NETIF_F_HW_CSUM) is not supported. + */ + if (ice_is_feature_supported(pf, ICE_F_GCS) && + ((features & NETIF_F_HW_CSUM) && (features & NETIF_F_ALL_TSO))) { + if (netdev->features & NETIF_F_HW_CSUM) + dev_err(ice_pf_to_dev(pf), "To enable TSO, you must first disable HW checksum.\n"); + else + dev_err(ice_pf_to_dev(pf), "To enable HW checksum, you must first disable TSO.\n"); + return -EIO; + } + return ret; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h index ac46d1183300..003cdfada3ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h @@ -10,70 +10,25 @@ /* Constants defined for the PTP 1588 clock hardware. */ const struct ice_phy_reg_info_eth56g eth56g_phy_res[NUM_ETH56G_PHY_RES] = { - /* ETH56G_PHY_REG_PTP */ - { - /* base_addr */ - { - 0x092000, - 0x126000, - 0x1BA000, - 0x24E000, - 0x2E2000, - }, - /* step */ - 0x98, + [ETH56G_PHY_REG_PTP] = { + .base_addr = 0x092000, + .step = 0x98, }, - /* ETH56G_PHY_MEM_PTP */ - { - /* base_addr */ - { - 0x093000, - 0x127000, - 0x1BB000, - 0x24F000, - 0x2E3000, - }, - /* step */ - 0x200, + [ETH56G_PHY_MEM_PTP] = { + .base_addr = 0x093000, + .step = 0x200, }, - /* ETH56G_PHY_REG_XPCS */ - { - /* base_addr */ - { - 0x000000, - 0x009400, - 0x128000, - 0x1BC000, - 0x250000, - }, - /* step */ - 0x21000, + [ETH56G_PHY_REG_XPCS] = { + .base_addr = 0x000000, + .step = 0x21000, }, - /* ETH56G_PHY_REG_MAC */ - { - /* base_addr */ - { - 0x085000, - 0x119000, - 0x1AD000, - 0x241000, - 0x2D5000, - }, - /* step */ - 0x1000, + [ETH56G_PHY_REG_MAC] = { + .base_addr = 0x085000, + .step = 0x1000, }, - /* ETH56G_PHY_REG_GPCS */ - { - /* base_addr */ - { - 0x084000, - 0x118000, - 0x1AC000, - 0x240000, - 0x2D4000, - }, - /* step */ - 0x400, + [ETH56G_PHY_REG_GPCS] = { + .base_addr = 0x084000, + .step = 0x400, }, }; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 3e824f7b30c0..89bb8461284a 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -1010,7 +1010,7 @@ static int ice_phy_res_address_eth56g(struct ice_hw *hw, u8 lane, /* Lanes 4..7 are in fact 0..3 on a second PHY */ lane %= hw->ptp.ports_per_phy; - *addr = eth56g_phy_res[res_type].base[0] + + *addr = eth56g_phy_res[res_type].base_addr + lane * eth56g_phy_res[res_type].step + offset; return 0; @@ -1240,7 +1240,7 @@ static int ice_write_quad_ptp_reg_eth56g(struct ice_hw *hw, u8 port, if (port >= hw->ptp.num_lports) return -EIO; - addr = eth56g_phy_res[ETH56G_PHY_REG_PTP].base[0] + offset; + addr = eth56g_phy_res[ETH56G_PHY_REG_PTP].base_addr + offset; return ice_write_phy_eth56g(hw, port, addr, val); } @@ -1265,7 +1265,7 @@ static int ice_read_quad_ptp_reg_eth56g(struct ice_hw *hw, u8 port, if (port >= hw->ptp.num_lports) return -EIO; - addr = eth56g_phy_res[ETH56G_PHY_REG_PTP].base[0] + offset; + addr = eth56g_phy_res[ETH56G_PHY_REG_PTP].base_addr + offset; return ice_read_phy_eth56g(hw, port, addr, val); } @@ -2650,18 +2650,17 @@ static void ice_sb_access_ena_eth56g(struct ice_hw *hw, bool enable) } /** - * ice_ptp_init_phc_eth56g - Perform E82X specific PHC initialization + * ice_ptp_init_phc_e825 - Perform E825 specific PHC initialization * @hw: pointer to HW struct * - * Perform PHC initialization steps specific to E82X devices. + * Perform E825-specific PTP hardware clock initialization steps. * - * Return: - * * %0 - success - * * %other - failed to initialize CGU + * Return: 0 on success, negative error code otherwise. */ -static int ice_ptp_init_phc_eth56g(struct ice_hw *hw) +static int ice_ptp_init_phc_e825(struct ice_hw *hw) { ice_sb_access_ena_eth56g(hw, true); + /* Initialize the Clock Generation Unit */ return ice_init_cgu_e82x(hw); } @@ -6123,7 +6122,7 @@ int ice_ptp_init_phc(struct ice_hw *hw) case ICE_MAC_GENERIC: return ice_ptp_init_phc_e82x(hw); case ICE_MAC_GENERIC_3K_E825: - return ice_ptp_init_phc_eth56g(hw); + return ice_ptp_init_phc_e825(hw); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 8442d1d60351..e5925ccc2613 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -65,14 +65,14 @@ enum ice_eth56g_link_spd { /** * struct ice_phy_reg_info_eth56g - ETH56G PHY register parameters - * @base: base address for each PHY block + * @base_addr: base address for each PHY block * @step: step between PHY lanes * * Characteristic information for the various PHY register parameters in the * ETH56G devices */ struct ice_phy_reg_info_eth56g { - u32 base[NUM_ETH56G_PHY_RES]; + u32 base_addr; u32 step; }; @@ -780,36 +780,19 @@ static inline bool ice_is_dual(struct ice_hw *hw) #define PHY_MAC_XIF_TS_SFD_ENA_M ICE_M(0x1, 20) #define PHY_MAC_XIF_GMII_TS_SEL_M ICE_M(0x1, 21) -/* GPCS config register */ -#define PHY_GPCS_CONFIG_REG0 0x268 -#define PHY_GPCS_CONFIG_REG0_TX_THR_M ICE_M(0xF, 24) -#define PHY_GPCS_BITSLIP 0x5C - #define PHY_TS_INT_CONFIG_THRESHOLD_M ICE_M(0x3F, 0) #define PHY_TS_INT_CONFIG_ENA_M BIT(6) -/* 1-step PTP config */ -#define PHY_PTP_1STEP_CONFIG 0x270 -#define PHY_PTP_1STEP_T1S_UP64_M ICE_M(0xF, 4) -#define PHY_PTP_1STEP_T1S_DELTA_M ICE_M(0xF, 8) -#define PHY_PTP_1STEP_PEER_DELAY(_port) (0x274 + 4 * (_port)) -#define PHY_PTP_1STEP_PD_ADD_PD_M ICE_M(0x1, 0) -#define PHY_PTP_1STEP_PD_DELAY_M ICE_M(0x3fffffff, 1) -#define PHY_PTP_1STEP_PD_DLY_V_M ICE_M(0x1, 31) - /* Macros to derive offsets for TimeStampLow and TimeStampHigh */ #define PHY_TSTAMP_L(x) (((x) * 8) + 0) #define PHY_TSTAMP_U(x) (((x) * 8) + 4) -#define PHY_REG_REVISION 0x85000 - #define PHY_REG_DESKEW_0 0x94 #define PHY_REG_DESKEW_0_RLEVEL GENMASK(6, 0) #define PHY_REG_DESKEW_0_RLEVEL_FRAC GENMASK(9, 7) #define PHY_REG_DESKEW_0_RLEVEL_FRAC_W 3 #define PHY_REG_DESKEW_0_VALID GENMASK(10, 10) -#define PHY_REG_GPCS_BITSLIP 0x5C #define PHY_REG_SD_BIT_SLIP(_port_offset) (0x29C + 4 * (_port_offset)) #define PHY_REVISION_ETH56G 0x10200 #define PHY_VENDOR_TXLANE_THRESH 0x2000C @@ -829,7 +812,21 @@ static inline bool ice_is_dual(struct ice_hw *hw) #define PHY_MAC_BLOCKTIME 0x50 #define PHY_MAC_MARKERTIME 0x54 #define PHY_MAC_TX_OFFSET 0x58 +#define PHY_GPCS_BITSLIP 0x5C #define PHY_PTP_INT_STATUS 0x7FD140 +/* ETH56G registers shared per quad */ +/* GPCS config register */ +#define PHY_GPCS_CONFIG_REG0 0x268 +#define PHY_GPCS_CONFIG_REG0_TX_THR_M GENMASK(27, 24) +/* 1-step PTP config */ +#define PHY_PTP_1STEP_CONFIG 0x270 +#define PHY_PTP_1STEP_T1S_UP64_M GENMASK(7, 4) +#define PHY_PTP_1STEP_T1S_DELTA_M GENMASK(11, 8) +#define PHY_PTP_1STEP_PEER_DELAY(_quad_lane) (0x274 + 4 * (_quad_lane)) +#define PHY_PTP_1STEP_PD_ADD_PD_M BIT(0) +#define PHY_PTP_1STEP_PD_DELAY_M GENMASK(30, 1) +#define PHY_PTP_1STEP_PD_DLY_V_M BIT(31) + #endif /* _ICE_PTP_HW_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 9c9ea4c1b93b..1e4f6f6ee449 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1809,6 +1809,7 @@ dma_error: static int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) { + const struct ice_tx_ring *tx_ring = off->tx_ring; u32 l4_len = 0, l3_len = 0, l2_len = 0; struct sk_buff *skb = first->skb; union { @@ -1958,6 +1959,30 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) l3_len = l4.hdr - ip.hdr; offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S; + if ((tx_ring->netdev->features & NETIF_F_HW_CSUM) && + !(first->tx_flags & ICE_TX_FLAGS_TSO) && + !skb_csum_is_sctp(skb)) { + /* Set GCS */ + u16 csum_start = (skb->csum_start - skb->mac_header) / 2; + u16 csum_offset = skb->csum_offset / 2; + u16 gcs_params; + + gcs_params = FIELD_PREP(ICE_TX_GCS_DESC_START_M, csum_start) | + FIELD_PREP(ICE_TX_GCS_DESC_OFFSET_M, csum_offset) | + FIELD_PREP(ICE_TX_GCS_DESC_TYPE_M, + ICE_TX_GCS_DESC_CSUM_PSH); + + /* Unlike legacy HW checksums, GCS requires a context + * descriptor. + */ + off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX; + off->cd_gcs_params = gcs_params; + /* Fill out CSO info in data descriptors */ + off->td_offset |= offset; + off->td_cmd |= cmd; + return 1; + } + /* Enable L4 checksum offloads */ switch (l4_proto) { case IPPROTO_TCP: @@ -2424,7 +2449,9 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) ICE_TXD_CTX_QW1_CMD_S); ice_tstamp(tx_ring, skb, first, &offload); - if (ice_is_switchdev_running(vsi->back) && vsi->type != ICE_VSI_SF) + if ((ice_is_switchdev_running(vsi->back) || + ice_lag_is_switchdev_running(vsi->back)) && + vsi->type != ICE_VSI_SF) ice_eswitch_set_target_vsi(skb, &offload); if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { @@ -2439,7 +2466,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) /* setup context descriptor */ cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params); cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2); - cdesc->rsvd = cpu_to_le16(0); + cdesc->gcs = cpu_to_le16(offload.cd_gcs_params); cdesc->qw1 = cpu_to_le64(offload.cd_qw1); } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 806bce701df3..a4b1e9514632 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -193,6 +193,7 @@ struct ice_tx_offload_params { u32 td_l2tag1; u32 cd_tunnel_params; u16 cd_l2tag2; + u16 cd_gcs_params; u8 header_len; }; @@ -366,6 +367,7 @@ struct ice_rx_ring { #define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) #define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) #define ICE_RX_FLAGS_MULTIDEV BIT(3) +#define ICE_RX_FLAGS_RING_GCS BIT(4) u8 flags; /* CL5 - 5th cacheline starts here */ struct xdp_rxq_info xdp_rxq; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 2719f0e20933..45cfaabc41cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -81,6 +81,23 @@ ice_rx_hash_to_skb(const struct ice_rx_ring *rx_ring, } /** + * ice_rx_gcs - Set generic checksum in skb + * @skb: skb currently being received and modified + * @rx_desc: receive descriptor + */ +static void ice_rx_gcs(struct sk_buff *skb, + const union ice_32b_rx_flex_desc *rx_desc) +{ + const struct ice_32b_rx_flex_desc_nic *desc; + u16 csum; + + desc = (struct ice_32b_rx_flex_desc_nic *)rx_desc; + skb->ip_summed = CHECKSUM_COMPLETE; + csum = (__force u16)desc->raw_csum; + skb->csum = csum_unfold((__force __sum16)swab16(csum)); +} + +/** * ice_rx_csum - Indicate in skb if checksum is good * @ring: the ring we care about * @skb: skb currently being received and modified @@ -107,6 +124,15 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, rx_status0 = le16_to_cpu(rx_desc->wb.status_error0); rx_status1 = le16_to_cpu(rx_desc->wb.status_error1); + if ((ring->flags & ICE_RX_FLAGS_RING_GCS) && + rx_desc->wb.rxdid == ICE_RXDID_FLEX_NIC && + (decoded.inner_prot == LIBETH_RX_PT_INNER_TCP || + decoded.inner_prot == LIBETH_RX_PT_INNER_UDP || + decoded.inner_prot == LIBETH_RX_PT_INNER_ICMP)) { + ice_rx_gcs(skb, rx_desc); + return; + } + /* check if HW has decoded the packet and checksum */ if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) return; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index da91c582d439..f03925c1f521 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3185,6 +3185,7 @@ static int ixgbe_get_ts_info(struct net_device *dev, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL); break; case ixgbe_mac_X540: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 467f81239e12..481f917f7ed2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3185,6 +3185,10 @@ static void ixgbe_handle_fw_event(struct ixgbe_adapter *adapter) case ixgbe_aci_opc_get_link_status: ixgbe_handle_link_status_event(adapter, &event); break; + case ixgbe_aci_opc_temp_tca_event: + e_crit(drv, "%s\n", ixgbe_overheat_msg); + ixgbe_down(adapter); + break; default: e_warn(hw, "unknown FW async event captured\n"); break; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 9339edbd9082..eef25e11d938 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -140,6 +140,7 @@ * proper mult and shift to convert the cycles into nanoseconds of time. */ #define IXGBE_X550_BASE_PERIOD 0xC80000000ULL +#define IXGBE_E610_BASE_PERIOD 0x333333333ULL #define INCVALUE_MASK 0x7FFFFFFF #define ISGN 0x80000000 @@ -415,6 +416,7 @@ static void ixgbe_ptp_convert_to_hwtstamp(struct ixgbe_adapter *adapter, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: /* Upper 32 bits represent billions of cycles, lower 32 bits * represent cycles. However, we use timespec64_to_ns for the * correct math even though the units haven't been corrected @@ -492,11 +494,13 @@ static int ixgbe_ptp_adjfine_X550(struct ptp_clock_info *ptp, long scaled_ppm) struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); struct ixgbe_hw *hw = &adapter->hw; + u64 rate, base; bool neg_adj; - u64 rate; u32 inca; - neg_adj = diff_by_scaled_ppm(IXGBE_X550_BASE_PERIOD, scaled_ppm, &rate); + base = hw->mac.type == ixgbe_mac_e610 ? IXGBE_E610_BASE_PERIOD : + IXGBE_X550_BASE_PERIOD; + neg_adj = diff_by_scaled_ppm(base, scaled_ppm, &rate); /* warn if rate is too large */ if (rate >= INCVALUE_MASK) @@ -559,6 +563,7 @@ static int ixgbe_ptp_gettimex(struct ptp_clock_info *ptp, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: /* Upper 32 bits represent billions of cycles, lower 32 bits * represent cycles. However, we use timespec64_to_ns for the * correct math even though the units haven't been corrected @@ -1067,6 +1072,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: /* enable timestamping all packets only if at least some * packets were requested. Otherwise, play nice and disable * timestamping @@ -1233,6 +1239,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) fallthrough; case ixgbe_mac_x550em_a: case ixgbe_mac_X550: + case ixgbe_mac_e610: cc.read = ixgbe_ptp_read_X550; break; case ixgbe_mac_X540: @@ -1280,6 +1287,7 @@ static void ixgbe_ptp_init_systime(struct ixgbe_adapter *adapter) case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_X550: + case ixgbe_mac_e610: tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC); /* Reset SYSTIME registers to 0 */ @@ -1407,6 +1415,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name); adapter->ptp_caps.owner = THIS_MODULE; adapter->ptp_caps.max_adj = 30000000; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h index 8d06ade3c7cd..617e07878e4f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h @@ -171,6 +171,9 @@ enum ixgbe_aci_opc { ixgbe_aci_opc_done_alt_write = 0x0904, ixgbe_aci_opc_clear_port_alt_write = 0x0906, + /* TCA Events */ + ixgbe_aci_opc_temp_tca_event = 0x0C94, + /* debug commands */ ixgbe_aci_opc_debug_dump_internals = 0xFF08, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e733b81e18a2..e53dbdc0a7a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -94,6 +94,11 @@ static u16 in_to_opcode(void *in) return MLX5_GET(mbox_in, in, opcode); } +static u16 in_to_uid(void *in) +{ + return MLX5_GET(mbox_in, in, uid); +} + /* Returns true for opcodes that might be triggered very frequently and throttle * the command interface. Limit their command slots usage. */ @@ -823,7 +828,7 @@ static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) opcode = in_to_opcode(in); op_mod = MLX5_GET(mbox_in, in, op_mod); - uid = MLX5_GET(mbox_in, in, uid); + uid = in_to_uid(in); status = MLX5_GET(mbox_out, out, status); if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY && @@ -1871,6 +1876,17 @@ static int is_manage_pages(void *in) return in_to_opcode(in) == MLX5_CMD_OP_MANAGE_PAGES; } +static bool mlx5_has_privileged_uid(struct mlx5_core_dev *dev) +{ + return !xa_empty(&dev->cmd.vars.privileged_uids); +} + +static bool mlx5_cmd_is_privileged_uid(struct mlx5_core_dev *dev, + u16 uid) +{ + return !!xa_load(&dev->cmd.vars.privileged_uids, uid); +} + /* Notes: * 1. Callback functions may not sleep * 2. Page queue commands do not support asynchrous completion @@ -1881,7 +1897,9 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, { struct mlx5_cmd_msg *inb, *outb; u16 opcode = in_to_opcode(in); - bool throttle_op; + bool throttle_locked = false; + bool unpriv_locked = false; + u16 uid = in_to_uid(in); int pages_queue; gfp_t gfp; u8 token; @@ -1890,12 +1908,17 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) return -ENXIO; - throttle_op = mlx5_cmd_is_throttle_opcode(opcode); - if (throttle_op) { - if (callback) { - if (down_trylock(&dev->cmd.vars.throttle_sem)) - return -EBUSY; - } else { + if (!callback) { + /* The semaphore is already held for callback commands. It was + * acquired in mlx5_cmd_exec_cb() + */ + if (uid && mlx5_has_privileged_uid(dev)) { + if (!mlx5_cmd_is_privileged_uid(dev, uid)) { + unpriv_locked = true; + down(&dev->cmd.vars.unprivileged_sem); + } + } else if (mlx5_cmd_is_throttle_opcode(opcode)) { + throttle_locked = true; down(&dev->cmd.vars.throttle_sem); } } @@ -1941,8 +1964,11 @@ out_out: out_in: free_msg(dev, inb); out_up: - if (throttle_op) + if (throttle_locked) up(&dev->cmd.vars.throttle_sem); + if (unpriv_locked) + up(&dev->cmd.vars.unprivileged_sem); + return err; } @@ -2104,18 +2130,22 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work) struct mlx5_async_work *work = _work; struct mlx5_async_ctx *ctx; struct mlx5_core_dev *dev; - u16 opcode; + bool throttle_locked; + bool unpriv_locked; ctx = work->ctx; dev = ctx->dev; - opcode = work->opcode; + throttle_locked = work->throttle_locked; + unpriv_locked = work->unpriv_locked; status = cmd_status_err(dev, status, work->opcode, work->op_mod, work->out); work->user_callback(status, work); /* Can't access "work" from this point on. It could have been freed in * the callback. */ - if (mlx5_cmd_is_throttle_opcode(opcode)) + if (throttle_locked) up(&dev->cmd.vars.throttle_sem); + if (unpriv_locked) + up(&dev->cmd.vars.unprivileged_sem); if (atomic_dec_and_test(&ctx->num_inflight)) complete(&ctx->inflight_done); } @@ -2124,6 +2154,8 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, void *out, int out_size, mlx5_async_cbk_t callback, struct mlx5_async_work *work) { + struct mlx5_core_dev *dev = ctx->dev; + u16 uid; int ret; work->ctx = ctx; @@ -2131,11 +2163,43 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->opcode = in_to_opcode(in); work->op_mod = MLX5_GET(mbox_in, in, op_mod); work->out = out; + work->throttle_locked = false; + work->unpriv_locked = false; + uid = in_to_uid(in); + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) return -EIO; - ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + + if (uid && mlx5_has_privileged_uid(dev)) { + if (!mlx5_cmd_is_privileged_uid(dev, uid)) { + if (down_trylock(&dev->cmd.vars.unprivileged_sem)) { + ret = -EBUSY; + goto dec_num_inflight; + } + work->unpriv_locked = true; + } + } else if (mlx5_cmd_is_throttle_opcode(in_to_opcode(in))) { + if (down_trylock(&dev->cmd.vars.throttle_sem)) { + ret = -EBUSY; + goto dec_num_inflight; + } + work->throttle_locked = true; + } + + ret = cmd_exec(dev, in, in_size, out, out_size, mlx5_cmd_exec_cb_handler, work, false); - if (ret && atomic_dec_and_test(&ctx->num_inflight)) + if (ret) + goto sem_up; + + return 0; + +sem_up: + if (work->throttle_locked) + up(&dev->cmd.vars.throttle_sem); + if (work->unpriv_locked) + up(&dev->cmd.vars.unprivileged_sem); +dec_num_inflight: + if (atomic_dec_and_test(&ctx->num_inflight)) complete(&ctx->inflight_done); return ret; @@ -2371,10 +2435,16 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev) sema_init(&cmd->vars.sem, cmd->vars.max_reg_cmds); sema_init(&cmd->vars.pages_sem, 1); sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2)); + sema_init(&cmd->vars.unprivileged_sem, + DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2)); + + xa_init(&cmd->vars.privileged_uids); cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); - if (!cmd->pool) - return -ENOMEM; + if (!cmd->pool) { + err = -ENOMEM; + goto err_destroy_xa; + } err = alloc_cmd_page(dev, cmd); if (err) @@ -2408,6 +2478,8 @@ err_cmd_page: free_cmd_page(dev, cmd); err_free_pool: dma_pool_destroy(cmd->pool); +err_destroy_xa: + xa_destroy(&dev->cmd.vars.privileged_uids); return err; } @@ -2420,6 +2492,7 @@ void mlx5_cmd_disable(struct mlx5_core_dev *dev) destroy_msg_cache(dev); free_cmd_page(dev, cmd); dma_pool_destroy(cmd->pool); + xa_destroy(&dev->cmd.vars.privileged_uids); } void mlx5_cmd_set_state(struct mlx5_core_dev *dev, @@ -2427,3 +2500,18 @@ void mlx5_cmd_set_state(struct mlx5_core_dev *dev, { dev->cmd.state = cmdif_state; } + +int mlx5_cmd_add_privileged_uid(struct mlx5_core_dev *dev, u16 uid) +{ + return xa_insert(&dev->cmd.vars.privileged_uids, uid, + xa_mk_value(uid), GFP_KERNEL); +} +EXPORT_SYMBOL(mlx5_cmd_add_privileged_uid); + +void mlx5_cmd_remove_privileged_uid(struct mlx5_core_dev *dev, u16 uid) +{ + void *data = xa_erase(&dev->cmd.vars.privileged_uids, uid); + + WARN(!data, "Privileged UID %u does not exist\n", uid); +} +EXPORT_SYMBOL(mlx5_cmd_remove_privileged_uid); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 98d4306929f3..73cd74644378 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -46,6 +46,9 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, u32 running_fw, stored_fw; int err; + if (!mlx5_core_is_pf(dev)) + return 0; + err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); if (err) return err; @@ -324,7 +327,8 @@ static const struct devlink_ops mlx5_devlink_ops = { .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set, .rate_node_new = mlx5_esw_devlink_rate_node_new, .rate_node_del = mlx5_esw_devlink_rate_node_del, - .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set, + .rate_leaf_parent_set = mlx5_esw_devlink_rate_leaf_parent_set, + .rate_node_parent_set = mlx5_esw_devlink_rate_node_parent_set, #endif #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index f62fbfb67a1b..6049ccf475bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -80,6 +80,7 @@ int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { struct mlx5_port_eth_proto eproto; + const struct mlx5_link_info *info; bool force_legacy = false; bool ext; int err; @@ -94,9 +95,13 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) if (err) goto out; } - *speed = mlx5_port_ptys2speed(mdev, eproto.oper, force_legacy); - if (!(*speed)) + info = mlx5_port_ptys2info(mdev, eproto.oper, force_legacy); + if (!info) { + *speed = SPEED_UNKNOWN; err = -EINVAL; + goto out; + } + *speed = info->speed; out: return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 5d128c5b4529..0f5d7ea8956f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -48,15 +48,10 @@ mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw) struct list_head *iter; netdev_for_each_lower_dev(dev, lower, iter) { - struct mlx5_core_dev *mdev; - struct mlx5e_priv *priv; - if (!mlx5e_eswitch_rep(lower)) continue; - priv = netdev_priv(lower); - mdev = priv->mdev; - if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw)) + if (mlx5_esw_bridge_dev_same_esw(lower, esw)) return lower; } @@ -125,7 +120,7 @@ static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device * priv = netdev_priv(rep); mdev = priv->mdev; if (netif_is_lag_master(dev)) - return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev); + return mlx5_lag_is_master(mdev); return true; } @@ -455,6 +450,9 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, if (!rep) return NOTIFY_DONE; + if (netif_is_lag_master(dev) && !mlx5_lag_is_shared_fdb(esw->dev)) + return NOTIFY_DONE; + switch (event) { case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = container_of(info, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c index 8218c892b161..7819fb297280 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c @@ -593,3 +593,8 @@ mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter, *drops = packets2; *lastuse = max_t(u64, lastuse1, lastuse2); } + +int mlx5e_flow_meter_get_base_id(struct mlx5e_flow_meter_handle *meter) +{ + return meter->meters_obj->base_id; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h index 9b795cd106bb..d6afb6556875 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h @@ -72,4 +72,17 @@ void mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter, u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse); +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +int mlx5e_flow_meter_get_base_id(struct mlx5e_flow_meter_handle *meter); + +#else /* CONFIG_MLX5_CLS_ACT */ + +static inline int +mlx5e_flow_meter_get_base_id(struct mlx5e_flow_meter_handle *meter) +{ + return 0; +} +#endif /* CONFIG_MLX5_CLS_ACT */ + #endif /* __MLX5_EN_FLOW_METER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 501709ac310f..782f6d51434d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -277,12 +277,12 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, case XFRM_DEV_OFFLOAD_IN: src = attrs->dmac; dst = attrs->smac; - pkey = &attrs->saddr.a4; + pkey = &attrs->addrs.saddr.a4; break; case XFRM_DEV_OFFLOAD_OUT: src = attrs->smac; dst = attrs->dmac; - pkey = &attrs->daddr.a4; + pkey = &attrs->addrs.daddr.a4; break; default: return; @@ -303,6 +303,16 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, neigh_release(n); } +static void mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr *addrs) +{ + /* + * State doesn't have subnet prefixes in outer headers. + * The match is performed for exaxt source/destination addresses. + */ + memset(addrs->smask.m6, 0xFF, sizeof(__be32) * 4); + memset(addrs->dmask.m6, 0xFF, sizeof(__be32) * 4); +} + void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { @@ -374,9 +384,11 @@ skip_replay_window: attrs->spi = be32_to_cpu(x->id.spi); /* source , destination ips */ - memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr)); - memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr)); - attrs->family = x->props.family; + memcpy(&attrs->addrs.saddr, x->props.saddr.a6, + sizeof(attrs->addrs.saddr)); + memcpy(&attrs->addrs.daddr, x->id.daddr.a6, sizeof(attrs->addrs.daddr)); + attrs->addrs.family = x->props.family; + mlx5e_ipsec_state_mask(&attrs->addrs); attrs->type = x->xso.type; attrs->reqid = x->props.reqid; attrs->upspec.dport = ntohs(x->sel.dport); @@ -428,7 +440,8 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, } if (x->encap) { if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) { - NL_SET_ERR_MSG_MOD(extack, "Encapsulation is not supported"); + NL_SET_ERR_MSG_MOD(extack, + "Encapsulation is not supported"); return -EINVAL; } @@ -853,13 +866,13 @@ static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) { attrs = &sa_entry->attrs; - if (attrs->family == AF_INET) { - if (!neigh_key_eq32(n, &attrs->saddr.a4) && - !neigh_key_eq32(n, &attrs->daddr.a4)) + if (attrs->addrs.family == AF_INET) { + if (!neigh_key_eq32(n, &attrs->addrs.saddr.a4) && + !neigh_key_eq32(n, &attrs->addrs.daddr.a4)) continue; } else { - if (!neigh_key_eq128(n, &attrs->saddr.a4) && - !neigh_key_eq128(n, &attrs->daddr.a4)) + if (!neigh_key_eq128(n, &attrs->addrs.saddr.a4) && + !neigh_key_eq128(n, &attrs->addrs.daddr.a4)) continue; } @@ -1035,7 +1048,7 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) * by removing always available headers. */ headers = sizeof(struct ethhdr); - if (sa_entry->attrs.family == AF_INET) + if (sa_entry->attrs.addrs.family == AF_INET) headers += sizeof(struct iphdr); else headers += sizeof(struct ipv6hdr); @@ -1044,6 +1057,43 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) x->curlft.bytes += success_bytes - headers * success_packets; } +static __be32 word_to_mask(int prefix) +{ + if (prefix < 0) + return 0; + + if (!prefix || prefix > 31) + return cpu_to_be32(0xFFFFFFFF); + + return cpu_to_be32(((1U << prefix) - 1) << (32 - prefix)); +} + +static void mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr *addrs, + struct xfrm_selector *sel) +{ + int i; + + if (addrs->family == AF_INET) { + addrs->smask.m4 = word_to_mask(sel->prefixlen_s); + addrs->saddr.a4 &= addrs->smask.m4; + addrs->dmask.m4 = word_to_mask(sel->prefixlen_d); + addrs->daddr.a4 &= addrs->dmask.m4; + return; + } + + for (i = 0; i < 4; i++) { + if (sel->prefixlen_s != 32 * i) + addrs->smask.m6[i] = + word_to_mask(sel->prefixlen_s - 32 * i); + addrs->saddr.a6[i] &= addrs->smask.m6[i]; + + if (sel->prefixlen_d != 32 * i) + addrs->dmask.m6[i] = + word_to_mask(sel->prefixlen_d - 32 * i); + addrs->daddr.a6[i] &= addrs->dmask.m6[i]; + } +} + static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, struct xfrm_policy *x, struct netlink_ext_ack *extack) @@ -1116,9 +1166,10 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, sel = &x->selector; memset(attrs, 0, sizeof(*attrs)); - memcpy(&attrs->saddr, sel->saddr.a6, sizeof(attrs->saddr)); - memcpy(&attrs->daddr, sel->daddr.a6, sizeof(attrs->daddr)); - attrs->family = sel->family; + memcpy(&attrs->addrs.saddr, sel->saddr.a6, sizeof(attrs->addrs.saddr)); + memcpy(&attrs->addrs.daddr, sel->daddr.a6, sizeof(attrs->addrs.daddr)); + attrs->addrs.family = sel->family; + mlx5e_ipsec_policy_mask(&attrs->addrs, sel); attrs->dir = x->xdo.dir; attrs->action = x->action; attrs->type = XFRM_DEV_OFFLOAD_PACKET; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index ad8db9e1fd1d..a63c2289f8af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -76,27 +76,36 @@ struct mlx5_replay_esn { u8 trigger : 1; }; -struct mlx5_accel_esp_xfrm_attrs { - u32 spi; - u32 mode; - struct aes_gcm_keymat aes_gcm; - +struct mlx5e_ipsec_addr { union { __be32 a4; __be32 a6[4]; } saddr; - + union { + __be32 m4; + __be32 m6[4]; + } smask; union { __be32 a4; __be32 a6[4]; } daddr; + union { + __be32 m4; + __be32 m6[4]; + } dmask; + u8 family; +}; +struct mlx5_accel_esp_xfrm_attrs { + u32 spi; + u32 mode; + struct aes_gcm_keymat aes_gcm; + struct mlx5e_ipsec_addr addrs; struct upspec upspec; u8 dir : 2; u8 type : 2; u8 drop : 1; u8 encap : 1; - u8 family; struct mlx5_replay_esn replay_esn; u32 authsize; u32 reqid; @@ -279,18 +288,8 @@ struct mlx5e_ipsec_sa_entry { }; struct mlx5_accel_pol_xfrm_attrs { - union { - __be32 a4; - __be32 a6[4]; - } saddr; - - union { - __be32 a4; - __be32 a6[4]; - } daddr; - + struct mlx5e_ipsec_addr addrs; struct upspec upspec; - u8 family; u8 action; u8 type : 2; u8 dir : 2; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index d51ace739637..98b6a3a623f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -1484,9 +1484,14 @@ static void tx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 prio, int type) mutex_unlock(&tx->ft.mutex); } -static void setup_fte_addr4(struct mlx5_flow_spec *spec, __be32 *saddr, - __be32 *daddr) +static void setup_fte_addr4(struct mlx5_flow_spec *spec, + struct mlx5e_ipsec_addr *addrs) { + __be32 *saddr = &addrs->saddr.a4; + __be32 *smask = &addrs->smask.m4; + __be32 *daddr = &addrs->daddr.a4; + __be32 *dmask = &addrs->dmask.m4; + if (!*saddr && !*daddr) return; @@ -1498,21 +1503,26 @@ static void setup_fte_addr4(struct mlx5_flow_spec *spec, __be32 *saddr, if (*saddr) { memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), saddr, 4); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, - outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), smask, 4); } if (*daddr) { memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), daddr, 4); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, - outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), dmask, 4); } } -static void setup_fte_addr6(struct mlx5_flow_spec *spec, __be32 *saddr, - __be32 *daddr) +static void setup_fte_addr6(struct mlx5_flow_spec *spec, + struct mlx5e_ipsec_addr *addrs) { + __be32 *saddr = addrs->saddr.a6; + __be32 *smask = addrs->smask.m6; + __be32 *daddr = addrs->daddr.a6; + __be32 *dmask = addrs->dmask.m6; + if (addr6_all_zero(saddr) && addr6_all_zero(daddr)) return; @@ -1524,15 +1534,15 @@ static void setup_fte_addr6(struct mlx5_flow_spec *spec, __be32 *saddr, if (!addr6_all_zero(saddr)) { memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), saddr, 16); - memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), 0xff, 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), dmask, 16); } if (!addr6_all_zero(daddr)) { memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), daddr, 16); - memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), smask, 16); } } @@ -1722,7 +1732,7 @@ setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev, if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) { bfflen += sizeof(*esp_hdr) + 8; - switch (attrs->family) { + switch (attrs->addrs.family) { case AF_INET: bfflen += sizeof(*iphdr); break; @@ -1739,7 +1749,7 @@ setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev, return -ENOMEM; eth_hdr = (struct ethhdr *)reformatbf; - switch (attrs->family) { + switch (attrs->addrs.family) { case AF_INET: eth_hdr->h_proto = htons(ETH_P_IP); break; @@ -1762,11 +1772,11 @@ setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev, reformat_params->param_0 = attrs->authsize; hdr = reformatbf + sizeof(*eth_hdr); - switch (attrs->family) { + switch (attrs->addrs.family) { case AF_INET: iphdr = (struct iphdr *)hdr; - memcpy(&iphdr->saddr, &attrs->saddr.a4, 4); - memcpy(&iphdr->daddr, &attrs->daddr.a4, 4); + memcpy(&iphdr->saddr, &attrs->addrs.saddr.a4, 4); + memcpy(&iphdr->daddr, &attrs->addrs.daddr.a4, 4); iphdr->version = 4; iphdr->ihl = 5; iphdr->ttl = IPSEC_TUNNEL_DEFAULT_TTL; @@ -1775,8 +1785,8 @@ setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev, break; case AF_INET6: ipv6hdr = (struct ipv6hdr *)hdr; - memcpy(&ipv6hdr->saddr, &attrs->saddr.a6, 16); - memcpy(&ipv6hdr->daddr, &attrs->daddr.a6, 16); + memcpy(&ipv6hdr->saddr, &attrs->addrs.saddr.a6, 16); + memcpy(&ipv6hdr->daddr, &attrs->addrs.daddr.a6, 16); ipv6hdr->nexthdr = IPPROTO_ESP; ipv6hdr->version = 6; ipv6hdr->hop_limit = IPSEC_TUNNEL_DEFAULT_TTL; @@ -1810,7 +1820,7 @@ static int get_reformat_type(struct mlx5_accel_esp_xfrm_attrs *attrs) return MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP; return MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT; case XFRM_DEV_OFFLOAD_OUT: - if (attrs->family == AF_INET) { + if (attrs->addrs.family == AF_INET) { if (attrs->encap) return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV4; return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4; @@ -2003,7 +2013,7 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) struct mlx5_fc *counter; int err = 0; - rx = rx_ft_get(mdev, ipsec, attrs->family, attrs->type); + rx = rx_ft_get(mdev, ipsec, attrs->addrs.family, attrs->type); if (IS_ERR(rx)) return PTR_ERR(rx); @@ -2013,10 +2023,10 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) goto err_alloc; } - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + if (attrs->addrs.family == AF_INET) + setup_fte_addr4(spec, &attrs->addrs); else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + setup_fte_addr6(spec, &attrs->addrs); setup_fte_spi(spec, attrs->spi, attrs->encap); if (!attrs->encap) @@ -2116,7 +2126,7 @@ err_pkt_reformat: err_mod_header: kvfree(spec); err_alloc: - rx_ft_put(ipsec, attrs->family, attrs->type); + rx_ft_put(ipsec, attrs->addrs.family, attrs->type); return err; } @@ -2148,10 +2158,10 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) switch (attrs->type) { case XFRM_DEV_OFFLOAD_CRYPTO: - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + if (attrs->addrs.family == AF_INET) + setup_fte_addr4(spec, &attrs->addrs); else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + setup_fte_addr6(spec, &attrs->addrs); setup_fte_spi(spec, attrs->spi, false); setup_fte_esp(spec); setup_fte_reg_a(spec); @@ -2235,10 +2245,10 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) } tx = ipsec_tx(ipsec, attrs->type); - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + if (attrs->addrs.family == AF_INET) + setup_fte_addr4(spec, &attrs->addrs); else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + setup_fte_addr6(spec, &attrs->addrs); setup_fte_no_frags(spec); setup_fte_upper_proto_match(spec, &attrs->upspec); @@ -2308,12 +2318,12 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) struct mlx5e_ipsec_rx *rx; int err, dstn = 0; - ft = rx_ft_get_policy(mdev, pol_entry->ipsec, attrs->family, attrs->prio, - attrs->type); + ft = rx_ft_get_policy(mdev, pol_entry->ipsec, attrs->addrs.family, + attrs->prio, attrs->type); if (IS_ERR(ft)) return PTR_ERR(ft); - rx = ipsec_rx(pol_entry->ipsec, attrs->family, attrs->type); + rx = ipsec_rx(pol_entry->ipsec, attrs->addrs.family, attrs->type); spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { @@ -2321,10 +2331,10 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) goto err_alloc; } - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + if (attrs->addrs.family == AF_INET) + setup_fte_addr4(spec, &attrs->addrs); else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + setup_fte_addr6(spec, &attrs->addrs); setup_fte_no_frags(spec); setup_fte_upper_proto_match(spec, &attrs->upspec); @@ -2364,7 +2374,8 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) err_action: kvfree(spec); err_alloc: - rx_ft_put_policy(pol_entry->ipsec, attrs->family, attrs->prio, attrs->type); + rx_ft_put_policy(pol_entry->ipsec, attrs->addrs.family, attrs->prio, + attrs->type); return err; } @@ -2638,7 +2649,8 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) mlx5_fc_destroy(mdev, ipsec_rule->replay.fc); } mlx5_esw_ipsec_rx_id_mapping_remove(sa_entry); - rx_ft_put(sa_entry->ipsec, sa_entry->attrs.family, sa_entry->attrs.type); + rx_ft_put(sa_entry->ipsec, sa_entry->attrs.addrs.family, + sa_entry->attrs.type); } int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry) @@ -2674,7 +2686,8 @@ void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry) mlx5e_ipsec_unblock_tc_offload(pol_entry->ipsec->mdev); if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) { - rx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.family, + rx_ft_put_policy(pol_entry->ipsec, + pol_entry->attrs.addrs.family, pol_entry->attrs.prio, pol_entry->attrs.type); return; } @@ -2814,7 +2827,7 @@ bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry) struct mlx5e_ipsec_rx *rx; struct mlx5e_ipsec_tx *tx; - rx = ipsec_rx(sa_entry->ipsec, attrs->family, attrs->type); + rx = ipsec_rx(sa_entry->ipsec, attrs->addrs.family, attrs->type); tx = ipsec_tx(sa_entry->ipsec, attrs->type); if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) return tx->allow_tunnel_mode; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3b2efde9dda2..fdf9e9bb99ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -42,6 +42,9 @@ #include "lib/clock.h" #include "en/fs_ethtool.h" +#define LANES_UNKNOWN 0 +#define MAX_LANES 8 + void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) { @@ -1076,32 +1079,34 @@ static void ptys2ethtool_supported_advertised_port(struct mlx5_core_dev *mdev, } } -static void get_speed_duplex(struct net_device *netdev, - u32 eth_proto_oper, bool force_legacy, - u16 data_rate_oper, - struct ethtool_link_ksettings *link_ksettings) +static void get_link_properties(struct net_device *netdev, + u32 eth_proto_oper, bool force_legacy, + u16 data_rate_oper, + struct ethtool_link_ksettings *link_ksettings) { struct mlx5e_priv *priv = netdev_priv(netdev); - u32 speed = SPEED_UNKNOWN; + const struct mlx5_link_info *info; u8 duplex = DUPLEX_UNKNOWN; + u32 speed = SPEED_UNKNOWN; + u32 lanes = LANES_UNKNOWN; if (!netif_carrier_ok(netdev)) goto out; - speed = mlx5_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy); - if (!speed) { - if (data_rate_oper) - speed = 100 * data_rate_oper; - else - speed = SPEED_UNKNOWN; - goto out; + info = mlx5_port_ptys2info(priv->mdev, eth_proto_oper, force_legacy); + if (info) { + speed = info->speed; + lanes = info->lanes; + duplex = DUPLEX_FULL; + } else if (data_rate_oper) { + speed = 100 * data_rate_oper; + lanes = MAX_LANES; } - duplex = DUPLEX_FULL; - out: - link_ksettings->base.speed = speed; link_ksettings->base.duplex = duplex; + link_ksettings->base.speed = speed; + link_ksettings->lanes = lanes; } static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap, @@ -1238,8 +1243,8 @@ static int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, get_supported(mdev, eth_proto_cap, link_ksettings); get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings, admin_ext); - get_speed_duplex(priv->netdev, eth_proto_oper, !admin_ext, - data_rate_oper, link_ksettings); + get_link_properties(priv->netdev, eth_proto_oper, !admin_ext, + data_rate_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; connector_type = connector_type < MLX5E_CONNECTOR_TYPE_NUMBER ? @@ -1349,6 +1354,7 @@ static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_port_eth_proto eproto; + struct mlx5_link_info info = {}; const unsigned long *adver; bool an_changes = false; u8 an_disable_admin; @@ -1359,7 +1365,6 @@ static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, u32 link_modes; u8 an_status; u8 autoneg; - u32 speed; bool ext; int err; @@ -1367,7 +1372,8 @@ static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, adver = link_ksettings->link_modes.advertising; autoneg = link_ksettings->base.autoneg; - speed = link_ksettings->base.speed; + info.speed = link_ksettings->base.speed; + info.lanes = link_ksettings->lanes; ext_supported = mlx5_ptys_ext_supported(mdev); ext_requested = ext_link_mode_requested(adver); @@ -1384,7 +1390,7 @@ static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, goto out; } link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : - mlx5_port_speed2linkmodes(mdev, speed, !ext); + mlx5_port_info2linkmodes(mdev, &info, !ext); err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg); if (err) @@ -2615,6 +2621,7 @@ static void mlx5e_get_ts_stats(struct net_device *netdev, } const struct ethtool_ops mlx5e_ethtool_ops = { + .cap_link_lanes_supported = true, .cap_rss_ctx_supported = true, .rxfh_per_ctx_key = true, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6eb8642d2e2b..2edc61328749 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5108,11 +5108,9 @@ static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; u8 mode, setting; - int err; - err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); - if (err) - return err; + if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting)) + return -EOPNOTSUPP; mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 2b229b6226c6..dfb079e59d85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -871,8 +871,8 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) { + struct mlx5_irq_pool *pool = mlx5_irq_table_get_comp_irq_pool(dev); struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); struct irq_affinity_desc af_desc = {}; struct mlx5_irq *irq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c index d599e50af346..3ce455c2535c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c @@ -27,7 +27,7 @@ esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num, ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress"); - root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index); + root_ns = mlx5_get_flow_vport_namespace(dev, ns, vport->index); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n", vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 823c1ba456cd..b6ae384396b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -90,15 +90,30 @@ struct mlx5_esw_sched_node { struct list_head children; /* Valid only if this node is associated with a vport. */ struct mlx5_vport *vport; + /* Level in the hierarchy. The root node level is 1. */ + u8 level; }; +static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node) +{ + if (!node->parent) { + /* Root children are assigned a depth level of 2. */ + node->level = 2; + list_add_tail(&node->entry, &node->esw->qos.domain->nodes); + } else { + node->level = node->parent->level + 1; + list_add_tail(&node->entry, &node->parent->children); + } +} + static void esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent) { list_del_init(&node->entry); node->parent = parent; - list_add_tail(&node->entry, &parent->children); - node->esw = parent->esw; + if (parent) + node->esw = parent->esw; + esw_qos_node_attach_to_parent(node); } void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport) @@ -305,8 +320,9 @@ static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node, return 0; } -static int esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id, - u32 *tsar_ix) +static int +esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id, + u32 max_rate, u32 bw_share, u32 *tsar_ix) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; void *attr; @@ -323,6 +339,8 @@ static int esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent_element_id); + MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, tsar_ctx, bw_share, bw_share); attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); @@ -358,7 +376,6 @@ static struct mlx5_esw_sched_node * __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type, struct mlx5_esw_sched_node *parent) { - struct list_head *parent_children; struct mlx5_esw_sched_node *node; node = kzalloc(sizeof(*node), GFP_KERNEL); @@ -370,8 +387,7 @@ __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type node->type = type; node->parent = parent; INIT_LIST_HEAD(&node->children); - parent_children = parent ? &parent->children : &esw->qos.domain->nodes; - list_add_tail(&node->entry, parent_children); + esw_qos_node_attach_to_parent(node); return node; } @@ -396,7 +412,8 @@ __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sch u32 tsar_ix; int err; - err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, &tsar_ix); + err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, 0, + 0, &tsar_ix); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed"); return ERR_PTR(err); @@ -463,7 +480,8 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) return -EOPNOTSUPP; - err = esw_qos_create_node_sched_elem(esw->dev, 0, &esw->qos.root_tsar_ix); + err = esw_qos_create_node_sched_elem(esw->dev, 0, 0, 0, + &esw->qos.root_tsar_ix); if (err) { esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); return err; @@ -986,10 +1004,10 @@ int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_s return err; } -int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, - struct devlink_rate *parent, - void *priv, void *parent_priv, - struct netlink_ext_ack *extack) +int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) { struct mlx5_esw_sched_node *node; struct mlx5_vport *vport = priv; @@ -1000,3 +1018,105 @@ int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, node = parent_priv; return mlx5_esw_qos_vport_update_parent(vport, node, extack); } + +static int +mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + u8 new_level, max_level; + + if (parent && parent->esw != node->esw) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot assign node to another E-Switch"); + return -EOPNOTSUPP; + } + + if (!list_empty(&node->children)) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot reassign a node that contains rate objects"); + return -EOPNOTSUPP; + } + + new_level = parent ? parent->level + 1 : 2; + max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth); + if (new_level > max_level) { + NL_SET_ERR_MSG_MOD(extack, + "Node hierarchy depth exceeds the maximum supported level"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_parent = node->parent; + struct mlx5_eswitch *esw = node->esw; + u32 parent_ix; + int err; + + parent_ix = parent ? parent->ix : node->esw->qos.root_tsar_ix; + mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + node->ix); + err = esw_qos_create_node_sched_elem(esw->dev, parent_ix, + node->max_rate, 0, &node->ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to create a node under the new hierarchy."); + if (esw_qos_create_node_sched_elem(esw->dev, curr_parent->ix, + node->max_rate, + node->bw_share, + &node->ix)) + esw_warn(esw->dev, "Node restore QoS failed\n"); + + return err; + } + esw_qos_node_set_parent(node, parent); + + return 0; +} + +static int mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_parent; + struct mlx5_eswitch *esw = node->esw; + int err; + + err = mlx5_esw_qos_node_validate_set_parent(node, parent, extack); + if (err) + return err; + + esw_qos_lock(esw); + curr_parent = node->parent; + err = esw_qos_vports_node_update_parent(node, parent, extack); + if (err) + goto out; + + esw_qos_normalize_min_rate(esw, curr_parent, extack); + esw_qos_normalize_min_rate(esw, parent, extack); + +out: + esw_qos_unlock(esw); + + return err; +} + +int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *node = priv, *parent_node; + + if (!parent) + return mlx5_esw_qos_node_update_parent(node, NULL, extack); + + parent_node = parent_priv; + return mlx5_esw_qos_node_update_parent(node, parent_node, extack); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index 6eb8f6a648c8..ed40ec8f027e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -29,10 +29,14 @@ int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, struct netlink_ext_ack *extack); int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, struct netlink_ext_ack *extack); -int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, - struct devlink_rate *parent, - void *priv, void *parent_priv, - struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack); #endif #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 0fa0333106a2..a6a8eea5980c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -648,6 +648,7 @@ esw_setup_meter(struct mlx5_flow_attr *attr, struct mlx5_flow_act *flow_act) meter = attr->meter_attr.meter; flow_act->exe_aso.type = attr->exe_aso_type; flow_act->exe_aso.object_id = meter->obj_id; + flow_act->exe_aso.base_id = mlx5e_flow_meter_get_base_id(meter); flow_act->exe_aso.flow_meter.meter_idx = meter->idx; flow_act->exe_aso.flow_meter.init_color = MLX5_FLOW_METER_COLOR_GREEN; /* use metadata reg 5 for packet color */ @@ -2828,9 +2829,9 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master, if (IS_ERR(vport)) return PTR_ERR(vport); - egress_ns = mlx5_get_flow_vport_acl_namespace(master, - MLX5_FLOW_NAMESPACE_ESW_EGRESS, - vport->index); + egress_ns = mlx5_get_flow_vport_namespace(master, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + vport->index); if (!egress_ns) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index ae20c061e0fb..a47c29571f64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -1142,6 +1142,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ case FS_FT_RDMA_RX: case FS_FT_RDMA_TX: case FS_FT_PORT_SEL: + case FS_FT_RDMA_TRANSPORT_RX: + case FS_FT_RDMA_TRANSPORT_TX: return mlx5_fs_cmd_get_fw_cmds(); default: return mlx5_fs_cmd_get_stub_cmds(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 22dc23d991d2..6163bc98d94a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1456,7 +1456,7 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table *ft; int autogroups_max_fte; - ft = mlx5_create_flow_table(ns, ft_attr); + ft = mlx5_create_vport_flow_table(ns, ft_attr, ft_attr->vport); if (IS_ERR(ft)) return ft; @@ -2764,9 +2764,9 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_get_flow_namespace); -struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev, - enum mlx5_flow_namespace_type type, - int vport) +struct mlx5_flow_namespace * +mlx5_get_flow_vport_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type, int vport_idx) { struct mlx5_flow_steering *steering = dev->priv.steering; @@ -2775,25 +2775,43 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d switch (type) { case MLX5_FLOW_NAMESPACE_ESW_EGRESS: - if (vport >= steering->esw_egress_acl_vports) + if (vport_idx >= steering->esw_egress_acl_vports) return NULL; if (steering->esw_egress_root_ns && - steering->esw_egress_root_ns[vport]) - return &steering->esw_egress_root_ns[vport]->ns; + steering->esw_egress_root_ns[vport_idx]) + return &steering->esw_egress_root_ns[vport_idx]->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_INGRESS: - if (vport >= steering->esw_ingress_acl_vports) + if (vport_idx >= steering->esw_ingress_acl_vports) return NULL; if (steering->esw_ingress_root_ns && - steering->esw_ingress_root_ns[vport]) - return &steering->esw_ingress_root_ns[vport]->ns; + steering->esw_ingress_root_ns[vport_idx]) + return &steering->esw_ingress_root_ns[vport_idx]->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX: + if (vport_idx >= steering->rdma_transport_rx_vports) + return NULL; + if (steering->rdma_transport_rx_root_ns && + steering->rdma_transport_rx_root_ns[vport_idx]) + return &steering->rdma_transport_rx_root_ns[vport_idx]->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX: + if (vport_idx >= steering->rdma_transport_tx_vports) + return NULL; + + if (steering->rdma_transport_tx_root_ns && + steering->rdma_transport_tx_root_ns[vport_idx]) + return &steering->rdma_transport_tx_root_ns[vport_idx]->ns; else return NULL; default: return NULL; } } +EXPORT_SYMBOL(mlx5_get_flow_vport_namespace); static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns, unsigned int prio, @@ -3199,6 +3217,127 @@ out_err: return err; } +static int +init_rdma_transport_rx_root_ns_one(struct mlx5_flow_steering *steering, + int vport_idx) +{ + struct fs_prio *prio; + + steering->rdma_transport_rx_root_ns[vport_idx] = + create_root_ns(steering, FS_FT_RDMA_TRANSPORT_RX); + if (!steering->rdma_transport_rx_root_ns[vport_idx]) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&steering->rdma_transport_rx_root_ns[vport_idx]->ns, + MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1); + return PTR_ERR_OR_ZERO(prio); +} + +static int +init_rdma_transport_tx_root_ns_one(struct mlx5_flow_steering *steering, + int vport_idx) +{ + struct fs_prio *prio; + + steering->rdma_transport_tx_root_ns[vport_idx] = + create_root_ns(steering, FS_FT_RDMA_TRANSPORT_TX); + if (!steering->rdma_transport_tx_root_ns[vport_idx]) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&steering->rdma_transport_tx_root_ns[vport_idx]->ns, + MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1); + return PTR_ERR_OR_ZERO(prio); +} + +static int init_rdma_transport_rx_root_ns(struct mlx5_flow_steering *steering) +{ + struct mlx5_core_dev *dev = steering->dev; + int total_vports; + int err; + int i; + + /* In case eswitch not supported and working in legacy mode */ + total_vports = mlx5_eswitch_get_total_vports(dev) ?: 1; + + steering->rdma_transport_rx_root_ns = + kcalloc(total_vports, + sizeof(*steering->rdma_transport_rx_root_ns), + GFP_KERNEL); + if (!steering->rdma_transport_rx_root_ns) + return -ENOMEM; + + for (i = 0; i < total_vports; i++) { + err = init_rdma_transport_rx_root_ns_one(steering, i); + if (err) + goto cleanup_root_ns; + } + steering->rdma_transport_rx_vports = total_vports; + return 0; + +cleanup_root_ns: + while (i--) + cleanup_root_ns(steering->rdma_transport_rx_root_ns[i]); + kfree(steering->rdma_transport_rx_root_ns); + steering->rdma_transport_rx_root_ns = NULL; + return err; +} + +static int init_rdma_transport_tx_root_ns(struct mlx5_flow_steering *steering) +{ + struct mlx5_core_dev *dev = steering->dev; + int total_vports; + int err; + int i; + + /* In case eswitch not supported and working in legacy mode */ + total_vports = mlx5_eswitch_get_total_vports(dev) ?: 1; + + steering->rdma_transport_tx_root_ns = + kcalloc(total_vports, + sizeof(*steering->rdma_transport_tx_root_ns), + GFP_KERNEL); + if (!steering->rdma_transport_tx_root_ns) + return -ENOMEM; + + for (i = 0; i < total_vports; i++) { + err = init_rdma_transport_tx_root_ns_one(steering, i); + if (err) + goto cleanup_root_ns; + } + steering->rdma_transport_tx_vports = total_vports; + return 0; + +cleanup_root_ns: + while (i--) + cleanup_root_ns(steering->rdma_transport_tx_root_ns[i]); + kfree(steering->rdma_transport_tx_root_ns); + steering->rdma_transport_tx_root_ns = NULL; + return err; +} + +static void cleanup_rdma_transport_roots_ns(struct mlx5_flow_steering *steering) +{ + int i; + + if (steering->rdma_transport_rx_root_ns) { + for (i = 0; i < steering->rdma_transport_rx_vports; i++) + cleanup_root_ns(steering->rdma_transport_rx_root_ns[i]); + + kfree(steering->rdma_transport_rx_root_ns); + steering->rdma_transport_rx_root_ns = NULL; + } + + if (steering->rdma_transport_tx_root_ns) { + for (i = 0; i < steering->rdma_transport_tx_vports; i++) + cleanup_root_ns(steering->rdma_transport_tx_root_ns[i]); + + kfree(steering->rdma_transport_tx_root_ns); + steering->rdma_transport_tx_root_ns = NULL; + } +} + /* FT and tc chains are stored in the same array so we can re-use the * mlx5_get_fdb_sub_ns() and tc api for FT chains. * When creating a new ns for each chain store it in the first available slot. @@ -3631,6 +3770,7 @@ void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev) cleanup_root_ns(steering->rdma_rx_root_ns); cleanup_root_ns(steering->rdma_tx_root_ns); cleanup_root_ns(steering->egress_root_ns); + cleanup_rdma_transport_roots_ns(steering); devl_params_unregister(priv_to_devlink(dev), mlx5_fs_params, ARRAY_SIZE(mlx5_fs_params)); @@ -3700,6 +3840,18 @@ int mlx5_fs_core_init(struct mlx5_core_dev *dev) goto err; } + if (MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(dev, ft_support)) { + err = init_rdma_transport_rx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(dev, ft_support)) { + err = init_rdma_transport_tx_root_ns(steering); + if (err) + goto err; + } + return 0; err: @@ -3850,8 +4002,10 @@ mlx5_get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type struct mlx5_flow_namespace *ns; if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS || - ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS) - ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0); + ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS || + ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX || + ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) + ns = mlx5_get_flow_vport_namespace(dev, ns_type, 0); else ns = mlx5_get_flow_namespace(dev, ns_type); if (!ns) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 20837e526679..0767239f651c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -115,7 +115,9 @@ enum fs_flow_table_type { FS_FT_PORT_SEL = 0X9, FS_FT_FDB_RX = 0xa, FS_FT_FDB_TX = 0xb, - FS_FT_MAX_TYPE = FS_FT_FDB_TX, + FS_FT_RDMA_TRANSPORT_RX = 0xd, + FS_FT_RDMA_TRANSPORT_TX = 0xe, + FS_FT_MAX_TYPE = FS_FT_RDMA_TRANSPORT_TX, }; enum fs_flow_table_op_mod { @@ -158,6 +160,10 @@ struct mlx5_flow_steering { struct mlx5_flow_root_namespace *port_sel_root_ns; int esw_egress_acl_vports; int esw_ingress_acl_vports; + struct mlx5_flow_root_namespace **rdma_transport_rx_root_ns; + struct mlx5_flow_root_namespace **rdma_transport_tx_root_ns; + int rdma_transport_rx_vports; + int rdma_transport_tx_vports; }; struct fs_node { @@ -341,16 +347,10 @@ struct mlx5_fc { u64 lastbytes; }; -struct mlx5_fc_bulk_hws_data { - struct mlx5hws_action *hws_action; - struct mutex lock; /* protects hws_action */ - refcount_t hws_action_refcount; -}; - struct mlx5_fc_bulk { struct mlx5_fs_bulk fs_bulk; u32 base_id; - struct mlx5_fc_bulk_hws_data hws_data; + struct mlx5_fs_hws_data hws_data; struct mlx5_fc fcs[]; }; @@ -434,7 +434,9 @@ struct mlx5_flow_root_namespace *find_root(struct fs_node *node); (type == FS_FT_PORT_SEL) ? MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) : \ (type == FS_FT_FDB_RX) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \ (type == FS_FT_FDB_TX) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \ - (BUILD_BUG_ON_ZERO(FS_FT_FDB_TX != FS_FT_MAX_TYPE))\ + (type == FS_FT_RDMA_TRANSPORT_RX) ? MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(mdev, cap) : \ + (type == FS_FT_RDMA_TRANSPORT_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(mdev, cap) : \ + (BUILD_BUG_ON_ZERO(FS_FT_RDMA_TRANSPORT_TX != FS_FT_MAX_TYPE))\ ) #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index b253d1673398..57476487e31f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -287,6 +287,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, adv_rdma)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ADV_RDMA, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 1477db7f5307..2691d88cdee1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -175,7 +175,7 @@ unlock: void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq) { - struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + struct mlx5_irq_pool *pool = mlx5_irq_get_pool(irq); int cpu; cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index cea5aa314f6c..ed2ba272946b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -951,7 +951,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); } -static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) { int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_core_dev *dev; @@ -1038,7 +1038,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) } if (do_bond && !__mlx5_lag_is_active(ldev)) { - bool shared_fdb = mlx5_shared_fdb_supported(ldev); + bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev); roce_lag = mlx5_lag_is_roce_lag(ldev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 01cf72366947..c2f256bb2bc2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -92,6 +92,7 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev) return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); } +bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev); bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index ffac0bd6c895..aad52d3a90e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -65,7 +65,6 @@ err_metadata: return err; } -#define MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS 4 static int enable_mpesw(struct mlx5_lag *ldev) { int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); @@ -77,13 +76,11 @@ static int enable_mpesw(struct mlx5_lag *ldev) return -EINVAL; dev0 = ldev->pf[idx].dev; - if (ldev->ports > MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS) - return -EOPNOTSUPP; - if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || - !mlx5_lag_check_prereq(ldev)) + !mlx5_lag_check_prereq(ldev) || + !mlx5_lag_shared_fdb_supported(ldev)) return -EOPNOTSUPP; err = mlx5_mpesw_metadata_set(ldev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index c862dd28c466..0a3c260af377 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -197,6 +197,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, ns = mlx5_get_flow_namespace(chains->dev, chains->ns); } + if (!ns) { + mlx5_core_warn(chains->dev, "Failed to get flow namespace\n"); + return ERR_PTR(-EOPNOTSUPP); + } + ft_attr.autogroup.num_reserved_entries = 2; ft_attr.autogroup.max_num_groups = chains->group_num; ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); @@ -700,7 +705,7 @@ mlx5_chains_create_global_table(struct mlx5_fs_chains *chains) goto err_ignore; } - chain = mlx5_chains_get_chain_range(chains), + chain = mlx5_chains_get_chain_range(chains); prio = mlx5_chains_get_prio_range(chains); level = mlx5_chains_get_level_range(chains); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 710633d5fdbe..7e13c099e9fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1804,6 +1804,7 @@ static const int types[] = { MLX5_CAP_ADV_VIRTUALIZATION, MLX5_CAP_CRYPTO, MLX5_CAP_SHAMPO, + MLX5_CAP_ADV_RDMA, }; static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 6fef1005c469..2e02bdea8361 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -114,6 +114,26 @@ struct mlx5_cmd_alias_obj_create_attr { u8 access_key[ACCESS_KEY_LEN]; }; +struct mlx5_port_eth_proto { + u32 cap; + u32 admin; + u32 oper; +}; + +struct mlx5_module_eeprom_query_params { + u16 size; + u16 offset; + u16 i2c_address; + u32 page; + u32 bank; + u32 module_number; +}; + +struct mlx5_link_info { + u32 speed; + u32 lanes; +}; + static inline void mlx5_printk(struct mlx5_core_dev *dev, int level, const char *format, ...) { struct device *device = dev->device; @@ -280,6 +300,78 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev); void mlx5_dm_cleanup(struct mlx5_core_dev *dev); +void mlx5_toggle_port_link(struct mlx5_core_dev *dev); +int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status); +int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status *status); +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); + +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); +int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause); +int mlx5_query_port_pause(struct mlx5_core_dev *dev, + u32 *rx_pause, u32 *tx_pause); + +int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx); +int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, + u8 *pfc_en_rx); + +int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, + u16 stall_critical_watermark, + u16 stall_minor_watermark); +int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev, + u16 *stall_critical_watermark, + u16 *stall_minor_watermark); + +int mlx5_max_tc(struct mlx5_core_dev *mdev); +int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc); +int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, + u8 prio, u8 *tc); +int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group); +int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, + u8 tc, u8 *tc_group); +int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw); +int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, + u8 tc, u8 *bw_pct); +int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_unit); +int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_unit); +int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode); +int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode); + +int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen); +int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen); +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable); +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled); +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data); +int +mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, + struct mlx5_module_eeprom_query_params *params, + u8 *data); + +int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); +int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); +int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state); +int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state); +int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio); +int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio); + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5_port_eth_proto *eproto); +bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev); +const struct mlx5_link_info *mlx5_port_ptys2info(struct mlx5_core_dev *mdev, + u32 eth_proto_oper, + bool force_legacy); +u32 mlx5_port_info2linkmodes(struct mlx5_core_dev *mdev, + struct mlx5_link_info *info, + bool force_legacy); +int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); + #define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \ MLX5_CAP_GEN((mdev), pps_modify) && \ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index 0881e961d8b1..586688da9940 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -10,12 +10,15 @@ struct mlx5_irq; struct cpu_rmap; +struct mlx5_irq_pool; int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_irq_table_create(struct mlx5_core_dev *dev); void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev); +struct mlx5_irq_pool * +mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev); int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table); int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table); struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); @@ -38,7 +41,6 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); int mlx5_irq_get_index(struct mlx5_irq *irq); int mlx5_irq_get_irq(const struct mlx5_irq *irq); -struct mlx5_irq_pool; #ifdef CONFIG_MLX5_SF struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, struct cpumask *used_cpus, u16 vecidx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index d9362eabc6a1..2c5f850c31f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -378,6 +378,11 @@ int mlx5_irq_get_index(struct mlx5_irq *irq) return irq->map.index; } +struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq) +{ + return irq->pool; +} + /* irq_pool API */ /* requesting an irq from a given pool according to given index */ @@ -405,18 +410,20 @@ static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_tab return irq_table->sf_ctrl_pool; } -static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) +static struct mlx5_irq_pool * +sf_comp_irq_pool_get(struct mlx5_irq_table *irq_table) { return irq_table->sf_comp_pool; } -struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) +struct mlx5_irq_pool * +mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev) { struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool = NULL; if (mlx5_core_is_sf(dev)) - pool = sf_irq_pool_get(irq_table); + pool = sf_comp_irq_pool_get(irq_table); /* In some configs, there won't be a pool of SFs IRQs. Hence, returning * the PF IRQs pool in case the SF pool doesn't exist. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h index c4d377f8df30..cc064425fe16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -28,7 +28,6 @@ struct mlx5_irq_pool { struct mlx5_core_dev *dev; }; -struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev); static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) { return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); @@ -40,5 +39,6 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, int mlx5_irq_get_locked(struct mlx5_irq *irq); int mlx5_irq_read_locked(struct mlx5_irq *irq); int mlx5_irq_put(struct mlx5_irq *irq); +struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq); #endif /* __PCI_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 3995df064101..549f1066d2a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -196,7 +196,6 @@ void mlx5_toggle_port_link(struct mlx5_core_dev *dev) if (ps == MLX5_PORT_UP) mlx5_set_port_admin_status(dev, MLX5_PORT_UP); } -EXPORT_SYMBOL_GPL(mlx5_toggle_port_link); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status) @@ -210,7 +209,6 @@ int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PAOS, 0, 1); } -EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status) @@ -227,7 +225,6 @@ int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, *status = MLX5_GET(paos_reg, out, admin_status); return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status); static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu, u16 *max_mtu, u16 *oper_mtu, u8 port) @@ -257,7 +254,6 @@ int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port) return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PMTU, 0, 1); } -EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port) @@ -447,7 +443,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, return mlx5_query_mcia(dev, &query, data); } -EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, struct mlx5_module_eeprom_query_params *params, @@ -467,7 +462,6 @@ int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, return mlx5_query_mcia(dev, params, data); } -EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom_by_page); static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, int pvlc_size, u8 local_port) @@ -518,7 +512,6 @@ int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PFCC, 0, 1); } -EXPORT_SYMBOL_GPL(mlx5_set_port_pause); int mlx5_query_port_pause(struct mlx5_core_dev *dev, u32 *rx_pause, u32 *tx_pause) @@ -538,7 +531,6 @@ int mlx5_query_port_pause(struct mlx5_core_dev *dev, return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_pause); int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, u16 stall_critical_watermark, @@ -597,7 +589,6 @@ int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx) return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PFCC, 0, 1); } -EXPORT_SYMBOL_GPL(mlx5_set_port_pfc); int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) { @@ -616,7 +607,6 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); int mlx5_max_tc(struct mlx5_core_dev *mdev) { @@ -667,7 +657,6 @@ int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc) return 0; } -EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc); int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, u8 prio, u8 *tc) @@ -689,7 +678,6 @@ int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, return err; } -EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc); static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, int inlen) @@ -728,7 +716,6 @@ int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group) return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); } -EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group); int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, u8 tc, u8 *tc_group) @@ -749,7 +736,6 @@ int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group); int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) { @@ -763,7 +749,6 @@ int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); } -EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc); int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 tc, u8 *bw_pct) @@ -784,7 +769,6 @@ int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_tc_bw_alloc); int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, u8 *max_bw_value, @@ -808,7 +792,6 @@ int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); } -EXPORT_SYMBOL_GPL(mlx5_modify_port_ets_rate_limit); int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, u8 *max_bw_value, @@ -834,7 +817,6 @@ int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit); int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode) { @@ -845,7 +827,6 @@ int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode) MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode); return mlx5_cmd_exec_in(mdev, set_wol_rol, in); } -EXPORT_SYMBOL_GPL(mlx5_set_port_wol); int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) { @@ -860,7 +841,6 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) return err; } -EXPORT_SYMBOL_GPL(mlx5_query_port_wol); int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen) { @@ -1058,56 +1038,57 @@ out: } /* speed in units of 1Mb */ -static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { - [MLX5E_1000BASE_CX_SGMII] = 1000, - [MLX5E_1000BASE_KX] = 1000, - [MLX5E_10GBASE_CX4] = 10000, - [MLX5E_10GBASE_KX4] = 10000, - [MLX5E_10GBASE_KR] = 10000, - [MLX5E_20GBASE_KR2] = 20000, - [MLX5E_40GBASE_CR4] = 40000, - [MLX5E_40GBASE_KR4] = 40000, - [MLX5E_56GBASE_R4] = 56000, - [MLX5E_10GBASE_CR] = 10000, - [MLX5E_10GBASE_SR] = 10000, - [MLX5E_10GBASE_ER] = 10000, - [MLX5E_40GBASE_SR4] = 40000, - [MLX5E_40GBASE_LR4] = 40000, - [MLX5E_50GBASE_SR2] = 50000, - [MLX5E_100GBASE_CR4] = 100000, - [MLX5E_100GBASE_SR4] = 100000, - [MLX5E_100GBASE_KR4] = 100000, - [MLX5E_100GBASE_LR4] = 100000, - [MLX5E_100BASE_TX] = 100, - [MLX5E_1000BASE_T] = 1000, - [MLX5E_10GBASE_T] = 10000, - [MLX5E_25GBASE_CR] = 25000, - [MLX5E_25GBASE_KR] = 25000, - [MLX5E_25GBASE_SR] = 25000, - [MLX5E_50GBASE_CR2] = 50000, - [MLX5E_50GBASE_KR2] = 50000, +static const struct mlx5_link_info mlx5e_link_info[MLX5E_LINK_MODES_NUMBER] = { + [MLX5E_1000BASE_CX_SGMII] = {.speed = 1000, .lanes = 1}, + [MLX5E_1000BASE_KX] = {.speed = 1000, .lanes = 1}, + [MLX5E_10GBASE_CX4] = {.speed = 10000, .lanes = 4}, + [MLX5E_10GBASE_KX4] = {.speed = 10000, .lanes = 4}, + [MLX5E_10GBASE_KR] = {.speed = 10000, .lanes = 1}, + [MLX5E_20GBASE_KR2] = {.speed = 20000, .lanes = 2}, + [MLX5E_40GBASE_CR4] = {.speed = 40000, .lanes = 4}, + [MLX5E_40GBASE_KR4] = {.speed = 40000, .lanes = 4}, + [MLX5E_56GBASE_R4] = {.speed = 56000, .lanes = 4}, + [MLX5E_10GBASE_CR] = {.speed = 10000, .lanes = 1}, + [MLX5E_10GBASE_SR] = {.speed = 10000, .lanes = 1}, + [MLX5E_10GBASE_ER] = {.speed = 10000, .lanes = 1}, + [MLX5E_40GBASE_SR4] = {.speed = 40000, .lanes = 4}, + [MLX5E_40GBASE_LR4] = {.speed = 40000, .lanes = 4}, + [MLX5E_50GBASE_SR2] = {.speed = 50000, .lanes = 2}, + [MLX5E_100GBASE_CR4] = {.speed = 100000, .lanes = 4}, + [MLX5E_100GBASE_SR4] = {.speed = 100000, .lanes = 4}, + [MLX5E_100GBASE_KR4] = {.speed = 100000, .lanes = 4}, + [MLX5E_100GBASE_LR4] = {.speed = 100000, .lanes = 4}, + [MLX5E_100BASE_TX] = {.speed = 100, .lanes = 1}, + [MLX5E_1000BASE_T] = {.speed = 1000, .lanes = 1}, + [MLX5E_10GBASE_T] = {.speed = 10000, .lanes = 1}, + [MLX5E_25GBASE_CR] = {.speed = 25000, .lanes = 1}, + [MLX5E_25GBASE_KR] = {.speed = 25000, .lanes = 1}, + [MLX5E_25GBASE_SR] = {.speed = 25000, .lanes = 1}, + [MLX5E_50GBASE_CR2] = {.speed = 50000, .lanes = 2}, + [MLX5E_50GBASE_KR2] = {.speed = 50000, .lanes = 2}, }; -static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { - [MLX5E_SGMII_100M] = 100, - [MLX5E_1000BASE_X_SGMII] = 1000, - [MLX5E_5GBASE_R] = 5000, - [MLX5E_10GBASE_XFI_XAUI_1] = 10000, - [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, - [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, - [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, - [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, - [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, - [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000, - [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, - [MLX5E_400GAUI_8_400GBASE_CR8] = 400000, - [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000, - [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, - [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, - [MLX5E_800GAUI_8_800GBASE_CR8_KR8] = 800000, - [MLX5E_200GAUI_1_200GBASE_CR1_KR1] = 200000, - [MLX5E_400GAUI_2_400GBASE_CR2_KR2] = 400000, - [MLX5E_800GAUI_4_800GBASE_CR4_KR4] = 800000, +static const struct mlx5_link_info +mlx5e_ext_link_info[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = {.speed = 100, .lanes = 1}, + [MLX5E_1000BASE_X_SGMII] = {.speed = 1000, .lanes = 1}, + [MLX5E_5GBASE_R] = {.speed = 5000, .lanes = 1}, + [MLX5E_10GBASE_XFI_XAUI_1] = {.speed = 10000, .lanes = 1}, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = {.speed = 40000, .lanes = 4}, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = {.speed = 25000, .lanes = 1}, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = {.speed = 50000, .lanes = 2}, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = {.speed = 50000, .lanes = 1}, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = {.speed = 100000, .lanes = 4}, + [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = {.speed = 100000, .lanes = 2}, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = {.speed = 200000, .lanes = 4}, + [MLX5E_400GAUI_8_400GBASE_CR8] = {.speed = 400000, .lanes = 8}, + [MLX5E_100GAUI_1_100GBASE_CR_KR] = {.speed = 100000, .lanes = 1}, + [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = {.speed = 200000, .lanes = 2}, + [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = {.speed = 400000, .lanes = 4}, + [MLX5E_800GAUI_8_800GBASE_CR8_KR8] = {.speed = 800000, .lanes = 8}, + [MLX5E_200GAUI_1_200GBASE_CR1_KR1] = {.speed = 200000, .lanes = 1}, + [MLX5E_400GAUI_2_400GBASE_CR2_KR2] = {.speed = 400000, .lanes = 2}, + [MLX5E_800GAUI_4_800GBASE_CR4_KR4] = {.speed = 800000, .lanes = 4}, }; int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, @@ -1145,54 +1126,61 @@ bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev) return !!eproto.cap; } -static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, - const u32 **arr, u32 *size, - bool force_legacy) +static void mlx5e_port_get_link_mode_info_arr(struct mlx5_core_dev *mdev, + const struct mlx5_link_info **arr, + u32 *size, + bool force_legacy) { bool ext = force_legacy ? false : mlx5_ptys_ext_supported(mdev); - *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : - ARRAY_SIZE(mlx5e_link_speed); - *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_info) : + ARRAY_SIZE(mlx5e_link_info); + *arr = ext ? mlx5e_ext_link_info : mlx5e_link_info; } -u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, - bool force_legacy) +const struct mlx5_link_info *mlx5_port_ptys2info(struct mlx5_core_dev *mdev, + u32 eth_proto_oper, + bool force_legacy) { unsigned long temp = eth_proto_oper; - const u32 *table; - u32 speed = 0; + const struct mlx5_link_info *table; u32 max_size; int i; - mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + mlx5e_port_get_link_mode_info_arr(mdev, &table, &max_size, + force_legacy); i = find_first_bit(&temp, max_size); if (i < max_size) - speed = table[i]; - return speed; + return &table[i]; + + return NULL; } -u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, - bool force_legacy) +u32 mlx5_port_info2linkmodes(struct mlx5_core_dev *mdev, + struct mlx5_link_info *info, + bool force_legacy) { + const struct mlx5_link_info *table; u32 link_modes = 0; - const u32 *table; u32 max_size; int i; - mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + mlx5e_port_get_link_mode_info_arr(mdev, &table, &max_size, + force_legacy); for (i = 0; i < max_size; ++i) { - if (table[i] == speed) - link_modes |= MLX5E_PROT_MASK(i); + if (table[i].speed == info->speed) { + if (!info->lanes || table[i].lanes == info->lanes) + link_modes |= MLX5E_PROT_MASK(i); + } } return link_modes; } int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { + const struct mlx5_link_info *table; struct mlx5_port_eth_proto eproto; u32 max_speed = 0; - const u32 *table; u32 max_size; bool ext; int err; @@ -1203,10 +1191,10 @@ int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) if (err) return err; - mlx5e_port_get_speed_arr(mdev, &table, &max_size, false); + mlx5e_port_get_link_mode_info_arr(mdev, &table, &max_size, false); for (i = 0; i < max_size; ++i) if (eproto.cap & MLX5E_PROT_MASK(i)) - max_speed = max(max_speed, table[i]); + max_speed = max(max_speed, table[i].speed); *speed = max_speed; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index f9f569131dde..47f7ed141553 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -24,8 +24,8 @@ struct mlx5hws_bwc_matcher { struct mlx5hws_matcher *matcher; struct mlx5hws_match_template *mt; struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM]; + u32 priority; u8 num_of_at; - u16 priority; u8 size_log; atomic_t num_of_rules; struct list_head *rules; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index f34bbbbba1c2..1b787cd66e6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -66,6 +66,8 @@ static int mlx5_fs_init_hws_actions_pool(struct mlx5_core_dev *dev, xa_init(&hws_pool->table_dests); xa_init(&hws_pool->vport_dests); xa_init(&hws_pool->vport_vhca_dests); + xa_init(&hws_pool->aso_meters); + xa_init(&hws_pool->sample_dests); return 0; cleanup_insert_hdr: @@ -88,10 +90,17 @@ destroy_tag: static void mlx5_fs_cleanup_hws_actions_pool(struct mlx5_fs_hws_context *fs_ctx) { struct mlx5_fs_hws_actions_pool *hws_pool = &fs_ctx->hws_pool; + struct mlx5_fs_hws_data *fs_hws_data; struct mlx5hws_action *action; struct mlx5_fs_pool *pool; unsigned long i; + xa_for_each(&hws_pool->sample_dests, i, fs_hws_data) + kfree(fs_hws_data); + xa_destroy(&hws_pool->sample_dests); + xa_for_each(&hws_pool->aso_meters, i, fs_hws_data) + kfree(fs_hws_data); + xa_destroy(&hws_pool->aso_meters); xa_for_each(&hws_pool->vport_vhca_dests, i, action) mlx5hws_action_destroy(action); xa_destroy(&hws_pool->vport_vhca_dests); @@ -459,6 +468,106 @@ mlx5_fs_create_dest_action_range(struct mlx5hws_context *ctx, flags); } +static struct mlx5_fs_hws_data * +mlx5_fs_get_cached_hws_data(struct xarray *cache_xa, unsigned long index) +{ + struct mlx5_fs_hws_data *fs_hws_data; + int err; + + xa_lock(cache_xa); + fs_hws_data = xa_load(cache_xa, index); + if (!fs_hws_data) { + fs_hws_data = kzalloc(sizeof(*fs_hws_data), GFP_ATOMIC); + if (!fs_hws_data) { + xa_unlock(cache_xa); + return NULL; + } + refcount_set(&fs_hws_data->hws_action_refcount, 0); + mutex_init(&fs_hws_data->lock); + err = __xa_insert(cache_xa, index, fs_hws_data, GFP_ATOMIC); + if (err) { + kfree(fs_hws_data); + xa_unlock(cache_xa); + return NULL; + } + } + xa_unlock(cache_xa); + + return fs_hws_data; +} + +static struct mlx5hws_action * +mlx5_fs_get_action_aso_meter(struct mlx5_fs_hws_context *fs_ctx, + struct mlx5_exe_aso *exe_aso) +{ + struct mlx5_fs_hws_create_action_ctx create_ctx; + struct mlx5hws_context *ctx = fs_ctx->hws_ctx; + struct mlx5_fs_hws_data *meter_hws_data; + u32 id = exe_aso->base_id; + struct xarray *meters_xa; + + meters_xa = &fs_ctx->hws_pool.aso_meters; + meter_hws_data = mlx5_fs_get_cached_hws_data(meters_xa, id); + if (!meter_hws_data) + return NULL; + + create_ctx.hws_ctx = ctx; + create_ctx.actions_type = MLX5HWS_ACTION_TYP_ASO_METER; + create_ctx.id = id; + create_ctx.return_reg_id = exe_aso->return_reg_id; + + return mlx5_fs_get_hws_action(meter_hws_data, &create_ctx); +} + +static void mlx5_fs_put_action_aso_meter(struct mlx5_fs_hws_context *fs_ctx, + struct mlx5_exe_aso *exe_aso) +{ + struct mlx5_fs_hws_data *meter_hws_data; + struct xarray *meters_xa; + + meters_xa = &fs_ctx->hws_pool.aso_meters; + meter_hws_data = xa_load(meters_xa, exe_aso->base_id); + if (!meter_hws_data) + return; + return mlx5_fs_put_hws_action(meter_hws_data); +} + +static struct mlx5hws_action * +mlx5_fs_get_dest_action_sampler(struct mlx5_fs_hws_context *fs_ctx, + struct mlx5_flow_rule *dst) +{ + struct mlx5_fs_hws_create_action_ctx create_ctx; + struct mlx5hws_context *ctx = fs_ctx->hws_ctx; + struct mlx5_fs_hws_data *sampler_hws_data; + u32 id = dst->dest_attr.sampler_id; + struct xarray *sampler_xa; + + sampler_xa = &fs_ctx->hws_pool.sample_dests; + sampler_hws_data = mlx5_fs_get_cached_hws_data(sampler_xa, id); + if (!sampler_hws_data) + return NULL; + + create_ctx.hws_ctx = ctx; + create_ctx.actions_type = MLX5HWS_ACTION_TYP_SAMPLER; + create_ctx.id = id; + + return mlx5_fs_get_hws_action(sampler_hws_data, &create_ctx); +} + +static void mlx5_fs_put_dest_action_sampler(struct mlx5_fs_hws_context *fs_ctx, + u32 sampler_id) +{ + struct mlx5_fs_hws_data *sampler_hws_data; + struct xarray *sampler_xa; + + sampler_xa = &fs_ctx->hws_pool.sample_dests; + sampler_hws_data = xa_load(sampler_xa, sampler_id); + if (!sampler_hws_data) + return; + + mlx5_fs_put_hws_action(sampler_hws_data); +} + static struct mlx5hws_action * mlx5_fs_create_action_dest_array(struct mlx5hws_context *ctx, struct mlx5hws_action_dest_attr *dests, @@ -519,26 +628,101 @@ mlx5_fs_create_action_last(struct mlx5hws_context *ctx) return mlx5hws_action_create_last(ctx, flags); } -static void mlx5_fs_destroy_fs_action(struct mlx5_fs_hws_rule_action *fs_action) +static struct mlx5hws_action * +mlx5_fs_create_hws_action(struct mlx5_fs_hws_create_action_ctx *create_ctx) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + + switch (create_ctx->actions_type) { + case MLX5HWS_ACTION_TYP_CTR: + return mlx5hws_action_create_counter(create_ctx->hws_ctx, + create_ctx->id, flags); + case MLX5HWS_ACTION_TYP_ASO_METER: + return mlx5hws_action_create_aso_meter(create_ctx->hws_ctx, + create_ctx->id, + create_ctx->return_reg_id, + flags); + case MLX5HWS_ACTION_TYP_SAMPLER: + return mlx5hws_action_create_flow_sampler(create_ctx->hws_ctx, + create_ctx->id, flags); + default: + return NULL; + } +} + +struct mlx5hws_action * +mlx5_fs_get_hws_action(struct mlx5_fs_hws_data *fs_hws_data, + struct mlx5_fs_hws_create_action_ctx *create_ctx) +{ + /* try avoid locking if not necessary */ + if (refcount_inc_not_zero(&fs_hws_data->hws_action_refcount)) + return fs_hws_data->hws_action; + + mutex_lock(&fs_hws_data->lock); + if (refcount_inc_not_zero(&fs_hws_data->hws_action_refcount)) { + mutex_unlock(&fs_hws_data->lock); + return fs_hws_data->hws_action; + } + fs_hws_data->hws_action = mlx5_fs_create_hws_action(create_ctx); + if (!fs_hws_data->hws_action) { + mutex_unlock(&fs_hws_data->lock); + return NULL; + } + refcount_set(&fs_hws_data->hws_action_refcount, 1); + mutex_unlock(&fs_hws_data->lock); + + return fs_hws_data->hws_action; +} + +void mlx5_fs_put_hws_action(struct mlx5_fs_hws_data *fs_hws_data) +{ + if (!fs_hws_data) + return; + + /* try avoid locking if not necessary */ + if (refcount_dec_not_one(&fs_hws_data->hws_action_refcount)) + return; + + mutex_lock(&fs_hws_data->lock); + if (!refcount_dec_and_test(&fs_hws_data->hws_action_refcount)) { + mutex_unlock(&fs_hws_data->lock); + return; + } + mlx5hws_action_destroy(fs_hws_data->hws_action); + fs_hws_data->hws_action = NULL; + mutex_unlock(&fs_hws_data->lock); +} + +static void mlx5_fs_destroy_fs_action(struct mlx5_flow_root_namespace *ns, + struct mlx5_fs_hws_rule_action *fs_action) { + struct mlx5_fs_hws_context *fs_ctx = &ns->fs_hws_context; + switch (mlx5hws_action_get_type(fs_action->action)) { case MLX5HWS_ACTION_TYP_CTR: mlx5_fc_put_hws_action(fs_action->counter); break; + case MLX5HWS_ACTION_TYP_ASO_METER: + mlx5_fs_put_action_aso_meter(fs_ctx, fs_action->exe_aso); + break; + case MLX5HWS_ACTION_TYP_SAMPLER: + mlx5_fs_put_dest_action_sampler(fs_ctx, fs_action->sampler_id); + break; default: mlx5hws_action_destroy(fs_action->action); } } static void -mlx5_fs_destroy_fs_actions(struct mlx5_fs_hws_rule_action **fs_actions, +mlx5_fs_destroy_fs_actions(struct mlx5_flow_root_namespace *ns, + struct mlx5_fs_hws_rule_action **fs_actions, int *num_fs_actions) { int i; /* Free in reverse order to handle action dependencies */ for (i = *num_fs_actions - 1; i >= 0; i--) - mlx5_fs_destroy_fs_action(*fs_actions + i); + mlx5_fs_destroy_fs_action(ns, *fs_actions + i); *num_fs_actions = 0; kfree(*fs_actions); *fs_actions = NULL; @@ -735,8 +919,25 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, } if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) { - err = -EOPNOTSUPP; - goto free_actions; + if (fte_action->exe_aso.type != MLX5_EXE_ASO_FLOW_METER || + num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + + tmp_action = mlx5_fs_get_action_aso_meter(fs_ctx, + &fte_action->exe_aso); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + (*ractions)[num_actions].aso_meter.offset = + fte_action->exe_aso.flow_meter.meter_idx; + (*ractions)[num_actions].aso_meter.init_color = + fte_action->exe_aso.flow_meter.init_color; + (*ractions)[num_actions++].action = tmp_action; + fs_actions[num_fs_actions].action = tmp_action; + fs_actions[num_fs_actions++].exe_aso = &fte_action->exe_aso; } if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_DROP) { @@ -784,6 +985,14 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, dest_action = mlx5_fs_get_dest_action_vport(fs_ctx, dst, type_uplink); break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + dest_action = + mlx5_fs_get_dest_action_sampler(fs_ctx, + dst); + fs_actions[num_fs_actions].action = dest_action; + fs_actions[num_fs_actions++].sampler_id = + dst->dest_attr.sampler_id; + break; default: err = -EOPNOTSUPP; goto free_actions; @@ -850,7 +1059,7 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, return 0; free_actions: - mlx5_fs_destroy_fs_actions(&fs_actions, &num_fs_actions); + mlx5_fs_destroy_fs_actions(ns, &fs_actions, &num_fs_actions); free_dest_actions_alloc: kfree(dest_actions); free_fs_actions_alloc: @@ -900,7 +1109,7 @@ static int mlx5_cmd_hws_create_fte(struct mlx5_flow_root_namespace *ns, return 0; free_actions: - mlx5_fs_destroy_fs_actions(&fte->fs_hws_rule.hws_fs_actions, + mlx5_fs_destroy_fs_actions(ns, &fte->fs_hws_rule.hws_fs_actions, &fte->fs_hws_rule.num_fs_actions); out_err: mlx5_core_err(ns->dev, "Failed to create hws rule err(%d)\n", err); @@ -920,7 +1129,8 @@ static int mlx5_cmd_hws_delete_fte(struct mlx5_flow_root_namespace *ns, err = mlx5hws_bwc_rule_destroy(rule->bwc_rule); rule->bwc_rule = NULL; - mlx5_fs_destroy_fs_actions(&rule->hws_fs_actions, &rule->num_fs_actions); + mlx5_fs_destroy_fs_actions(ns, &rule->hws_fs_actions, + &rule->num_fs_actions); return err; } @@ -958,11 +1168,12 @@ static int mlx5_cmd_hws_update_fte(struct mlx5_flow_root_namespace *ns, if (ret) goto restore_actions; - mlx5_fs_destroy_fs_actions(&saved_hws_fs_actions, &saved_num_fs_actions); + mlx5_fs_destroy_fs_actions(ns, &saved_hws_fs_actions, + &saved_num_fs_actions); return ret; restore_actions: - mlx5_fs_destroy_fs_actions(&fte->fs_hws_rule.hws_fs_actions, + mlx5_fs_destroy_fs_actions(ns, &fte->fs_hws_rule.hws_fs_actions, &fte->fs_hws_rule.num_fs_actions); fte->fs_hws_rule.hws_fs_actions = saved_hws_fs_actions; fte->fs_hws_rule.num_fs_actions = saved_num_fs_actions; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h index cbddb72d4362..8b56298288da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h @@ -22,6 +22,8 @@ struct mlx5_fs_hws_actions_pool { struct xarray table_dests; struct xarray vport_vhca_dests; struct xarray vport_dests; + struct xarray aso_meters; + struct xarray sample_dests; }; struct mlx5_fs_hws_context { @@ -49,6 +51,8 @@ struct mlx5_fs_hws_rule_action { struct mlx5hws_action *action; union { struct mlx5_fc *counter; + struct mlx5_exe_aso *exe_aso; + u32 sampler_id; }; }; @@ -58,6 +62,26 @@ struct mlx5_fs_hws_rule { int num_fs_actions; }; +struct mlx5_fs_hws_data { + struct mlx5hws_action *hws_action; + struct mutex lock; /* protects hws_action */ + refcount_t hws_action_refcount; +}; + +struct mlx5_fs_hws_create_action_ctx { + enum mlx5hws_action_type actions_type; + struct mlx5hws_context *hws_ctx; + u32 id; + union { + u8 return_reg_id; + }; +}; + +struct mlx5hws_action * +mlx5_fs_get_hws_action(struct mlx5_fs_hws_data *fs_hws_data, + struct mlx5_fs_hws_create_action_ctx *create_ctx); +void mlx5_fs_put_hws_action(struct mlx5_fs_hws_data *fs_hws_data); + #ifdef CONFIG_MLX5_HW_STEERING bool mlx5_fs_hws_is_supported(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c index 2ae4ac62b0e2..f1ecdba74e1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c @@ -405,46 +405,17 @@ bool mlx5_fs_hws_mh_pool_match(struct mlx5_fs_pool *mh_pool, struct mlx5hws_action *mlx5_fc_get_hws_action(struct mlx5hws_context *ctx, struct mlx5_fc *counter) { - u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + struct mlx5_fs_hws_create_action_ctx create_ctx; struct mlx5_fc_bulk *fc_bulk = counter->bulk; - struct mlx5_fc_bulk_hws_data *fc_bulk_hws; - fc_bulk_hws = &fc_bulk->hws_data; - /* try avoid locking if not necessary */ - if (refcount_inc_not_zero(&fc_bulk_hws->hws_action_refcount)) - return fc_bulk_hws->hws_action; + create_ctx.hws_ctx = ctx; + create_ctx.id = fc_bulk->base_id; + create_ctx.actions_type = MLX5HWS_ACTION_TYP_CTR; - mutex_lock(&fc_bulk_hws->lock); - if (refcount_inc_not_zero(&fc_bulk_hws->hws_action_refcount)) { - mutex_unlock(&fc_bulk_hws->lock); - return fc_bulk_hws->hws_action; - } - fc_bulk_hws->hws_action = - mlx5hws_action_create_counter(ctx, fc_bulk->base_id, flags); - if (!fc_bulk_hws->hws_action) { - mutex_unlock(&fc_bulk_hws->lock); - return NULL; - } - refcount_set(&fc_bulk_hws->hws_action_refcount, 1); - mutex_unlock(&fc_bulk_hws->lock); - - return fc_bulk_hws->hws_action; + return mlx5_fs_get_hws_action(&fc_bulk->hws_data, &create_ctx); } void mlx5_fc_put_hws_action(struct mlx5_fc *counter) { - struct mlx5_fc_bulk_hws_data *fc_bulk_hws = &counter->bulk->hws_data; - - /* try avoid locking if not necessary */ - if (refcount_dec_not_one(&fc_bulk_hws->hws_action_refcount)) - return; - - mutex_lock(&fc_bulk_hws->lock); - if (!refcount_dec_and_test(&fc_bulk_hws->hws_action_refcount)) { - mutex_unlock(&fc_bulk_hws->lock); - return; - } - mlx5hws_action_destroy(fc_bulk_hws->hws_action); - fc_bulk_hws->hws_action = NULL; - mutex_unlock(&fc_bulk_hws->lock); + mlx5_fs_put_hws_action(&counter->bulk->hws_data); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h index 5f409dc30aca..3d5afc832fa5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h @@ -210,6 +210,10 @@ struct mlx5dr_ste_ctx { void (*set_encap_l3)(u8 *hw_ste_p, u8 *frst_s_action, u8 *scnd_d_action, u32 reformat_id, int size); + void (*set_insert_hdr)(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, + u8 anchor, u8 offset, int size); + void (*set_remove_hdr)(u8 *hw_ste_p, u8 *s_action, u8 anchor, + u8 offset, int size); /* Send */ void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size); }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c index 7f83d77c43ef..6447efbae00d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c @@ -266,10 +266,10 @@ void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, int size) dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, - u32 reformat_id, - u8 anchor, u8 offset, - int size) +void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, + u8 anchor, u8 offset, + int size) { MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); @@ -286,9 +286,9 @@ static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, - u8 anchor, u8 offset, - int size) +void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, + u8 anchor, u8 offset, + int size) { MLX5_SET(ste_single_action_remove_header_size_v1, s_action, action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); @@ -584,11 +584,11 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_insert_hdr(last_ste, action, - attr->reformat.id, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) { @@ -597,10 +597,10 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_remove_hdr(last_ste, action, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; } @@ -792,11 +792,11 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_insert_hdr(last_ste, action, - attr->reformat.id, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; allow_modify_hdr = false; @@ -808,10 +808,10 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, allow_modify_hdr = true; allow_ctr = true; } - dr_ste_v1_set_remove_hdr(last_ste, action, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; } @@ -2200,6 +2200,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v1 = { .set_pop_vlan = &dr_ste_v1_set_pop_vlan, .set_rx_decap = &dr_ste_v1_set_rx_decap, .set_encap_l3 = &dr_ste_v1_set_encap_l3, + .set_insert_hdr = &dr_ste_v1_set_insert_hdr, + .set_remove_hdr = &dr_ste_v1_set_remove_hdr, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h index a8d9e308d339..591c20c95a6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h @@ -156,6 +156,10 @@ void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num); void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, u8 *frst_s_action, u8 *scnd_d_action, u32 reformat_id, int size); void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action); +void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, + u8 anchor, u8 offset, int size); +void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, u8 anchor, + u8 offset, int size); void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *last_ste, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c index 0882dba0f64b..d0ebaf820d42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c @@ -69,6 +69,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v2 = { .set_pop_vlan = &dr_ste_v1_set_pop_vlan, .set_rx_decap = &dr_ste_v1_set_rx_decap, .set_encap_l3 = &dr_ste_v1_set_encap_l3, + .set_insert_hdr = &dr_ste_v1_set_insert_hdr, + .set_remove_hdr = &dr_ste_v1_set_remove_hdr, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c index cc60ce1d274e..e468a9ae44e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c @@ -79,6 +79,46 @@ static void dr_ste_v3_set_rx_decap(u8 *hw_ste_p, u8 *s_action) dr_ste_v1_set_reparse(hw_ste_p); } +static void dr_ste_v3_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, u8 anchor, + u8 offset, int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + start_offset, offset / 2); + + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + attributes, DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v3_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, + u8 anchor, u8 offset, int size) +{ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + remove_size, size / 2); + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + start_offset, offset / 2); + + dr_ste_v1_set_reparse(hw_ste_p); +} + static int dr_ste_v3_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action, u32 hw_action_sz, @@ -211,6 +251,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v3 = { .set_pop_vlan = &dr_ste_v3_set_pop_vlan, .set_rx_decap = &dr_ste_v3_set_rx_decap, .set_encap_l3 = &dr_ste_v3_set_encap_l3, + .set_insert_hdr = &dr_ste_v3_set_insert_hdr, + .set_remove_hdr = &dr_ste_v3_set_remove_hdr, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index c1477aad98a0..0a751a2aaf73 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -191,6 +191,113 @@ static int fbnic_set_coalesce(struct net_device *netdev, return 0; } +static void +fbnic_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + + ring->rx_max_pending = FBNIC_QUEUE_SIZE_MAX; + ring->rx_mini_max_pending = FBNIC_QUEUE_SIZE_MAX; + ring->rx_jumbo_max_pending = FBNIC_QUEUE_SIZE_MAX; + ring->tx_max_pending = FBNIC_QUEUE_SIZE_MAX; + + ring->rx_pending = fbn->rcq_size; + ring->rx_mini_pending = fbn->hpq_size; + ring->rx_jumbo_pending = fbn->ppq_size; + ring->tx_pending = fbn->txq_size; +} + +static void fbnic_set_rings(struct fbnic_net *fbn, + struct ethtool_ringparam *ring) +{ + fbn->rcq_size = ring->rx_pending; + fbn->hpq_size = ring->rx_mini_pending; + fbn->ppq_size = ring->rx_jumbo_pending; + fbn->txq_size = ring->tx_pending; +} + +static int +fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) + +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_net *clone; + int err; + + ring->rx_pending = roundup_pow_of_two(ring->rx_pending); + ring->rx_mini_pending = roundup_pow_of_two(ring->rx_mini_pending); + ring->rx_jumbo_pending = roundup_pow_of_two(ring->rx_jumbo_pending); + ring->tx_pending = roundup_pow_of_two(ring->tx_pending); + + /* These are absolute minimums allowing the device and driver to operate + * but not necessarily guarantee reasonable performance. Settings below + * Rx queue size of 128 and BDQs smaller than 64 are likely suboptimal + * at best. + */ + if (ring->rx_pending < max(FBNIC_QUEUE_SIZE_MIN, FBNIC_RX_DESC_MIN) || + ring->rx_mini_pending < FBNIC_QUEUE_SIZE_MIN || + ring->rx_jumbo_pending < FBNIC_QUEUE_SIZE_MIN || + ring->tx_pending < max(FBNIC_QUEUE_SIZE_MIN, FBNIC_TX_DESC_MIN)) { + NL_SET_ERR_MSG_MOD(extack, "requested ring size too small"); + return -EINVAL; + } + + if (!netif_running(netdev)) { + fbnic_set_rings(fbn, ring); + return 0; + } + + clone = fbnic_clone_create(fbn); + if (!clone) + return -ENOMEM; + + fbnic_set_rings(clone, ring); + + err = fbnic_alloc_napi_vectors(clone); + if (err) + goto err_free_clone; + + err = fbnic_alloc_resources(clone); + if (err) + goto err_free_napis; + + fbnic_down_noidle(fbn); + err = fbnic_wait_all_queues_idle(fbn->fbd, true); + if (err) + goto err_start_stack; + + err = fbnic_set_netif_queues(clone); + if (err) + goto err_start_stack; + + /* Nothing can fail past this point */ + fbnic_flush(fbn); + + fbnic_clone_swap(fbn, clone); + + fbnic_up(fbn); + + fbnic_free_resources(clone); + fbnic_free_napi_vectors(clone); + fbnic_clone_free(clone); + + return 0; + +err_start_stack: + fbnic_flush(fbn); + fbnic_up(fbn); + fbnic_free_resources(clone); +err_free_napis: + fbnic_free_napi_vectors(clone); +err_free_clone: + fbnic_clone_free(clone); + return err; +} + static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data) { int i; @@ -1351,6 +1458,8 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .get_regs = fbnic_get_regs, .get_coalesce = fbnic_get_coalesce, .set_coalesce = fbnic_set_coalesce, + .get_ringparam = fbnic_get_ringparam, + .set_ringparam = fbnic_set_ringparam, .get_strings = fbnic_get_strings, .get_ethtool_stats = fbnic_get_ethtool_stats, .get_sset_count = fbnic_get_sset_count, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c b/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c index d558d176e0df..517ed8b2f1cb 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c @@ -261,7 +261,7 @@ ssize_t fbnic_tlv_attr_get_string(struct fbnic_tlv_msg *attr, char *dst, return -E2BIG; srclen = le16_to_cpu(attr->hdr.len) - sizeof(*attr); - if (srclen > 0 && attr->value[srclen - 1] == '\0') + if (srclen > 0 && ((char *)attr->value)[srclen - 1] == '\0') srclen--; if (srclen >= dstsize) { diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index aba4c65974ee..ac11389a764c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -1221,7 +1221,7 @@ void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete; fbn->rx_stats.rx.csum_none += stats->rx.csum_none; /* Remember to add new stats here */ - BUILD_BUG_ON(sizeof(fbn->tx_stats.rx) / 8 != 3); + BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 3); } void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, @@ -1316,7 +1316,9 @@ static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn, .dev = nv->dev, .dma_dir = DMA_BIDIRECTIONAL, .offset = 0, - .max_len = PAGE_SIZE + .max_len = PAGE_SIZE, + .napi = &nv->napi, + .netdev = fbn->netdev, }; struct page_pool *pp; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index 54368dc22328..f46616af41ea 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -24,9 +24,22 @@ struct fbnic_net; #define FBNIC_TX_DESC_WAKEUP (FBNIC_MAX_SKB_DESC * 2) #define FBNIC_TX_DESC_MIN roundup_pow_of_two(FBNIC_TX_DESC_WAKEUP) +/* To receive the worst case packet we need: + * 1 descriptor for primary metadata + * + 1 descriptor for optional metadata + * + 1 descriptor for headers + * + 4 descriptors for payload + */ +#define FBNIC_MAX_RX_PKT_DESC 7 +#define FBNIC_RX_DESC_MIN roundup_pow_of_two(FBNIC_MAX_RX_PKT_DESC * 2) + #define FBNIC_MAX_TXQS 128u #define FBNIC_MAX_RXQS 128u +/* These apply to TWQs, TCQ, RCQ */ +#define FBNIC_QUEUE_SIZE_MIN 16u +#define FBNIC_QUEUE_SIZE_MAX SZ_64K + #define FBNIC_TXQ_SIZE_DEFAULT 1024 #define FBNIC_HPQ_SIZE_DEFAULT 256 #define FBNIC_PPQ_SIZE_DEFAULT 256 diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index c15a5ef4674e..f1966788c98e 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1579,6 +1579,7 @@ unmap_bar: * adapter-MTU file and apc->mana_pci_debugfs folder. */ debugfs_remove_recursive(gc->mana_pci_debugfs); + gc->mana_pci_debugfs = NULL; pci_iounmap(pdev, bar0_va); free_gc: pci_set_drvdata(pdev, NULL); @@ -1601,6 +1602,8 @@ static void mana_gd_remove(struct pci_dev *pdev) debugfs_remove_recursive(gc->mana_pci_debugfs); + gc->mana_pci_debugfs = NULL; + pci_iounmap(pdev, gc->bar0_va); vfree(gc); @@ -1656,6 +1659,8 @@ static void mana_gd_shutdown(struct pci_dev *pdev) debugfs_remove_recursive(gc->mana_pci_debugfs); + gc->mana_pci_debugfs = NULL; + pci_disable_device(pdev); } @@ -1682,8 +1687,10 @@ static int __init mana_driver_init(void) mana_debugfs_root = debugfs_create_dir("mana", NULL); err = pci_register_driver(&mana_driver); - if (err) + if (err) { debugfs_remove(mana_debugfs_root); + mana_debugfs_root = NULL; + } return err; } @@ -1693,6 +1700,8 @@ static void __exit mana_driver_exit(void) pci_unregister_driver(&mana_driver); debugfs_remove(mana_debugfs_root); + + mana_debugfs_root = NULL; } module_init(mana_driver_init); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 0411a1897f57..9a8171f099b6 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -13,6 +13,7 @@ #include <net/checksum.h> #include <net/ip6_checksum.h> +#include <net/netdev_lock.h> #include <net/page_pool/helpers.h> #include <net/xdp.h> @@ -747,12 +748,11 @@ static const struct net_device_ops mana_devops = { static void mana_cleanup_port_context(struct mana_port_context *apc) { /* - * at this point all dir/files under the vport directory - * are already cleaned up. - * We are sure the apc->mana_port_debugfs remove will not - * cause any freed memory access issues + * make sure subsequent cleanup attempts don't end up removing already + * cleaned dentry pointer */ debugfs_remove(apc->mana_port_debugfs); + apc->mana_port_debugfs = NULL; kfree(apc->rxqs); apc->rxqs = NULL; } @@ -1263,6 +1263,7 @@ static void mana_destroy_eq(struct mana_context *ac) return; debugfs_remove_recursive(ac->mana_eqs_debugfs); + ac->mana_eqs_debugfs = NULL; for (i = 0; i < gc->max_num_queues; i++) { eq = ac->eqs[i].eq; @@ -1925,6 +1926,7 @@ static void mana_destroy_txq(struct mana_port_context *apc) for (i = 0; i < apc->num_queues; i++) { debugfs_remove_recursive(apc->tx_qp[i].mana_tx_debugfs); + apc->tx_qp[i].mana_tx_debugfs = NULL; napi = &apc->tx_qp[i].tx_cq.napi; if (apc->tx_qp[i].txq.napi_initialized) { @@ -2112,6 +2114,7 @@ static void mana_destroy_rxq(struct mana_port_context *apc, return; debugfs_remove_recursive(rxq->mana_rx_debugfs); + rxq->mana_rx_debugfs = NULL; napi = &rxq->rx_cq.napi; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index f915c423fe70..886061d7351a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -454,7 +454,7 @@ int qed_fill_dev_info(struct qed_dev *cdev, static void qed_free_cdev(struct qed_dev *cdev) { - kfree((void *)cdev); + kfree(cdev); } static struct qed_dev *qed_alloc_cdev(struct pci_dev *pdev) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index f9dd50152b1e..28d24d59efb8 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -454,8 +454,10 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, num_vlans = sriov->num_allowed_vlans; sriov->allowed_vlans = kcalloc(num_vlans, sizeof(u16), GFP_KERNEL); - if (!sriov->allowed_vlans) + if (!sriov->allowed_vlans) { + qlcnic_sriov_free_vlans(adapter); return -ENOMEM; + } vlans = (u16 *)&cmd->rsp.arg[3]; for (i = 0; i < num_vlans; i++) @@ -2167,8 +2169,10 @@ int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) vf = &sriov->vf_info[i]; vf->sriov_vlans = kcalloc(sriov->num_allowed_vlans, sizeof(*vf->sriov_vlans), GFP_KERNEL); - if (!vf->sriov_vlans) + if (!vf->sriov_vlans) { + qlcnic_sriov_free_vlans(adapter); return -ENOMEM; + } } return 0; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index fa339bd8c775..b18daeeda40d 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -89,6 +89,7 @@ #define JUMBO_6K (6 * SZ_1K - VLAN_ETH_HLEN - ETH_FCS_LEN) #define JUMBO_7K (7 * SZ_1K - VLAN_ETH_HLEN - ETH_FCS_LEN) #define JUMBO_9K (9 * SZ_1K - VLAN_ETH_HLEN - ETH_FCS_LEN) +#define JUMBO_16K (SZ_16K - VLAN_ETH_HLEN - ETH_FCS_LEN) static const struct { const char *name; @@ -5360,6 +5361,9 @@ static int rtl_jumbo_max(struct rtl8169_private *tp) /* RTL8168c */ case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24: return JUMBO_6K; + /* RTL8125/8126 */ + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: + return JUMBO_16K; default: return JUMBO_9K; } diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 3bd11cb56294..2aacc1996796 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1501,7 +1501,10 @@ static void rtase_wait_for_quiescence(const struct net_device *dev) static void rtase_sw_reset(struct net_device *dev) { struct rtase_private *tp = netdev_priv(dev); + struct rtase_ring *ring, *tmp; + struct rtase_int_vector *ivec; int ret; + u32 i; netif_stop_queue(dev); netif_carrier_off(dev); @@ -1512,6 +1515,13 @@ static void rtase_sw_reset(struct net_device *dev) rtase_tx_clear(tp); rtase_rx_clear(tp); + for (i = 0; i < tp->int_nums; i++) { + ivec = &tp->int_vector[i]; + list_for_each_entry_safe(ring, tmp, &ivec->ring_list, + ring_entry) + list_del(&ring->ring_entry); + } + ret = rtase_init_ring(dev); if (ret) { netdev_err(dev, "unable to init ring\n"); diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index c5f94a67b3f2..3c820ef56775 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -181,6 +181,17 @@ config DWMAC_SOCFPGA for the stmmac device driver. This driver is used for arria5 and cyclone5 FPGA SoCs. +config DWMAC_SOPHGO + tristate "Sophgo dwmac support" + depends on OF && (ARCH_SOPHGO || COMPILE_TEST) + default m if ARCH_SOPHGO + help + Support for ethernet controllers on Sophgo RISC-V SoCs + + This selects the Sophgo SoC specific glue layer support + for the stmmac device driver. This driver is used for the + ethernet controllers on various Sophgo SoCs. + config DWMAC_STARFIVE tristate "StarFive dwmac support" depends on OF && (ARCH_STARFIVE || COMPILE_TEST) diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index b26f0e79c2b3..594883fb4164 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o obj-$(CONFIG_DWMAC_RZN1) += dwmac-rzn1.o obj-$(CONFIG_DWMAC_S32) += dwmac-s32.o obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o +obj-$(CONFIG_DWMAC_SOPHGO) += dwmac-sophgo.o obj-$(CONFIG_DWMAC_STARFIVE) += dwmac-starfive.o obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o obj-$(CONFIG_DWMAC_STM32) += dwmac-stm32.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c index ef99ef3f1ab4..37fe7c288878 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c @@ -59,10 +59,11 @@ static void anarion_gmac_exit(struct platform_device *pdev, void *priv) gmac_write_reg(gmac, GMAC_RESET_CONTROL_REG, 1); } -static struct anarion_gmac *anarion_config_dt(struct platform_device *pdev) +static struct anarion_gmac * +anarion_config_dt(struct platform_device *pdev, + struct plat_stmmacenet_data *plat_dat) { struct anarion_gmac *gmac; - phy_interface_t phy_mode; void __iomem *ctl_block; int err; @@ -79,11 +80,7 @@ static struct anarion_gmac *anarion_config_dt(struct platform_device *pdev) gmac->ctl_block = ctl_block; - err = of_get_phy_mode(pdev->dev.of_node, &phy_mode); - if (err) - return ERR_PTR(err); - - switch (phy_mode) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RGMII: fallthrough; case PHY_INTERFACE_MODE_RGMII_ID: @@ -93,7 +90,7 @@ static struct anarion_gmac *anarion_config_dt(struct platform_device *pdev) break; default: dev_err(&pdev->dev, "Unsupported phy-mode (%d)\n", - phy_mode); + plat_dat->phy_interface); return ERR_PTR(-ENOTSUPP); } @@ -111,14 +108,14 @@ static int anarion_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - gmac = anarion_config_dt(pdev); - if (IS_ERR(gmac)) - return PTR_ERR(gmac); - plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); + gmac = anarion_config_dt(pdev, plat_dat); + if (IS_ERR(gmac)) + return PTR_ERR(gmac); + plat_dat->init = anarion_gmac_init; plat_dat->exit = anarion_gmac_exit; anarion_gmac_init(pdev, gmac); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 3f0f4ea6cf2e..f18fc4f183fe 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -319,6 +319,10 @@ static const struct dwc_eth_dwmac_data tegra_eqos_data = { .stmmac_clk_name = "slave_bus", }; +static const struct dwc_eth_dwmac_data fsd_eqos_data = { + .stmmac_clk_name = "slave_bus", +}; + static int dwc_eth_dwmac_probe(struct platform_device *pdev) { const struct dwc_eth_dwmac_data *data; @@ -359,7 +363,8 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) plat_dat->stmmac_clk = dwc_eth_find_clk(plat_dat, data->stmmac_clk_name); - ret = data->probe(pdev, plat_dat, &stmmac_res); + if (data->probe) + ret = data->probe(pdev, plat_dat, &stmmac_res); if (ret < 0) { dev_err_probe(&pdev->dev, ret, "failed to probe subdriver\n"); clk_bulk_disable_unprepare(plat_dat->num_clks, plat_dat->clks); @@ -400,6 +405,7 @@ static void dwc_eth_dwmac_remove(struct platform_device *pdev) static const struct of_device_id dwc_eth_dwmac_match[] = { { .compatible = "snps,dwc-qos-ethernet-4.10", .data = &dwc_qos_data }, { .compatible = "nvidia,tegra186-eqos", .data = &tegra_eqos_data }, + { .compatible = "tesla,fsd-ethqos", .data = &fsd_eqos_data }, { } }; MODULE_DEVICE_TABLE(of, dwc_eth_dwmac_match); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 9c8de47ee149..5910571a954f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -494,10 +494,10 @@ static int intel_tsn_lane_is_available(struct net_device *ndev, if ((rbuf.buf[0] >> (4 * (intel_priv->tsn_lane_regs[j] % 8)) & B_PCH_FIA_PCR_L0O) == 0xB) - return ret; + return 0; } - return ret; + return -EINVAL; } static int intel_set_reg_access(const struct pmc_serdes_regs *regs, int max_regs) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 0a9c137cc4e6..ca4035cbb55b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -211,16 +211,12 @@ static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, int speed) return 0; } -static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) +static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac, + struct plat_stmmacenet_data *plat_dat) { struct device *dev = &gmac->pdev->dev; - int ret; - ret = of_get_phy_mode(dev->of_node, &gmac->phy_mode); - if (ret) { - dev_err(dev, "missing phy mode property\n"); - return -EINVAL; - } + gmac->phy_mode = plat_dat->phy_interface; if (of_property_read_u32(dev->of_node, "qcom,id", &gmac->id) < 0) { dev_err(dev, "missing qcom id property\n"); @@ -398,7 +394,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) gmac->pdev = pdev; - err = ipq806x_gmac_of_parse(gmac); + err = ipq806x_gmac_of_parse(gmac, plat_dat); if (err) { dev_err(dev, "device tree parsing error\n"); return err; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 16fd24facb0e..f1c62f237e58 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -11,6 +11,8 @@ #include "dwmac_dma.h" #include "dwmac1000.h" +#define DRIVER_NAME "dwmac-loongson-pci" + /* Normal Loongson Tx Summary */ #define DMA_INTR_ENA_NIE_TX_LOONGSON 0x00040000 /* Normal Loongson Rx Summary */ @@ -567,7 +569,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id for (i = 0; i < PCI_STD_NUM_BARS; i++) { if (pci_resource_len(pdev, i) == 0) continue; - ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + ret = pcim_iomap_regions(pdev, BIT(0), DRIVER_NAME); if (ret) goto err_disable_device; break; @@ -689,7 +691,7 @@ static const struct pci_device_id loongson_dwmac_id_table[] = { MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table); static struct pci_driver loongson_dwmac_driver = { - .name = "dwmac-loongson-pci", + .name = DRIVER_NAME, .id_table = loongson_dwmac_id_table, .probe = loongson_dwmac_probe, .remove = loongson_dwmac_remove, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index c9636832a570..d178d5ddc7c7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -456,7 +456,6 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat) { struct mac_delay_struct *mac_delay = &plat->mac_delay; u32 tx_delay_ps, rx_delay_ps; - int err; plat->peri_regmap = syscon_regmap_lookup_by_phandle(plat->np, "mediatek,pericfg"); if (IS_ERR(plat->peri_regmap)) { @@ -464,12 +463,6 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat) return PTR_ERR(plat->peri_regmap); } - err = of_get_phy_mode(plat->np, &plat->phy_mode); - if (err) { - dev_err(plat->dev, "not find phy-mode\n"); - return err; - } - if (!of_property_read_u32(plat->np, "mediatek,tx-delay-ps", &tx_delay_ps)) { if (tx_delay_ps < plat->variant->tx_delay_max) { mac_delay->tx_delay = tx_delay_ps; @@ -587,6 +580,7 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, { int i; + priv_plat->phy_mode = plat->phy_interface; plat->mac_interface = priv_plat->phy_mode; if (priv_plat->mac_wol) plat->flags &= ~STMMAC_FLAG_USE_PHY_WOL; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 9c2d62d133ad..a50782994b97 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -417,11 +417,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) return PTR_ERR(dwmac->regs); dwmac->dev = &pdev->dev; - ret = of_get_phy_mode(pdev->dev.of_node, &dwmac->phy_mode); - if (ret) { - dev_err(&pdev->dev, "missing phy-mode property\n"); - return ret; - } + dwmac->phy_mode = plat_dat->phy_interface; /* use 2ns as fallback since this value was previously hardcoded */ if (of_property_read_u32(pdev->dev.of_node, "amlogic,tx-delay-ns", diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index eafe637540b6..0e4da216f942 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -794,9 +794,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) if (!ethqos) return -ENOMEM; - ret = of_get_phy_mode(np, ðqos->phy_mode); - if (ret) - return dev_err_probe(dev, ret, "Failed to get phy mode\n"); + ethqos->phy_mode = plat_dat->phy_interface; switch (ethqos->phy_mode) { case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 003fa5cf42c3..2d4fee8b9ab1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -33,6 +33,7 @@ struct rk_gmac_ops { void (*set_clock_selection)(struct rk_priv_data *bsp_priv, bool input, bool enable); void (*integrated_phy_powerup)(struct rk_priv_data *bsp_priv); + bool php_grf_required; bool regs_valid; u32 regs[]; }; @@ -101,13 +102,6 @@ struct rk_priv_data { static void px30_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1, PX30_GMAC_PHY_INTF_SEL_RMII); } @@ -181,13 +175,6 @@ static const struct rk_gmac_ops px30_ops = { static void rk3128_set_to_rgmii(struct rk_priv_data *bsp_priv, int tx_delay, int rx_delay) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON1, RK3128_GMAC_PHY_INTF_SEL_RGMII | RK3128_GMAC_RMII_MODE_CLR); @@ -199,13 +186,6 @@ static void rk3128_set_to_rgmii(struct rk_priv_data *bsp_priv, static void rk3128_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON1, RK3128_GMAC_PHY_INTF_SEL_RMII | RK3128_GMAC_RMII_MODE); } @@ -214,11 +194,6 @@ static void rk3128_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - if (speed == 10) regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON1, RK3128_GMAC_CLK_2_5M); @@ -236,11 +211,6 @@ static void rk3128_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - if (speed == 10) { regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON1, RK3128_GMAC_RMII_CLK_2_5M | @@ -297,13 +267,6 @@ static const struct rk_gmac_ops rk3128_ops = { static void rk3228_set_to_rgmii(struct rk_priv_data *bsp_priv, int tx_delay, int rx_delay) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1, RK3228_GMAC_PHY_INTF_SEL_RGMII | RK3228_GMAC_RMII_MODE_CLR | @@ -316,13 +279,6 @@ static void rk3228_set_to_rgmii(struct rk_priv_data *bsp_priv, static void rk3228_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1, RK3228_GMAC_PHY_INTF_SEL_RMII | RK3228_GMAC_RMII_MODE); @@ -335,11 +291,6 @@ static void rk3228_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - if (speed == 10) regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1, RK3228_GMAC_CLK_2_5M); @@ -357,11 +308,6 @@ static void rk3228_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - if (speed == 10) regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1, RK3228_GMAC_RMII_CLK_2_5M | @@ -419,13 +365,6 @@ static const struct rk_gmac_ops rk3228_ops = { static void rk3288_set_to_rgmii(struct rk_priv_data *bsp_priv, int tx_delay, int rx_delay) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1, RK3288_GMAC_PHY_INTF_SEL_RGMII | RK3288_GMAC_RMII_MODE_CLR); @@ -437,13 +376,6 @@ static void rk3288_set_to_rgmii(struct rk_priv_data *bsp_priv, static void rk3288_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1, RK3288_GMAC_PHY_INTF_SEL_RMII | RK3288_GMAC_RMII_MODE); } @@ -452,11 +384,6 @@ static void rk3288_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - if (speed == 10) regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1, RK3288_GMAC_CLK_2_5M); @@ -474,11 +401,6 @@ static void rk3288_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - if (speed == 10) { regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1, RK3288_GMAC_RMII_CLK_2_5M | @@ -511,13 +433,6 @@ static const struct rk_gmac_ops rk3288_ops = { static void rk3308_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - regmap_write(bsp_priv->grf, RK3308_GRF_MAC_CON0, RK3308_GMAC_PHY_INTF_SEL_RMII); } @@ -526,11 +441,6 @@ static void rk3308_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - if (speed == 10) { regmap_write(bsp_priv->grf, RK3308_GRF_MAC_CON0, RK3308_GMAC_SPEED_10M); @@ -583,13 +493,6 @@ static const struct rk_gmac_ops rk3308_ops = { static void rk3328_set_to_rgmii(struct rk_priv_data *bsp_priv, int tx_delay, int rx_delay) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - regmap_write(bsp_priv->grf, RK3328_GRF_MAC_CON1, RK3328_GMAC_PHY_INTF_SEL_RGMII | RK3328_GMAC_RMII_MODE_CLR | @@ -603,14 +506,8 @@ static void rk3328_set_to_rgmii(struct rk_priv_data *bsp_priv, static void rk3328_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; unsigned int reg; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - reg = bsp_priv->integrated_phy ? RK3328_GRF_MAC_CON2 : RK3328_GRF_MAC_CON1; @@ -623,11 +520,6 @@ static void rk3328_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - if (speed == 10) regmap_write(bsp_priv->grf, RK3328_GRF_MAC_CON1, RK3328_GMAC_CLK_2_5M); @@ -646,11 +538,6 @@ static void rk3328_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) struct device *dev = &bsp_priv->pdev->dev; unsigned int reg; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - reg = bsp_priv->integrated_phy ? RK3328_GRF_MAC_CON2 : RK3328_GRF_MAC_CON1; @@ -711,13 +598,6 @@ static const struct rk_gmac_ops rk3328_ops = { static void rk3366_set_to_rgmii(struct rk_priv_data *bsp_priv, int tx_delay, int rx_delay) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, RK3366_GMAC_PHY_INTF_SEL_RGMII | RK3366_GMAC_RMII_MODE_CLR); @@ -729,13 +609,6 @@ static void rk3366_set_to_rgmii(struct rk_priv_data *bsp_priv, static void rk3366_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, RK3366_GMAC_PHY_INTF_SEL_RMII | RK3366_GMAC_RMII_MODE); } @@ -744,11 +617,6 @@ static void rk3366_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - if (speed == 10) regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, RK3366_GMAC_CLK_2_5M); @@ -766,11 +634,6 @@ static void rk3366_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - if (speed == 10) { regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, RK3366_GMAC_RMII_CLK_2_5M | @@ -822,13 +685,6 @@ static const struct rk_gmac_ops rk3366_ops = { static void rk3368_set_to_rgmii(struct rk_priv_data *bsp_priv, int tx_delay, int rx_delay) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - regmap_write(bsp_priv->grf, RK3368_GRF_SOC_CON15, RK3368_GMAC_PHY_INTF_SEL_RGMII | RK3368_GMAC_RMII_MODE_CLR); @@ -840,13 +696,6 @@ static void rk3368_set_to_rgmii(struct rk_priv_data *bsp_priv, static void rk3368_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - regmap_write(bsp_priv->grf, RK3368_GRF_SOC_CON15, RK3368_GMAC_PHY_INTF_SEL_RMII | RK3368_GMAC_RMII_MODE); } @@ -855,11 +704,6 @@ static void rk3368_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - if (speed == 10) regmap_write(bsp_priv->grf, RK3368_GRF_SOC_CON15, RK3368_GMAC_CLK_2_5M); @@ -877,11 +721,6 @@ static void rk3368_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - if (speed == 10) { regmap_write(bsp_priv->grf, RK3368_GRF_SOC_CON15, RK3368_GMAC_RMII_CLK_2_5M | @@ -933,13 +772,6 @@ static const struct rk_gmac_ops rk3368_ops = { static void rk3399_set_to_rgmii(struct rk_priv_data *bsp_priv, int tx_delay, int rx_delay) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, RK3399_GMAC_PHY_INTF_SEL_RGMII | RK3399_GMAC_RMII_MODE_CLR); @@ -951,13 +783,6 @@ static void rk3399_set_to_rgmii(struct rk_priv_data *bsp_priv, static void rk3399_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, RK3399_GMAC_PHY_INTF_SEL_RMII | RK3399_GMAC_RMII_MODE); } @@ -966,11 +791,6 @@ static void rk3399_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - if (speed == 10) regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, RK3399_GMAC_CLK_2_5M); @@ -988,11 +808,6 @@ static void rk3399_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - if (speed == 10) { regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, RK3399_GMAC_RMII_CLK_2_5M | @@ -1037,14 +852,8 @@ static const struct rk_gmac_ops rk3399_ops = { static void rk3568_set_to_rgmii(struct rk_priv_data *bsp_priv, int tx_delay, int rx_delay) { - struct device *dev = &bsp_priv->pdev->dev; u32 con0, con1; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - con0 = (bsp_priv->id == 1) ? RK3568_GRF_GMAC1_CON0 : RK3568_GRF_GMAC0_CON0; con1 = (bsp_priv->id == 1) ? RK3568_GRF_GMAC1_CON1 : @@ -1062,14 +871,8 @@ static void rk3568_set_to_rgmii(struct rk_priv_data *bsp_priv, static void rk3568_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; u32 con1; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - con1 = (bsp_priv->id == 1) ? RK3568_GRF_GMAC1_CON1 : RK3568_GRF_GMAC0_CON1; regmap_write(bsp_priv->grf, con1, RK3568_GMAC_PHY_INTF_SEL_RMII); @@ -1147,14 +950,8 @@ static const struct rk_gmac_ops rk3568_ops = { static void rk3576_set_to_rgmii(struct rk_priv_data *bsp_priv, int tx_delay, int rx_delay) { - struct device *dev = &bsp_priv->pdev->dev; unsigned int offset_con; - if (IS_ERR(bsp_priv->grf) || IS_ERR(bsp_priv->php_grf)) { - dev_err(dev, "Missing rockchip,grf or rockchip,php-grf property\n"); - return; - } - offset_con = bsp_priv->id == 1 ? RK3576_GRF_GMAC_CON1 : RK3576_GRF_GMAC_CON0; @@ -1180,14 +977,8 @@ static void rk3576_set_to_rgmii(struct rk_priv_data *bsp_priv, static void rk3576_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; unsigned int offset_con; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - offset_con = bsp_priv->id == 1 ? RK3576_GRF_GMAC_CON1 : RK3576_GRF_GMAC_CON0; @@ -1254,6 +1045,7 @@ static const struct rk_gmac_ops rk3576_ops = { .set_rgmii_speed = rk3576_set_gmac_speed, .set_rmii_speed = rk3576_set_gmac_speed, .set_clock_selection = rk3576_set_clock_selection, + .php_grf_required = true, .regs_valid = true, .regs = { 0x2a220000, /* gmac0 */ @@ -1306,14 +1098,8 @@ static const struct rk_gmac_ops rk3576_ops = { static void rk3588_set_to_rgmii(struct rk_priv_data *bsp_priv, int tx_delay, int rx_delay) { - struct device *dev = &bsp_priv->pdev->dev; u32 offset_con, id = bsp_priv->id; - if (IS_ERR(bsp_priv->grf) || IS_ERR(bsp_priv->php_grf)) { - dev_err(dev, "Missing rockchip,grf or rockchip,php_grf property\n"); - return; - } - offset_con = bsp_priv->id == 1 ? RK3588_GRF_GMAC_CON9 : RK3588_GRF_GMAC_CON8; @@ -1334,13 +1120,6 @@ static void rk3588_set_to_rgmii(struct rk_priv_data *bsp_priv, static void rk3588_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->php_grf)) { - dev_err(dev, "%s: Missing rockchip,php_grf property\n", __func__); - return; - } - regmap_write(bsp_priv->php_grf, RK3588_GRF_GMAC_CON0, RK3588_GMAC_PHY_INTF_SEL_RMII(bsp_priv->id)); @@ -1401,6 +1180,7 @@ static const struct rk_gmac_ops rk3588_ops = { .set_rgmii_speed = rk3588_set_gmac_speed, .set_rmii_speed = rk3588_set_gmac_speed, .set_clock_selection = rk3588_set_clock_selection, + .php_grf_required = true, .regs_valid = true, .regs = { 0xfe1b0000, /* gmac0 */ @@ -1423,13 +1203,6 @@ static const struct rk_gmac_ops rk3588_ops = { static void rv1108_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - regmap_write(bsp_priv->grf, RV1108_GRF_GMAC_CON0, RV1108_GMAC_PHY_INTF_SEL_RMII); } @@ -1438,11 +1211,6 @@ static void rv1108_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct device *dev = &bsp_priv->pdev->dev; - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - if (speed == 10) { regmap_write(bsp_priv->grf, RV1108_GRF_GMAC_CON0, RV1108_GMAC_RMII_CLK_2_5M | @@ -1491,13 +1259,6 @@ static const struct rk_gmac_ops rv1108_ops = { static void rv1126_set_to_rgmii(struct rk_priv_data *bsp_priv, int tx_delay, int rx_delay) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "Missing rockchip,grf property\n"); - return; - } - regmap_write(bsp_priv->grf, RV1126_GRF_GMAC_CON0, RV1126_GMAC_PHY_INTF_SEL_RGMII | RV1126_GMAC_M0_RXCLK_DLY_ENABLE | @@ -1516,13 +1277,6 @@ static void rv1126_set_to_rgmii(struct rk_priv_data *bsp_priv, static void rv1126_set_to_rmii(struct rk_priv_data *bsp_priv) { - struct device *dev = &bsp_priv->pdev->dev; - - if (IS_ERR(bsp_priv->grf)) { - dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); - return; - } - regmap_write(bsp_priv->grf, RV1126_GRF_GMAC_CON0, RV1126_GMAC_PHY_INTF_SEL_RMII); } @@ -1749,7 +1503,7 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev, if (!bsp_priv) return ERR_PTR(-ENOMEM); - of_get_phy_mode(dev->of_node, &bsp_priv->phy_iface); + bsp_priv->phy_iface = plat->phy_interface; bsp_priv->ops = ops; /* Some SoCs have multiple MAC controllers, which need @@ -1812,8 +1566,22 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev, bsp_priv->grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf"); - bsp_priv->php_grf = syscon_regmap_lookup_by_phandle(dev->of_node, - "rockchip,php-grf"); + if (IS_ERR(bsp_priv->grf)) { + dev_err_probe(dev, PTR_ERR(bsp_priv->grf), + "failed to lookup rockchip,grf\n"); + return ERR_CAST(bsp_priv->grf); + } + + if (ops->php_grf_required) { + bsp_priv->php_grf = + syscon_regmap_lookup_by_phandle(dev->of_node, + "rockchip,php-grf"); + if (IS_ERR(bsp_priv->php_grf)) { + dev_err_probe(dev, PTR_ERR(bsp_priv->php_grf), + "failed to lookup rockchip,php-grf\n"); + return ERR_CAST(bsp_priv->php_grf); + } + } if (plat->phy_node) { bsp_priv->integrated_phy = of_property_read_bool(plat->phy_node, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sophgo.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sophgo.c new file mode 100644 index 000000000000..3303784cbbf8 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sophgo.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Sophgo DWMAC platform driver + * + * Copyright (C) 2024 Inochi Amaoto <inochiama@gmail.com> + */ + +#include <linux/clk.h> +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/platform_device.h> + +#include "stmmac_platform.h" + +static int sophgo_sg2044_dwmac_init(struct platform_device *pdev, + struct plat_stmmacenet_data *plat_dat, + struct stmmac_resources *stmmac_res) +{ + plat_dat->clk_tx_i = devm_clk_get_enabled(&pdev->dev, "tx"); + if (IS_ERR(plat_dat->clk_tx_i)) + return dev_err_probe(&pdev->dev, PTR_ERR(plat_dat->clk_tx_i), + "failed to get tx clock\n"); + + plat_dat->flags |= STMMAC_FLAG_SPH_DISABLE; + plat_dat->set_clk_tx_rate = stmmac_set_clk_tx_rate; + plat_dat->multicast_filter_bins = 0; + plat_dat->unicast_filter_entries = 1; + + return 0; +} + +static int sophgo_dwmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct device *dev = &pdev->dev; + int ret; + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return dev_err_probe(dev, ret, + "failed to get platform resources\n"); + + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); + if (IS_ERR(plat_dat)) + return dev_err_probe(dev, PTR_ERR(plat_dat), + "failed to parse DT parameters\n"); + + ret = sophgo_sg2044_dwmac_init(pdev, plat_dat, &stmmac_res); + if (ret) + return ret; + + return stmmac_dvr_probe(dev, plat_dat, &stmmac_res); +} + +static const struct of_device_id sophgo_dwmac_match[] = { + { .compatible = "sophgo,sg2044-dwmac" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, sophgo_dwmac_match); + +static struct platform_driver sophgo_dwmac_driver = { + .probe = sophgo_dwmac_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = "sophgo-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = sophgo_dwmac_match, + }, +}; +module_platform_driver(sophgo_dwmac_driver); + +MODULE_AUTHOR("Inochi Amaoto <inochiama@gmail.com>"); +MODULE_DESCRIPTION("Sophgo DWMAC platform driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 13b9c2a51fce..be57c6c12c1c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -185,7 +185,8 @@ static int sti_dwmac_set_mode(struct sti_dwmac *dwmac) } static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, - struct platform_device *pdev) + struct platform_device *pdev, + struct plat_stmmacenet_data *plat_dat) { struct resource *res; struct device *dev = &pdev->dev; @@ -204,12 +205,7 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, if (IS_ERR(regmap)) return PTR_ERR(regmap); - err = of_get_phy_mode(np, &dwmac->interface); - if (err && err != -ENODEV) { - dev_err(dev, "Can't get phy-mode\n"); - return err; - } - + dwmac->interface = plat_dat->phy_interface; dwmac->regmap = regmap; dwmac->gmac_en = of_property_read_bool(np, "st,gmac_en"); dwmac->ext_phyclk = of_property_read_bool(np, "st,ext-phyclk"); @@ -268,7 +264,7 @@ static int sti_dwmac_probe(struct platform_device *pdev) if (!dwmac) return -ENOMEM; - ret = sti_dwmac_parse_data(dwmac, pdev); + ret = sti_dwmac_parse_data(dwmac, pdev, plat_dat); if (ret) { dev_err(&pdev->dev, "Unable to parse OF data\n"); return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 4b7b2582a120..85723a78793a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1155,11 +1155,10 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) struct stmmac_resources stmmac_res; struct sunxi_priv_data *gmac; struct device *dev = &pdev->dev; - phy_interface_t interface; - int ret; struct stmmac_priv *priv; struct net_device *ndev; struct regmap *regmap; + int ret; ret = stmmac_get_platform_resources(pdev, &stmmac_res); if (ret) @@ -1219,10 +1218,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) return ret; } - ret = of_get_phy_mode(dev->of_node, &interface); - if (ret) - return -EINVAL; - plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); @@ -1230,7 +1225,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) /* platform data specifying hardware features and callbacks. * hardware features were copied from Allwinner drivers. */ - plat_dat->mac_interface = interface; plat_dat->rx_coe = STMMAC_RX_COE_TYPE2; plat_dat->tx_coe = 1; plat_dat->flags |= STMMAC_FLAG_HAS_SUN8I; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index 1b1ce2888b2e..9f098ff0ff05 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -116,11 +116,7 @@ static int sun7i_gmac_probe(struct platform_device *pdev) if (!gmac) return -ENOMEM; - ret = of_get_phy_mode(dev->of_node, &gmac->interface); - if (ret && ret != -ENODEV) { - dev_err(dev, "Can't get phy-mode\n"); - return ret; - } + gmac->interface = plat_dat->phy_interface; gmac->tx_clk = devm_clk_get(dev, "allwinner_gmac_tx"); if (IS_ERR(gmac->tx_clk)) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 3a00a988cb36..d87275c1cf23 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -282,7 +282,6 @@ struct stmmac_priv { /* Generic channel for NAPI */ struct stmmac_channel channel[STMMAC_CH_MAX]; - int speed; unsigned int pause_time; struct mii_bus *mii; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1a928e21b286..839ecebf5f5f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -101,6 +101,7 @@ static int tc = TC_DEFAULT; module_param(tc, int, 0644); MODULE_PARM_DESC(tc, "DMA threshold control value"); +/* This is unused */ #define DEFAULT_BUFSIZE 1536 static int buf_sz = DEFAULT_BUFSIZE; module_param(buf_sz, int, 0644); @@ -218,8 +219,6 @@ static void stmmac_verify_args(void) { if (unlikely(watchdog < 0)) watchdog = TX_TIMEO; - if (unlikely((buf_sz < DEFAULT_BUFSIZE) || (buf_sz > BUF_SIZE_16KiB))) - buf_sz = DEFAULT_BUFSIZE; if (unlikely((pause < 0) || (pause > 0xffff))) pause = PAUSE_TIME; @@ -1028,8 +1027,6 @@ static void stmmac_mac_link_up(struct phylink_config *config, } } - priv->speed = speed; - if (priv->plat->fix_mac_speed) priv->plat->fix_mac_speed(priv->plat->bsp_priv, speed, mode); @@ -4028,7 +4025,6 @@ static int __stmmac_open(struct net_device *dev, } } - buf_sz = dma_conf->dma_buf_sz; for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN) dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs; @@ -4133,9 +4129,6 @@ static int stmmac_release(struct net_device *dev) /* Release and free the Rx/Tx resources */ free_dma_desc_resources(priv, &priv->dma_conf); - /* Disable the MAC Rx/Tx */ - stmmac_mac_set(priv, priv->ioaddr, false); - /* Powerdown Serdes if there is */ if (priv->plat->serdes_powerdown) priv->plat->serdes_powerdown(dev, priv->plat->bsp_priv); @@ -5485,10 +5478,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) struct sk_buff *skb = NULL; struct stmmac_xdp_buff ctx; int xdp_status = 0; - int buf_sz; + int bufsz; dma_dir = page_pool_get_dma_dir(rx_q->page_pool); - buf_sz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE; + bufsz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE; limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit); if (netif_msg_rx_status(priv)) { @@ -5601,7 +5594,7 @@ read_again: net_prefetch(page_address(buf->page) + buf->page_offset); - xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq); + xdp_init_buff(&ctx.xdp, bufsz, &rx_q->xdp_rxq); xdp_prepare_buff(&ctx.xdp, page_address(buf->page), buf->page_offset, buf1_len, true); @@ -7772,8 +7765,6 @@ void stmmac_dvr_remove(struct device *dev) pm_runtime_get_sync(dev); - stmmac_stop_all_dma(priv); - stmmac_mac_set(priv, priv->ioaddr, false); unregister_netdev(ndev); #ifdef CONFIG_DEBUG_FS @@ -7844,19 +7835,16 @@ int stmmac_suspend(struct device *dev) mutex_unlock(&priv->lock); rtnl_lock(); - if (device_may_wakeup(priv->device) && priv->plat->pmt) { - phylink_suspend(priv->phylink, true); - } else { - if (device_may_wakeup(priv->device)) - phylink_speed_down(priv->phylink, false); - phylink_suspend(priv->phylink, false); - } + if (device_may_wakeup(priv->device) && !priv->plat->pmt) + phylink_speed_down(priv->phylink, false); + + phylink_suspend(priv->phylink, + device_may_wakeup(priv->device) && priv->plat->pmt); rtnl_unlock(); if (stmmac_fpe_supported(priv)) timer_shutdown_sync(&priv->fpe_cfg.verify_timer); - priv->speed = SPEED_UNKNOWN; return 0; } EXPORT_SYMBOL_GPL(stmmac_suspend); @@ -7940,13 +7928,9 @@ int stmmac_resume(struct device *dev) } rtnl_lock(); - if (device_may_wakeup(priv->device) && priv->plat->pmt) { - phylink_resume(priv->phylink); - } else { - phylink_resume(priv->phylink); - if (device_may_wakeup(priv->device)) - phylink_speed_up(priv->phylink); - } + phylink_resume(priv->phylink); + if (device_may_wakeup(priv->device) && !priv->plat->pmt) + phylink_speed_up(priv->phylink); rtnl_unlock(); rtnl_lock(); @@ -7989,9 +7973,6 @@ static int __init stmmac_cmdline_opt(char *str) } else if (!strncmp(opt, "phyaddr:", 8)) { if (kstrtoint(opt + 8, 0, &phyaddr)) goto err; - } else if (!strncmp(opt, "buf_sz:", 7)) { - if (kstrtoint(opt + 7, 0, &buf_sz)) - goto err; } else if (!strncmp(opt, "tc:", 3)) { if (kstrtoint(opt + 3, 0, &tc)) goto err; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index d0e61aa1a495..8dc3bd6946c6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -405,6 +405,17 @@ static int stmmac_of_get_mac_mode(struct device_node *np) return -ENODEV; } +/* Compatible string array for all gmac4 devices */ +static const char * const stmmac_gmac4_compats[] = { + "snps,dwmac-4.00", + "snps,dwmac-4.10a", + "snps,dwmac-4.20a", + "snps,dwmac-5.10a", + "snps,dwmac-5.20", + "snps,dwmac-5.30a", + NULL +}; + /** * stmmac_probe_config_dt - parse device-tree driver parameters * @pdev: platform_device structure @@ -538,11 +549,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) plat->pmt = 1; } - if (of_device_is_compatible(np, "snps,dwmac-4.00") || - of_device_is_compatible(np, "snps,dwmac-4.10a") || - of_device_is_compatible(np, "snps,dwmac-4.20a") || - of_device_is_compatible(np, "snps,dwmac-5.10a") || - of_device_is_compatible(np, "snps,dwmac-5.20")) { + if (of_device_compatible_match(np, stmmac_gmac4_compats)) { plat->has_gmac4 = 1; plat->has_gmac = 0; plat->pmt = 1; diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 3a13d60a947a..a07c910c497a 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -205,6 +205,7 @@ config TI_ICSSG_PRUETH_SR1 select PHYLIB select TI_ICSS_IEP select TI_K3_CPPI_DESC_POOL + select PAGE_POOL depends on PRU_REMOTEPROC depends on NET_SWITCHDEV depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index d59c1744840a..b4a34c57b7b4 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -406,9 +406,25 @@ static void icss_iep_update_to_next_boundary(struct icss_iep *iep, u64 start_ns) static int icss_iep_perout_enable_hw(struct icss_iep *iep, struct ptp_perout_request *req, int on) { + struct timespec64 ts; + u64 ns_start; + u64 ns_width; int ret; u64 cmp; + /* Calculate width of the signal for PPS/PEROUT handling */ + ts.tv_sec = req->on.sec; + ts.tv_nsec = req->on.nsec; + ns_width = timespec64_to_ns(&ts); + + if (req->flags & PTP_PEROUT_PHASE) { + ts.tv_sec = req->phase.sec; + ts.tv_nsec = req->phase.nsec; + ns_start = timespec64_to_ns(&ts); + } else { + ns_start = 0; + } + if (iep->ops && iep->ops->perout_enable) { ret = iep->ops->perout_enable(iep->clockops_data, req, on, &cmp); if (ret) @@ -419,10 +435,12 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, regmap_write(iep->map, ICSS_IEP_CMP1_REG0, lower_32_bits(cmp)); if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) regmap_write(iep->map, ICSS_IEP_CMP1_REG1, upper_32_bits(cmp)); - /* Configure SYNC, 1ms pulse width */ - regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, 1000000); + /* Configure SYNC, based on req on width */ + regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, + div_u64(ns_width, iep->def_inc)); regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0); - regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, 0); + regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, + div_u64(ns_start, iep->def_inc)); regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); /* one-shot mode */ /* Enable CMP 1 */ regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, @@ -447,6 +465,10 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, + req->period.nsec; icss_iep_update_to_next_boundary(iep, start_ns); + regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, + div_u64(ns_width, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, + div_u64(ns_start, iep->def_inc)); /* Enable Sync in single shot mode */ regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, IEP_SYNC_CTRL_SYNC_N_EN(0) | IEP_SYNC_CTRL_SYNC_EN); @@ -474,7 +496,37 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, static int icss_iep_perout_enable(struct icss_iep *iep, struct ptp_perout_request *req, int on) { - return -EOPNOTSUPP; + int ret = 0; + + /* Reject requests with unsupported flags */ + if (req->flags & ~(PTP_PEROUT_DUTY_CYCLE | + PTP_PEROUT_PHASE)) + return -EOPNOTSUPP; + + mutex_lock(&iep->ptp_clk_mutex); + + if (iep->pps_enabled) { + ret = -EBUSY; + goto exit; + } + + if (iep->perout_enabled == !!on) + goto exit; + + /* Set default "on" time (1ms) for the signal if not passed by the app */ + if (!(req->flags & PTP_PEROUT_DUTY_CYCLE)) { + req->on.sec = 0; + req->on.nsec = NSEC_PER_MSEC; + } + + ret = icss_iep_perout_enable_hw(iep, req, on); + if (!ret) + iep->perout_enabled = !!on; + +exit: + mutex_unlock(&iep->ptp_clk_mutex); + + return ret; } static void icss_iep_cap_cmp_work(struct work_struct *work) @@ -549,10 +601,13 @@ static int icss_iep_pps_enable(struct icss_iep *iep, int on) if (on) { ns = icss_iep_gettime(iep, NULL); ts = ns_to_timespec64(ns); + rq.perout.flags = 0; rq.perout.period.sec = 1; rq.perout.period.nsec = 0; rq.perout.start.sec = ts.tv_sec + 2; rq.perout.start.nsec = 0; + rq.perout.on.sec = 0; + rq.perout.on.nsec = NSEC_PER_MSEC; ret = icss_iep_perout_enable_hw(iep, &rq.perout, on); } else { ret = icss_iep_perout_enable_hw(iep, &rq.perout, on); diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 74f0f200a89d..df5da7a98abf 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -45,6 +45,11 @@ void prueth_cleanup_rx_chns(struct prueth_emac *emac, struct prueth_rx_chn *rx_chn, int max_rflows) { + if (rx_chn->pg_pool) { + page_pool_destroy(rx_chn->pg_pool); + rx_chn->pg_pool = NULL; + } + if (rx_chn->desc_pool) k3_cppi_desc_pool_destroy(rx_chn->desc_pool); @@ -93,11 +98,20 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn, { struct cppi5_host_desc_t *first_desc, *next_desc; dma_addr_t buf_dma, next_desc_dma; + struct prueth_swdata *swdata; + struct page *page; u32 buf_dma_len; first_desc = desc; next_desc = first_desc; + swdata = cppi5_hdesc_get_swdata(desc); + if (swdata->type == PRUETH_SWDATA_PAGE) { + page = swdata->data.page; + page_pool_recycle_direct(page->pp, swdata->data.page); + goto free_desc; + } + cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); @@ -121,6 +135,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn, k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); } +free_desc: k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); } EXPORT_SYMBOL_GPL(prueth_xmit_free); @@ -131,12 +146,13 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, struct net_device *ndev = emac->ndev; struct cppi5_host_desc_t *desc_tx; struct netdev_queue *netif_txq; + struct prueth_swdata *swdata; struct prueth_tx_chn *tx_chn; unsigned int total_bytes = 0; + struct xdp_frame *xdpf; struct sk_buff *skb; dma_addr_t desc_dma; int res, num_tx = 0; - void **swdata; tx_chn = &emac->tx_chns[chn]; @@ -157,20 +173,27 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_tx); - /* was this command's TX complete? */ - if (emac->is_sr1 && *(swdata) == emac->cmd_data) { + switch (swdata->type) { + case PRUETH_SWDATA_SKB: + skb = swdata->data.skb; + dev_sw_netstats_tx_add(skb->dev, 1, skb->len); + total_bytes += skb->len; + napi_consume_skb(skb, budget); + break; + case PRUETH_SWDATA_XDPF: + xdpf = swdata->data.xdpf; + dev_sw_netstats_tx_add(ndev, 1, xdpf->len); + total_bytes += xdpf->len; + xdp_return_frame(xdpf); + break; + default: + netdev_err(ndev, "tx_complete: invalid swdata type %d\n", swdata->type); prueth_xmit_free(tx_chn, desc_tx); + ndev->stats.tx_dropped++; continue; } - skb = *(swdata); prueth_xmit_free(tx_chn, desc_tx); - - ndev = skb->dev; - ndev->stats.tx_packets++; - ndev->stats.tx_bytes += skb->len; - total_bytes += skb->len; - napi_consume_skb(skb, budget); num_tx++; } @@ -461,17 +484,17 @@ fail: } EXPORT_SYMBOL_GPL(prueth_init_rx_chns); -int prueth_dma_rx_push(struct prueth_emac *emac, - struct sk_buff *skb, - struct prueth_rx_chn *rx_chn) +int prueth_dma_rx_push_mapped(struct prueth_emac *emac, + struct prueth_rx_chn *rx_chn, + struct page *page, u32 buf_len) { struct net_device *ndev = emac->ndev; struct cppi5_host_desc_t *desc_rx; - u32 pkt_len = skb_tailroom(skb); + struct prueth_swdata *swdata; dma_addr_t desc_dma; dma_addr_t buf_dma; - void **swdata; + buf_dma = page_pool_get_dma_addr(page) + PRUETH_HEADROOM; desc_rx = k3_cppi_desc_pool_alloc(rx_chn->desc_pool); if (!desc_rx) { netdev_err(ndev, "rx push: failed to allocate descriptor\n"); @@ -479,25 +502,19 @@ int prueth_dma_rx_push(struct prueth_emac *emac, } desc_dma = k3_cppi_desc_pool_virt2dma(rx_chn->desc_pool, desc_rx); - buf_dma = dma_map_single(rx_chn->dma_dev, skb->data, pkt_len, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(rx_chn->dma_dev, buf_dma))) { - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - netdev_err(ndev, "rx push: failed to map rx pkt buffer\n"); - return -EINVAL; - } - cppi5_hdesc_init(desc_rx, CPPI5_INFO0_HDESC_EPIB_PRESENT, PRUETH_NAV_PS_DATA_SIZE); k3_udma_glue_rx_dma_to_cppi5_addr(rx_chn->rx_chn, &buf_dma); - cppi5_hdesc_attach_buf(desc_rx, buf_dma, skb_tailroom(skb), buf_dma, skb_tailroom(skb)); + cppi5_hdesc_attach_buf(desc_rx, buf_dma, buf_len, buf_dma, buf_len); swdata = cppi5_hdesc_get_swdata(desc_rx); - *swdata = skb; + swdata->type = PRUETH_SWDATA_PAGE; + swdata->data.page = page; - return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, 0, + return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, PRUETH_RX_FLOW_DATA, desc_rx, desc_dma); } -EXPORT_SYMBOL_GPL(prueth_dma_rx_push); +EXPORT_SYMBOL_GPL(prueth_dma_rx_push_mapped); u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns) { @@ -535,18 +552,170 @@ void emac_rx_timestamp(struct prueth_emac *emac, ssh->hwtstamp = ns_to_ktime(ns); } -static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id) +/** + * emac_xmit_xdp_frame - transmits an XDP frame + * @emac: emac device + * @xdpf: data to transmit + * @page: page from page pool if already DMA mapped + * @q_idx: queue id + * + * Return: XDP state + */ +u32 emac_xmit_xdp_frame(struct prueth_emac *emac, + struct xdp_frame *xdpf, + struct page *page, + unsigned int q_idx) +{ + struct cppi5_host_desc_t *first_desc; + struct net_device *ndev = emac->ndev; + struct prueth_tx_chn *tx_chn; + dma_addr_t desc_dma, buf_dma; + struct prueth_swdata *swdata; + u32 *epib; + int ret; + + if (q_idx >= PRUETH_MAX_TX_QUEUES) { + netdev_err(ndev, "xdp tx: invalid q_id %d\n", q_idx); + return ICSSG_XDP_CONSUMED; /* drop */ + } + + tx_chn = &emac->tx_chns[q_idx]; + + first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); + if (!first_desc) { + netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n"); + goto drop_free_descs; /* drop */ + } + + if (page) { /* already DMA mapped by page_pool */ + buf_dma = page_pool_get_dma_addr(page); + buf_dma += xdpf->headroom + sizeof(struct xdp_frame); + } else { /* Map the linear buffer */ + buf_dma = dma_map_single(tx_chn->dma_dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); + if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) { + netdev_err(ndev, "xdp tx: failed to map data buffer\n"); + goto drop_free_descs; /* drop */ + } + } + + cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT, + PRUETH_NAV_PS_DATA_SIZE); + cppi5_hdesc_set_pkttype(first_desc, 0); + epib = first_desc->epib; + epib[0] = 0; + epib[1] = 0; + + /* set dst tag to indicate internal qid at the firmware which is at + * bit8..bit15. bit0..bit7 indicates port num for directed + * packets in case of switch mode operation + */ + cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8))); + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); + cppi5_hdesc_attach_buf(first_desc, buf_dma, xdpf->len, buf_dma, xdpf->len); + swdata = cppi5_hdesc_get_swdata(first_desc); + if (page) { + swdata->type = PRUETH_SWDATA_PAGE; + swdata->data.page = page; + } else { + swdata->type = PRUETH_SWDATA_XDPF; + swdata->data.xdpf = xdpf; + } + + cppi5_hdesc_set_pktlen(first_desc, xdpf->len); + desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc); + + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); + if (ret) { + netdev_err(ndev, "xdp tx: push failed: %d\n", ret); + goto drop_free_descs; + } + + return ICSSG_XDP_TX; + +drop_free_descs: + prueth_xmit_free(tx_chn, first_desc); + return ICSSG_XDP_CONSUMED; +} +EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame); + +/** + * emac_run_xdp - run an XDP program + * @emac: emac device + * @xdp: XDP buffer containing the frame + * @page: page with RX data if already DMA mapped + * @len: Rx descriptor packet length + * + * Return: XDP state + */ +static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, + struct page *page, u32 *len) +{ + struct net_device *ndev = emac->ndev; + struct bpf_prog *xdp_prog; + struct xdp_frame *xdpf; + u32 pkt_len = *len; + u32 act, result; + int q_idx, err; + + xdp_prog = READ_ONCE(emac->xdp_prog); + act = bpf_prog_run_xdp(xdp_prog, xdp); + switch (act) { + case XDP_PASS: + return ICSSG_XDP_PASS; + case XDP_TX: + /* Send packet to TX ring for immediate transmission */ + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) { + ndev->stats.tx_dropped++; + goto drop; + } + + q_idx = smp_processor_id() % emac->tx_ch_num; + result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); + if (result == ICSSG_XDP_CONSUMED) + goto drop; + + dev_sw_netstats_rx_add(ndev, xdpf->len); + return result; + case XDP_REDIRECT: + err = xdp_do_redirect(emac->ndev, xdp, xdp_prog); + if (err) + goto drop; + + dev_sw_netstats_rx_add(ndev, pkt_len); + return ICSSG_XDP_REDIR; + default: + bpf_warn_invalid_xdp_action(emac->ndev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: +drop: + trace_xdp_exception(emac->ndev, xdp_prog, act); + fallthrough; /* handle aborts by dropping packet */ + case XDP_DROP: + ndev->stats.rx_dropped++; + page_pool_recycle_direct(emac->rx_chns.pg_pool, page); + return ICSSG_XDP_CONSUMED; + } +} + +static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) { struct prueth_rx_chn *rx_chn = &emac->rx_chns; u32 buf_dma_len, pkt_len, port_id = 0; struct net_device *ndev = emac->ndev; struct cppi5_host_desc_t *desc_rx; - struct sk_buff *skb, *new_skb; + struct prueth_swdata *swdata; dma_addr_t desc_dma, buf_dma; - void **swdata; + struct page *page, *new_page; + struct page_pool *pool; + struct sk_buff *skb; + struct xdp_buff xdp; u32 *psdata; + void *pa; int ret; + *xdp_state = 0; + pool = rx_chn->pg_pool; ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma); if (ret) { if (ret != -ENODATA) @@ -558,15 +727,15 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id) return 0; desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); - swdata = cppi5_hdesc_get_swdata(desc_rx); - skb = *swdata; - - psdata = cppi5_hdesc_get_psdata(desc_rx); - /* RX HW timestamp */ - if (emac->rx_ts_enabled) - emac_rx_timestamp(emac, skb, psdata); + if (swdata->type != PRUETH_SWDATA_PAGE) { + netdev_err(ndev, "rx_pkt: invalid swdata->type %d\n", swdata->type); + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); + return 0; + } + page = swdata->data.page; + page_pool_dma_sync_for_cpu(pool, page, 0, PAGE_SIZE); cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); pkt_len = cppi5_hdesc_get_pktlen(desc_rx); @@ -574,32 +743,63 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id) pkt_len -= 4; cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); - dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - skb->dev = ndev; - new_skb = netdev_alloc_skb_ip_align(ndev, PRUETH_MAX_PKT_SIZE); /* if allocation fails we drop the packet but push the - * descriptor back to the ring with old skb to prevent a stall + * descriptor back to the ring with old page to prevent a stall */ - if (!new_skb) { + new_page = page_pool_dev_alloc_pages(pool); + if (unlikely(!new_page)) { + new_page = page; ndev->stats.rx_dropped++; - new_skb = skb; + goto requeue; + } + + pa = page_address(page); + if (emac->xdp_prog) { + xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq); + xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false); + + *xdp_state = emac_run_xdp(emac, &xdp, page, &pkt_len); + if (*xdp_state == ICSSG_XDP_PASS) + skb = xdp_build_skb_from_buff(&xdp); + else + goto requeue; } else { - /* send the filled skb up the n/w stack */ - skb_put(skb, pkt_len); - if (emac->prueth->is_switch_mode) - skb->offload_fwd_mark = emac->offload_fwd_mark; - skb->protocol = eth_type_trans(skb, ndev); - napi_gro_receive(&emac->napi_rx, skb); - ndev->stats.rx_bytes += pkt_len; - ndev->stats.rx_packets++; + /* prepare skb and send to n/w stack */ + skb = napi_build_skb(pa, PAGE_SIZE); + } + + if (!skb) { + ndev->stats.rx_dropped++; + page_pool_recycle_direct(pool, page); + goto requeue; } + skb_reserve(skb, PRUETH_HEADROOM); + skb_put(skb, pkt_len); + skb->dev = ndev; + + psdata = cppi5_hdesc_get_psdata(desc_rx); + /* RX HW timestamp */ + if (emac->rx_ts_enabled) + emac_rx_timestamp(emac, skb, psdata); + + if (emac->prueth->is_switch_mode) + skb->offload_fwd_mark = emac->offload_fwd_mark; + skb->protocol = eth_type_trans(skb, ndev); + + skb_mark_for_recycle(skb); + napi_gro_receive(&emac->napi_rx, skb); + ndev->stats.rx_bytes += pkt_len; + ndev->stats.rx_packets++; + +requeue: /* queue another RX DMA */ - ret = prueth_dma_rx_push(emac, new_skb, &emac->rx_chns); + ret = prueth_dma_rx_push_mapped(emac, &emac->rx_chns, new_page, + PRUETH_MAX_PKT_SIZE); if (WARN_ON(ret < 0)) { - dev_kfree_skb_any(new_skb); + page_pool_recycle_direct(pool, new_page); ndev->stats.rx_errors++; ndev->stats.rx_dropped++; } @@ -611,22 +811,19 @@ static void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) { struct prueth_rx_chn *rx_chn = data; struct cppi5_host_desc_t *desc_rx; - struct sk_buff *skb; - dma_addr_t buf_dma; - u32 buf_dma_len; - void **swdata; + struct prueth_swdata *swdata; + struct page_pool *pool; + struct page *page; + pool = rx_chn->pg_pool; desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_rx); - skb = *swdata; - cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); - k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); + if (swdata->type == PRUETH_SWDATA_PAGE) { + page = swdata->data.page; + page_pool_recycle_direct(pool, page); + } - dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, - DMA_FROM_DEVICE); k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - - dev_kfree_skb_any(skb); } static int prueth_tx_ts_cookie_get(struct prueth_emac *emac) @@ -662,13 +859,13 @@ enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev struct prueth_emac *emac = netdev_priv(ndev); struct prueth *prueth = emac->prueth; struct netdev_queue *netif_txq; + struct prueth_swdata *swdata; struct prueth_tx_chn *tx_chn; dma_addr_t desc_dma, buf_dma; u32 pkt_len, dst_tag_id; int i, ret = 0, q_idx; bool in_tx_ts = 0; int tx_ts_cookie; - void **swdata; u32 *epib; pkt_len = skb_headlen(skb); @@ -730,7 +927,8 @@ enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len); swdata = cppi5_hdesc_get_swdata(first_desc); - *swdata = skb; + swdata->type = PRUETH_SWDATA_SKB; + swdata->data.skb = skb; /* Handle the case where skb is fragmented in pages */ cur_desc = first_desc; @@ -833,15 +1031,27 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) { struct prueth_tx_chn *tx_chn = data; struct cppi5_host_desc_t *desc_tx; + struct prueth_swdata *swdata; + struct xdp_frame *xdpf; struct sk_buff *skb; - void **swdata; desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_tx); - skb = *(swdata); - prueth_xmit_free(tx_chn, desc_tx); - dev_kfree_skb_any(skb); + switch (swdata->type) { + case PRUETH_SWDATA_SKB: + skb = swdata->data.skb; + dev_kfree_skb_any(skb); + break; + case PRUETH_SWDATA_XDPF: + xdpf = swdata->data.xdpf; + xdp_return_frame(xdpf); + break; + default: + break; + } + + prueth_xmit_free(tx_chn, desc_tx); } irqreturn_t prueth_rx_irq(int irq, void *dev_id) @@ -875,15 +1085,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget) PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA; int flow = emac->is_sr1 ? PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS; + int xdp_state_or = 0; int num_rx = 0; int cur_budget; + u32 xdp_state; int ret; while (flow--) { cur_budget = budget - num_rx; while (cur_budget--) { - ret = emac_rx_packet(emac, flow); + ret = emac_rx_packet(emac, flow, &xdp_state); + xdp_state_or |= xdp_state; if (ret) break; num_rx++; @@ -893,6 +1106,9 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget) break; } + if (xdp_state_or & ICSSG_XDP_REDIR) + xdp_do_flush(); + if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) { if (unlikely(emac->rx_pace_timeout_ns)) { hrtimer_start(&emac->rx_hrtimer, @@ -907,29 +1123,71 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget) } EXPORT_SYMBOL_GPL(icssg_napi_rx_poll); +static struct page_pool *prueth_create_page_pool(struct prueth_emac *emac, + struct device *dma_dev, + int size) +{ + struct page_pool_params pp_params = { 0 }; + struct page_pool *pool; + + pp_params.order = 0; + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.pool_size = size; + pp_params.nid = dev_to_node(emac->prueth->dev); + pp_params.dma_dir = DMA_BIDIRECTIONAL; + pp_params.dev = dma_dev; + pp_params.napi = &emac->napi_rx; + pp_params.max_len = PAGE_SIZE; + + pool = page_pool_create(&pp_params); + if (IS_ERR(pool)) + netdev_err(emac->ndev, "cannot create rx page pool\n"); + + return pool; +} + int prueth_prepare_rx_chan(struct prueth_emac *emac, struct prueth_rx_chn *chn, int buf_size) { - struct sk_buff *skb; + struct page_pool *pool; + struct page *page; int i, ret; + pool = prueth_create_page_pool(emac, chn->dma_dev, chn->descs_num); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + chn->pg_pool = pool; + for (i = 0; i < chn->descs_num; i++) { - skb = __netdev_alloc_skb_ip_align(NULL, buf_size, GFP_KERNEL); - if (!skb) - return -ENOMEM; + /* NOTE: we're not using memory efficiently here. + * 1 full page (4KB?) used here instead of + * PRUETH_MAX_PKT_SIZE (~1.5KB?) + */ + page = page_pool_dev_alloc_pages(pool); + if (!page) { + netdev_err(emac->ndev, "couldn't allocate rx page\n"); + ret = -ENOMEM; + goto recycle_alloc_pg; + } - ret = prueth_dma_rx_push(emac, skb, chn); + ret = prueth_dma_rx_push_mapped(emac, chn, page, buf_size); if (ret < 0) { netdev_err(emac->ndev, - "cannot submit skb for rx chan %s ret %d\n", + "cannot submit page for rx chan %s ret %d\n", chn->name, ret); - kfree_skb(skb); - return ret; + page_pool_recycle_direct(pool, page); + goto recycle_alloc_pg; } } return 0; + +recycle_alloc_pg: + prueth_reset_rx_chan(&emac->rx_chns, PRUETH_MAX_RX_FLOWS, false); + + return ret; } EXPORT_SYMBOL_GPL(prueth_prepare_rx_chan); @@ -958,6 +1216,9 @@ void prueth_reset_rx_chan(struct prueth_rx_chn *chn, prueth_rx_cleanup, !!i); if (disable) k3_udma_glue_disable_rx_chn(chn->rx_chn); + + page_pool_destroy(chn->pg_pool); + chn->pg_pool = NULL; } EXPORT_SYMBOL_GPL(prueth_reset_rx_chan); diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 00ed97860547..c4a995630eca 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -559,6 +559,33 @@ const struct icss_iep_clockops prueth_iep_clockops = { .perout_enable = prueth_perout_enable, }; +static int prueth_create_xdp_rxqs(struct prueth_emac *emac) +{ + struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq; + struct page_pool *pool = emac->rx_chns.pg_pool; + int ret; + + ret = xdp_rxq_info_reg(rxq, emac->ndev, 0, emac->napi_rx.napi_id); + if (ret) + return ret; + + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool); + if (ret) + xdp_rxq_info_unreg(rxq); + + return ret; +} + +static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac) +{ + struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq; + + if (!xdp_rxq_info_is_reg(rxq)) + return; + + xdp_rxq_info_unreg(rxq); +} + static int icssg_prueth_add_mcast(struct net_device *ndev, const u8 *addr) { struct net_device *real_dev; @@ -780,10 +807,14 @@ static int emac_ndo_open(struct net_device *ndev) if (ret) goto free_tx_ts_irq; - ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn); + ret = prueth_create_xdp_rxqs(emac); if (ret) goto reset_rx_chn; + ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn); + if (ret) + goto destroy_xdp_rxqs; + for (i = 0; i < emac->tx_ch_num; i++) { ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn); if (ret) @@ -809,6 +840,8 @@ reset_tx_chan: * any SKB for completion. So set false to free_skb */ prueth_reset_tx_chan(emac, i, false); +destroy_xdp_rxqs: + prueth_destroy_xdp_rxqs(emac); reset_rx_chn: prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false); free_tx_ts_irq: @@ -879,7 +912,7 @@ static int emac_ndo_stop(struct net_device *ndev) k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true); prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true); - + prueth_destroy_xdp_rxqs(emac); napi_disable(&emac->napi_rx); hrtimer_cancel(&emac->rx_hrtimer); @@ -1024,6 +1057,93 @@ static int emac_ndo_vlan_rx_del_vid(struct net_device *ndev, return 0; } +/** + * emac_xdp_xmit - Implements ndo_xdp_xmit + * @dev: netdev + * @n: number of frames + * @frames: array of XDP buffer pointers + * @flags: XDP extra info + * + * Return: number of frames successfully sent. Failed frames + * will be free'ed by XDP core. + * + * For error cases, a negative errno code is returned and no-frames + * are transmitted (caller must handle freeing frames). + **/ +static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct prueth_emac *emac = netdev_priv(dev); + struct net_device *ndev = emac->ndev; + struct xdp_frame *xdpf; + unsigned int q_idx; + int nxmit = 0; + u32 err; + int i; + + q_idx = smp_processor_id() % emac->tx_ch_num; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + for (i = 0; i < n; i++) { + xdpf = frames[i]; + err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx); + if (err != ICSSG_XDP_TX) { + ndev->stats.tx_dropped++; + break; + } + nxmit++; + } + + return nxmit; +} + +/** + * emac_xdp_setup - add/remove an XDP program + * @emac: emac device + * @bpf: XDP program + * + * Return: Always 0 (Success) + **/ +static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf) +{ + struct bpf_prog *prog = bpf->prog; + xdp_features_t val; + + val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + xdp_set_features_flag(emac->ndev, val); + + if (!emac->xdpi.prog && !prog) + return 0; + + WRITE_ONCE(emac->xdp_prog, prog); + + xdp_attachment_setup(&emac->xdpi, bpf); + + return 0; +} + +/** + * emac_ndo_bpf - implements ndo_bpf for icssg_prueth + * @ndev: network adapter device + * @bpf: XDP program + * + * Return: 0 on success, error code on failure. + **/ +static int emac_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) +{ + struct prueth_emac *emac = netdev_priv(ndev); + + switch (bpf->command) { + case XDP_SETUP_PROG: + return emac_xdp_setup(emac, bpf); + default: + return -EINVAL; + } +} + static const struct net_device_ops emac_netdev_ops = { .ndo_open = emac_ndo_open, .ndo_stop = emac_ndo_stop, @@ -1038,6 +1158,8 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_fix_features = emac_ndo_fix_features, .ndo_vlan_rx_add_vid = emac_ndo_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = emac_ndo_vlan_rx_del_vid, + .ndo_bpf = emac_ndo_bpf, + .ndo_xdp_xmit = emac_xdp_xmit, }; static int prueth_netdev_init(struct prueth *prueth, @@ -1066,6 +1188,8 @@ static int prueth_netdev_init(struct prueth *prueth, emac->prueth = prueth; emac->ndev = ndev; emac->port_id = port; + emac->xdp_prog = NULL; + emac->ndev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; emac->cmd_wq = create_singlethread_workqueue("icssg_cmd_wq"); if (!emac->cmd_wq) { ret = -ENOMEM; @@ -1522,6 +1646,9 @@ static int prueth_probe(struct platform_device *pdev) np = dev->of_node; + BUILD_BUG_ON_MSG((sizeof(struct prueth_swdata) > PRUETH_NAV_SW_DATA_SIZE), + "insufficient SW_DATA size"); + prueth = devm_kzalloc(dev, sizeof(*prueth), GFP_KERNEL); if (!prueth) return -ENOMEM; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 329b46e9ee53..1dd76e2adfcd 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -8,6 +8,8 @@ #ifndef __NET_TI_ICSSG_PRUETH_H #define __NET_TI_ICSSG_PRUETH_H +#include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/etherdevice.h> #include <linux/genalloc.h> #include <linux/if_vlan.h> @@ -33,6 +35,8 @@ #include <linux/dma/k3-udma-glue.h> #include <net/devlink.h> +#include <net/xdp.h> +#include <net/page_pool/helpers.h> #include "icssg_config.h" #include "icss_iep.h" @@ -131,6 +135,26 @@ struct prueth_rx_chn { u32 descs_num; unsigned int irq[ICSSG_MAX_RFLOWS]; /* separate irq per flow */ char name[32]; + struct page_pool *pg_pool; + struct xdp_rxq_info xdp_rxq; +}; + +enum prueth_swdata_type { + PRUETH_SWDATA_INVALID = 0, + PRUETH_SWDATA_SKB, + PRUETH_SWDATA_PAGE, + PRUETH_SWDATA_CMD, + PRUETH_SWDATA_XDPF, +}; + +struct prueth_swdata { + enum prueth_swdata_type type; + union prueth_data { + struct sk_buff *skb; + struct page *page; + u32 cmd; + struct xdp_frame *xdpf; + } data; }; /* There are 4 Tx DMA channels, but the highest priority is CH3 (thread 3) @@ -140,6 +164,12 @@ struct prueth_rx_chn { #define PRUETH_MAX_TX_TS_REQUESTS 50 /* Max simultaneous TX_TS requests */ +/* XDP BPF state */ +#define ICSSG_XDP_PASS 0 +#define ICSSG_XDP_CONSUMED BIT(0) +#define ICSSG_XDP_TX BIT(1) +#define ICSSG_XDP_REDIR BIT(2) + /* Minimum coalesce time in usecs for both Tx and Rx */ #define ICSSG_MIN_COALESCE_USECS 20 @@ -208,8 +238,14 @@ struct prueth_emac { unsigned long rx_pace_timeout_ns; struct netdev_hw_addr_list vlan_mcast_list[MAX_VLAN_ID]; + struct bpf_prog *xdp_prog; + struct xdp_attachment_info xdpi; }; +/* The buf includes headroom compatible with both skb and xdpf */ +#define PRUETH_HEADROOM_NA (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + NET_IP_ALIGN) +#define PRUETH_HEADROOM ALIGN(PRUETH_HEADROOM_NA, sizeof(long)) + /** * struct prueth_pdata - PRUeth platform data * @fdqring_mode: Free desc queue mode @@ -410,9 +446,10 @@ int prueth_init_rx_chns(struct prueth_emac *emac, struct prueth_rx_chn *rx_chn, char *name, u32 max_rflows, u32 max_desc_num); -int prueth_dma_rx_push(struct prueth_emac *emac, - struct sk_buff *skb, - struct prueth_rx_chn *rx_chn); +int prueth_dma_rx_push_mapped(struct prueth_emac *emac, + struct prueth_rx_chn *rx_chn, + struct page *page, u32 buf_len); +unsigned int prueth_rxbuf_total_len(unsigned int len); void emac_rx_timestamp(struct prueth_emac *emac, struct sk_buff *skb, u32 *psdata); enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev); @@ -441,5 +478,9 @@ void prueth_put_cores(struct prueth *prueth, int slice); /* Revision specific helper */ u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns); +u32 emac_xmit_xdp_frame(struct prueth_emac *emac, + struct xdp_frame *xdpf, + struct page *page, + unsigned int q_idx); #endif /* __NET_TI_ICSSG_PRUETH_H */ diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c index 64a19ff39562..ff5f41bf499e 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c @@ -84,7 +84,7 @@ static int emac_send_command_sr1(struct prueth_emac *emac, u32 cmd) __le32 *data = emac->cmd_data; dma_addr_t desc_dma, buf_dma; struct prueth_tx_chn *tx_chn; - void **swdata; + struct prueth_swdata *swdata; int ret = 0; u32 *epib; @@ -122,7 +122,8 @@ static int emac_send_command_sr1(struct prueth_emac *emac, u32 cmd) cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len); swdata = cppi5_hdesc_get_swdata(first_desc); - *swdata = data; + swdata->type = PRUETH_SWDATA_CMD; + swdata->data.cmd = le32_to_cpu(data[0]); cppi5_hdesc_set_pktlen(first_desc, pkt_len); desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc); @@ -268,16 +269,16 @@ static int emac_phy_connect(struct prueth_emac *emac) * Returns skb pointer if packet found else NULL * Caller must free the returned skb. */ -static struct sk_buff *prueth_process_rx_mgm(struct prueth_emac *emac, - u32 flow_id) +static struct page *prueth_process_rx_mgm(struct prueth_emac *emac, + u32 flow_id) { struct prueth_rx_chn *rx_chn = &emac->rx_mgm_chn; struct net_device *ndev = emac->ndev; struct cppi5_host_desc_t *desc_rx; - struct sk_buff *skb, *new_skb; + struct page *page, *new_page; + struct prueth_swdata *swdata; dma_addr_t desc_dma, buf_dma; - u32 buf_dma_len, pkt_len; - void **swdata; + u32 buf_dma_len; int ret; ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma); @@ -299,34 +300,31 @@ static struct sk_buff *prueth_process_rx_mgm(struct prueth_emac *emac, } swdata = cppi5_hdesc_get_swdata(desc_rx); - skb = *swdata; + page = swdata->data.page; cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); - pkt_len = cppi5_hdesc_get_pktlen(desc_rx); dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - new_skb = netdev_alloc_skb_ip_align(ndev, PRUETH_MAX_PKT_SIZE); + new_page = page_pool_dev_alloc_pages(rx_chn->pg_pool); /* if allocation fails we drop the packet but push the * descriptor back to the ring with old skb to prevent a stall */ - if (!new_skb) { + if (!new_page) { netdev_err(ndev, - "skb alloc failed, dropped mgm pkt from flow %d\n", + "page alloc failed, dropped mgm pkt from flow %d\n", flow_id); - new_skb = skb; - skb = NULL; /* return NULL */ - } else { - /* return the filled skb */ - skb_put(skb, pkt_len); + new_page = page; + page = NULL; /* return NULL */ } /* queue another DMA */ - ret = prueth_dma_rx_push(emac, new_skb, &emac->rx_mgm_chn); + ret = prueth_dma_rx_push_mapped(emac, &emac->rx_chns, new_page, + PRUETH_MAX_PKT_SIZE); if (WARN_ON(ret < 0)) - dev_kfree_skb_any(new_skb); + page_pool_recycle_direct(rx_chn->pg_pool, new_page); - return skb; + return page; } static void prueth_tx_ts_sr1(struct prueth_emac *emac, @@ -362,14 +360,14 @@ static void prueth_tx_ts_sr1(struct prueth_emac *emac, static irqreturn_t prueth_rx_mgm_ts_thread_sr1(int irq, void *dev_id) { struct prueth_emac *emac = dev_id; - struct sk_buff *skb; + struct page *page; - skb = prueth_process_rx_mgm(emac, PRUETH_RX_MGM_FLOW_TIMESTAMP_SR1); - if (!skb) + page = prueth_process_rx_mgm(emac, PRUETH_RX_MGM_FLOW_TIMESTAMP_SR1); + if (!page) return IRQ_NONE; - prueth_tx_ts_sr1(emac, (void *)skb->data); - dev_kfree_skb_any(skb); + prueth_tx_ts_sr1(emac, (void *)page_address(page)); + page_pool_recycle_direct(page->pp, page); return IRQ_HANDLED; } @@ -377,15 +375,15 @@ static irqreturn_t prueth_rx_mgm_ts_thread_sr1(int irq, void *dev_id) static irqreturn_t prueth_rx_mgm_rsp_thread(int irq, void *dev_id) { struct prueth_emac *emac = dev_id; - struct sk_buff *skb; + struct page *page; u32 rsp; - skb = prueth_process_rx_mgm(emac, PRUETH_RX_MGM_FLOW_RESPONSE_SR1); - if (!skb) + page = prueth_process_rx_mgm(emac, PRUETH_RX_MGM_FLOW_RESPONSE_SR1); + if (!page) return IRQ_NONE; /* Process command response */ - rsp = le32_to_cpu(*(__le32 *)skb->data) & 0xffff0000; + rsp = le32_to_cpu(*(__le32 *)page_address(page)) & 0xffff0000; if (rsp == ICSSG_SHUTDOWN_CMD_SR1) { netdev_dbg(emac->ndev, "f/w Shutdown cmd resp %x\n", rsp); complete(&emac->cmd_complete); @@ -394,7 +392,7 @@ static irqreturn_t prueth_rx_mgm_rsp_thread(int irq, void *dev_id) complete(&emac->cmd_complete); } - dev_kfree_skb_any(skb); + page_pool_recycle_direct(page->pp, page); return IRQ_HANDLED; } diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 2c65f867fd31..66e38ce9cd1d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -18,6 +18,7 @@ #include <net/rtnetlink.h> #include <net/geneve.h> #include <net/gro.h> +#include <net/netdev_lock.h> #include <net/protocol.h> #define GENEVE_NETDEV_VER "0.6" diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index bac1bb69d63a..0e0fe32d2da4 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -77,6 +77,7 @@ #include <net/ip.h> #include <net/arp.h> +#include <net/netdev_lock.h> #include <net/net_namespace.h> #include <linux/bpqether.h> @@ -107,27 +108,6 @@ struct bpqdev { static LIST_HEAD(bpq_devices); -/* - * bpqether network devices are paired with ethernet devices below them, so - * form a special "super class" of normal ethernet devices; split their locks - * off into a separate class since they always nest. - */ -static struct lock_class_key bpq_netdev_xmit_lock_key; -static struct lock_class_key bpq_netdev_addr_lock_key; - -static void bpq_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, &bpq_netdev_xmit_lock_key); -} - -static void bpq_set_lockdep_class(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, &bpq_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL); -} - /* ------------------------------------------------------------------------ */ @@ -454,6 +434,8 @@ static const struct net_device_ops bpq_netdev_ops = { static void bpq_setup(struct net_device *dev) { + netdev_lockdep_set_classes(dev); + dev->netdev_ops = &bpq_netdev_ops; dev->needs_free_netdev = true; @@ -499,7 +481,6 @@ static int bpq_new_device(struct net_device *edev) err = register_netdevice(ndev); if (err) goto error; - bpq_set_lockdep_class(ndev); /* List protected by RTNL */ list_add_rcu(&bpq->bpq_list, &bpq_devices); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9c6501bf27bd..c51b318b8a72 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -29,6 +29,7 @@ #include <linux/bpf.h> #include <net/arp.h> +#include <net/netdev_lock.h> #include <net/route.h> #include <net/sock.h> #include <net/pkt_sched.h> diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c index c8c23d9be961..41f212209993 100644 --- a/drivers/net/ipa/data/ipa_data-v4.7.c +++ b/drivers/net/ipa/data/ipa_data-v4.7.c @@ -28,20 +28,18 @@ enum ipa_resource_type { enum ipa_rsrc_group_id { /* Source resource group identifiers */ IPA_RSRC_GROUP_SRC_UL_DL = 0, - IPA_RSRC_GROUP_SRC_UC_RX_Q, IPA_RSRC_GROUP_SRC_COUNT, /* Last in set; not a source group */ /* Destination resource group identifiers */ - IPA_RSRC_GROUP_DST_UL_DL_DPL = 0, - IPA_RSRC_GROUP_DST_UNUSED_1, + IPA_RSRC_GROUP_DST_UL_DL = 0, IPA_RSRC_GROUP_DST_COUNT, /* Last; not a destination group */ }; /* QSB configuration data for an SoC having IPA v4.7 */ static const struct ipa_qsb_data ipa_qsb_data[] = { [IPA_QSB_MASTER_DDR] = { - .max_writes = 8, - .max_reads = 0, /* no limit (hardware max) */ + .max_writes = 12, + .max_reads = 13, .max_reads_beats = 120, }, }; @@ -81,7 +79,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .resource_group = IPA_RSRC_GROUP_DST_UL_DL, .aggregation = true, .status_enable = true, .rx = { @@ -106,6 +104,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .filter_support = true, .config = { .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, + .checksum = true, .qmap = true, .status_enable = true, .tx = { @@ -128,7 +127,8 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .resource_group = IPA_RSRC_GROUP_DST_UL_DL, + .checksum = true, .qmap = true, .aggregation = true, .rx = { @@ -197,12 +197,12 @@ static const struct ipa_resource ipa_resource_src[] = { /* Destination resource configuration data for an SoC having IPA v4.7 */ static const struct ipa_resource ipa_resource_dst[] = { [IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = { - .limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = { + .limits[IPA_RSRC_GROUP_DST_UL_DL] = { .min = 7, .max = 7, }, }, [IPA_RESOURCE_TYPE_DST_DPS_DMARS] = { - .limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = { + .limits[IPA_RSRC_GROUP_DST_UL_DL] = { .min = 2, .max = 2, }, }, diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index b56144ca2fde..0ed2fd833a5d 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -3,6 +3,7 @@ */ #include <linux/ethtool.h> +#include <net/netdev_lock.h> #include "ipvlan.h" diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 201fddcd3b1e..1fb6ce6843ad 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -54,6 +54,7 @@ #include <linux/percpu.h> #include <linux/net_tstamp.h> #include <net/net_namespace.h> +#include <net/netdev_lock.h> #include <linux/u64_stats_sync.h> /* blackhole_netdev - a device used for dsts that are marked expired! diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 4de5d63fd577..3d315e30ee47 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -19,6 +19,7 @@ #include <net/gro_cells.h> #include <net/macsec.h> #include <net/dst_metadata.h> +#include <net/netdev_lock.h> #include <linux/phy.h> #include <linux/byteorder/generic.h> #include <linux/if_arp.h> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 4e9d54be887c..d0dfa6bca6cc 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -28,6 +28,7 @@ #include <linux/if_macvlan.h> #include <linux/hash.h> #include <linux/workqueue.h> +#include <net/netdev_lock.h> #include <net/rtnetlink.h> #include <net/xfrm.h> #include <linux/netpoll.h> diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index e3dcdeacc12c..d74d47dd6e04 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -583,6 +583,7 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, struct mctp_i2c_hdr *hdr; struct mctp_hdr *mhdr; u8 lldst, llsrc; + int rc; if (len > MCTP_I2C_MAXMTU) return -EMSGSIZE; @@ -593,6 +594,10 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, lldst = *((u8 *)daddr); llsrc = *((u8 *)saddr); + rc = skb_cow_head(skb, sizeof(struct mctp_i2c_hdr)); + if (rc) + return rc; + skb_push(skb, sizeof(struct mctp_i2c_hdr)); skb_reset_mac_header(skb); hdr = (void *)skb_mac_header(skb); diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c index d247fe483c58..c678f79aa356 100644 --- a/drivers/net/mctp/mctp-i3c.c +++ b/drivers/net/mctp/mctp-i3c.c @@ -506,6 +506,14 @@ static int mctp_i3c_header_create(struct sk_buff *skb, struct net_device *dev, const void *saddr, unsigned int len) { struct mctp_i3c_internal_hdr *ihdr; + int rc; + + if (!daddr || !saddr) + return -EINVAL; + + rc = skb_cow_head(skb, sizeof(struct mctp_i3c_internal_hdr)); + if (rc) + return rc; skb_push(skb, sizeof(struct mctp_i3c_internal_hdr)); skb_reset_mac_header(skb); diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 608953d4f98d..49537d3c4120 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -296,7 +296,8 @@ static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) NSIM_EA(bpf->extack, "attempt to load offloaded prog to drv"); return -EINVAL; } - if (ns->netdev->mtu > NSIM_XDP_MAX_MTU) { + if (bpf->prog && !bpf->prog->aux->xdp_has_frags && + ns->netdev->mtu > NSIM_XDP_MAX_MTU) { NSIM_EA(bpf->extack, "MTU too large w/ XDP enabled"); return -EINVAL; } diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 7ab358616e03..4d191a3293c7 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -107,10 +107,8 @@ nsim_set_channels(struct net_device *dev, struct ethtool_channels *ch) struct netdevsim *ns = netdev_priv(dev); int err; - netdev_lock(dev); err = netif_set_real_num_queues(dev, ch->combined_count, ch->combined_count); - netdev_unlock(dev); if (err) return err; diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index a41dc79e9c2e..b67af4651185 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -25,6 +25,7 @@ #include <net/page_pool/helpers.h> #include <net/netlink.h> #include <net/net_shaper.h> +#include <net/netdev_lock.h> #include <net/pkt_cls.h> #include <net/rtnetlink.h> #include <net/udp_tunnel.h> @@ -115,7 +116,8 @@ static int nsim_change_mtu(struct net_device *dev, int new_mtu) { struct netdevsim *ns = netdev_priv(dev); - if (ns->xdp.prog && new_mtu > NSIM_XDP_MAX_MTU) + if (ns->xdp.prog && !ns->xdp.prog->aux->xdp_has_frags && + new_mtu > NSIM_XDP_MAX_MTU) return -EBUSY; WRITE_ONCE(dev->mtu, new_mtu); @@ -402,7 +404,7 @@ static int nsim_init_napi(struct netdevsim *ns) for (i = 0; i < dev->num_rx_queues; i++) { rq = ns->rq[i]; - netif_napi_add_config(dev, &rq->napi, nsim_poll, i); + netif_napi_add_config_locked(dev, &rq->napi, nsim_poll, i); } for (i = 0; i < dev->num_rx_queues; i++) { @@ -422,7 +424,7 @@ err_pp_destroy: } for (i = 0; i < dev->num_rx_queues; i++) - __netif_napi_del(&ns->rq[i]->napi); + __netif_napi_del_locked(&ns->rq[i]->napi); return err; } @@ -452,7 +454,7 @@ static void nsim_enable_napi(struct netdevsim *ns) struct nsim_rq *rq = ns->rq[i]; netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi); - napi_enable(&rq->napi); + napi_enable_locked(&rq->napi); } } @@ -461,6 +463,8 @@ static int nsim_open(struct net_device *dev) struct netdevsim *ns = netdev_priv(dev); int err; + netdev_assert_locked(dev); + err = nsim_init_napi(ns); if (err) return err; @@ -478,8 +482,8 @@ static void nsim_del_napi(struct netdevsim *ns) for (i = 0; i < dev->num_rx_queues; i++) { struct nsim_rq *rq = ns->rq[i]; - napi_disable(&rq->napi); - __netif_napi_del(&rq->napi); + napi_disable_locked(&rq->napi); + __netif_napi_del_locked(&rq->napi); } synchronize_net(); @@ -494,6 +498,8 @@ static int nsim_stop(struct net_device *dev) struct netdevsim *ns = netdev_priv(dev); struct netdevsim *peer; + netdev_assert_locked(dev); + netif_carrier_off(dev); peer = rtnl_dereference(ns->peer); if (peer) @@ -679,7 +685,8 @@ nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx) goto err_free; if (!ns->rq_reset_mode) - netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, + idx); return 0; @@ -696,7 +703,7 @@ static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem) page_pool_destroy(qmem->pp); if (qmem->rq) { if (!ns->rq_reset_mode) - netif_napi_del(&qmem->rq->napi); + netif_napi_del_locked(&qmem->rq->napi); page_pool_destroy(qmem->rq->page_pool); nsim_queue_free(qmem->rq); } @@ -708,9 +715,11 @@ nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx) struct nsim_queue_mem *qmem = per_queue_mem; struct netdevsim *ns = netdev_priv(dev); + netdev_assert_locked(dev); + if (ns->rq_reset_mode == 1) { ns->rq[idx]->page_pool = qmem->pp; - napi_enable(&ns->rq[idx]->napi); + napi_enable_locked(&ns->rq[idx]->napi); return 0; } @@ -718,15 +727,17 @@ nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx) * here we want to test various call orders. */ if (ns->rq_reset_mode == 2) { - netif_napi_del(&ns->rq[idx]->napi); - netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + netif_napi_del_locked(&ns->rq[idx]->napi); + netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, + idx); } else if (ns->rq_reset_mode == 3) { - netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); - netif_napi_del(&ns->rq[idx]->napi); + netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, + idx); + netif_napi_del_locked(&ns->rq[idx]->napi); } ns->rq[idx] = qmem->rq; - napi_enable(&ns->rq[idx]->napi); + napi_enable_locked(&ns->rq[idx]->napi); return 0; } @@ -736,7 +747,9 @@ static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx) struct nsim_queue_mem *qmem = per_queue_mem; struct netdevsim *ns = netdev_priv(dev); - napi_disable(&ns->rq[idx]->napi); + netdev_assert_locked(dev); + + napi_disable_locked(&ns->rq[idx]->napi); if (ns->rq_reset_mode == 1) { qmem->pp = ns->rq[idx]->page_pool; @@ -775,7 +788,7 @@ nsim_qreset_write(struct file *file, const char __user *data, if (ret != 2) return -EINVAL; - rtnl_lock(); + netdev_lock(ns->netdev); if (queue >= ns->netdev->real_num_rx_queues) { ret = -EINVAL; goto exit_unlock; @@ -789,7 +802,7 @@ nsim_qreset_write(struct file *file, const char __user *data, ret = count; exit_unlock: - rtnl_unlock(); + netdev_unlock(ns->netdev); return ret; } diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 8f9ba5e8290d..23ce205ae91d 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -3,7 +3,7 @@ libphy-y := phy.o phy-c45.o phy-core.o phy_device.o \ linkmode.o phy_link_topology.o \ - phy_package.o + phy_package.o phy_caps.o mdio-bus-y += mdio_bus.o mdio_device.o ifdef CONFIG_MDIO_DEVICE diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 63945fe58227..250a018d5546 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -25,6 +25,11 @@ /* Same id: TJA1120, TJA1121 */ #define PHY_ID_TJA_1120 0x001BB031 +#define VEND1_DEVICE_ID3 0x0004 +#define TJA1120_DEV_ID3_SILICON_VERSION GENMASK(15, 12) +#define TJA1120_DEV_ID3_SAMPLE_TYPE GENMASK(11, 8) +#define DEVICE_ID3_SAMPLE_TYPE_R 0x9 + #define VEND1_DEVICE_CONTROL 0x0040 #define DEVICE_CONTROL_RESET BIT(15) #define DEVICE_CONTROL_CONFIG_GLOBAL_EN BIT(14) @@ -112,6 +117,9 @@ #define MII_BASIC_CONFIG_RMII 0x5 #define MII_BASIC_CONFIG_MII 0x4 +#define VEND1_SGMII_BASIC_CONTROL 0xB000 +#define SGMII_LPM BIT(11) + #define VEND1_SYMBOL_ERROR_CNT_XTD 0x8351 #define EXTENDED_CNT_EN BIT(15) #define VEND1_MONITOR_STATUS 0xAC80 @@ -1596,6 +1604,63 @@ static int nxp_c45_set_phy_mode(struct phy_device *phydev) return 0; } +/* Errata: ES_TJA1120 and ES_TJA1121 Rev. 1.0 — 28 November 2024 Section 3.1 & 3.2 */ +static void nxp_c45_tja1120_errata(struct phy_device *phydev) +{ + bool macsec_ability, sgmii_ability; + int silicon_version, sample_type; + int phy_abilities; + int ret = 0; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_DEVICE_ID3); + if (ret < 0) + return; + + sample_type = FIELD_GET(TJA1120_DEV_ID3_SAMPLE_TYPE, ret); + if (sample_type != DEVICE_ID3_SAMPLE_TYPE_R) + return; + + silicon_version = FIELD_GET(TJA1120_DEV_ID3_SILICON_VERSION, ret); + + phy_abilities = phy_read_mmd(phydev, MDIO_MMD_VEND1, + VEND1_PORT_ABILITIES); + macsec_ability = !!(phy_abilities & MACSEC_ABILITY); + sgmii_ability = !!(phy_abilities & SGMII_ABILITY); + if ((!macsec_ability && silicon_version == 2) || + (macsec_ability && silicon_version == 1)) { + /* TJA1120/TJA1121 PHY configuration errata workaround. + * Apply PHY writes sequence before link up. + */ + if (!macsec_ability) { + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x4b95); + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0xf3cd); + } else { + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x89c7); + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0x0893); + } + + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x0476, 0x58a0); + + phy_write_mmd(phydev, MDIO_MMD_PMAPMD, 0x8921, 0xa3a); + phy_write_mmd(phydev, MDIO_MMD_PMAPMD, 0x89F1, 0x16c1); + + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x0); + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0x0); + + if (sgmii_ability) { + /* TJA1120B/TJA1121B SGMII PCS restart errata workaround. + * Put SGMII PCS into power down mode and back up. + */ + phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, + VEND1_SGMII_BASIC_CONTROL, + SGMII_LPM); + phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, + VEND1_SGMII_BASIC_CONTROL, + SGMII_LPM); + } + } +} + static int nxp_c45_config_init(struct phy_device *phydev) { int ret; @@ -1612,6 +1677,9 @@ static int nxp_c45_config_init(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 1); phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 2); + if (phy_id_compare(phydev->phy_id, PHY_ID_TJA_1120, GENMASK(31, 4))) + nxp_c45_tja1120_errata(phydev); + phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_CONFIG, PHY_CONFIG_AUTO); diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c index ed7fa26bac8e..601094fe24ca 100644 --- a/drivers/net/phy/nxp-tja11xx.c +++ b/drivers/net/phy/nxp-tja11xx.c @@ -21,12 +21,14 @@ #define PHY_ID_TJA1100 0x0180dc40 #define PHY_ID_TJA1101 0x0180dd00 #define PHY_ID_TJA1102 0x0180dc80 +#define PHY_ID_TJA1102S 0x0180dc90 #define MII_ECTRL 17 #define MII_ECTRL_LINK_CONTROL BIT(15) #define MII_ECTRL_POWER_MODE_MASK GENMASK(14, 11) #define MII_ECTRL_POWER_MODE_NO_CHANGE (0x0 << 11) #define MII_ECTRL_POWER_MODE_NORMAL (0x3 << 11) +#define MII_ECTRL_POWER_MODE_SLEEP (0xa << 11) #define MII_ECTRL_POWER_MODE_STANDBY (0xc << 11) #define MII_ECTRL_CABLE_TEST BIT(5) #define MII_ECTRL_CONFIG_EN BIT(2) @@ -78,6 +80,9 @@ #define MII_COMMCFG 27 #define MII_COMMCFG_AUTO_OP BIT(15) +#define MII_CFG3 28 +#define MII_CFG3_PHY_EN BIT(0) + /* Configure REF_CLK as input in RMII mode */ #define TJA110X_RMII_MODE_REFCLK_IN BIT(0) @@ -179,6 +184,14 @@ static int tja11xx_wakeup(struct phy_device *phydev) return ret; return tja11xx_enable_link_control(phydev); + case MII_ECTRL_POWER_MODE_SLEEP: + switch (phydev->phy_id & PHY_ID_MASK) { + case PHY_ID_TJA1102S: + /* Enable PHY, maybe it is disabled due to pin strapping */ + return phy_set_bits(phydev, MII_CFG3, MII_CFG3_PHY_EN); + default: + return 0; + } default: break; } @@ -316,6 +329,7 @@ static int tja11xx_config_init(struct phy_device *phydev) if (ret) return ret; break; + case PHY_ID_TJA1102S: case PHY_ID_TJA1101: reg_mask = MII_CFG1_INTERFACE_MODE_MASK; ret = tja11xx_get_interface_mode(phydev); @@ -883,6 +897,29 @@ static struct phy_driver tja11xx_driver[] = { .handle_interrupt = tja11xx_handle_interrupt, .cable_test_start = tja11xx_cable_test_start, .cable_test_get_status = tja11xx_cable_test_get_status, + }, { + PHY_ID_MATCH_MODEL(PHY_ID_TJA1102S), + .name = "NXP TJA1102S", + .features = PHY_BASIC_T1_FEATURES, + .flags = PHY_POLL_CABLE_TEST, + .probe = tja11xx_probe, + .soft_reset = tja11xx_soft_reset, + .config_aneg = tja11xx_config_aneg, + .config_init = tja11xx_config_init, + .read_status = tja11xx_read_status, + .get_sqi = tja11xx_get_sqi, + .get_sqi_max = tja11xx_get_sqi_max, + .suspend = genphy_suspend, + .resume = genphy_resume, + .set_loopback = genphy_loopback, + /* Statistics */ + .get_sset_count = tja11xx_get_sset_count, + .get_strings = tja11xx_get_strings, + .get_stats = tja11xx_get_stats, + .config_intr = tja11xx_config_intr, + .handle_interrupt = tja11xx_handle_interrupt, + .cable_test_start = tja11xx_cable_test_start, + .cable_test_get_status = tja11xx_cable_test_get_status, } }; @@ -892,6 +929,7 @@ static const struct mdio_device_id __maybe_unused tja11xx_tbl[] = { { PHY_ID_MATCH_MODEL(PHY_ID_TJA1100) }, { PHY_ID_MATCH_MODEL(PHY_ID_TJA1101) }, { PHY_ID_MATCH_MODEL(PHY_ID_TJA1102) }, + { PHY_ID_MATCH_MODEL(PHY_ID_TJA1102S) }, { } }; diff --git a/drivers/net/phy/phy-caps.h b/drivers/net/phy/phy-caps.h new file mode 100644 index 000000000000..157759966650 --- /dev/null +++ b/drivers/net/phy/phy-caps.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * link caps internal header, for link modes <-> capabilities <-> interfaces + * conversions. + */ + +#ifndef __PHY_CAPS_H +#define __PHY_CAPS_H + +#include <linux/ethtool.h> +#include <linux/phy.h> + +enum { + LINK_CAPA_10HD = 0, + LINK_CAPA_10FD, + LINK_CAPA_100HD, + LINK_CAPA_100FD, + LINK_CAPA_1000HD, + LINK_CAPA_1000FD, + LINK_CAPA_2500FD, + LINK_CAPA_5000FD, + LINK_CAPA_10000FD, + LINK_CAPA_20000FD, + LINK_CAPA_25000FD, + LINK_CAPA_40000FD, + LINK_CAPA_50000FD, + LINK_CAPA_56000FD, + LINK_CAPA_100000FD, + LINK_CAPA_200000FD, + LINK_CAPA_400000FD, + LINK_CAPA_800000FD, + + __LINK_CAPA_MAX, +}; + +#define LINK_CAPA_ALL GENMASK((__LINK_CAPA_MAX - 1), 0) + +struct link_capabilities { + int speed; + unsigned int duplex; + __ETHTOOL_DECLARE_LINK_MODE_MASK(linkmodes); +}; + +int phy_caps_init(void); + +size_t phy_caps_speeds(unsigned int *speeds, size_t size, + unsigned long *linkmodes); +void phy_caps_linkmode_max_speed(u32 max_speed, unsigned long *linkmodes); +bool phy_caps_valid(int speed, int duplex, const unsigned long *linkmodes); +void phy_caps_linkmodes(unsigned long caps, unsigned long *linkmodes); +unsigned long phy_caps_from_interface(phy_interface_t interface); + +const struct link_capabilities * +phy_caps_lookup_by_linkmode(const unsigned long *linkmodes); + +const struct link_capabilities * +phy_caps_lookup_by_linkmode_rev(const unsigned long *linkmodes, bool fdx_only); + +const struct link_capabilities * +phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, + bool exact); + +#endif /* __PHY_CAPS_H */ diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 154d29be6293..e177037f9110 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -8,6 +8,7 @@ #include "phylib.h" #include "phylib-internal.h" +#include "phy-caps.h" /** * phy_speed_to_str - Return a string representing the PHY link speed @@ -156,221 +157,9 @@ int phy_interface_num_ports(phy_interface_t interface) } EXPORT_SYMBOL_GPL(phy_interface_num_ports); -/* A mapping of all SUPPORTED settings to speed/duplex. This table - * must be grouped by speed and sorted in descending match priority - * - iow, descending speed. - */ - -#define PHY_SETTING(s, d, b) { .speed = SPEED_ ## s, .duplex = DUPLEX_ ## d, \ - .bit = ETHTOOL_LINK_MODE_ ## b ## _BIT} - -static const struct phy_setting settings[] = { - /* 800G */ - PHY_SETTING( 800000, FULL, 800000baseCR8_Full ), - PHY_SETTING( 800000, FULL, 800000baseKR8_Full ), - PHY_SETTING( 800000, FULL, 800000baseDR8_Full ), - PHY_SETTING( 800000, FULL, 800000baseDR8_2_Full ), - PHY_SETTING( 800000, FULL, 800000baseSR8_Full ), - PHY_SETTING( 800000, FULL, 800000baseVR8_Full ), - PHY_SETTING( 800000, FULL, 800000baseCR4_Full ), - PHY_SETTING( 800000, FULL, 800000baseKR4_Full ), - PHY_SETTING( 800000, FULL, 800000baseDR4_Full ), - PHY_SETTING( 800000, FULL, 800000baseDR4_2_Full ), - PHY_SETTING( 800000, FULL, 800000baseSR4_Full ), - PHY_SETTING( 800000, FULL, 800000baseVR4_Full ), - /* 400G */ - PHY_SETTING( 400000, FULL, 400000baseCR8_Full ), - PHY_SETTING( 400000, FULL, 400000baseKR8_Full ), - PHY_SETTING( 400000, FULL, 400000baseLR8_ER8_FR8_Full ), - PHY_SETTING( 400000, FULL, 400000baseDR8_Full ), - PHY_SETTING( 400000, FULL, 400000baseSR8_Full ), - PHY_SETTING( 400000, FULL, 400000baseCR4_Full ), - PHY_SETTING( 400000, FULL, 400000baseKR4_Full ), - PHY_SETTING( 400000, FULL, 400000baseLR4_ER4_FR4_Full ), - PHY_SETTING( 400000, FULL, 400000baseDR4_Full ), - PHY_SETTING( 400000, FULL, 400000baseSR4_Full ), - PHY_SETTING( 400000, FULL, 400000baseCR2_Full ), - PHY_SETTING( 400000, FULL, 400000baseKR2_Full ), - PHY_SETTING( 400000, FULL, 400000baseDR2_Full ), - PHY_SETTING( 400000, FULL, 400000baseDR2_2_Full ), - PHY_SETTING( 400000, FULL, 400000baseSR2_Full ), - PHY_SETTING( 400000, FULL, 400000baseVR2_Full ), - /* 200G */ - PHY_SETTING( 200000, FULL, 200000baseCR4_Full ), - PHY_SETTING( 200000, FULL, 200000baseKR4_Full ), - PHY_SETTING( 200000, FULL, 200000baseLR4_ER4_FR4_Full ), - PHY_SETTING( 200000, FULL, 200000baseDR4_Full ), - PHY_SETTING( 200000, FULL, 200000baseSR4_Full ), - PHY_SETTING( 200000, FULL, 200000baseCR2_Full ), - PHY_SETTING( 200000, FULL, 200000baseKR2_Full ), - PHY_SETTING( 200000, FULL, 200000baseLR2_ER2_FR2_Full ), - PHY_SETTING( 200000, FULL, 200000baseDR2_Full ), - PHY_SETTING( 200000, FULL, 200000baseSR2_Full ), - PHY_SETTING( 200000, FULL, 200000baseCR_Full ), - PHY_SETTING( 200000, FULL, 200000baseKR_Full ), - PHY_SETTING( 200000, FULL, 200000baseDR_Full ), - PHY_SETTING( 200000, FULL, 200000baseDR_2_Full ), - PHY_SETTING( 200000, FULL, 200000baseSR_Full ), - PHY_SETTING( 200000, FULL, 200000baseVR_Full ), - /* 100G */ - PHY_SETTING( 100000, FULL, 100000baseCR4_Full ), - PHY_SETTING( 100000, FULL, 100000baseKR4_Full ), - PHY_SETTING( 100000, FULL, 100000baseLR4_ER4_Full ), - PHY_SETTING( 100000, FULL, 100000baseSR4_Full ), - PHY_SETTING( 100000, FULL, 100000baseCR2_Full ), - PHY_SETTING( 100000, FULL, 100000baseKR2_Full ), - PHY_SETTING( 100000, FULL, 100000baseLR2_ER2_FR2_Full ), - PHY_SETTING( 100000, FULL, 100000baseDR2_Full ), - PHY_SETTING( 100000, FULL, 100000baseSR2_Full ), - PHY_SETTING( 100000, FULL, 100000baseCR_Full ), - PHY_SETTING( 100000, FULL, 100000baseKR_Full ), - PHY_SETTING( 100000, FULL, 100000baseLR_ER_FR_Full ), - PHY_SETTING( 100000, FULL, 100000baseDR_Full ), - PHY_SETTING( 100000, FULL, 100000baseSR_Full ), - /* 56G */ - PHY_SETTING( 56000, FULL, 56000baseCR4_Full ), - PHY_SETTING( 56000, FULL, 56000baseKR4_Full ), - PHY_SETTING( 56000, FULL, 56000baseLR4_Full ), - PHY_SETTING( 56000, FULL, 56000baseSR4_Full ), - /* 50G */ - PHY_SETTING( 50000, FULL, 50000baseCR2_Full ), - PHY_SETTING( 50000, FULL, 50000baseKR2_Full ), - PHY_SETTING( 50000, FULL, 50000baseSR2_Full ), - PHY_SETTING( 50000, FULL, 50000baseCR_Full ), - PHY_SETTING( 50000, FULL, 50000baseKR_Full ), - PHY_SETTING( 50000, FULL, 50000baseLR_ER_FR_Full ), - PHY_SETTING( 50000, FULL, 50000baseDR_Full ), - PHY_SETTING( 50000, FULL, 50000baseSR_Full ), - /* 40G */ - PHY_SETTING( 40000, FULL, 40000baseCR4_Full ), - PHY_SETTING( 40000, FULL, 40000baseKR4_Full ), - PHY_SETTING( 40000, FULL, 40000baseLR4_Full ), - PHY_SETTING( 40000, FULL, 40000baseSR4_Full ), - /* 25G */ - PHY_SETTING( 25000, FULL, 25000baseCR_Full ), - PHY_SETTING( 25000, FULL, 25000baseKR_Full ), - PHY_SETTING( 25000, FULL, 25000baseSR_Full ), - /* 20G */ - PHY_SETTING( 20000, FULL, 20000baseKR2_Full ), - PHY_SETTING( 20000, FULL, 20000baseMLD2_Full ), - /* 10G */ - PHY_SETTING( 10000, FULL, 10000baseCR_Full ), - PHY_SETTING( 10000, FULL, 10000baseER_Full ), - PHY_SETTING( 10000, FULL, 10000baseKR_Full ), - PHY_SETTING( 10000, FULL, 10000baseKX4_Full ), - PHY_SETTING( 10000, FULL, 10000baseLR_Full ), - PHY_SETTING( 10000, FULL, 10000baseLRM_Full ), - PHY_SETTING( 10000, FULL, 10000baseR_FEC ), - PHY_SETTING( 10000, FULL, 10000baseSR_Full ), - PHY_SETTING( 10000, FULL, 10000baseT_Full ), - /* 5G */ - PHY_SETTING( 5000, FULL, 5000baseT_Full ), - /* 2.5G */ - PHY_SETTING( 2500, FULL, 2500baseT_Full ), - PHY_SETTING( 2500, FULL, 2500baseX_Full ), - /* 1G */ - PHY_SETTING( 1000, FULL, 1000baseT_Full ), - PHY_SETTING( 1000, HALF, 1000baseT_Half ), - PHY_SETTING( 1000, FULL, 1000baseT1_Full ), - PHY_SETTING( 1000, FULL, 1000baseX_Full ), - PHY_SETTING( 1000, FULL, 1000baseKX_Full ), - /* 100M */ - PHY_SETTING( 100, FULL, 100baseT_Full ), - PHY_SETTING( 100, FULL, 100baseT1_Full ), - PHY_SETTING( 100, HALF, 100baseT_Half ), - PHY_SETTING( 100, HALF, 100baseFX_Half ), - PHY_SETTING( 100, FULL, 100baseFX_Full ), - /* 10M */ - PHY_SETTING( 10, FULL, 10baseT_Full ), - PHY_SETTING( 10, HALF, 10baseT_Half ), - PHY_SETTING( 10, FULL, 10baseT1L_Full ), - PHY_SETTING( 10, FULL, 10baseT1S_Full ), - PHY_SETTING( 10, HALF, 10baseT1S_Half ), - PHY_SETTING( 10, HALF, 10baseT1S_P2MP_Half ), - PHY_SETTING( 10, FULL, 10baseT1BRR_Full ), -}; -#undef PHY_SETTING - -/** - * phy_lookup_setting - lookup a PHY setting - * @speed: speed to match - * @duplex: duplex to match - * @mask: allowed link modes - * @exact: an exact match is required - * - * Search the settings array for a setting that matches the speed and - * duplex, and which is supported. - * - * If @exact is unset, either an exact match or %NULL for no match will - * be returned. - * - * If @exact is set, an exact match, the fastest supported setting at - * or below the specified speed, the slowest supported setting, or if - * they all fail, %NULL will be returned. - */ -const struct phy_setting * -phy_lookup_setting(int speed, int duplex, const unsigned long *mask, bool exact) -{ - const struct phy_setting *p, *match = NULL, *last = NULL; - int i; - - for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) { - if (p->bit < __ETHTOOL_LINK_MODE_MASK_NBITS && - test_bit(p->bit, mask)) { - last = p; - if (p->speed == speed && p->duplex == duplex) { - /* Exact match for speed and duplex */ - match = p; - break; - } else if (!exact) { - if (!match && p->speed <= speed) - /* Candidate */ - match = p; - - if (p->speed < speed) - break; - } - } - } - - if (!match && !exact) - match = last; - - return match; -} -EXPORT_SYMBOL_GPL(phy_lookup_setting); - -size_t phy_speeds(unsigned int *speeds, size_t size, - unsigned long *mask) -{ - size_t count; - int i; - - for (i = 0, count = 0; i < ARRAY_SIZE(settings) && count < size; i++) - if (settings[i].bit < __ETHTOOL_LINK_MODE_MASK_NBITS && - test_bit(settings[i].bit, mask) && - (count == 0 || speeds[count - 1] != settings[i].speed)) - speeds[count++] = settings[i].speed; - - return count; -} - -static void __set_linkmode_max_speed(u32 max_speed, unsigned long *addr) -{ - const struct phy_setting *p; - int i; - - for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) { - if (p->speed > max_speed) - linkmode_clear_bit(p->bit, addr); - else - break; - } -} - static void __set_phy_supported(struct phy_device *phydev, u32 max_speed) { - __set_linkmode_max_speed(max_speed, phydev->supported); + phy_caps_linkmode_max_speed(max_speed, phydev->supported); } /** @@ -496,16 +285,15 @@ EXPORT_SYMBOL_GPL(phy_resolve_aneg_pause); void phy_resolve_aneg_linkmode(struct phy_device *phydev) { __ETHTOOL_DECLARE_LINK_MODE_MASK(common); - int i; + const struct link_capabilities *c; linkmode_and(common, phydev->lp_advertising, phydev->advertising); - for (i = 0; i < ARRAY_SIZE(settings); i++) - if (test_bit(settings[i].bit, common)) { - phydev->speed = settings[i].speed; - phydev->duplex = settings[i].duplex; - break; - } + c = phy_caps_lookup_by_linkmode(common); + if (c) { + phydev->speed = c->speed; + phydev->duplex = c->duplex; + } phy_resolve_aneg_pause(phydev); } @@ -523,7 +311,8 @@ EXPORT_SYMBOL_GPL(phy_resolve_aneg_linkmode); void phy_check_downshift(struct phy_device *phydev) { __ETHTOOL_DECLARE_LINK_MODE_MASK(common); - int i, speed = SPEED_UNKNOWN; + const struct link_capabilities *c; + int speed = SPEED_UNKNOWN; phydev->downshifted_rate = 0; @@ -533,11 +322,9 @@ void phy_check_downshift(struct phy_device *phydev) linkmode_and(common, phydev->lp_advertising, phydev->advertising); - for (i = 0; i < ARRAY_SIZE(settings); i++) - if (test_bit(settings[i].bit, common)) { - speed = settings[i].speed; - break; - } + c = phy_caps_lookup_by_linkmode(common); + if (c) + speed = c->speed; if (speed == SPEED_UNKNOWN || phydev->speed >= speed) return; @@ -551,17 +338,13 @@ void phy_check_downshift(struct phy_device *phydev) static int phy_resolve_min_speed(struct phy_device *phydev, bool fdx_only) { __ETHTOOL_DECLARE_LINK_MODE_MASK(common); - int i = ARRAY_SIZE(settings); + const struct link_capabilities *c; linkmode_and(common, phydev->lp_advertising, phydev->advertising); - while (--i >= 0) { - if (test_bit(settings[i].bit, common)) { - if (fdx_only && settings[i].duplex != DUPLEX_FULL) - continue; - return settings[i].speed; - } - } + c = phy_caps_lookup_by_linkmode_rev(common, fdx_only); + if (c) + return c->speed; return SPEED_UNKNOWN; } @@ -573,7 +356,7 @@ int phy_speed_down_core(struct phy_device *phydev) if (min_common_speed == SPEED_UNKNOWN) return -EINVAL; - __set_linkmode_max_speed(min_common_speed, phydev->advertising); + phy_caps_linkmode_max_speed(min_common_speed, phydev->advertising); return 0; } @@ -735,43 +518,6 @@ int __phy_package_read_mmd(struct phy_device *phydev, EXPORT_SYMBOL(__phy_package_read_mmd); /** - * phy_package_read_mmd - read MMD reg relative to PHY package base addr - * @phydev: The phy_device struct - * @addr_offset: The offset to be added to PHY package base_addr - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Convenience helper for reading a register of an MMD on a given PHY - * using the PHY package base address. The base address is added to - * the addr_offset value. - * - * Same calling rules as for phy_read(); - * - * NOTE: It's assumed that the entire PHY package is either C22 or C45. - */ -int phy_package_read_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum) -{ - int addr = phy_package_address(phydev, addr_offset); - int val; - - if (addr < 0) - return addr; - - if (regnum > (u16)~0 || devad > 32) - return -EINVAL; - - phy_lock_mdio_bus(phydev); - val = mmd_phy_read(phydev->mdio.bus, addr, phydev->is_c45, devad, - regnum); - phy_unlock_mdio_bus(phydev); - - return val; -} -EXPORT_SYMBOL(phy_package_read_mmd); - -/** * __phy_package_write_mmd - write MMD reg relative to PHY package base addr * @phydev: The phy_device struct * @addr_offset: The offset to be added to PHY package base_addr @@ -805,44 +551,6 @@ int __phy_package_write_mmd(struct phy_device *phydev, EXPORT_SYMBOL(__phy_package_write_mmd); /** - * phy_package_write_mmd - write MMD reg relative to PHY package base addr - * @phydev: The phy_device struct - * @addr_offset: The offset to be added to PHY package base_addr - * @devad: The MMD to write to - * @regnum: The register on the MMD to write - * @val: value to write to @regnum - * - * Convenience helper for writing a register of an MMD on a given PHY - * using the PHY package base address. The base address is added to - * the addr_offset value. - * - * Same calling rules as for phy_write(); - * - * NOTE: It's assumed that the entire PHY package is either C22 or C45. - */ -int phy_package_write_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum, u16 val) -{ - int addr = phy_package_address(phydev, addr_offset); - int ret; - - if (addr < 0) - return addr; - - if (regnum > (u16)~0 || devad > 32) - return -EINVAL; - - phy_lock_mdio_bus(phydev); - ret = mmd_phy_write(phydev->mdio.bus, addr, phydev->is_c45, devad, - regnum, val); - phy_unlock_mdio_bus(phydev); - - return ret; -} -EXPORT_SYMBOL(phy_package_write_mmd); - -/** * phy_modify_changed - Function for modifying a PHY register * @phydev: the phy_device struct * @regnum: register number to modify diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 16ffc00b419c..562acde89224 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -37,6 +37,7 @@ #include <net/sock.h> #include "phylib-internal.h" +#include "phy-caps.h" #define PHY_STATE_TIME HZ @@ -213,25 +214,6 @@ int phy_aneg_done(struct phy_device *phydev) EXPORT_SYMBOL(phy_aneg_done); /** - * phy_find_valid - find a PHY setting that matches the requested parameters - * @speed: desired speed - * @duplex: desired duplex - * @supported: mask of supported link modes - * - * Locate a supported phy setting that is, in priority order: - * - an exact match for the specified speed and duplex mode - * - a match for the specified speed, or slower speed - * - the slowest supported speed - * Returns the matched phy_setting entry, or %NULL if no supported phy - * settings were found. - */ -static const struct phy_setting * -phy_find_valid(int speed, int duplex, unsigned long *supported) -{ - return phy_lookup_setting(speed, duplex, supported, false); -} - -/** * phy_supported_speeds - return all speeds currently supported by a phy device * @phy: The phy device to return supported speeds of. * @speeds: buffer to store supported speeds in. @@ -245,7 +227,7 @@ unsigned int phy_supported_speeds(struct phy_device *phy, unsigned int *speeds, unsigned int size) { - return phy_speeds(speeds, size, phy->supported); + return phy_caps_speeds(speeds, size, phy->supported); } /** @@ -259,7 +241,7 @@ unsigned int phy_supported_speeds(struct phy_device *phy, */ bool phy_check_valid(int speed, int duplex, unsigned long *features) { - return !!phy_lookup_setting(speed, duplex, features, true); + return phy_caps_valid(speed, duplex, features); } EXPORT_SYMBOL(phy_check_valid); @@ -273,13 +255,14 @@ EXPORT_SYMBOL(phy_check_valid); */ static void phy_sanitize_settings(struct phy_device *phydev) { - const struct phy_setting *setting; + const struct link_capabilities *c; + + c = phy_caps_lookup(phydev->speed, phydev->duplex, phydev->supported, + false); - setting = phy_find_valid(phydev->speed, phydev->duplex, - phydev->supported); - if (setting) { - phydev->speed = setting->speed; - phydev->duplex = setting->duplex; + if (c) { + phydev->speed = c->speed; + phydev->duplex = c->duplex; } else { /* We failed to find anything (no supported speeds?) */ phydev->speed = SPEED_UNKNOWN; diff --git a/drivers/net/phy/phy_caps.c b/drivers/net/phy/phy_caps.c new file mode 100644 index 000000000000..703321689726 --- /dev/null +++ b/drivers/net/phy/phy_caps.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/ethtool.h> +#include <linux/linkmode.h> +#include <linux/phy.h> + +#include "phy-caps.h" + +static struct link_capabilities link_caps[__LINK_CAPA_MAX] __ro_after_init = { + { SPEED_10, DUPLEX_HALF, {0} }, /* LINK_CAPA_10HD */ + { SPEED_10, DUPLEX_FULL, {0} }, /* LINK_CAPA_10FD */ + { SPEED_100, DUPLEX_HALF, {0} }, /* LINK_CAPA_100HD */ + { SPEED_100, DUPLEX_FULL, {0} }, /* LINK_CAPA_100FD */ + { SPEED_1000, DUPLEX_HALF, {0} }, /* LINK_CAPA_1000HD */ + { SPEED_1000, DUPLEX_FULL, {0} }, /* LINK_CAPA_1000FD */ + { SPEED_2500, DUPLEX_FULL, {0} }, /* LINK_CAPA_2500FD */ + { SPEED_5000, DUPLEX_FULL, {0} }, /* LINK_CAPA_5000FD */ + { SPEED_10000, DUPLEX_FULL, {0} }, /* LINK_CAPA_10000FD */ + { SPEED_20000, DUPLEX_FULL, {0} }, /* LINK_CAPA_20000FD */ + { SPEED_25000, DUPLEX_FULL, {0} }, /* LINK_CAPA_25000FD */ + { SPEED_40000, DUPLEX_FULL, {0} }, /* LINK_CAPA_40000FD */ + { SPEED_50000, DUPLEX_FULL, {0} }, /* LINK_CAPA_50000FD */ + { SPEED_56000, DUPLEX_FULL, {0} }, /* LINK_CAPA_56000FD */ + { SPEED_100000, DUPLEX_FULL, {0} }, /* LINK_CAPA_100000FD */ + { SPEED_200000, DUPLEX_FULL, {0} }, /* LINK_CAPA_200000FD */ + { SPEED_400000, DUPLEX_FULL, {0} }, /* LINK_CAPA_400000FD */ + { SPEED_800000, DUPLEX_FULL, {0} }, /* LINK_CAPA_800000FD */ +}; + +static int speed_duplex_to_capa(int speed, unsigned int duplex) +{ + if (duplex == DUPLEX_UNKNOWN || + (speed > SPEED_1000 && duplex != DUPLEX_FULL)) + return -EINVAL; + + switch (speed) { + case SPEED_10: return duplex == DUPLEX_FULL ? + LINK_CAPA_10FD : LINK_CAPA_10HD; + case SPEED_100: return duplex == DUPLEX_FULL ? + LINK_CAPA_100FD : LINK_CAPA_100HD; + case SPEED_1000: return duplex == DUPLEX_FULL ? + LINK_CAPA_1000FD : LINK_CAPA_1000HD; + case SPEED_2500: return LINK_CAPA_2500FD; + case SPEED_5000: return LINK_CAPA_5000FD; + case SPEED_10000: return LINK_CAPA_10000FD; + case SPEED_20000: return LINK_CAPA_20000FD; + case SPEED_25000: return LINK_CAPA_25000FD; + case SPEED_40000: return LINK_CAPA_40000FD; + case SPEED_50000: return LINK_CAPA_50000FD; + case SPEED_56000: return LINK_CAPA_56000FD; + case SPEED_100000: return LINK_CAPA_100000FD; + case SPEED_200000: return LINK_CAPA_200000FD; + case SPEED_400000: return LINK_CAPA_400000FD; + case SPEED_800000: return LINK_CAPA_800000FD; + } + + return -EINVAL; +} + +#define for_each_link_caps_asc_speed(cap) \ + for (cap = link_caps; cap < &link_caps[__LINK_CAPA_MAX]; cap++) + +#define for_each_link_caps_desc_speed(cap) \ + for (cap = &link_caps[__LINK_CAPA_MAX - 1]; cap >= link_caps; cap--) + +/** + * phy_caps_init() - Initializes the link_caps array from the link_mode_params. + * + * Returns: 0 if phy caps init was successful, -EINVAL if we found an + * unexpected linkmode setting that requires LINK_CAPS update. + * + */ +int phy_caps_init(void) +{ + const struct link_mode_info *linkmode; + int i, capa; + + /* Fill the caps array from net/ethtool/common.c */ + for (i = 0; i < __ETHTOOL_LINK_MODE_MASK_NBITS; i++) { + linkmode = &link_mode_params[i]; + capa = speed_duplex_to_capa(linkmode->speed, linkmode->duplex); + + if (capa < 0) { + if (linkmode->speed != SPEED_UNKNOWN) { + pr_err("Unknown speed %d, please update LINK_CAPS\n", + linkmode->speed); + return -EINVAL; + } + continue; + } + + __set_bit(i, link_caps[capa].linkmodes); + } + + return 0; +} + +/** + * phy_caps_speeds() - Fill an array of supported SPEED_* values for given modes + * @speeds: Output array to store the speeds list into + * @size: Size of the output array + * @linkmodes: Linkmodes to get the speeds from + * + * Fills the speeds array with all possible speeds that can be achieved with + * the specified linkmodes. + * + * Returns: The number of speeds filled into the array. If the input array isn't + * big enough to store all speeds, fill it as much as possible. + */ +size_t phy_caps_speeds(unsigned int *speeds, size_t size, + unsigned long *linkmodes) +{ + struct link_capabilities *lcap; + size_t count = 0; + + for_each_link_caps_asc_speed(lcap) { + if (linkmode_intersects(lcap->linkmodes, linkmodes) && + (count == 0 || speeds[count - 1] != lcap->speed)) { + speeds[count++] = lcap->speed; + if (count >= size) + break; + } + } + + return count; +} + +/** + * phy_caps_lookup_by_linkmode() - Lookup the fastest matching link_capabilities + * @linkmodes: Linkmodes to match against + * + * Returns: The highest-speed link_capabilities that intersects the given + * linkmodes. In case several DUPLEX_ options exist at that speed, + * DUPLEX_FULL is matched first. NULL is returned if no match. + */ +const struct link_capabilities * +phy_caps_lookup_by_linkmode(const unsigned long *linkmodes) +{ + struct link_capabilities *lcap; + + for_each_link_caps_desc_speed(lcap) + if (linkmode_intersects(lcap->linkmodes, linkmodes)) + return lcap; + + return NULL; +} + +/** + * phy_caps_lookup_by_linkmode_rev() - Lookup the slowest matching link_capabilities + * @linkmodes: Linkmodes to match against + * @fdx_only: Full duplex match only when set + * + * Returns: The lowest-speed link_capabilities that intersects the given + * linkmodes. When set, fdx_only will ignore half-duplex matches. + * NULL is returned if no match. + */ +const struct link_capabilities * +phy_caps_lookup_by_linkmode_rev(const unsigned long *linkmodes, bool fdx_only) +{ + struct link_capabilities *lcap; + + for_each_link_caps_asc_speed(lcap) { + if (fdx_only && lcap->duplex != DUPLEX_FULL) + continue; + + if (linkmode_intersects(lcap->linkmodes, linkmodes)) + return lcap; + } + + return NULL; +} + +/** + * phy_caps_lookup() - Lookup capabilities by speed/duplex that matches a mask + * @speed: Speed to match + * @duplex: Duplex to match + * @supported: Mask of linkmodes to match + * @exact: Perform an exact match or not. + * + * Lookup a link_capabilities entry that intersect the supported linkmodes mask, + * and that matches the passed speed and duplex. + * + * When @exact is set, an exact match is performed on speed and duplex, meaning + * that if the linkmodes for the given speed and duplex intersect the supported + * mask, this capability is returned, otherwise we don't have a match and return + * NULL. + * + * When @exact is not set, we return either an exact match, or matching capabilities + * at lower speed, or the lowest matching speed, or NULL. + * + * Returns: a matched link_capabilities according to the above process, NULL + * otherwise. + */ +const struct link_capabilities * +phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, + bool exact) +{ + const struct link_capabilities *lcap, *last = NULL; + + for_each_link_caps_desc_speed(lcap) { + if (linkmode_intersects(lcap->linkmodes, supported)) { + last = lcap; + /* exact match on speed and duplex*/ + if (lcap->speed == speed && lcap->duplex == duplex) { + return lcap; + } else if (!exact) { + if (lcap->speed <= speed) + return lcap; + } + } + } + + if (!exact) + return last; + + return NULL; +} +EXPORT_SYMBOL_GPL(phy_caps_lookup); + +/** + * phy_caps_linkmode_max_speed() - Clamp a linkmodes set to a max speed + * @max_speed: Speed limit for the linkmode set + * @linkmodes: Linkmodes to limit + */ +void phy_caps_linkmode_max_speed(u32 max_speed, unsigned long *linkmodes) +{ + struct link_capabilities *lcap; + + for_each_link_caps_desc_speed(lcap) + if (lcap->speed > max_speed) + linkmode_andnot(linkmodes, linkmodes, lcap->linkmodes); + else + break; +} + +/** + * phy_caps_valid() - Validate a linkmodes set agains given speed and duplex + * @speed: input speed to validate + * @duplex: input duplex to validate. Passing DUPLEX_UNKNOWN is always not valid + * @linkmodes: The linkmodes to validate + * + * Returns: True if at least one of the linkmodes in @linkmodes can function at + * the given speed and duplex, false otherwise. + */ +bool phy_caps_valid(int speed, int duplex, const unsigned long *linkmodes) +{ + int capa = speed_duplex_to_capa(speed, duplex); + + if (capa < 0) + return false; + + return linkmode_intersects(link_caps[capa].linkmodes, linkmodes); +} + +/** + * phy_caps_linkmodes() - Convert a bitfield of capabilities into linkmodes + * @caps: The list of caps, each bit corresponding to a LINK_CAPA value + * @linkmodes: The set of linkmodes to fill. Must be previously initialized. + */ +void phy_caps_linkmodes(unsigned long caps, unsigned long *linkmodes) +{ + unsigned long capa; + + for_each_set_bit(capa, &caps, __LINK_CAPA_MAX) + linkmode_or(linkmodes, linkmodes, link_caps[capa].linkmodes); +} +EXPORT_SYMBOL_GPL(phy_caps_linkmodes); + +/** + * phy_caps_from_interface() - Get the link capa from a given PHY interface + * @interface: The PHY interface we want to get the possible Speed/Duplex from + * + * Returns: A bitmask of LINK_CAPA_xxx values that can be achieved with the + * provided interface. + */ +unsigned long phy_caps_from_interface(phy_interface_t interface) +{ + unsigned long link_caps = 0; + + switch (interface) { + case PHY_INTERFACE_MODE_USXGMII: + link_caps |= BIT(LINK_CAPA_10000FD) | BIT(LINK_CAPA_5000FD); + fallthrough; + + case PHY_INTERFACE_MODE_10G_QXGMII: + link_caps |= BIT(LINK_CAPA_2500FD); + fallthrough; + + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_PSGMII: + case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_QUSGMII: + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_GMII: + link_caps |= BIT(LINK_CAPA_1000HD) | BIT(LINK_CAPA_1000FD); + fallthrough; + + case PHY_INTERFACE_MODE_REVRMII: + case PHY_INTERFACE_MODE_RMII: + case PHY_INTERFACE_MODE_SMII: + case PHY_INTERFACE_MODE_REVMII: + case PHY_INTERFACE_MODE_MII: + link_caps |= BIT(LINK_CAPA_10HD) | BIT(LINK_CAPA_10FD); + fallthrough; + + case PHY_INTERFACE_MODE_100BASEX: + link_caps |= BIT(LINK_CAPA_100HD) | BIT(LINK_CAPA_100FD); + break; + + case PHY_INTERFACE_MODE_TBI: + case PHY_INTERFACE_MODE_MOCA: + case PHY_INTERFACE_MODE_RTBI: + case PHY_INTERFACE_MODE_1000BASEX: + link_caps |= BIT(LINK_CAPA_1000HD); + fallthrough; + case PHY_INTERFACE_MODE_1000BASEKX: + case PHY_INTERFACE_MODE_TRGMII: + link_caps |= BIT(LINK_CAPA_1000FD); + break; + + case PHY_INTERFACE_MODE_2500BASEX: + link_caps |= BIT(LINK_CAPA_2500FD); + break; + + case PHY_INTERFACE_MODE_5GBASER: + link_caps |= BIT(LINK_CAPA_5000FD); + break; + + case PHY_INTERFACE_MODE_XGMII: + case PHY_INTERFACE_MODE_RXAUI: + case PHY_INTERFACE_MODE_XAUI: + case PHY_INTERFACE_MODE_10GBASER: + case PHY_INTERFACE_MODE_10GKR: + link_caps |= BIT(LINK_CAPA_10000FD); + break; + + case PHY_INTERFACE_MODE_25GBASER: + link_caps |= BIT(LINK_CAPA_25000FD); + break; + + case PHY_INTERFACE_MODE_XLGMII: + link_caps |= BIT(LINK_CAPA_40000FD); + break; + + case PHY_INTERFACE_MODE_INTERNAL: + link_caps |= LINK_CAPA_ALL; + break; + + case PHY_INTERFACE_MODE_NA: + case PHY_INTERFACE_MODE_MAX: + break; + } + + return link_caps; +} +EXPORT_SYMBOL_GPL(phy_caps_from_interface); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index b2d32fbc8c85..3f734e847e8e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -42,6 +42,7 @@ #include <linux/unistd.h> #include "phylib-internal.h" +#include "phy-caps.h" MODULE_DESCRIPTION("PHY library"); MODULE_AUTHOR("Andy Fleming"); @@ -2119,7 +2120,7 @@ EXPORT_SYMBOL(genphy_check_and_restart_aneg); int __genphy_config_aneg(struct phy_device *phydev, bool changed) { __ETHTOOL_DECLARE_LINK_MODE_MASK(fixed_advert); - const struct phy_setting *set; + const struct link_capabilities *c; unsigned long *advert; int err; @@ -2145,10 +2146,11 @@ int __genphy_config_aneg(struct phy_device *phydev, bool changed) } else { linkmode_zero(fixed_advert); - set = phy_lookup_setting(phydev->speed, phydev->duplex, - phydev->supported, true); - if (set) - linkmode_set_bit(set->bit, fixed_advert); + c = phy_caps_lookup(phydev->speed, phydev->duplex, + phydev->supported, true); + if (c) + linkmode_and(fixed_advert, phydev->supported, + c->linkmodes); advert = fixed_advert; } @@ -3558,6 +3560,10 @@ static int __init phy_init(void) if (rc) goto err_ethtool_phy_ops; + rc = phy_caps_init(); + if (rc) + goto err_mdio_bus; + features_init(); rc = phy_driver_register(&genphy_c45_driver, THIS_MODULE); diff --git a/drivers/net/phy/phylib.h b/drivers/net/phy/phylib.h index f0e499fed1f7..c15484a805b3 100644 --- a/drivers/net/phy/phylib.h +++ b/drivers/net/phy/phylib.h @@ -15,6 +15,12 @@ int __phy_package_read(struct phy_device *phydev, unsigned int addr_offset, u32 regnum); int __phy_package_write(struct phy_device *phydev, unsigned int addr_offset, u32 regnum, u16 val); +int __phy_package_read_mmd(struct phy_device *phydev, + unsigned int addr_offset, int devad, + u32 regnum); +int __phy_package_write_mmd(struct phy_device *phydev, + unsigned int addr_offset, int devad, + u32 regnum, u16 val); bool phy_package_init_once(struct phy_device *phydev); bool phy_package_probe_once(struct phy_device *phydev); int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index a3f64b6d2d34..0f70a7f3dfcc 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -20,6 +20,7 @@ #include <linux/timer.h> #include <linux/workqueue.h> +#include "phy-caps.h" #include "sfp.h" #include "swphy.h" @@ -291,6 +292,61 @@ static int phylink_interface_max_speed(phy_interface_t interface) return SPEED_UNKNOWN; } +static struct { + unsigned long mask; + int speed; + unsigned int duplex; + unsigned int caps_bit; +} phylink_caps_params[] = { + { MAC_400000FD, SPEED_400000, DUPLEX_FULL, BIT(LINK_CAPA_400000FD) }, + { MAC_200000FD, SPEED_200000, DUPLEX_FULL, BIT(LINK_CAPA_200000FD) }, + { MAC_100000FD, SPEED_100000, DUPLEX_FULL, BIT(LINK_CAPA_100000FD) }, + { MAC_56000FD, SPEED_56000, DUPLEX_FULL, BIT(LINK_CAPA_56000FD) }, + { MAC_50000FD, SPEED_50000, DUPLEX_FULL, BIT(LINK_CAPA_50000FD) }, + { MAC_40000FD, SPEED_40000, DUPLEX_FULL, BIT(LINK_CAPA_40000FD) }, + { MAC_25000FD, SPEED_25000, DUPLEX_FULL, BIT(LINK_CAPA_25000FD) }, + { MAC_20000FD, SPEED_20000, DUPLEX_FULL, BIT(LINK_CAPA_20000FD) }, + { MAC_10000FD, SPEED_10000, DUPLEX_FULL, BIT(LINK_CAPA_10000FD) }, + { MAC_5000FD, SPEED_5000, DUPLEX_FULL, BIT(LINK_CAPA_5000FD) }, + { MAC_2500FD, SPEED_2500, DUPLEX_FULL, BIT(LINK_CAPA_2500FD) }, + { MAC_1000FD, SPEED_1000, DUPLEX_FULL, BIT(LINK_CAPA_1000FD) }, + { MAC_1000HD, SPEED_1000, DUPLEX_HALF, BIT(LINK_CAPA_1000HD) }, + { MAC_100FD, SPEED_100, DUPLEX_FULL, BIT(LINK_CAPA_100FD) }, + { MAC_100HD, SPEED_100, DUPLEX_HALF, BIT(LINK_CAPA_100HD) }, + { MAC_10FD, SPEED_10, DUPLEX_FULL, BIT(LINK_CAPA_10FD) }, + { MAC_10HD, SPEED_10, DUPLEX_HALF, BIT(LINK_CAPA_10HD) }, +}; + +/** + * phylink_caps_to_link_caps() - Convert a set of MAC capabilities LINK caps + * @caps: A set of MAC capabilities + * + * Returns: The corresponding set of LINK_CAPA as defined in phy-caps.h + */ +static unsigned long phylink_caps_to_link_caps(unsigned long caps) +{ + unsigned long link_caps = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(phylink_caps_params); i++) + if (caps & phylink_caps_params[i].mask) + link_caps |= phylink_caps_params[i].caps_bit; + + return link_caps; +} + +static unsigned long phylink_link_caps_to_mac_caps(unsigned long link_caps) +{ + unsigned long caps = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(phylink_caps_params); i++) + if (link_caps & phylink_caps_params[i].caps_bit) + caps |= phylink_caps_params[i].mask; + + return caps; +} + /** * phylink_caps_to_linkmodes() - Convert capabilities to ethtool link modes * @linkmodes: ethtool linkmode mask (must be already initialised) @@ -302,172 +358,17 @@ static int phylink_interface_max_speed(phy_interface_t interface) static void phylink_caps_to_linkmodes(unsigned long *linkmodes, unsigned long caps) { + unsigned long link_caps = phylink_caps_to_link_caps(caps); + if (caps & MAC_SYM_PAUSE) __set_bit(ETHTOOL_LINK_MODE_Pause_BIT, linkmodes); if (caps & MAC_ASYM_PAUSE) __set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, linkmodes); - if (caps & MAC_10HD) { - __set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10baseT1S_Half_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT, linkmodes); - } - - if (caps & MAC_10FD) { - __set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10baseT1S_Full_BIT, linkmodes); - } - - if (caps & MAC_100HD) { - __set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100baseFX_Half_BIT, linkmodes); - } - - if (caps & MAC_100FD) { - __set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100baseFX_Full_BIT, linkmodes); - } - - if (caps & MAC_1000HD) - __set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, linkmodes); - - if (caps & MAC_1000FD) { - __set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT, linkmodes); - } - - if (caps & MAC_2500FD) { - __set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_2500baseX_Full_BIT, linkmodes); - } - - if (caps & MAC_5000FD) - __set_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, linkmodes); - - if (caps & MAC_10000FD) { - __set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT, linkmodes); - } - - if (caps & MAC_25000FD) { - __set_bit(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, linkmodes); - } - - if (caps & MAC_40000FD) { - __set_bit(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, linkmodes); - } - - if (caps & MAC_50000FD) { - __set_bit(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, - linkmodes); - __set_bit(ETHTOOL_LINK_MODE_50000baseDR_Full_BIT, linkmodes); - } - - if (caps & MAC_56000FD) { - __set_bit(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT, linkmodes); - } - - if (caps & MAC_100000FD) { - __set_bit(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, - linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT, - linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseKR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseSR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT, - linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseCR_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_100000baseDR_Full_BIT, linkmodes); - } - - if (caps & MAC_200000FD) { - __set_bit(ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT, - linkmodes); - __set_bit(ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT, - linkmodes); - __set_bit(ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT, linkmodes); - } - - if (caps & MAC_400000FD) { - __set_bit(ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT, - linkmodes); - __set_bit(ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT, - linkmodes); - __set_bit(ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT, linkmodes); - __set_bit(ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT, linkmodes); - } + phy_caps_linkmodes(link_caps, linkmodes); } -static struct { - unsigned long mask; - int speed; - unsigned int duplex; -} phylink_caps_params[] = { - { MAC_400000FD, SPEED_400000, DUPLEX_FULL }, - { MAC_200000FD, SPEED_200000, DUPLEX_FULL }, - { MAC_100000FD, SPEED_100000, DUPLEX_FULL }, - { MAC_56000FD, SPEED_56000, DUPLEX_FULL }, - { MAC_50000FD, SPEED_50000, DUPLEX_FULL }, - { MAC_40000FD, SPEED_40000, DUPLEX_FULL }, - { MAC_25000FD, SPEED_25000, DUPLEX_FULL }, - { MAC_20000FD, SPEED_20000, DUPLEX_FULL }, - { MAC_10000FD, SPEED_10000, DUPLEX_FULL }, - { MAC_5000FD, SPEED_5000, DUPLEX_FULL }, - { MAC_2500FD, SPEED_2500, DUPLEX_FULL }, - { MAC_1000FD, SPEED_1000, DUPLEX_FULL }, - { MAC_1000HD, SPEED_1000, DUPLEX_HALF }, - { MAC_100FD, SPEED_100, DUPLEX_FULL }, - { MAC_100HD, SPEED_100, DUPLEX_HALF }, - { MAC_10FD, SPEED_10, DUPLEX_FULL }, - { MAC_10HD, SPEED_10, DUPLEX_HALF }, -}; - /** * phylink_limit_mac_speed - limit the phylink_config to a maximum speed * @config: pointer to a &struct phylink_config @@ -523,86 +424,12 @@ static unsigned long phylink_get_capabilities(phy_interface_t interface, unsigned long mac_capabilities, int rate_matching) { + unsigned long link_caps = phy_caps_from_interface(interface); int max_speed = phylink_interface_max_speed(interface); unsigned long caps = MAC_SYM_PAUSE | MAC_ASYM_PAUSE; unsigned long matched_caps = 0; - switch (interface) { - case PHY_INTERFACE_MODE_USXGMII: - caps |= MAC_10000FD | MAC_5000FD; - fallthrough; - - case PHY_INTERFACE_MODE_10G_QXGMII: - caps |= MAC_2500FD; - fallthrough; - - case PHY_INTERFACE_MODE_RGMII_TXID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_PSGMII: - case PHY_INTERFACE_MODE_QSGMII: - case PHY_INTERFACE_MODE_QUSGMII: - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_GMII: - caps |= MAC_1000HD | MAC_1000FD; - fallthrough; - - case PHY_INTERFACE_MODE_REVRMII: - case PHY_INTERFACE_MODE_RMII: - case PHY_INTERFACE_MODE_SMII: - case PHY_INTERFACE_MODE_REVMII: - case PHY_INTERFACE_MODE_MII: - caps |= MAC_10HD | MAC_10FD; - fallthrough; - - case PHY_INTERFACE_MODE_100BASEX: - caps |= MAC_100HD | MAC_100FD; - break; - - case PHY_INTERFACE_MODE_TBI: - case PHY_INTERFACE_MODE_MOCA: - case PHY_INTERFACE_MODE_RTBI: - case PHY_INTERFACE_MODE_1000BASEX: - caps |= MAC_1000HD; - fallthrough; - case PHY_INTERFACE_MODE_1000BASEKX: - case PHY_INTERFACE_MODE_TRGMII: - caps |= MAC_1000FD; - break; - - case PHY_INTERFACE_MODE_2500BASEX: - caps |= MAC_2500FD; - break; - - case PHY_INTERFACE_MODE_5GBASER: - caps |= MAC_5000FD; - break; - - case PHY_INTERFACE_MODE_XGMII: - case PHY_INTERFACE_MODE_RXAUI: - case PHY_INTERFACE_MODE_XAUI: - case PHY_INTERFACE_MODE_10GBASER: - case PHY_INTERFACE_MODE_10GKR: - caps |= MAC_10000FD; - break; - - case PHY_INTERFACE_MODE_25GBASER: - caps |= MAC_25000FD; - break; - - case PHY_INTERFACE_MODE_XLGMII: - caps |= MAC_40000FD; - break; - - case PHY_INTERFACE_MODE_INTERNAL: - caps |= ~0; - break; - - case PHY_INTERFACE_MODE_NA: - case PHY_INTERFACE_MODE_MAX: - break; - } + caps |= phylink_link_caps_to_mac_caps(link_caps); switch (rate_matching) { case RATE_MATCH_OPEN_LOOP: @@ -801,12 +628,26 @@ static int phylink_validate(struct phylink *pl, unsigned long *supported, return phylink_validate_mac_and_pcs(pl, supported, state); } +static void phylink_fill_fixedlink_supported(unsigned long *supported) +{ + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, supported); +} + static int phylink_parse_fixedlink(struct phylink *pl, const struct fwnode_handle *fwnode) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(match) = { 0, }; __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + const struct link_capabilities *c; struct fwnode_handle *fixed_node; - const struct phy_setting *s; struct gpio_desc *desc; u32 speed; int ret; @@ -874,12 +715,16 @@ static int phylink_parse_fixedlink(struct phylink *pl, phylink_warn(pl, "fixed link specifies half duplex for %dMbps link?\n", pl->link_config.speed); - linkmode_fill(pl->supported); + linkmode_zero(pl->supported); + phylink_fill_fixedlink_supported(pl->supported); + linkmode_copy(pl->link_config.advertising, pl->supported); phylink_validate(pl, pl->supported, &pl->link_config); - s = phy_lookup_setting(pl->link_config.speed, pl->link_config.duplex, - pl->supported, true); + c = phy_caps_lookup(pl->link_config.speed, pl->link_config.duplex, + pl->supported, true); + if (c) + linkmode_and(match, pl->supported, c->linkmodes); linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mask); linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, mask); @@ -888,9 +733,10 @@ static int phylink_parse_fixedlink(struct phylink *pl, phylink_set(pl->supported, MII); - if (s) { - __set_bit(s->bit, pl->supported); - __set_bit(s->bit, pl->link_config.lp_advertising); + if (c) { + linkmode_or(pl->supported, pl->supported, match); + linkmode_or(pl->link_config.lp_advertising, + pl->link_config.lp_advertising, match); } else { phylink_warn(pl, "fixed link %s duplex %dMbps not recognised\n", pl->link_config.duplex == DUPLEX_FULL ? "full" : "half", @@ -1878,21 +1724,20 @@ static int phylink_register_sfp(struct phylink *pl, int phylink_set_fixed_link(struct phylink *pl, const struct phylink_link_state *state) { - const struct phy_setting *s; + const struct link_capabilities *c; unsigned long *adv; if (pl->cfg_link_an_mode != MLO_AN_PHY || !state || !test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) return -EINVAL; - s = phy_lookup_setting(state->speed, state->duplex, - pl->supported, true); - if (!s) + c = phy_caps_lookup(state->speed, state->duplex, + pl->supported, true); + if (!c) return -EINVAL; adv = pl->link_config.advertising; - linkmode_zero(adv); - linkmode_set_bit(s->bit, adv); + linkmode_and(adv, pl->supported, c->linkmodes); linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, adv); pl->link_config.speed = state->speed; @@ -2852,8 +2697,8 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, const struct ethtool_link_ksettings *kset) { __ETHTOOL_DECLARE_LINK_MODE_MASK(support); + const struct link_capabilities *c; struct phylink_link_state config; - const struct phy_setting *s; ASSERT_RTNL(); @@ -2896,23 +2741,23 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, /* Autonegotiation disabled, select a suitable speed and * duplex. */ - s = phy_lookup_setting(kset->base.speed, kset->base.duplex, - pl->supported, false); - if (!s) + c = phy_caps_lookup(kset->base.speed, kset->base.duplex, + pl->supported, false); + if (!c) return -EINVAL; /* If we have a fixed link, refuse to change link parameters. * If the link parameters match, accept them but do nothing. */ if (pl->req_link_an_mode == MLO_AN_FIXED) { - if (s->speed != pl->link_config.speed || - s->duplex != pl->link_config.duplex) + if (c->speed != pl->link_config.speed || + c->duplex != pl->link_config.duplex) return -EINVAL; return 0; } - config.speed = s->speed; - config.duplex = s->duplex; + config.speed = c->speed; + config.duplex = c->duplex; break; case AUTONEG_ENABLE: @@ -3158,24 +3003,6 @@ int phylink_get_eee_err(struct phylink *pl) EXPORT_SYMBOL_GPL(phylink_get_eee_err); /** - * phylink_init_eee() - init and check the EEE features - * @pl: a pointer to a &struct phylink returned from phylink_create() - * @clk_stop_enable: allow PHY to stop receive clock - * - * Must be called either with RTNL held or within mac_link_up() - */ -int phylink_init_eee(struct phylink *pl, bool clk_stop_enable) -{ - int ret = -EOPNOTSUPP; - - if (pl->phydev) - ret = phy_init_eee(pl->phydev, clk_stop_enable); - - return ret; -} -EXPORT_SYMBOL_GPL(phylink_init_eee); - -/** * phylink_ethtool_get_eee() - read the energy efficient ethernet parameters * @pl: a pointer to a &struct phylink returned from phylink_create() * @eee: a pointer to a &struct ethtool_keee for the read parameters diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 815108c98b78..53463767cc43 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -45,6 +45,7 @@ #include <linux/slab.h> #include <linux/file.h> #include <linux/unaligned.h> +#include <net/netdev_lock.h> #include <net/slhc_vj.h> #include <linux/atomic.h> #include <linux/refcount.h> @@ -72,6 +73,17 @@ #define PPP_PROTO_LEN 2 #define PPP_LCP_HDRLEN 4 +/* The filter instructions generated by libpcap are constructed + * assuming a four-byte PPP header on each packet, where the last + * 2 bytes are the protocol field defined in the RFC and the first + * byte of the first 2 bytes indicates the direction. + * The second byte is currently unused, but we still need to initialize + * it to prevent crafted BPF programs from reading them which would + * cause reading of uninitialized data. + */ +#define PPP_FILTER_OUTBOUND_TAG 0x0100 +#define PPP_FILTER_INBOUND_TAG 0x0000 + /* * An instance of /dev/ppp can be associated with either a ppp * interface unit or a ppp channel. In both cases, file->private_data @@ -1765,10 +1777,10 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) if (proto < 0x8000) { #ifdef CONFIG_PPP_FILTER - /* check if we should pass this packet */ - /* the filter instructions are constructed assuming - a four-byte PPP header on each packet */ - *(u8 *)skb_push(skb, 2) = 1; + /* check if the packet passes the pass and active filters. + * See comment for PPP_FILTER_OUTBOUND_TAG above. + */ + *(__be16 *)skb_push(skb, 2) = htons(PPP_FILTER_OUTBOUND_TAG); if (ppp->pass_filter && bpf_prog_run(ppp->pass_filter, skb) == 0) { if (ppp->debug & 1) @@ -2485,14 +2497,13 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) /* network protocol frame - give it to the kernel */ #ifdef CONFIG_PPP_FILTER - /* check if the packet passes the pass and active filters */ - /* the filter instructions are constructed assuming - a four-byte PPP header on each packet */ if (ppp->pass_filter || ppp->active_filter) { if (skb_unclone(skb, GFP_ATOMIC)) goto err; - - *(u8 *)skb_push(skb, 2) = 0; + /* Check if the packet passes the pass and active filters. + * See comment for PPP_FILTER_INBOUND_TAG above. + */ + *(__be16 *)skb_push(skb, 2) = htons(PPP_FILTER_INBOUND_TAG); if (ppp->pass_filter && bpf_prog_run(ppp->pass_filter, skb) == 0) { if (ppp->debug & 1) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index d4ece538f1b2..4382f5e323b0 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1017,7 +1017,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, rtnl_unlock(); return -ENOLINK; } - ret = dev_set_mac_address_user(tap->dev, &sa, NULL); + ret = dev_set_mac_address(tap->dev, &sa, NULL); tap_put_tap_dev(tap); rtnl_unlock(); return ret; diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index fb917560d0a2..d8fc0c79745d 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -23,6 +23,7 @@ #include <linux/rtnetlink.h> #include <net/rtnetlink.h> #include <net/genetlink.h> +#include <net/netdev_lock.h> #include <net/netlink.h> #include <net/sch_generic.h> #include <linux/if_team.h> diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d8f4d3e996a7..55b60cb68d00 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1535,7 +1535,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile, static struct sk_buff *__tun_build_skb(struct tun_file *tfile, struct page_frag *alloc_frag, char *buf, - int buflen, int len, int pad) + int buflen, int len, int pad, + int metasize) { struct sk_buff *skb = build_skb(buf, buflen); @@ -1544,6 +1545,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile, skb_reserve(skb, pad); skb_put(skb, len); + if (metasize) + skb_metadata_set(skb, metasize); skb_set_owner_w(skb, tfile->socket.sk); get_page(alloc_frag->page); @@ -1603,6 +1606,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, char *buf; size_t copied; int pad = TUN_RX_PAD; + int metasize = 0; int err = 0; rcu_read_lock(); @@ -1630,7 +1634,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, if (hdr->gso_type || !xdp_prog) { *skb_xdp = 1; return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, - pad); + pad, metasize); } *skb_xdp = 0; @@ -1644,7 +1648,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, u32 act; xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq); - xdp_prepare_buff(&xdp, buf, pad, len, false); + xdp_prepare_buff(&xdp, buf, pad, len, true); act = bpf_prog_run_xdp(xdp_prog, &xdp); if (act == XDP_REDIRECT || act == XDP_TX) { @@ -1665,12 +1669,18 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, pad = xdp.data - xdp.data_hard_start; len = xdp.data_end - xdp.data; + + /* It is known that the xdp_buff was prepared with metadata + * support, so the metasize will never be negative. + */ + metasize = xdp.data - xdp.data_meta; } bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock(); local_bh_enable(); - return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad); + return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad, + metasize); out: bpf_net_ctx_clear(bpf_net_ctx); @@ -2353,6 +2363,7 @@ static int tun_xdp_one(struct tun_struct *tun, struct sk_buff_head *queue; u32 rxhash = 0, act; int buflen = hdr->buflen; + int metasize = 0; int ret = 0; bool skb_xdp = false; struct page *page; @@ -2368,7 +2379,6 @@ static int tun_xdp_one(struct tun_struct *tun, } xdp_init_buff(xdp, buflen, &tfile->xdp_rxq); - xdp_set_data_meta_invalid(xdp); act = bpf_prog_run_xdp(xdp_prog, xdp); ret = tun_xdp_act(tun, xdp_prog, xdp, act); @@ -2408,6 +2418,14 @@ build: skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); + /* The externally provided xdp_buff may have no metadata support, which + * is marked by xdp->data_meta being xdp->data + 1. This will lead to a + * metasize of -1 and is the reason why the condition checks for > 0. + */ + metasize = xdp->data - xdp->data_meta; + if (metasize > 0) + skb_metadata_set(skb, metasize); + if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); @@ -3175,7 +3193,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case SIOCSIFHWADDR: /* Set hw address */ - ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL); + ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr, NULL); break; case TUNGETSNDBUF: diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index a6469235d904..a032c1ded406 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -783,6 +783,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Lenovo ThinkPad Hybrid USB-C with USB-A Dock (40af0135eu, based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa359, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* Aquantia AQtion USB to 5GbE Controller (based on AQC111U) */ { USB_DEVICE_AND_INTERFACE_INFO(AQUANTIA_VENDOR_ID, 0xc101, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a91bf9c7e31d..137adf6d5b08 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -627,7 +627,7 @@ static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data) kfree(buf); - return ret; + return ret < 0 ? ret : 0; } static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data) @@ -658,7 +658,7 @@ static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data) kfree(buf); - return ret; + return ret < 0 ? ret : 0; } static int lan78xx_update_reg(struct lan78xx_net *dev, u32 reg, u32 mask, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e1021148d3a6..2cab046749a9 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -785,6 +785,7 @@ enum rtl8152_flags { #define DEVICE_ID_THINKPAD_USB_C_DONGLE 0x720c #define DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2 0xa387 #define DEVICE_ID_THINKPAD_USB_C_DOCK_GEN3 0x3062 +#define DEVICE_ID_THINKPAD_HYBRID_USB_C_DOCK 0xa359 struct tally_counter { __le64 tx_packets; @@ -9787,6 +9788,7 @@ static bool rtl8152_supports_lenovo_macpassthru(struct usb_device *udev) case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2: case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN3: case DEVICE_ID_THINKPAD_USB_C_DONGLE: + case DEVICE_ID_THINKPAD_HYBRID_USB_C_DOCK: return 1; } } else if (vendor_id == VENDOR_ID_REALTEK && parent_vendor_id == VENDOR_ID_LENOVO) { @@ -10064,6 +10066,8 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927) }, { USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0c5e) }, { USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101) }, + + /* Lenovo */ { USB_DEVICE(VENDOR_ID_LENOVO, 0x304f) }, { USB_DEVICE(VENDOR_ID_LENOVO, 0x3054) }, { USB_DEVICE(VENDOR_ID_LENOVO, 0x3062) }, @@ -10074,7 +10078,9 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_LENOVO, 0x720c) }, { USB_DEVICE(VENDOR_ID_LENOVO, 0x7214) }, { USB_DEVICE(VENDOR_ID_LENOVO, 0x721e) }, + { USB_DEVICE(VENDOR_ID_LENOVO, 0xa359) }, { USB_DEVICE(VENDOR_ID_LENOVO, 0xa387) }, + { USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041) }, { USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) }, { USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) }, diff --git a/drivers/net/usb/r8153_ecm.c b/drivers/net/usb/r8153_ecm.c index 20b2df8d74ae..8d860dacdf49 100644 --- a/drivers/net/usb/r8153_ecm.c +++ b/drivers/net/usb/r8153_ecm.c @@ -135,6 +135,12 @@ static const struct usb_device_id products[] = { USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&r8153_info, }, +/* Lenovo ThinkPad Hybrid USB-C with USB-A Dock (40af0135eu, based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(VENDOR_ID_LENOVO, 0xa359, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&r8153_info, +}, { }, /* END */ }; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 05f5eeef539f..7bb53961c0ea 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -17,6 +17,7 @@ #include <net/rtnetlink.h> #include <net/dst.h> +#include <net/netdev_lock.h> #include <net/xfrm.h> #include <net/xdp.h> #include <linux/veth.h> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ac26a6201c44..34cec2b11b74 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2807,7 +2807,8 @@ static void skb_recv_done(struct virtqueue *rvq) virtqueue_napi_schedule(&rq->napi, rvq); } -static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) +static void virtnet_napi_do_enable(struct virtqueue *vq, + struct napi_struct *napi) { napi_enable(napi); @@ -2820,10 +2821,21 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) local_bh_enable(); } -static void virtnet_napi_tx_enable(struct virtnet_info *vi, - struct virtqueue *vq, - struct napi_struct *napi) +static void virtnet_napi_enable(struct receive_queue *rq) +{ + struct virtnet_info *vi = rq->vq->vdev->priv; + int qidx = vq2rxq(rq->vq); + + virtnet_napi_do_enable(rq->vq, &rq->napi); + netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi); +} + +static void virtnet_napi_tx_enable(struct send_queue *sq) { + struct virtnet_info *vi = sq->vq->vdev->priv; + struct napi_struct *napi = &sq->napi; + int qidx = vq2txq(sq->vq); + if (!napi->weight) return; @@ -2835,13 +2847,30 @@ static void virtnet_napi_tx_enable(struct virtnet_info *vi, return; } - return virtnet_napi_enable(vq, napi); + virtnet_napi_do_enable(sq->vq, napi); + netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi); } -static void virtnet_napi_tx_disable(struct napi_struct *napi) +static void virtnet_napi_tx_disable(struct send_queue *sq) { - if (napi->weight) + struct virtnet_info *vi = sq->vq->vdev->priv; + struct napi_struct *napi = &sq->napi; + int qidx = vq2txq(sq->vq); + + if (napi->weight) { + netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL); napi_disable(napi); + } +} + +static void virtnet_napi_disable(struct receive_queue *rq) +{ + struct virtnet_info *vi = rq->vq->vdev->priv; + struct napi_struct *napi = &rq->napi; + int qidx = vq2rxq(rq->vq); + + netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL); + napi_disable(napi); } static void refill_work(struct work_struct *work) @@ -2854,9 +2883,23 @@ static void refill_work(struct work_struct *work) for (i = 0; i < vi->curr_queue_pairs; i++) { struct receive_queue *rq = &vi->rq[i]; + /* + * When queue API support is added in the future and the call + * below becomes napi_disable_locked, this driver will need to + * be refactored. + * + * One possible solution would be to: + * - cancel refill_work with cancel_delayed_work (note: + * non-sync) + * - cancel refill_work with cancel_delayed_work_sync in + * virtnet_remove after the netdev is unregistered + * - wrap all of the work in a lock (perhaps the netdev + * instance lock) + * - check netif_running() and return early to avoid a race + */ napi_disable(&rq->napi); still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); - virtnet_napi_enable(rq->vq, &rq->napi); + virtnet_napi_do_enable(rq->vq, &rq->napi); /* In theory, this can happen: if we don't get any buffers in * we will *never* try to fill again. @@ -3053,8 +3096,8 @@ static int virtnet_poll(struct napi_struct *napi, int budget) static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) { - virtnet_napi_tx_disable(&vi->sq[qp_index].napi); - napi_disable(&vi->rq[qp_index].napi); + virtnet_napi_tx_disable(&vi->sq[qp_index]); + virtnet_napi_disable(&vi->rq[qp_index]); xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); } @@ -3073,8 +3116,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) if (err < 0) goto err_xdp_reg_mem_model; - virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); - virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); + virtnet_napi_enable(&vi->rq[qp_index]); + virtnet_napi_tx_enable(&vi->sq[qp_index]); return 0; @@ -3326,7 +3369,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) bool running = netif_running(vi->dev); if (running) { - napi_disable(&rq->napi); + virtnet_napi_disable(rq); virtnet_cancel_dim(vi, &rq->dim); } } @@ -3339,7 +3382,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) schedule_delayed_work(&vi->refill, 0); if (running) - virtnet_napi_enable(rq->vq, &rq->napi); + virtnet_napi_enable(rq); } static int virtnet_rx_resize(struct virtnet_info *vi, @@ -3368,7 +3411,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) qindex = sq - vi->sq; if (running) - virtnet_napi_tx_disable(&sq->napi); + virtnet_napi_tx_disable(sq); txq = netdev_get_tx_queue(vi->dev, qindex); @@ -3402,7 +3445,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) __netif_tx_unlock_bh(txq); if (running) - virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); + virtnet_napi_tx_enable(sq); } static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, @@ -5634,8 +5677,11 @@ static void virtnet_freeze_down(struct virtio_device *vdev) netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); netif_tx_unlock_bh(vi->dev); - if (netif_running(vi->dev)) + if (netif_running(vi->dev)) { + rtnl_lock(); virtnet_close(vi->dev); + rtnl_unlock(); + } } static int init_vqs(struct virtnet_info *vi); @@ -5655,7 +5701,9 @@ static int virtnet_restore_up(struct virtio_device *vdev) enable_rx_mode_work(vi); if (netif_running(vi->dev)) { + rtnl_lock(); err = virtnet_open(vi->dev); + rtnl_unlock(); if (err) return err; } @@ -5945,8 +5993,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, /* Make sure NAPI is not using any XDP TX queues for RX. */ if (netif_running(dev)) { for (i = 0; i < vi->max_queue_pairs; i++) { - napi_disable(&vi->rq[i].napi); - virtnet_napi_tx_disable(&vi->sq[i].napi); + virtnet_napi_disable(&vi->rq[i]); + virtnet_napi_tx_disable(&vi->sq[i]); } } @@ -5983,9 +6031,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (old_prog) bpf_prog_put(old_prog); if (netif_running(dev)) { - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); - virtnet_napi_tx_enable(vi, vi->sq[i].vq, - &vi->sq[i].napi); + virtnet_napi_enable(&vi->rq[i]); + virtnet_napi_tx_enable(&vi->sq[i]); } } @@ -6000,9 +6047,8 @@ err: if (netif_running(dev)) { for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); - virtnet_napi_tx_enable(vi, vi->sq[i].vq, - &vi->sq[i].napi); + virtnet_napi_enable(&vi->rq[i]); + virtnet_napi_tx_enable(&vi->sq[i]); } } if (prog) @@ -6409,8 +6455,9 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) INIT_DELAYED_WORK(&vi->refill, refill_work); for (i = 0; i < vi->max_queue_pairs; i++) { vi->rq[i].pages = NULL; - netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, - napi_weight); + netif_napi_add_config(vi->dev, &vi->rq[i].napi, virtnet_poll, + i); + vi->rq[i].napi.weight = napi_weight; netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, virtnet_poll_tx, napi_tx ? napi_weight : 0); diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 36cf6191335e..7168b33adadb 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -34,6 +34,7 @@ #include <net/addrconf.h> #include <net/l3mdev.h> #include <net/fib_rules.h> +#include <net/netdev_lock.h> #include <net/sch_generic.h> #include <net/netns/generic.h> #include <net/netfilter/nf_conntrack.h> diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 227d7f5a302a..8c49e903cb3a 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -25,6 +25,7 @@ #include <net/inet_ecn.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <net/tun_proto.h> #include <net/vxlan.h> #include <net/nexthop.h> diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index 60eb95fc19a5..6bc107476a2a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -1172,6 +1172,7 @@ static int brcmf_ops_sdio_suspend(struct device *dev) struct brcmf_bus *bus_if; struct brcmf_sdio_dev *sdiodev; mmc_pm_flag_t sdio_flags; + bool cap_power_off; int ret = 0; func = container_of(dev, struct sdio_func, dev); @@ -1179,19 +1180,23 @@ static int brcmf_ops_sdio_suspend(struct device *dev) if (func->num != 1) return 0; + cap_power_off = !!(func->card->host->caps & MMC_CAP_POWER_OFF_CARD); bus_if = dev_get_drvdata(dev); sdiodev = bus_if->bus_priv.sdio; - if (sdiodev->wowl_enabled) { + if (sdiodev->wowl_enabled || !cap_power_off) { brcmf_sdiod_freezer_on(sdiodev); brcmf_sdio_wd_timer(sdiodev->bus, 0); sdio_flags = MMC_PM_KEEP_POWER; - if (sdiodev->settings->bus.sdio.oob_irq_supported) - enable_irq_wake(sdiodev->settings->bus.sdio.oob_irq_nr); - else - sdio_flags |= MMC_PM_WAKE_SDIO_IRQ; + + if (sdiodev->wowl_enabled) { + if (sdiodev->settings->bus.sdio.oob_irq_supported) + enable_irq_wake(sdiodev->settings->bus.sdio.oob_irq_nr); + else + sdio_flags |= MMC_PM_WAKE_SDIO_IRQ; + } if (sdio_set_host_pm_flags(sdiodev->func1, sdio_flags)) brcmf_err("Failed to set pm_flags %x\n", sdio_flags); @@ -1213,18 +1218,19 @@ static int brcmf_ops_sdio_resume(struct device *dev) struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio; struct sdio_func *func = container_of(dev, struct sdio_func, dev); int ret = 0; + bool cap_power_off = !!(func->card->host->caps & MMC_CAP_POWER_OFF_CARD); brcmf_dbg(SDIO, "Enter: F%d\n", func->num); if (func->num != 2) return 0; - if (!sdiodev->wowl_enabled) { + if (!sdiodev->wowl_enabled && cap_power_off) { /* bus was powered off and device removed, probe again */ ret = brcmf_sdiod_probe(sdiodev); if (ret) brcmf_err("Failed to probe device on resume\n"); } else { - if (sdiodev->settings->bus.sdio.oob_irq_supported) + if (sdiodev->wowl_enabled && sdiodev->settings->bus.sdio.oob_irq_supported) disable_irq_wake(sdiodev->settings->bus.sdio.oob_irq_nr); brcmf_sdiod_freezer_off(sdiodev); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 634e2543ba6d..03f639fbf9b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -558,41 +558,71 @@ static void iwl_dump_prph(struct iwl_fw_runtime *fwrt, } /* - * alloc_sgtable - allocates scallerlist table in the given size, - * fills it with pages and returns it + * alloc_sgtable - allocates (chained) scatterlist in the given size, + * fills it with pages and returns it * @size: the size (in bytes) of the table -*/ -static struct scatterlist *alloc_sgtable(int size) + */ +static struct scatterlist *alloc_sgtable(ssize_t size) { - int alloc_size, nents, i; - struct page *new_page; - struct scatterlist *iter; - struct scatterlist *table; + struct scatterlist *result = NULL, *prev; + int nents, i, n_prev; nents = DIV_ROUND_UP(size, PAGE_SIZE); - table = kcalloc(nents, sizeof(*table), GFP_KERNEL); - if (!table) - return NULL; - sg_init_table(table, nents); - iter = table; - for_each_sg(table, iter, sg_nents(table), i) { - new_page = alloc_page(GFP_KERNEL); - if (!new_page) { - /* release all previous allocated pages in the table */ - iter = table; - for_each_sg(table, iter, sg_nents(table), i) { - new_page = sg_page(iter); - if (new_page) - __free_page(new_page); - } - kfree(table); + +#define N_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(*result)) + /* + * We need an additional entry for table chaining, + * this ensures the loop can finish i.e. we can + * fit at least two entries per page (obviously, + * many more really fit.) + */ + BUILD_BUG_ON(N_ENTRIES_PER_PAGE < 2); + + while (nents > 0) { + struct scatterlist *new, *iter; + int n_fill, n_alloc; + + if (nents <= N_ENTRIES_PER_PAGE) { + /* last needed table */ + n_fill = nents; + n_alloc = nents; + nents = 0; + } else { + /* fill a page with entries */ + n_alloc = N_ENTRIES_PER_PAGE; + /* reserve one for chaining */ + n_fill = n_alloc - 1; + nents -= n_fill; + } + + new = kcalloc(n_alloc, sizeof(*new), GFP_KERNEL); + if (!new) { + if (result) + _devcd_free_sgtable(result); return NULL; } - alloc_size = min_t(int, size, PAGE_SIZE); - size -= PAGE_SIZE; - sg_set_page(iter, new_page, alloc_size, 0); + sg_init_table(new, n_alloc); + + if (!result) + result = new; + else + sg_chain(prev, n_prev, new); + prev = new; + n_prev = n_alloc; + + for_each_sg(new, iter, n_fill, i) { + struct page *new_page = alloc_page(GFP_KERNEL); + + if (!new_page) { + _devcd_free_sgtable(result); + return NULL; + } + + sg_set_page(iter, new_page, PAGE_SIZE, 0); + } } - return table; + + return result; } static void iwl_fw_get_prph_len(struct iwl_fw_runtime *fwrt, diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dump.c b/drivers/net/wireless/intel/iwlwifi/fw/dump.c index 8e0c85a1240d..c7b261c8ec96 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dump.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dump.c @@ -540,6 +540,9 @@ bool iwl_fwrt_read_err_table(struct iwl_trans *trans, u32 base, u32 *err_id) } err_info = {}; int ret; + if (err_id) + *err_id = 0; + if (!base) return false; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index d3a65f33097c..352b6e73e08f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1181,7 +1181,7 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv, if (tlv_len != sizeof(*fseq_ver)) goto invalid_tlv_len; - IWL_INFO(drv, "TLV_FW_FSEQ_VERSION: %s\n", + IWL_INFO(drv, "TLV_FW_FSEQ_VERSION: %.32s\n", fseq_ver->version); } break; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 1632ab20a23f..2d0e09aa9cbe 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -403,6 +403,8 @@ void iwl_trans_op_mode_leave(struct iwl_trans *trans) iwl_trans_pcie_op_mode_leave(trans); + cancel_work_sync(&trans->restart.wk); + trans->op_mode = NULL; trans->state = IWL_TRANS_NO_FW; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 632f1a26672c..3e8b7168af01 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -3092,8 +3092,14 @@ static void iwl_mvm_d3_disconnect_iter(void *data, u8 *mac, ieee80211_resume_disconnect(vif); } -static bool iwl_mvm_check_rt_status(struct iwl_mvm *mvm, - struct ieee80211_vif *vif) +enum rt_status { + FW_ALIVE, + FW_NEEDS_RESET, + FW_ERROR, +}; + +static enum rt_status iwl_mvm_check_rt_status(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) { u32 err_id; @@ -3101,29 +3107,35 @@ static bool iwl_mvm_check_rt_status(struct iwl_mvm *mvm, if (iwl_fwrt_read_err_table(mvm->trans, mvm->trans->dbg.lmac_error_event_table[0], &err_id)) { - if (err_id == RF_KILL_INDICATOR_FOR_WOWLAN && vif) { - struct cfg80211_wowlan_wakeup wakeup = { - .rfkill_release = true, - }; - ieee80211_report_wowlan_wakeup(vif, &wakeup, - GFP_KERNEL); + if (err_id == RF_KILL_INDICATOR_FOR_WOWLAN) { + IWL_WARN(mvm, "Rfkill was toggled during suspend\n"); + if (vif) { + struct cfg80211_wowlan_wakeup wakeup = { + .rfkill_release = true, + }; + + ieee80211_report_wowlan_wakeup(vif, &wakeup, + GFP_KERNEL); + } + + return FW_NEEDS_RESET; } - return true; + return FW_ERROR; } /* check if we have lmac2 set and check for error */ if (iwl_fwrt_read_err_table(mvm->trans, mvm->trans->dbg.lmac_error_event_table[1], NULL)) - return true; + return FW_ERROR; /* check for umac error */ if (iwl_fwrt_read_err_table(mvm->trans, mvm->trans->dbg.umac_error_event_table, NULL)) - return true; + return FW_ERROR; - return false; + return FW_ALIVE; } /* @@ -3492,6 +3504,7 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) bool d0i3_first = fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_D0I3_END_FIRST); bool resume_notif_based = iwl_mvm_d3_resume_notif_based(mvm); + enum rt_status rt_status; bool keep = false; mutex_lock(&mvm->mutex); @@ -3515,14 +3528,19 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) iwl_fw_dbg_read_d3_debug_data(&mvm->fwrt); - if (iwl_mvm_check_rt_status(mvm, vif)) { - IWL_ERR(mvm, "FW Error occurred during suspend. Restarting.\n"); + rt_status = iwl_mvm_check_rt_status(mvm, vif); + if (rt_status != FW_ALIVE) { set_bit(STATUS_FW_ERROR, &mvm->trans->status); - iwl_mvm_dump_nic_error_log(mvm); - iwl_dbg_tlv_time_point(&mvm->fwrt, - IWL_FW_INI_TIME_POINT_FW_ASSERT, NULL); - iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert, - false, 0); + if (rt_status == FW_ERROR) { + IWL_ERR(mvm, "FW Error occurred during suspend. Restarting.\n"); + iwl_mvm_dump_nic_error_log(mvm); + iwl_dbg_tlv_time_point(&mvm->fwrt, + IWL_FW_INI_TIME_POINT_FW_ASSERT, + NULL); + iwl_fw_dbg_collect_desc(&mvm->fwrt, + &iwl_dump_desc_assert, + false, 0); + } ret = 1; goto err; } @@ -3679,6 +3697,7 @@ int iwl_mvm_fast_resume(struct iwl_mvm *mvm) .notif_expected = IWL_D3_NOTIF_D3_END_NOTIF, }; + enum rt_status rt_status; int ret; lockdep_assert_held(&mvm->mutex); @@ -3688,14 +3707,20 @@ int iwl_mvm_fast_resume(struct iwl_mvm *mvm) mvm->last_reset_or_resume_time_jiffies = jiffies; iwl_fw_dbg_read_d3_debug_data(&mvm->fwrt); - if (iwl_mvm_check_rt_status(mvm, NULL)) { - IWL_ERR(mvm, "FW Error occurred during suspend. Restarting.\n"); + rt_status = iwl_mvm_check_rt_status(mvm, NULL); + if (rt_status != FW_ALIVE) { set_bit(STATUS_FW_ERROR, &mvm->trans->status); - iwl_mvm_dump_nic_error_log(mvm); - iwl_dbg_tlv_time_point(&mvm->fwrt, - IWL_FW_INI_TIME_POINT_FW_ASSERT, NULL); - iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert, - false, 0); + if (rt_status == FW_ERROR) { + IWL_ERR(mvm, + "iwl_mvm_check_rt_status failed, device is gone during suspend\n"); + iwl_mvm_dump_nic_error_log(mvm); + iwl_dbg_tlv_time_point(&mvm->fwrt, + IWL_FW_INI_TIME_POINT_FW_ASSERT, + NULL); + iwl_fw_dbg_collect_desc(&mvm->fwrt, + &iwl_dump_desc_assert, + false, 0); + } mvm->trans->state = IWL_TRANS_NO_FW; ret = -ENODEV; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 83e3c1160362..55d035b896e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1479,6 +1479,13 @@ static ssize_t iwl_dbgfs_fw_dbg_clear_write(struct iwl_mvm *mvm, if (mvm->trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_9000) return -EOPNOTSUPP; + /* + * If the firmware is not running, silently succeed since there is + * no data to clear. + */ + if (!iwl_mvm_firmware_running(mvm)) + return count; + mutex_lock(&mvm->mutex); iwl_fw_dbg_clear_monitor_buf(&mvm->fwrt); mutex_unlock(&mvm->mutex); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 822244b87df3..2b5a62604fc4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -422,6 +422,8 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, /* if reached this point, Alive notification was received */ iwl_mei_alive_notif(true); + iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr); + ret = iwl_pnvm_load(mvm->trans, &mvm->notif_wait, &mvm->fw->ucode_capa); if (ret) { @@ -430,8 +432,6 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, return ret; } - iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr); - /* * Note: all the queues are enabled as part of the interface * initialization, but in firmware restart scenarios they diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 09fd8752046e..14ea89f931bb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -995,7 +995,7 @@ iwl_mvm_decode_he_phy_ru_alloc(struct iwl_mvm_rx_phy_data *phy_data, */ u8 ru = le32_get_bits(phy_data->d1, IWL_RX_PHY_DATA1_HE_RU_ALLOC_MASK); u32 rate_n_flags = phy_data->rate_n_flags; - u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK_V1; + u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK; u8 offs = 0; rx_status->bw = RATE_INFO_BW_HE_RU; @@ -1050,13 +1050,13 @@ iwl_mvm_decode_he_phy_ru_alloc(struct iwl_mvm_rx_phy_data *phy_data, if (he_mu) he_mu->flags2 |= - le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK_V1, + le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK, rate_n_flags), IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW); - else if (he_type == RATE_MCS_HE_TYPE_TRIG_V1) + else if (he_type == RATE_MCS_HE_TYPE_TRIG) he->data6 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_KNOWN) | - le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK_V1, + le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK, rate_n_flags), IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 198f94292674..1a30bb1ff8ca 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -1030,6 +1030,8 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm, /* End TE, notify mac80211 */ mvmvif->time_event_data.id = SESSION_PROTECT_CONF_MAX_ID; mvmvif->time_event_data.link_id = -1; + /* set the bit so the ROC cleanup will actually clean up */ + set_bit(IWL_MVM_STATUS_ROC_P2P_RUNNING, &mvm->status); iwl_mvm_roc_finished(mvm); ieee80211_remain_on_channel_expired(mvm->hw); } else if (le32_to_cpu(notif->start)) { diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 856b7e9f717d..45460f93d24a 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2003-2015, 2018-2024 Intel Corporation + * Copyright (C) 2003-2015, 2018-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -646,7 +646,8 @@ dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, unsigned int len); struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta, - u8 **hdr, unsigned int hdr_room); + u8 **hdr, unsigned int hdr_room, + unsigned int offset); void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 1f483f15c238..401919f9fe88 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020, 2023-2024 Intel Corporation + * Copyright (C) 2018-2020, 2023-2025 Intel Corporation */ #include <net/tso.h> #include <linux/tcp.h> @@ -188,7 +188,8 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)); /* Our device supports 9 segments at most, it will fit in 1 page */ - sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room); + sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room, + snap_ip_tcp_hdrlen + hdr_len); if (!sgt) return -ENOMEM; @@ -347,6 +348,7 @@ iwl_tfh_tfd *iwl_txq_gen2_build_tx_amsdu(struct iwl_trans *trans, return tfd; out_err: + iwl_pcie_free_tso_pages(trans, skb, out_meta); iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); return NULL; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 334ebd4c12fa..7c1dd5cc084a 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2003-2014, 2018-2021, 2023-2024 Intel Corporation + * Copyright (C) 2003-2014, 2018-2021, 2023-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -1855,6 +1855,7 @@ dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, * @cmd_meta: command meta to store the scatter list information for unmapping * @hdr: output argument for TSO headers * @hdr_room: requested length for TSO headers + * @offset: offset into the data from which mapping should start * * Allocate space for a scatter gather list and TSO headers and map the SKB * using the scatter gather list. The SKB is unmapped again when the page is @@ -1864,9 +1865,12 @@ dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, */ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta, - u8 **hdr, unsigned int hdr_room) + u8 **hdr, unsigned int hdr_room, + unsigned int offset) { struct sg_table *sgt; + unsigned int n_segments = skb_shinfo(skb)->nr_frags + 1; + int orig_nents; if (WARN_ON_ONCE(skb_has_frag_list(skb))) return NULL; @@ -1874,8 +1878,7 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, *hdr = iwl_pcie_get_page_hdr(trans, hdr_room + __alignof__(struct sg_table) + sizeof(struct sg_table) + - (skb_shinfo(skb)->nr_frags + 1) * - sizeof(struct scatterlist), + n_segments * sizeof(struct scatterlist), skb); if (!*hdr) return NULL; @@ -1883,14 +1886,15 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, sgt = (void *)PTR_ALIGN(*hdr + hdr_room, __alignof__(struct sg_table)); sgt->sgl = (void *)(sgt + 1); - sg_init_table(sgt->sgl, skb_shinfo(skb)->nr_frags + 1); + sg_init_table(sgt->sgl, n_segments); /* Only map the data, not the header (it is copied to the TSO page) */ - sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, skb_headlen(skb), - skb->data_len); - if (WARN_ON_ONCE(sgt->orig_nents <= 0)) + orig_nents = skb_to_sgvec(skb, sgt->sgl, offset, skb->len - offset); + if (WARN_ON_ONCE(orig_nents <= 0)) return NULL; + sgt->orig_nents = orig_nents; + /* And map the entire SKB */ if (dma_map_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0) < 0) return NULL; @@ -1939,7 +1943,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len; /* Our device supports 9 segments at most, it will fit in 1 page */ - sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room); + sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room, + snap_ip_tcp_hdrlen + hdr_len + iv_len); if (!sgt) return -ENOMEM; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index b1b46c2713e1..24e2c702da7a 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -128,8 +128,10 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, if (!nvme_ctrl_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked data buffer\n"); if (has_metadata) { - if (!supports_metadata) - return -EINVAL; + if (!supports_metadata) { + ret = -EINVAL; + goto out; + } if (!nvme_ctrl_meta_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked metadata buffer\n"); @@ -139,8 +141,10 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct iov_iter iter; /* fixedbufs is only for non-vectored io */ - if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) - return -EINVAL; + if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) { + ret = -EINVAL; + goto out; + } ret = io_uring_cmd_import_fixed(ubuffer, bufflen, rq_data_dir(req), &iter, ioucmd); if (ret < 0) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 950289405ef2..640590b21728 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1983,6 +1983,18 @@ static void nvme_map_cmb(struct nvme_dev *dev) return; /* + * Controllers may support a CMB size larger than their BAR, for + * example, due to being behind a bridge. Reduce the CMB to the + * reported size of the BAR + */ + size = min(size, bar_size - offset); + + if (!IS_ALIGNED(size, memremap_compat_align()) || + !IS_ALIGNED(pci_resource_start(pdev, bar), + memremap_compat_align())) + return; + + /* * Tell the controller about the host side address mapping the CMB, * and enable CMB decoding for the NVMe 1.4+ scheme: */ @@ -1992,17 +2004,10 @@ static void nvme_map_cmb(struct nvme_dev *dev) dev->bar + NVME_REG_CMBMSC); } - /* - * Controllers may support a CMB size larger than their BAR, - * for example, due to being behind a bridge. Reduce the CMB to - * the reported size of the BAR - */ - if (size > bar_size - offset) - size = bar_size - offset; - if (pci_p2pdma_add_resource(pdev, bar, size, offset)) { dev_warn(dev->ctrl.device, "failed to register the CMB\n"); + hi_lo_writeq(0, dev->bar + NVME_REG_CMBMSC); return; } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8a9131c95a3d..327f3f2f5399 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -217,6 +217,19 @@ static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) return queue - queue->ctrl->queues; } +static inline bool nvme_tcp_recv_pdu_supported(enum nvme_tcp_pdu_type type) +{ + switch (type) { + case nvme_tcp_c2h_term: + case nvme_tcp_c2h_data: + case nvme_tcp_r2t: + case nvme_tcp_rsp: + return true; + default: + return false; + } +} + /* * Check if the queue is TLS encrypted */ @@ -775,7 +788,7 @@ static void nvme_tcp_handle_c2h_term(struct nvme_tcp_queue *queue, [NVME_TCP_FES_PDU_SEQ_ERR] = "PDU Sequence Error", [NVME_TCP_FES_HDR_DIGEST_ERR] = "Header Digest Error", [NVME_TCP_FES_DATA_OUT_OF_RANGE] = "Data Transfer Out Of Range", - [NVME_TCP_FES_R2T_LIMIT_EXCEEDED] = "R2T Limit Exceeded", + [NVME_TCP_FES_DATA_LIMIT_EXCEEDED] = "Data Transfer Limit Exceeded", [NVME_TCP_FES_UNSUPPORTED_PARAM] = "Unsupported Parameter", }; @@ -818,6 +831,16 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, return 0; hdr = queue->pdu; + if (unlikely(hdr->hlen != sizeof(struct nvme_tcp_rsp_pdu))) { + if (!nvme_tcp_recv_pdu_supported(hdr->type)) + goto unsupported_pdu; + + dev_err(queue->ctrl->ctrl.device, + "pdu type %d has unexpected header length (%d)\n", + hdr->type, hdr->hlen); + return -EPROTO; + } + if (unlikely(hdr->type == nvme_tcp_c2h_term)) { /* * C2HTermReq never includes Header or Data digests. @@ -850,10 +873,13 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, nvme_tcp_init_recv_ctx(queue); return nvme_tcp_handle_r2t(queue, (void *)queue->pdu); default: - dev_err(queue->ctrl->ctrl.device, - "unsupported pdu type (%d)\n", hdr->type); - return -EINVAL; + goto unsupported_pdu; } + +unsupported_pdu: + dev_err(queue->ctrl->ctrl.device, + "unsupported pdu type (%d)\n", hdr->type); + return -EINVAL; } static inline void nvme_tcp_end_request(struct request *rq, u16 status) @@ -1495,11 +1521,11 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) msg.msg_flags = MSG_WAITALL; ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); - if (ret < sizeof(*icresp)) { + if (ret >= 0 && ret < sizeof(*icresp)) + ret = -ECONNRESET; + if (ret < 0) { pr_warn("queue %d: failed to receive icresp, error %d\n", nvme_tcp_queue_id(queue), ret); - if (ret >= 0) - ret = -ECONNRESET; goto free_icresp; } ret = -ENOTCONN; @@ -2699,6 +2725,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_tcp_queue *queue = hctx->driver_data; struct sock *sk = queue->sock->sk; + int ret; if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) return 0; @@ -2706,9 +2733,9 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) set_bit(NVME_TCP_Q_POLLING, &queue->flags); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); - nvme_tcp_try_recv(queue); + ret = nvme_tcp_try_recv(queue); clear_bit(NVME_TCP_Q_POLLING, &queue->flags); - return queue->nr_cqe; + return ret < 0 ? ret : queue->nr_cqe; } static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index d2c1233981e1..fcf4f460dc9a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -647,7 +647,6 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, struct nvmet_host *host); void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, u8 event_info, u8 log_page); -bool nvmet_subsys_nsid_exists(struct nvmet_subsys *subsys, u32 nsid); #define NVMET_MIN_QUEUE_SIZE 16 #define NVMET_MAX_QUEUE_SIZE 1024 diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 7c51c2a8c109..4f9cac8a5abe 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -571,10 +571,16 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) struct nvmet_tcp_cmd *cmd = container_of(req, struct nvmet_tcp_cmd, req); struct nvmet_tcp_queue *queue = cmd->queue; + enum nvmet_tcp_recv_state queue_state; + struct nvmet_tcp_cmd *queue_cmd; struct nvme_sgl_desc *sgl; u32 len; - if (unlikely(cmd == queue->cmd)) { + /* Pairs with store_release in nvmet_prepare_receive_pdu() */ + queue_state = smp_load_acquire(&queue->rcv_state); + queue_cmd = READ_ONCE(queue->cmd); + + if (unlikely(cmd == queue_cmd)) { sgl = &cmd->req.cmd->common.dptr.sgl; len = le32_to_cpu(sgl->length); @@ -583,7 +589,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) * Avoid using helpers, this might happen before * nvmet_req_init is completed. */ - if (queue->rcv_state == NVMET_TCP_RECV_PDU && + if (queue_state == NVMET_TCP_RECV_PDU && len && len <= cmd->req.port->inline_data_size && nvme_is_write(cmd->req.cmd)) return; @@ -847,8 +853,9 @@ static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue) { queue->offset = 0; queue->left = sizeof(struct nvme_tcp_hdr); - queue->cmd = NULL; - queue->rcv_state = NVMET_TCP_RECV_PDU; + WRITE_ONCE(queue->cmd, NULL); + /* Ensure rcv_state is visible only after queue->cmd is set */ + smp_store_release(&queue->rcv_state, NVMET_TCP_RECV_PDU); } static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 75e819f66a56..ee2e31522d7e 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -415,12 +415,12 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam prop = of_get_flat_dt_prop(node, "alignment", &len); if (prop) { - if (len != dt_root_size_cells * sizeof(__be32)) { + if (len != dt_root_addr_cells * sizeof(__be32)) { pr_err("invalid alignment property in '%s' node.\n", uname); return -EINVAL; } - align = dt_mem_next_cell(dt_root_size_cells, &prop); + align = dt_mem_next_cell(dt_root_addr_cells, &prop); } nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; diff --git a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c index 45004f598e4d..e4c0a82d16d9 100644 --- a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c +++ b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c @@ -325,7 +325,7 @@ to_fsl_samsung_hdmi_phy(struct clk_hw *hw) return container_of(hw, struct fsl_samsung_hdmi_phy, hw); } -static void +static int fsl_samsung_hdmi_phy_configure_pll_lock_det(struct fsl_samsung_hdmi_phy *phy, const struct phy_config *cfg) { @@ -341,6 +341,9 @@ fsl_samsung_hdmi_phy_configure_pll_lock_det(struct fsl_samsung_hdmi_phy *phy, break; } + if (unlikely(div == 4)) + return -EINVAL; + writeb(FIELD_PREP(REG12_CK_DIV_MASK, div), phy->regs + PHY_REG(12)); /* @@ -364,6 +367,8 @@ fsl_samsung_hdmi_phy_configure_pll_lock_det(struct fsl_samsung_hdmi_phy *phy, FIELD_PREP(REG14_RP_CODE_MASK, 2) | FIELD_PREP(REG14_TG_CODE_HIGH_MASK, fld_tg_code >> 8), phy->regs + PHY_REG(14)); + + return 0; } static unsigned long fsl_samsung_hdmi_phy_find_pms(unsigned long fout, u8 *p, u16 *m, u8 *s) @@ -466,7 +471,11 @@ static int fsl_samsung_hdmi_phy_configure(struct fsl_samsung_hdmi_phy *phy, writeb(REG21_SEL_TX_CK_INV | FIELD_PREP(REG21_PMS_S_MASK, cfg->pll_div_regs[2] >> 4), phy->regs + PHY_REG(21)); - fsl_samsung_hdmi_phy_configure_pll_lock_det(phy, cfg); + ret = fsl_samsung_hdmi_phy_configure_pll_lock_det(phy, cfg); + if (ret) { + dev_err(phy->dev, "pixclock too large\n"); + return ret; + } writeb(REG33_FIX_DA | REG33_MODE_SET_DONE, phy->regs + PHY_REG(33)); diff --git a/drivers/phy/rockchip/Kconfig b/drivers/phy/rockchip/Kconfig index 2f7a05f21dc5..dcb8e1628632 100644 --- a/drivers/phy/rockchip/Kconfig +++ b/drivers/phy/rockchip/Kconfig @@ -125,6 +125,7 @@ config PHY_ROCKCHIP_USBDP depends on ARCH_ROCKCHIP && OF depends on TYPEC select GENERIC_PHY + select USB_COMMON help Enable this to support the Rockchip USB3.0/DP combo PHY with Samsung IP block. This is required for USB3 support on RK3588. diff --git a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c index a1532ef8bbe9..8c3ce57f8915 100644 --- a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c +++ b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c @@ -324,7 +324,10 @@ static int rockchip_combphy_parse_dt(struct device *dev, struct rockchip_combphy priv->ext_refclk = device_property_present(dev, "rockchip,ext-refclk"); - priv->phy_rst = devm_reset_control_get(dev, "phy"); + priv->phy_rst = devm_reset_control_get_exclusive(dev, "phy"); + /* fallback to old behaviour */ + if (PTR_ERR(priv->phy_rst) == -ENOENT) + priv->phy_rst = devm_reset_control_array_get_exclusive(dev); if (IS_ERR(priv->phy_rst)) return dev_err_probe(dev, PTR_ERR(priv->phy_rst), "failed to get phy reset\n"); diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index c421b495eb0f..46b8f6987c62 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -488,9 +488,9 @@ exynos5_usbdrd_pipe3_set_refclk(struct phy_usb_instance *inst) reg |= PHYCLKRST_REFCLKSEL_EXT_REFCLK; /* FSEL settings corresponding to reference clock */ - reg &= ~PHYCLKRST_FSEL_PIPE_MASK | - PHYCLKRST_MPLL_MULTIPLIER_MASK | - PHYCLKRST_SSC_REFCLKSEL_MASK; + reg &= ~(PHYCLKRST_FSEL_PIPE_MASK | + PHYCLKRST_MPLL_MULTIPLIER_MASK | + PHYCLKRST_SSC_REFCLKSEL_MASK); switch (phy_drd->extrefclk) { case EXYNOS5_FSEL_50MHZ: reg |= (PHYCLKRST_MPLL_MULTIPLIER_50M_REF | @@ -532,9 +532,9 @@ exynos5_usbdrd_utmi_set_refclk(struct phy_usb_instance *inst) reg &= ~PHYCLKRST_REFCLKSEL_MASK; reg |= PHYCLKRST_REFCLKSEL_EXT_REFCLK; - reg &= ~PHYCLKRST_FSEL_UTMI_MASK | - PHYCLKRST_MPLL_MULTIPLIER_MASK | - PHYCLKRST_SSC_REFCLKSEL_MASK; + reg &= ~(PHYCLKRST_FSEL_UTMI_MASK | + PHYCLKRST_MPLL_MULTIPLIER_MASK | + PHYCLKRST_SSC_REFCLKSEL_MASK); reg |= PHYCLKRST_FSEL(phy_drd->extrefclk); return reg; @@ -1296,14 +1296,17 @@ static int exynos5_usbdrd_gs101_phy_exit(struct phy *phy) struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst); int ret; + if (inst->phy_cfg->id == EXYNOS5_DRDPHY_UTMI) { + ret = exynos850_usbdrd_phy_exit(phy); + if (ret) + return ret; + } + + exynos5_usbdrd_phy_isol(inst, true); + if (inst->phy_cfg->id != EXYNOS5_DRDPHY_UTMI) return 0; - ret = exynos850_usbdrd_phy_exit(phy); - if (ret) - return ret; - - exynos5_usbdrd_phy_isol(inst, true); return regulator_bulk_disable(phy_drd->drv_data->n_regulators, phy_drd->regulators); } diff --git a/drivers/phy/st/phy-stm32-combophy.c b/drivers/phy/st/phy-stm32-combophy.c index 49e9fa90a681..607b4d607eb5 100644 --- a/drivers/phy/st/phy-stm32-combophy.c +++ b/drivers/phy/st/phy-stm32-combophy.c @@ -111,6 +111,7 @@ static const struct clk_impedance imp_lookup[] = { { 4204000, { 511000, 609000, 706000, 802000 } }, { 3999000, { 571000, 648000, 726000, 803000 } } }; +#define DEFAULT_IMP_INDEX 3 /* Default impedance is 50 Ohm */ static int stm32_impedance_tune(struct stm32_combophy *combophy) { @@ -119,10 +120,9 @@ static int stm32_impedance_tune(struct stm32_combophy *combophy) u8 imp_of, vswing_of; u32 max_imp = imp_lookup[0].microohm; u32 min_imp = imp_lookup[imp_size - 1].microohm; - u32 max_vswing = imp_lookup[imp_size - 1].vswing[vswing_size - 1]; + u32 max_vswing; u32 min_vswing = imp_lookup[0].vswing[0]; u32 val; - u32 regval; if (!of_property_read_u32(combophy->dev->of_node, "st,output-micro-ohms", &val)) { if (val < min_imp || val > max_imp) { @@ -130,45 +130,43 @@ static int stm32_impedance_tune(struct stm32_combophy *combophy) return -EINVAL; } - regval = 0; - for (imp_of = 0; imp_of < ARRAY_SIZE(imp_lookup); imp_of++) { - if (imp_lookup[imp_of].microohm <= val) { - regval = FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_OHM, imp_of); + for (imp_of = 0; imp_of < ARRAY_SIZE(imp_lookup); imp_of++) + if (imp_lookup[imp_of].microohm <= val) break; - } - } + + if (WARN_ON(imp_of == ARRAY_SIZE(imp_lookup))) + return -EINVAL; dev_dbg(combophy->dev, "Set %u micro-ohms output impedance\n", imp_lookup[imp_of].microohm); regmap_update_bits(combophy->regmap, SYSCFG_PCIEPRGCR, STM32MP25_PCIEPRG_IMPCTRL_OHM, - regval); - } else { - regmap_read(combophy->regmap, SYSCFG_PCIEPRGCR, &val); - imp_of = FIELD_GET(STM32MP25_PCIEPRG_IMPCTRL_OHM, val); - } + FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_OHM, imp_of)); + } else + imp_of = DEFAULT_IMP_INDEX; if (!of_property_read_u32(combophy->dev->of_node, "st,output-vswing-microvolt", &val)) { + max_vswing = imp_lookup[imp_of].vswing[vswing_size - 1]; + if (val < min_vswing || val > max_vswing) { dev_err(combophy->dev, "Invalid value %u for output vswing\n", val); return -EINVAL; } - regval = 0; - for (vswing_of = 0; vswing_of < ARRAY_SIZE(imp_lookup[imp_of].vswing); vswing_of++) { - if (imp_lookup[imp_of].vswing[vswing_of] >= val) { - regval = FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_VSWING, vswing_of); + for (vswing_of = 0; vswing_of < ARRAY_SIZE(imp_lookup[imp_of].vswing); vswing_of++) + if (imp_lookup[imp_of].vswing[vswing_of] >= val) break; - } - } + + if (WARN_ON(vswing_of == ARRAY_SIZE(imp_lookup[imp_of].vswing))) + return -EINVAL; dev_dbg(combophy->dev, "Set %u microvolt swing\n", imp_lookup[imp_of].vswing[vswing_of]); regmap_update_bits(combophy->regmap, SYSCFG_PCIEPRGCR, STM32MP25_PCIEPRG_IMPCTRL_VSWING, - regval); + FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_VSWING, vswing_of)); } return 0; diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index 0f60d5d1c167..fae6242aa730 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -928,6 +928,7 @@ static int tegra186_utmi_phy_init(struct phy *phy) unsigned int index = lane->index; struct device *dev = padctl->dev; int err; + u32 reg; port = tegra_xusb_find_usb2_port(padctl, index); if (!port) { @@ -935,6 +936,16 @@ static int tegra186_utmi_phy_init(struct phy *phy) return -ENODEV; } + if (port->mode == USB_DR_MODE_OTG || + port->mode == USB_DR_MODE_PERIPHERAL) { + /* reset VBUS&ID OVERRIDE */ + reg = padctl_readl(padctl, USB2_VBUS_ID); + reg &= ~VBUS_OVERRIDE; + reg &= ~ID_OVERRIDE(~0); + reg |= ID_OVERRIDE_FLOATING; + padctl_writel(padctl, reg, USB2_VBUS_ID); + } + if (port->supply && port->mode == USB_DR_MODE_HOST) { err = regulator_enable(port->supply); if (err) { diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index e0ca59ae3153..ff5d5e29629f 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -424,6 +424,12 @@ static int phy_gmii_sel_init_ports(struct phy_gmii_sel_priv *priv) return 0; } +static const struct regmap_config phy_gmii_sel_regmap_cfg = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, +}; + static int phy_gmii_sel_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -468,7 +474,14 @@ static int phy_gmii_sel_probe(struct platform_device *pdev) priv->regmap = syscon_node_to_regmap(node->parent); if (IS_ERR(priv->regmap)) { - priv->regmap = device_node_to_regmap(node); + void __iomem *base; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return dev_err_probe(dev, PTR_ERR(base), + "failed to get base memory resource\n"); + + priv->regmap = regmap_init_mmio(dev, base, &phy_gmii_sel_regmap_cfg); if (IS_ERR(priv->regmap)) return dev_err_probe(dev, PTR_ERR(priv->regmap), "Failed to get syscon\n"); diff --git a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c index 73dbf29c002f..cf6efa9c0364 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c @@ -974,7 +974,7 @@ static const struct regmap_config bcm281xx_pinctrl_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = BCM281XX_PIN_VC_CAM3_SDA, + .max_register = BCM281XX_PIN_VC_CAM3_SDA * 4, }; static int bcm281xx_pinctrl_get_groups_count(struct pinctrl_dev *pctldev) diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c index 471f644c5eef..d09a5e9b2eca 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c @@ -2374,6 +2374,9 @@ static int npcm8xx_gpio_fw(struct npcm8xx_pinctrl *pctrl) pctrl->gpio_bank[id].gc.parent = dev; pctrl->gpio_bank[id].gc.fwnode = child; pctrl->gpio_bank[id].gc.label = devm_kasprintf(dev, GFP_KERNEL, "%pfw", child); + if (pctrl->gpio_bank[id].gc.label == NULL) + return -ENOMEM; + pctrl->gpio_bank[id].gc.dbg_show = npcmgpio_dbg_show; pctrl->gpio_bank[id].direction_input = pctrl->gpio_bank[id].gc.direction_input; pctrl->gpio_bank[id].gc.direction_input = npcmgpio_direction_input; diff --git a/drivers/pinctrl/spacemit/Kconfig b/drivers/pinctrl/spacemit/Kconfig index 168f8a5ffbb9..a2f98b3f8a75 100644 --- a/drivers/pinctrl/spacemit/Kconfig +++ b/drivers/pinctrl/spacemit/Kconfig @@ -4,9 +4,10 @@ # config PINCTRL_SPACEMIT_K1 - tristate "SpacemiT K1 SoC Pinctrl driver" + bool "SpacemiT K1 SoC Pinctrl driver" depends on ARCH_SPACEMIT || COMPILE_TEST depends on OF + default y select GENERIC_PINCTRL_GROUPS select GENERIC_PINMUX_FUNCTIONS select GENERIC_PINCONF diff --git a/drivers/pinctrl/spacemit/pinctrl-k1.c b/drivers/pinctrl/spacemit/pinctrl-k1.c index a32579d73613..59fd555ff38d 100644 --- a/drivers/pinctrl/spacemit/pinctrl-k1.c +++ b/drivers/pinctrl/spacemit/pinctrl-k1.c @@ -1044,7 +1044,7 @@ static struct platform_driver k1_pinctrl_driver = { .of_match_table = k1_pinctrl_ids, }, }; -module_platform_driver(k1_pinctrl_driver); +builtin_platform_driver(k1_pinctrl_driver); MODULE_AUTHOR("Yixun Lan <dlan@gentoo.org>"); MODULE_DESCRIPTION("Pinctrl driver for the SpacemiT K1 SoC"); diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 764cc1fe90ae..a2cb2d5544f5 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -452,6 +452,7 @@ static int amd_pmf_probe(struct platform_device *pdev) mutex_init(&dev->lock); mutex_init(&dev->update_mutex); + mutex_init(&dev->cb_mutex); apmf_acpi_init(dev); platform_set_drvdata(pdev, dev); @@ -477,6 +478,7 @@ static void amd_pmf_remove(struct platform_device *pdev) amd_pmf_dbgfs_unregister(dev); mutex_destroy(&dev->lock); mutex_destroy(&dev->update_mutex); + mutex_destroy(&dev->cb_mutex); kfree(dev->buf); } diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h index 41b2b91b8fdc..e6bdee68ccf3 100644 --- a/drivers/platform/x86/amd/pmf/pmf.h +++ b/drivers/platform/x86/amd/pmf/pmf.h @@ -106,9 +106,12 @@ struct cookie_header { #define PMF_TA_IF_VERSION_MAJOR 1 #define TA_PMF_ACTION_MAX 32 #define TA_PMF_UNDO_MAX 8 -#define TA_OUTPUT_RESERVED_MEM 906 +#define TA_OUTPUT_RESERVED_MEM 922 #define MAX_OPERATION_PARAMS 4 +#define TA_ERROR_CRYPTO_INVALID_PARAM 0x20002 +#define TA_ERROR_CRYPTO_BIN_TOO_LARGE 0x2000d + #define PMF_IF_V1 1 #define PMF_IF_V2 2 diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c index e6cf0b22dac3..d3083383f11f 100644 --- a/drivers/platform/x86/amd/pmf/sps.c +++ b/drivers/platform/x86/amd/pmf/sps.c @@ -297,12 +297,14 @@ int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf) switch (pmf->current_profile) { case PLATFORM_PROFILE_PERFORMANCE: + case PLATFORM_PROFILE_BALANCED_PERFORMANCE: mode = POWER_MODE_PERFORMANCE; break; case PLATFORM_PROFILE_BALANCED: mode = POWER_MODE_BALANCED_POWER; break; case PLATFORM_PROFILE_LOW_POWER: + case PLATFORM_PROFILE_QUIET: mode = POWER_MODE_POWER_SAVER; break; default: @@ -387,6 +389,14 @@ static int amd_pmf_profile_set(struct device *dev, return 0; } +static int amd_pmf_hidden_choices(void *drvdata, unsigned long *choices) +{ + set_bit(PLATFORM_PROFILE_QUIET, choices); + set_bit(PLATFORM_PROFILE_BALANCED_PERFORMANCE, choices); + + return 0; +} + static int amd_pmf_profile_probe(void *drvdata, unsigned long *choices) { set_bit(PLATFORM_PROFILE_LOW_POWER, choices); @@ -398,6 +408,7 @@ static int amd_pmf_profile_probe(void *drvdata, unsigned long *choices) static const struct platform_profile_ops amd_pmf_profile_ops = { .probe = amd_pmf_profile_probe, + .hidden_choices = amd_pmf_hidden_choices, .profile_get = amd_pmf_profile_get, .profile_set = amd_pmf_profile_set, }; diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 8c88769ea1d8..ceaff1ebb7b9 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -27,8 +27,11 @@ module_param(pb_side_load, bool, 0444); MODULE_PARM_DESC(pb_side_load, "Sideload policy binaries debug policy failures"); #endif -static const uuid_t amd_pmf_ta_uuid = UUID_INIT(0x6fd93b77, 0x3fb8, 0x524d, - 0xb1, 0x2d, 0xc5, 0x29, 0xb1, 0x3d, 0x85, 0x43); +static const uuid_t amd_pmf_ta_uuid[] = { UUID_INIT(0xd9b39bf2, 0x66bd, 0x4154, 0xaf, 0xb8, 0x8a, + 0xcc, 0x2b, 0x2b, 0x60, 0xd6), + UUID_INIT(0x6fd93b77, 0x3fb8, 0x524d, 0xb1, 0x2d, 0xc5, + 0x29, 0xb1, 0x3d, 0x85, 0x43), + }; static const char *amd_pmf_uevent_as_str(unsigned int state) { @@ -321,9 +324,9 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) */ schedule_delayed_work(&dev->pb_work, msecs_to_jiffies(pb_actions_ms * 3)); } else { - dev_err(dev->dev, "ta invoke cmd init failed err: %x\n", res); + dev_dbg(dev->dev, "ta invoke cmd init failed err: %x\n", res); dev->smart_pc_enabled = false; - return -EIO; + return res; } return 0; @@ -390,12 +393,12 @@ static int amd_pmf_amdtee_ta_match(struct tee_ioctl_version_data *ver, const voi return ver->impl_id == TEE_IMPL_ID_AMDTEE; } -static int amd_pmf_ta_open_session(struct tee_context *ctx, u32 *id) +static int amd_pmf_ta_open_session(struct tee_context *ctx, u32 *id, const uuid_t *uuid) { struct tee_ioctl_open_session_arg sess_arg = {}; int rc; - export_uuid(sess_arg.uuid, &amd_pmf_ta_uuid); + export_uuid(sess_arg.uuid, uuid); sess_arg.clnt_login = TEE_IOCTL_LOGIN_PUBLIC; sess_arg.num_params = 0; @@ -434,7 +437,7 @@ static int amd_pmf_register_input_device(struct amd_pmf_dev *dev) return 0; } -static int amd_pmf_tee_init(struct amd_pmf_dev *dev) +static int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid) { u32 size; int ret; @@ -445,7 +448,7 @@ static int amd_pmf_tee_init(struct amd_pmf_dev *dev) return PTR_ERR(dev->tee_ctx); } - ret = amd_pmf_ta_open_session(dev->tee_ctx, &dev->session_id); + ret = amd_pmf_ta_open_session(dev->tee_ctx, &dev->session_id, uuid); if (ret) { dev_err(dev->dev, "Failed to open TA session (%d)\n", ret); ret = -EINVAL; @@ -489,7 +492,8 @@ static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev) int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) { - int ret; + bool status; + int ret, i; ret = apmf_check_smart_pc(dev); if (ret) { @@ -502,10 +506,6 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) return -ENODEV; } - ret = amd_pmf_tee_init(dev); - if (ret) - return ret; - INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd); ret = amd_pmf_set_dram_addr(dev, true); @@ -534,8 +534,30 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) goto error; } - ret = amd_pmf_start_policy_engine(dev); - if (ret) + for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) { + ret = amd_pmf_tee_init(dev, &amd_pmf_ta_uuid[i]); + if (ret) + return ret; + + ret = amd_pmf_start_policy_engine(dev); + switch (ret) { + case TA_PMF_TYPE_SUCCESS: + status = true; + break; + case TA_ERROR_CRYPTO_INVALID_PARAM: + case TA_ERROR_CRYPTO_BIN_TOO_LARGE: + amd_pmf_tee_deinit(dev); + status = false; + break; + default: + goto error; + } + + if (status) + break; + } + + if (!status && !pb_side_load) goto error; if (pb_side_load) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 927a2993f616..88a1a9ff2f34 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -139,6 +139,13 @@ static const struct dmi_system_id button_array_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"), }, }, + { + .ident = "Microsoft Surface Go 4", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 4"), + }, + }, { } }; diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c index 8272f1dd0fbc..db3c031d1757 100644 --- a/drivers/platform/x86/intel/vsec.c +++ b/drivers/platform/x86/intel/vsec.c @@ -404,6 +404,11 @@ static const struct intel_vsec_platform_info oobmsm_info = { .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_SDSI | VSEC_CAP_TPMI, }; +/* DMR OOBMSM info */ +static const struct intel_vsec_platform_info dmr_oobmsm_info = { + .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_TPMI, +}; + /* TGL info */ static const struct intel_vsec_platform_info tgl_info = { .caps = VSEC_CAP_TELEMETRY, @@ -420,6 +425,7 @@ static const struct intel_vsec_platform_info lnl_info = { #define PCI_DEVICE_ID_INTEL_VSEC_MTL_M 0x7d0d #define PCI_DEVICE_ID_INTEL_VSEC_MTL_S 0xad0d #define PCI_DEVICE_ID_INTEL_VSEC_OOBMSM 0x09a7 +#define PCI_DEVICE_ID_INTEL_VSEC_OOBMSM_DMR 0x09a1 #define PCI_DEVICE_ID_INTEL_VSEC_RPL 0xa77d #define PCI_DEVICE_ID_INTEL_VSEC_TGL 0x9a0d #define PCI_DEVICE_ID_INTEL_VSEC_LNL_M 0x647d @@ -430,6 +436,7 @@ static const struct pci_device_id intel_vsec_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, VSEC_MTL_M, &mtl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_MTL_S, &mtl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_OOBMSM, &oobmsm_info) }, + { PCI_DEVICE_DATA(INTEL, VSEC_OOBMSM_DMR, &dmr_oobmsm_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_RPL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_TGL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_LNL_M, &lnl_info) }, diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 72a10ed2017c..1cc91173e012 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -9972,6 +9972,7 @@ static const struct tpacpi_quirk battery_quirk_table[] __initconst = { * Individual addressing is broken on models that expose the * primary battery as BAT1. */ + TPACPI_Q_LNV('G', '8', true), /* ThinkPad X131e */ TPACPI_Q_LNV('8', 'F', true), /* Thinkpad X120e */ TPACPI_Q_LNV('J', '7', true), /* B5400 */ TPACPI_Q_LNV('J', 'I', true), /* Thinkpad 11e */ diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index b651087f426f..34c616bd0a02 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -3959,9 +3959,6 @@ _signal_summary_show(struct seq_file *s, struct ptp_ocp *bp, int nr) bool on; u32 val; - if (!signal) - return; - on = signal->running; sprintf(label, "GEN%d", nr + 1); seq_printf(s, "%7s: %s, period:%llu duty:%d%% phase:%llu pol:%d", diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 27afbb9d544b..cbf531d0ba68 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -1742,7 +1742,8 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, err = rio_add_net(net); if (err) { rmcd_debug(RDEV, "failed to register net, err=%d", err); - kfree(net); + put_device(&net->dev); + mport->net = NULL; goto cleanup; } } diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index fdcf742b2adb..c12941f71e2c 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -871,7 +871,10 @@ static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport, dev_set_name(&net->dev, "rnet_%d", net->id); net->dev.parent = &mport->dev; net->dev.release = rio_scan_release_dev; - rio_add_net(net); + if (rio_add_net(net)) { + put_device(&net->dev); + net = NULL; + } } return net; diff --git a/drivers/slimbus/messaging.c b/drivers/slimbus/messaging.c index e7aa9bd4b44b..6f01d944f9c6 100644 --- a/drivers/slimbus/messaging.c +++ b/drivers/slimbus/messaging.c @@ -148,8 +148,9 @@ int slim_do_transfer(struct slim_controller *ctrl, struct slim_msg_txn *txn) } ret = ctrl->xfer_msg(ctrl, txn); - - if (!ret && need_tid && !txn->msg->comp) { + if (ret == -ETIMEDOUT) { + slim_free_txn_tid(ctrl, txn); + } else if (!ret && need_tid && !txn->msg->comp) { unsigned long ms = txn->rl + HZ; time_left = wait_for_completion_timeout(txn->comp, diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index d8c9be64d006..244ac0106862 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -930,11 +930,8 @@ static int atmel_qspi_sama7g5_transfer(struct spi_mem *mem, /* Release the chip-select. */ ret = atmel_qspi_reg_sync(aq); - if (ret) { - pm_runtime_mark_last_busy(&aq->pdev->dev); - pm_runtime_put_autosuspend(&aq->pdev->dev); + if (ret) return ret; - } atmel_qspi_write(QSPI_CR_LASTXFER, aq, QSPI_CR); return atmel_qspi_wait_for_completion(aq, QSPI_SR_CSRA); diff --git a/drivers/spi/spi-microchip-core.c b/drivers/spi/spi-microchip-core.c index 5b6af55855ef..62ba0bd9cbb7 100644 --- a/drivers/spi/spi-microchip-core.c +++ b/drivers/spi/spi-microchip-core.c @@ -70,8 +70,7 @@ #define INT_RX_CHANNEL_OVERFLOW BIT(2) #define INT_TX_CHANNEL_UNDERRUN BIT(3) -#define INT_ENABLE_MASK (CONTROL_RX_DATA_INT | CONTROL_TX_DATA_INT | \ - CONTROL_RX_OVER_INT | CONTROL_TX_UNDER_INT) +#define INT_ENABLE_MASK (CONTROL_RX_OVER_INT | CONTROL_TX_UNDER_INT) #define REG_CONTROL (0x00) #define REG_FRAME_SIZE (0x04) @@ -133,10 +132,15 @@ static inline void mchp_corespi_disable(struct mchp_corespi *spi) mchp_corespi_write(spi, REG_CONTROL, control); } -static inline void mchp_corespi_read_fifo(struct mchp_corespi *spi) +static inline void mchp_corespi_read_fifo(struct mchp_corespi *spi, int fifo_max) { - while (spi->rx_len >= spi->n_bytes && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY)) { - u32 data = mchp_corespi_read(spi, REG_RX_DATA); + for (int i = 0; i < fifo_max; i++) { + u32 data; + + while (mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY) + ; + + data = mchp_corespi_read(spi, REG_RX_DATA); spi->rx_len -= spi->n_bytes; @@ -211,11 +215,10 @@ static inline void mchp_corespi_set_xfer_size(struct mchp_corespi *spi, int len) mchp_corespi_write(spi, REG_FRAMESUP, len); } -static inline void mchp_corespi_write_fifo(struct mchp_corespi *spi) +static inline void mchp_corespi_write_fifo(struct mchp_corespi *spi, int fifo_max) { - int fifo_max, i = 0; + int i = 0; - fifo_max = DIV_ROUND_UP(min(spi->tx_len, FIFO_DEPTH), spi->n_bytes); mchp_corespi_set_xfer_size(spi, fifo_max); while ((i < fifo_max) && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_TXFIFO_FULL)) { @@ -413,19 +416,6 @@ static irqreturn_t mchp_corespi_interrupt(int irq, void *dev_id) if (intfield == 0) return IRQ_NONE; - if (intfield & INT_TXDONE) - mchp_corespi_write(spi, REG_INT_CLEAR, INT_TXDONE); - - if (intfield & INT_RXRDY) { - mchp_corespi_write(spi, REG_INT_CLEAR, INT_RXRDY); - - if (spi->rx_len) - mchp_corespi_read_fifo(spi); - } - - if (!spi->rx_len && !spi->tx_len) - finalise = true; - if (intfield & INT_RX_CHANNEL_OVERFLOW) { mchp_corespi_write(spi, REG_INT_CLEAR, INT_RX_CHANNEL_OVERFLOW); finalise = true; @@ -512,9 +502,14 @@ static int mchp_corespi_transfer_one(struct spi_controller *host, mchp_corespi_write(spi, REG_SLAVE_SELECT, spi->pending_slave_select); - while (spi->tx_len) - mchp_corespi_write_fifo(spi); + while (spi->tx_len) { + int fifo_max = DIV_ROUND_UP(min(spi->tx_len, FIFO_DEPTH), spi->n_bytes); + + mchp_corespi_write_fifo(spi, fifo_max); + mchp_corespi_read_fifo(spi, fifo_max); + } + spi_finalize_current_transfer(host); return 1; } diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 3b644de3292e..0d9f636c80f4 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -370,7 +370,7 @@ static void divvy_up_power(struct power_actor *power, int num_actors, for (i = 0; i < num_actors; i++) { struct power_actor *pa = &power[i]; - u64 req_range = (u64)pa->req_power * power_range; + u64 req_range = (u64)pa->weighted_req_power * power_range; pa->granted_power = DIV_ROUND_CLOSEST_ULL(req_range, total_req_power); @@ -641,6 +641,22 @@ clean_state: return ret; } +static void power_allocator_update_weight(struct power_allocator_params *params) +{ + const struct thermal_trip_desc *td; + struct thermal_instance *instance; + + if (!params->trip_max) + return; + + td = trip_to_trip_desc(params->trip_max); + + params->total_weight = 0; + list_for_each_entry(instance, &td->thermal_instances, trip_node) + if (power_actor_is_valid(instance)) + params->total_weight += instance->weight; +} + static void power_allocator_update_tz(struct thermal_zone_device *tz, enum thermal_notify_event reason) { @@ -656,16 +672,12 @@ static void power_allocator_update_tz(struct thermal_zone_device *tz, if (power_actor_is_valid(instance)) num_actors++; - if (num_actors == params->num_actors) - return; + if (num_actors != params->num_actors) + allocate_actors_buffer(params, num_actors); - allocate_actors_buffer(params, num_actors); - break; + fallthrough; case THERMAL_INSTANCE_WEIGHT_CHANGED: - params->total_weight = 0; - list_for_each_entry(instance, &td->thermal_instances, trip_node) - if (power_actor_is_valid(instance)) - params->total_weight += instance->weight; + power_allocator_update_weight(params); break; default: break; @@ -731,6 +743,8 @@ static int power_allocator_bind(struct thermal_zone_device *tz) tz->governor_data = params; + power_allocator_update_weight(params); + return 0; free_params: diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 5ab4ce4daaeb..5401f03d6b6c 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -274,6 +274,34 @@ static bool thermal_of_get_cooling_spec(struct device_node *map_np, int index, return true; } +static bool thermal_of_cm_lookup(struct device_node *cm_np, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev, + struct cooling_spec *c) +{ + for_each_child_of_node_scoped(cm_np, child) { + struct device_node *tr_np; + int count, i; + + tr_np = of_parse_phandle(child, "trip", 0); + if (tr_np != trip->priv) + continue; + + /* The trip has been found, look up the cdev. */ + count = of_count_phandle_with_args(child, "cooling-device", + "#cooling-cells"); + if (count <= 0) + pr_err("Add a cooling_device property with at least one device\n"); + + for (i = 0; i < count; i++) { + if (thermal_of_get_cooling_spec(child, i, cdev, c)) + return true; + } + } + + return false; +} + static bool thermal_of_should_bind(struct thermal_zone_device *tz, const struct thermal_trip *trip, struct thermal_cooling_device *cdev, @@ -293,27 +321,7 @@ static bool thermal_of_should_bind(struct thermal_zone_device *tz, goto out; /* Look up the trip and the cdev in the cooling maps. */ - for_each_child_of_node_scoped(cm_np, child) { - struct device_node *tr_np; - int count, i; - - tr_np = of_parse_phandle(child, "trip", 0); - if (tr_np != trip->priv) - continue; - - /* The trip has been found, look up the cdev. */ - count = of_count_phandle_with_args(child, "cooling-device", "#cooling-cells"); - if (count <= 0) - pr_err("Add a cooling_device property with at least one device\n"); - - for (i = 0; i < count; i++) { - result = thermal_of_get_cooling_spec(child, i, cdev, c); - if (result) - break; - } - - break; - } + result = thermal_of_cm_lookup(cm_np, trip, cdev, c); of_node_put(cm_np); out: diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index 0dd85d2635b9..47d06af33747 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -1131,7 +1131,10 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, struct cxacru_data *instance; struct usb_device *usb_dev = interface_to_usbdev(intf); struct usb_host_endpoint *cmd_ep = usb_dev->ep_in[CXACRU_EP_CMD]; - struct usb_endpoint_descriptor *in, *out; + static const u8 ep_addrs[] = { + CXACRU_EP_CMD + USB_DIR_IN, + CXACRU_EP_CMD + USB_DIR_OUT, + 0}; int ret; /* instance init */ @@ -1179,13 +1182,11 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, } if (usb_endpoint_xfer_int(&cmd_ep->desc)) - ret = usb_find_common_endpoints(intf->cur_altsetting, - NULL, NULL, &in, &out); + ret = usb_check_int_endpoints(intf, ep_addrs); else - ret = usb_find_common_endpoints(intf->cur_altsetting, - &in, &out, NULL, NULL); + ret = usb_check_bulk_endpoints(intf, ep_addrs); - if (ret) { + if (!ret) { usb_err(usbatm_instance, "cxacru_bind: interface has incorrect endpoints\n"); ret = -ENODEV; goto fail; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a76bb50b6202..dcba4281ea48 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -6066,6 +6066,36 @@ void usb_hub_cleanup(void) } /* usb_hub_cleanup() */ /** + * hub_hc_release_resources - clear resources used by host controller + * @udev: pointer to device being released + * + * Context: task context, might sleep + * + * Function releases the host controller resources in correct order before + * making any operation on resuming usb device. The host controller resources + * allocated for devices in tree should be released starting from the last + * usb device in tree toward the root hub. This function is used only during + * resuming device when usb device require reinitialization – that is, when + * flag udev->reset_resume is set. + * + * This call is synchronous, and may not be used in an interrupt context. + */ +static void hub_hc_release_resources(struct usb_device *udev) +{ + struct usb_hub *hub = usb_hub_to_struct_hub(udev); + struct usb_hcd *hcd = bus_to_hcd(udev->bus); + int i; + + /* Release up resources for all children before this device */ + for (i = 0; i < udev->maxchild; i++) + if (hub->ports[i]->child) + hub_hc_release_resources(hub->ports[i]->child); + + if (hcd->driver->reset_device) + hcd->driver->reset_device(hcd, udev); +} + +/** * usb_reset_and_verify_device - perform a USB port reset to reinitialize a device * @udev: device to reset (not in SUSPENDED or NOTATTACHED state) * @@ -6129,6 +6159,9 @@ static int usb_reset_and_verify_device(struct usb_device *udev) bos = udev->bos; udev->bos = NULL; + if (udev->reset_resume) + hub_hc_release_resources(udev); + mutex_lock(hcd->address0_mutex); for (i = 0; i < PORT_INIT_TRIES; ++i) { diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index dfcfc142bd5e..8efbacc5bc34 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -341,6 +341,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0638, 0x0a13), .driver_info = USB_QUIRK_STRING_FETCH_255 }, + /* Prolific Single-LUN Mass Storage Card Reader */ + { USB_DEVICE(0x067b, 0x2731), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_NO_LPM }, + /* Saitek Cyborg Gold Joystick */ { USB_DEVICE(0x06a3, 0x0006), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index dfa1b5fe48dc..66a08b527165 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -131,11 +131,24 @@ void dwc3_enable_susphy(struct dwc3 *dwc, bool enable) } } -void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) +void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode, bool ignore_susphy) { + unsigned int hw_mode; u32 reg; reg = dwc3_readl(dwc->regs, DWC3_GCTL); + + /* + * For DRD controllers, GUSB3PIPECTL.SUSPENDENABLE and + * GUSB2PHYCFG.SUSPHY should be cleared during mode switching, + * and they can be set after core initialization. + */ + hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0); + if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD && !ignore_susphy) { + if (DWC3_GCTL_PRTCAP(reg) != mode) + dwc3_enable_susphy(dwc, false); + } + reg &= ~(DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_OTG)); reg |= DWC3_GCTL_PRTCAPDIR(mode); dwc3_writel(dwc->regs, DWC3_GCTL, reg); @@ -216,7 +229,7 @@ static void __dwc3_set_mode(struct work_struct *work) spin_lock_irqsave(&dwc->lock, flags); - dwc3_set_prtcap(dwc, desired_dr_role); + dwc3_set_prtcap(dwc, desired_dr_role, false); spin_unlock_irqrestore(&dwc->lock, flags); @@ -658,16 +671,7 @@ static int dwc3_ss_phy_setup(struct dwc3 *dwc, int index) */ reg &= ~DWC3_GUSB3PIPECTL_UX_EXIT_PX; - /* - * Above DWC_usb3.0 1.94a, it is recommended to set - * DWC3_GUSB3PIPECTL_SUSPHY to '0' during coreConsultant configuration. - * So default value will be '0' when the core is reset. Application - * needs to set it to '1' after the core initialization is completed. - * - * Similarly for DRD controllers, GUSB3PIPECTL.SUSPENDENABLE must be - * cleared after power-on reset, and it can be set after core - * initialization. - */ + /* Ensure the GUSB3PIPECTL.SUSPENDENABLE is cleared prior to phy init. */ reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; if (dwc->u2ss_inp3_quirk) @@ -747,15 +751,7 @@ static int dwc3_hs_phy_setup(struct dwc3 *dwc, int index) break; } - /* - * Above DWC_usb3.0 1.94a, it is recommended to set - * DWC3_GUSB2PHYCFG_SUSPHY to '0' during coreConsultant configuration. - * So default value will be '0' when the core is reset. Application - * needs to set it to '1' after the core initialization is completed. - * - * Similarly for DRD controllers, GUSB2PHYCFG.SUSPHY must be cleared - * after power-on reset, and it can be set after core initialization. - */ + /* Ensure the GUSB2PHYCFG.SUSPHY is cleared prior to phy init. */ reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; if (dwc->dis_enblslpm_quirk) @@ -830,6 +826,25 @@ static int dwc3_phy_init(struct dwc3 *dwc) goto err_exit_usb3_phy; } + /* + * Above DWC_usb3.0 1.94a, it is recommended to set + * DWC3_GUSB3PIPECTL_SUSPHY and DWC3_GUSB2PHYCFG_SUSPHY to '0' during + * coreConsultant configuration. So default value will be '0' when the + * core is reset. Application needs to set it to '1' after the core + * initialization is completed. + * + * Certain phy requires to be in P0 power state during initialization. + * Make sure GUSB3PIPECTL.SUSPENDENABLE and GUSB2PHYCFG.SUSPHY are clear + * prior to phy init to maintain in the P0 state. + * + * After phy initialization, some phy operations can only be executed + * while in lower P states. Ensure GUSB3PIPECTL.SUSPENDENABLE and + * GUSB2PHYCFG.SUSPHY are set soon after initialization to avoid + * blocking phy ops. + */ + if (!DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) + dwc3_enable_susphy(dwc, true); + return 0; err_exit_usb3_phy: @@ -1588,7 +1603,7 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) switch (dwc->dr_mode) { case USB_DR_MODE_PERIPHERAL: - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE, false); if (dwc->usb2_phy) otg_set_vbus(dwc->usb2_phy->otg, false); @@ -1600,7 +1615,7 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) return dev_err_probe(dev, ret, "failed to initialize gadget\n"); break; case USB_DR_MODE_HOST: - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST, false); if (dwc->usb2_phy) otg_set_vbus(dwc->usb2_phy->otg, true); @@ -1645,7 +1660,7 @@ static void dwc3_core_exit_mode(struct dwc3 *dwc) } /* de-assert DRVVBUS for HOST and OTG mode */ - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE, true); } static void dwc3_get_software_properties(struct dwc3 *dwc) @@ -1835,8 +1850,6 @@ static void dwc3_get_properties(struct dwc3 *dwc) dwc->tx_thr_num_pkt_prd = tx_thr_num_pkt_prd; dwc->tx_max_burst_prd = tx_max_burst_prd; - dwc->imod_interval = 0; - dwc->tx_fifo_resize_max_num = tx_fifo_resize_max_num; } @@ -1854,21 +1867,19 @@ static void dwc3_check_params(struct dwc3 *dwc) unsigned int hwparam_gen = DWC3_GHWPARAMS3_SSPHY_IFC(dwc->hwparams.hwparams3); - /* Check for proper value of imod_interval */ - if (dwc->imod_interval && !dwc3_has_imod(dwc)) { - dev_warn(dwc->dev, "Interrupt moderation not supported\n"); - dwc->imod_interval = 0; - } - /* + * Enable IMOD for all supporting controllers. + * + * Particularly, DWC_usb3 v3.00a must enable this feature for + * the following reason: + * * Workaround for STAR 9000961433 which affects only version * 3.00a of the DWC_usb3 core. This prevents the controller * interrupt from being masked while handling events. IMOD * allows us to work around this issue. Enable it for the * affected version. */ - if (!dwc->imod_interval && - DWC3_VER_IS(DWC3, 300A)) + if (dwc3_has_imod((dwc))) dwc->imod_interval = 1; /* Check the maximum_speed parameter */ @@ -2457,7 +2468,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) if (ret) return ret; - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE, true); dwc3_gadget_resume(dwc); break; case DWC3_GCTL_PRTCAP_HOST: @@ -2465,7 +2476,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) ret = dwc3_core_init_for_resume(dwc); if (ret) return ret; - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST, true); break; } /* Restore GUSB2PHYCFG bits that were modified in suspend */ @@ -2494,7 +2505,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) if (ret) return ret; - dwc3_set_prtcap(dwc, dwc->current_dr_role); + dwc3_set_prtcap(dwc, dwc->current_dr_role, true); dwc3_otg_init(dwc); if (dwc->current_otg_role == DWC3_OTG_ROLE_HOST) { diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index c955039bb4f6..aaa39e663f60 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1558,7 +1558,7 @@ struct dwc3_gadget_ep_cmd_params { #define DWC3_HAS_OTG BIT(3) /* prototypes */ -void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode); +void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode, bool ignore_susphy); void dwc3_set_mode(struct dwc3 *dwc, u32 mode); u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type); diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c index d76ae676783c..7977860932b1 100644 --- a/drivers/usb/dwc3/drd.c +++ b/drivers/usb/dwc3/drd.c @@ -173,7 +173,7 @@ void dwc3_otg_init(struct dwc3 *dwc) * block "Initialize GCTL for OTG operation". */ /* GCTL.PrtCapDir=2'b11 */ - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG, true); /* GUSB2PHYCFG0.SusPHY=0 */ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; @@ -556,7 +556,7 @@ int dwc3_drd_init(struct dwc3 *dwc) dwc3_drd_update(dwc); } else { - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG, true); /* use OTG block to get ID event */ irq = dwc3_otg_get_irq(dwc); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index ddd6b2ce5710..89a4dc8ebf94 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4501,14 +4501,18 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) dwc3_writel(dwc->regs, DWC3_GEVNTSIZ(0), DWC3_GEVNTSIZ_SIZE(evt->length)); + evt->flags &= ~DWC3_EVENT_PENDING; + /* + * Add an explicit write memory barrier to make sure that the update of + * clearing DWC3_EVENT_PENDING is observed in dwc3_check_event_buf() + */ + wmb(); + if (dwc->imod_interval) { dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), DWC3_GEVNTCOUNT_EHB); dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval); } - /* Keep the clearing of DWC3_EVENT_PENDING at the end */ - evt->flags &= ~DWC3_EVENT_PENDING; - return ret; } diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index bdda8c74602d..869ad99afb48 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1050,10 +1050,11 @@ static int set_config(struct usb_composite_dev *cdev, else usb_gadget_set_remote_wakeup(gadget, 0); done: - if (power <= USB_SELF_POWER_VBUS_MAX_DRAW) - usb_gadget_set_selfpowered(gadget); - else + if (power > USB_SELF_POWER_VBUS_MAX_DRAW || + (c && !(c->bmAttributes & USB_CONFIG_ATT_SELFPOWER))) usb_gadget_clear_selfpowered(gadget); + else + usb_gadget_set_selfpowered(gadget); usb_gadget_vbus_draw(gadget, power); if (result >= 0 && cdev->delayed_status) @@ -2615,7 +2616,10 @@ void composite_suspend(struct usb_gadget *gadget) cdev->suspended = 1; - usb_gadget_set_selfpowered(gadget); + if (cdev->config && + cdev->config->bmAttributes & USB_CONFIG_ATT_SELFPOWER) + usb_gadget_set_selfpowered(gadget); + usb_gadget_vbus_draw(gadget, 2); } @@ -2649,8 +2653,11 @@ void composite_resume(struct usb_gadget *gadget) else maxpower = min(maxpower, 900U); - if (maxpower > USB_SELF_POWER_VBUS_MAX_DRAW) + if (maxpower > USB_SELF_POWER_VBUS_MAX_DRAW || + !(cdev->config->bmAttributes & USB_CONFIG_ATT_SELFPOWER)) usb_gadget_clear_selfpowered(gadget); + else + usb_gadget_set_selfpowered(gadget); usb_gadget_vbus_draw(gadget, maxpower); } else { diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 09e2838917e2..f58590bf5e02 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -1052,8 +1052,8 @@ void gether_suspend(struct gether *link) * There is a transfer in progress. So we trigger a remote * wakeup to inform the host. */ - ether_wakeup_host(dev->port_usb); - return; + if (!ether_wakeup_host(dev->port_usb)) + return; } spin_lock_irqsave(&dev->lock, flags); link->is_suspend = true; diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 9693464c0520..69c278b64084 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <linux/unaligned.h> #include <linux/bitfield.h> +#include <linux/pci.h> #include "xhci.h" #include "xhci-trace.h" @@ -770,9 +771,16 @@ static int xhci_exit_test_mode(struct xhci_hcd *xhci) enum usb_link_tunnel_mode xhci_port_is_tunneled(struct xhci_hcd *xhci, struct xhci_port *port) { + struct usb_hcd *hcd; void __iomem *base; u32 offset; + /* Don't try and probe this capability for non-Intel hosts */ + hcd = xhci_to_hcd(xhci); + if (!dev_is_pci(hcd->self.controller) || + to_pci_dev(hcd->self.controller)->vendor != PCI_VENDOR_ID_INTEL) + return USB_LINK_UNKNOWN; + base = &xhci->cap_regs->hc_capbase; offset = xhci_find_next_ext_cap(base, 0, XHCI_EXT_CAPS_INTEL_SPR_SHADOW); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 92703efda1f7..fdf0c1008225 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2437,7 +2437,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) * and our use of dma addresses in the trb_address_map radix tree needs * TRB_SEGMENT_SIZE alignment, so we pick the greater alignment need. */ - if (xhci->quirks & XHCI_ZHAOXIN_TRB_FETCH) + if (xhci->quirks & XHCI_TRB_OVERFETCH) + /* Buggy HC prefetches beyond segment bounds - allocate dummy space at the end */ xhci->segment_pool = dma_pool_create("xHCI ring segments", dev, TRB_SEGMENT_SIZE * 2, TRB_SEGMENT_SIZE * 2, xhci->page_size * 2); else diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index ad0ff356f6fa..54460d11f7ee 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -38,6 +38,8 @@ #define PCI_DEVICE_ID_ETRON_EJ168 0x7023 #define PCI_DEVICE_ID_ETRON_EJ188 0x7052 +#define PCI_DEVICE_ID_VIA_VL805 0x3483 + #define PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI 0x8c31 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 #define PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_XHCI 0x9cb1 @@ -418,8 +420,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x3432) xhci->quirks |= XHCI_BROKEN_STREAMS; - if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == 0x3483) + if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == PCI_DEVICE_ID_VIA_VL805) { xhci->quirks |= XHCI_LPM_SUPPORT; + xhci->quirks |= XHCI_TRB_OVERFETCH; + } if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) { @@ -467,11 +471,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->device == 0x9202) { xhci->quirks |= XHCI_RESET_ON_RESUME; - xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; + xhci->quirks |= XHCI_TRB_OVERFETCH; } if (pdev->device == 0x9203) - xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; + xhci->quirks |= XHCI_TRB_OVERFETCH; } if (pdev->vendor == PCI_VENDOR_ID_CDNS && diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 45653114ccd7..1a90ebc8a30e 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -780,8 +780,12 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci) struct xhci_segment *seg; ring = xhci->cmd_ring; - xhci_for_each_ring_seg(ring->first_seg, seg) + xhci_for_each_ring_seg(ring->first_seg, seg) { + /* erase all TRBs before the link */ memset(seg->trbs, 0, sizeof(union xhci_trb) * (TRBS_PER_SEGMENT - 1)); + /* clear link cycle bit */ + seg->trbs[TRBS_PER_SEGMENT - 1].link.control &= cpu_to_le32(~TRB_CYCLE); + } xhci_initialize_ring_info(ring); /* diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 8c164340a2c3..779b01dee068 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1632,7 +1632,7 @@ struct xhci_hcd { #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(42) #define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43) #define XHCI_RESET_TO_DEFAULT BIT_ULL(44) -#define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) +#define XHCI_TRB_OVERFETCH BIT_ULL(45) #define XHCI_ZHAOXIN_HOST BIT_ULL(46) #define XHCI_WRITE_64_HI_LO BIT_ULL(47) #define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48) diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 935fc496fe94..4b35ef216125 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -312,8 +312,10 @@ static int usbhsc_clk_get(struct device *dev, struct usbhs_priv *priv) priv->clks[1] = of_clk_get(dev_of_node(dev), 1); if (PTR_ERR(priv->clks[1]) == -ENOENT) priv->clks[1] = NULL; - else if (IS_ERR(priv->clks[1])) + else if (IS_ERR(priv->clks[1])) { + clk_put(priv->clks[0]); return PTR_ERR(priv->clks[1]); + } return 0; } @@ -779,6 +781,8 @@ static void usbhs_remove(struct platform_device *pdev) dev_dbg(&pdev->dev, "usb remove\n"); + flush_delayed_work(&priv->notify_hotplug_work); + /* power off */ if (!usbhs_get_dparam(priv, runtime_pwctrl)) usbhsc_power_ctrl(priv, 0); diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 105132ae87ac..e8e5723f5412 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -1094,7 +1094,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv) goto usbhs_mod_gadget_probe_err_gpriv; } - gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED); + gpriv->transceiver = devm_usb_get_phy(dev, USB_PHY_TYPE_UNDEFINED); dev_info(dev, "%stransceiver found\n", !IS_ERR(gpriv->transceiver) ? "" : "no "); diff --git a/drivers/usb/typec/tcpm/tcpci_rt1711h.c b/drivers/usb/typec/tcpm/tcpci_rt1711h.c index 64f6dd0dc660..88c50b984e8a 100644 --- a/drivers/usb/typec/tcpm/tcpci_rt1711h.c +++ b/drivers/usb/typec/tcpm/tcpci_rt1711h.c @@ -334,6 +334,11 @@ static int rt1711h_probe(struct i2c_client *client) { int ret; struct rt1711h_chip *chip; + const u16 alert_mask = TCPC_ALERT_TX_SUCCESS | TCPC_ALERT_TX_DISCARDED | + TCPC_ALERT_TX_FAILED | TCPC_ALERT_RX_HARD_RST | + TCPC_ALERT_RX_STATUS | TCPC_ALERT_POWER_STATUS | + TCPC_ALERT_CC_STATUS | TCPC_ALERT_RX_BUF_OVF | + TCPC_ALERT_FAULT; chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); if (!chip) @@ -382,6 +387,12 @@ static int rt1711h_probe(struct i2c_client *client) dev_name(chip->dev), chip); if (ret < 0) return ret; + + /* Enable alert interrupts */ + ret = rt1711h_write16(chip, TCPC_ALERT_MASK, alert_mask); + if (ret < 0) + return ret; + enable_irq_wake(client->irq); return 0; diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index fcf499cc9458..2a2915b0a645 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -25,7 +25,7 @@ * difficult to estimate the time it takes for the system to process the command * before it is actually passed to the PPM. */ -#define UCSI_TIMEOUT_MS 5000 +#define UCSI_TIMEOUT_MS 10000 /* * UCSI_SWAP_TIMEOUT_MS - Timeout for role swap requests @@ -1346,7 +1346,7 @@ static int ucsi_reset_ppm(struct ucsi *ucsi) mutex_lock(&ucsi->ppm_lock); - ret = ucsi->ops->read_cci(ucsi, &cci); + ret = ucsi->ops->poll_cci(ucsi, &cci); if (ret < 0) goto out; @@ -1364,7 +1364,7 @@ static int ucsi_reset_ppm(struct ucsi *ucsi) tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS); do { - ret = ucsi->ops->read_cci(ucsi, &cci); + ret = ucsi->ops->poll_cci(ucsi, &cci); if (ret < 0) goto out; if (cci & UCSI_CCI_COMMAND_COMPLETE) @@ -1393,7 +1393,7 @@ static int ucsi_reset_ppm(struct ucsi *ucsi) /* Give the PPM time to process a reset before reading CCI */ msleep(20); - ret = ucsi->ops->read_cci(ucsi, &cci); + ret = ucsi->ops->poll_cci(ucsi, &cci); if (ret) goto out; @@ -1825,11 +1825,11 @@ static int ucsi_init(struct ucsi *ucsi) err_unregister: for (con = connector; con->port; con++) { + if (con->wq) + destroy_workqueue(con->wq); ucsi_unregister_partner(con); ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON); ucsi_unregister_port_psy(con); - if (con->wq) - destroy_workqueue(con->wq); usb_power_delivery_unregister_capabilities(con->port_sink_caps); con->port_sink_caps = NULL; @@ -1929,8 +1929,8 @@ struct ucsi *ucsi_create(struct device *dev, const struct ucsi_operations *ops) struct ucsi *ucsi; if (!ops || - !ops->read_version || !ops->read_cci || !ops->read_message_in || - !ops->sync_control || !ops->async_control) + !ops->read_version || !ops->read_cci || !ops->poll_cci || + !ops->read_message_in || !ops->sync_control || !ops->async_control) return ERR_PTR(-EINVAL); ucsi = kzalloc(sizeof(*ucsi), GFP_KERNEL); @@ -2013,10 +2013,6 @@ void ucsi_unregister(struct ucsi *ucsi) for (i = 0; i < ucsi->cap.num_connectors; i++) { cancel_work_sync(&ucsi->connector[i].work); - ucsi_unregister_partner(&ucsi->connector[i]); - ucsi_unregister_altmodes(&ucsi->connector[i], - UCSI_RECIPIENT_CON); - ucsi_unregister_port_psy(&ucsi->connector[i]); if (ucsi->connector[i].wq) { struct ucsi_work *uwork; @@ -2032,6 +2028,11 @@ void ucsi_unregister(struct ucsi *ucsi) destroy_workqueue(ucsi->connector[i].wq); } + ucsi_unregister_partner(&ucsi->connector[i]); + ucsi_unregister_altmodes(&ucsi->connector[i], + UCSI_RECIPIENT_CON); + ucsi_unregister_port_psy(&ucsi->connector[i]); + usb_power_delivery_unregister_capabilities(ucsi->connector[i].port_sink_caps); ucsi->connector[i].port_sink_caps = NULL; usb_power_delivery_unregister_capabilities(ucsi->connector[i].port_source_caps); diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 82735eb34f0e..28780acc4af2 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -62,6 +62,7 @@ struct dentry; * struct ucsi_operations - UCSI I/O operations * @read_version: Read implemented UCSI version * @read_cci: Read CCI register + * @poll_cci: Read CCI register while polling with notifications disabled * @read_message_in: Read message data from UCSI * @sync_control: Blocking control operation * @async_control: Non-blocking control operation @@ -76,6 +77,7 @@ struct dentry; struct ucsi_operations { int (*read_version)(struct ucsi *ucsi, u16 *version); int (*read_cci)(struct ucsi *ucsi, u32 *cci); + int (*poll_cci)(struct ucsi *ucsi, u32 *cci); int (*read_message_in)(struct ucsi *ucsi, void *val, size_t val_len); int (*sync_control)(struct ucsi *ucsi, u64 command); int (*async_control)(struct ucsi *ucsi, u64 command); diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 5c5515551963..ac1ebb5d9527 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -59,19 +59,24 @@ static int ucsi_acpi_read_version(struct ucsi *ucsi, u16 *version) static int ucsi_acpi_read_cci(struct ucsi *ucsi, u32 *cci) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); - int ret; - - if (UCSI_COMMAND(ua->cmd) == UCSI_PPM_RESET) { - ret = ucsi_acpi_dsm(ua, UCSI_DSM_FUNC_READ); - if (ret) - return ret; - } memcpy(cci, ua->base + UCSI_CCI, sizeof(*cci)); return 0; } +static int ucsi_acpi_poll_cci(struct ucsi *ucsi, u32 *cci) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + int ret; + + ret = ucsi_acpi_dsm(ua, UCSI_DSM_FUNC_READ); + if (ret) + return ret; + + return ucsi_acpi_read_cci(ucsi, cci); +} + static int ucsi_acpi_read_message_in(struct ucsi *ucsi, void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); @@ -94,6 +99,7 @@ static int ucsi_acpi_async_control(struct ucsi *ucsi, u64 command) static const struct ucsi_operations ucsi_acpi_ops = { .read_version = ucsi_acpi_read_version, .read_cci = ucsi_acpi_read_cci, + .poll_cci = ucsi_acpi_poll_cci, .read_message_in = ucsi_acpi_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = ucsi_acpi_async_control @@ -142,6 +148,7 @@ static int ucsi_gram_sync_control(struct ucsi *ucsi, u64 command) static const struct ucsi_operations ucsi_gram_ops = { .read_version = ucsi_acpi_read_version, .read_cci = ucsi_acpi_read_cci, + .poll_cci = ucsi_acpi_poll_cci, .read_message_in = ucsi_gram_read_message_in, .sync_control = ucsi_gram_sync_control, .async_control = ucsi_acpi_async_control diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 740171f24ef9..4b1668733a4b 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -664,6 +664,7 @@ err_put: static const struct ucsi_operations ucsi_ccg_ops = { .read_version = ucsi_ccg_read_version, .read_cci = ucsi_ccg_read_cci, + .poll_cci = ucsi_ccg_read_cci, .read_message_in = ucsi_ccg_read_message_in, .sync_control = ucsi_ccg_sync_control, .async_control = ucsi_ccg_async_control, diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index fed39d458090..8af79101a2fc 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -206,6 +206,7 @@ static void pmic_glink_ucsi_connector_status(struct ucsi_connector *con) static const struct ucsi_operations pmic_glink_ucsi_ops = { .read_version = pmic_glink_ucsi_read_version, .read_cci = pmic_glink_ucsi_read_cci, + .poll_cci = pmic_glink_ucsi_read_cci, .read_message_in = pmic_glink_ucsi_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = pmic_glink_ucsi_async_control, diff --git a/drivers/usb/typec/ucsi/ucsi_stm32g0.c b/drivers/usb/typec/ucsi/ucsi_stm32g0.c index 6923fad31d79..57ef7d83a412 100644 --- a/drivers/usb/typec/ucsi/ucsi_stm32g0.c +++ b/drivers/usb/typec/ucsi/ucsi_stm32g0.c @@ -424,6 +424,7 @@ static irqreturn_t ucsi_stm32g0_irq_handler(int irq, void *data) static const struct ucsi_operations ucsi_stm32g0_ops = { .read_version = ucsi_stm32g0_read_version, .read_cci = ucsi_stm32g0_read_cci, + .poll_cci = ucsi_stm32g0_read_cci, .read_message_in = ucsi_stm32g0_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = ucsi_stm32g0_async_control, diff --git a/drivers/usb/typec/ucsi/ucsi_yoga_c630.c b/drivers/usb/typec/ucsi/ucsi_yoga_c630.c index 4cae85c0dc12..d33e3f2dd1d8 100644 --- a/drivers/usb/typec/ucsi/ucsi_yoga_c630.c +++ b/drivers/usb/typec/ucsi/ucsi_yoga_c630.c @@ -74,6 +74,7 @@ static int yoga_c630_ucsi_async_control(struct ucsi *ucsi, u64 command) static const struct ucsi_operations yoga_c630_ucsi_ops = { .read_version = yoga_c630_ucsi_read_version, .read_cci = yoga_c630_ucsi_read_cci, + .poll_cci = yoga_c630_ucsi_read_cci, .read_message_in = yoga_c630_ucsi_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = yoga_c630_ucsi_async_control, diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9ac25d08f473..63612faeab72 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -666,7 +666,7 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, worker, name); - if (!vtsk) + if (IS_ERR(vtsk)) goto free_worker; mutex_init(&worker->mutex); diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 7fdb5edd7e2e..75338ffc703f 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -282,6 +282,8 @@ static uint screen_depth; static uint screen_fb_size; static uint dio_fb_size; /* FB size for deferred IO */ +static void hvfb_putmem(struct fb_info *info); + /* Send message to Hyper-V host */ static inline int synthvid_send(struct hv_device *hdev, struct synthvid_msg *msg) @@ -863,6 +865,17 @@ static void hvfb_ops_damage_area(struct fb_info *info, u32 x, u32 y, u32 width, } /* + * fb_ops.fb_destroy is called by the last put_fb_info() call at the end + * of unregister_framebuffer() or fb_release(). Do any cleanup related to + * framebuffer here. + */ +static void hvfb_destroy(struct fb_info *info) +{ + hvfb_putmem(info); + framebuffer_release(info); +} + +/* * TODO: GEN1 codepaths allocate from system or DMA-able memory. Fix the * driver to use the _SYSMEM_ or _DMAMEM_ helpers in these cases. */ @@ -877,6 +890,7 @@ static const struct fb_ops hvfb_ops = { .fb_set_par = hvfb_set_par, .fb_setcolreg = hvfb_setcolreg, .fb_blank = hvfb_blank, + .fb_destroy = hvfb_destroy, }; /* Get options from kernel paramenter "video=" */ @@ -952,7 +966,7 @@ static phys_addr_t hvfb_get_phymem(struct hv_device *hdev, } /* Release contiguous physical memory */ -static void hvfb_release_phymem(struct hv_device *hdev, +static void hvfb_release_phymem(struct device *device, phys_addr_t paddr, unsigned int size) { unsigned int order = get_order(size); @@ -960,7 +974,7 @@ static void hvfb_release_phymem(struct hv_device *hdev, if (order <= MAX_PAGE_ORDER) __free_pages(pfn_to_page(paddr >> PAGE_SHIFT), order); else - dma_free_coherent(&hdev->device, + dma_free_coherent(device, round_up(size, PAGE_SIZE), phys_to_virt(paddr), paddr); @@ -989,6 +1003,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) base = pci_resource_start(pdev, 0); size = pci_resource_len(pdev, 0); + aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME); /* * For Gen 1 VM, we can directly use the contiguous memory @@ -1010,11 +1025,21 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) goto getmem_done; } pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n"); + } else { + aperture_remove_all_conflicting_devices(KBUILD_MODNAME); } /* - * Cannot use the contiguous physical memory. - * Allocate mmio space for framebuffer. + * Cannot use contiguous physical memory, so allocate MMIO space for + * the framebuffer. At this point in the function, conflicting devices + * that might have claimed the framebuffer MMIO space based on + * screen_info.lfb_base must have already been removed so that + * vmbus_allocate_mmio() does not allocate different MMIO space. If the + * kdump image were to be loaded using kexec_file_load(), the + * framebuffer location in the kdump image would be set from + * screen_info.lfb_base at the time that kdump is enabled. If the + * framebuffer has moved elsewhere, this could be the wrong location, + * causing kdump to hang when efifb (for example) loads. */ dio_fb_size = screen_width * screen_height * screen_depth / 8; @@ -1051,11 +1076,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) info->screen_size = dio_fb_size; getmem_done: - if (base && size) - aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME); - else - aperture_remove_all_conflicting_devices(KBUILD_MODNAME); - if (!gen2vm) pci_dev_put(pdev); @@ -1074,16 +1094,16 @@ err1: } /* Release the framebuffer */ -static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) +static void hvfb_putmem(struct fb_info *info) { struct hvfb_par *par = info->par; if (par->need_docopy) { vfree(par->dio_vp); - iounmap(info->screen_base); + iounmap(par->mmio_vp); vmbus_free_mmio(par->mem->start, screen_fb_size); } else { - hvfb_release_phymem(hdev, info->fix.smem_start, + hvfb_release_phymem(info->device, info->fix.smem_start, screen_fb_size); } @@ -1172,7 +1192,7 @@ static int hvfb_probe(struct hv_device *hdev, if (ret) goto error; - ret = register_framebuffer(info); + ret = devm_register_framebuffer(&hdev->device, info); if (ret) { pr_err("Unable to register framebuffer\n"); goto error; @@ -1197,7 +1217,7 @@ static int hvfb_probe(struct hv_device *hdev, error: fb_deferred_io_cleanup(info); - hvfb_putmem(hdev, info); + hvfb_putmem(info); error2: vmbus_close(hdev->channel); error1: @@ -1220,14 +1240,10 @@ static void hvfb_remove(struct hv_device *hdev) fb_deferred_io_cleanup(info); - unregister_framebuffer(info); cancel_delayed_work_sync(&par->dwork); vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); - - hvfb_putmem(hdev, info); - framebuffer_release(info); } static int hvfb_suspend(struct hv_device *hdev) diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c index c24036c4e51e..e4e196abdaac 100644 --- a/drivers/virt/acrn/hsm.c +++ b/drivers/virt/acrn/hsm.c @@ -49,7 +49,7 @@ static int pmcmd_ioctl(u64 cmd, void __user *uptr) switch (cmd & PMCMD_TYPE_MASK) { case ACRN_PMCMD_GET_PX_CNT: case ACRN_PMCMD_GET_CX_CNT: - pm_info = kmalloc(sizeof(u64), GFP_KERNEL); + pm_info = kzalloc(sizeof(u64), GFP_KERNEL); if (!pm_info) return -ENOMEM; @@ -64,7 +64,7 @@ static int pmcmd_ioctl(u64 cmd, void __user *uptr) kfree(pm_info); break; case ACRN_PMCMD_GET_PX_DATA: - px_data = kmalloc(sizeof(*px_data), GFP_KERNEL); + px_data = kzalloc(sizeof(*px_data), GFP_KERNEL); if (!px_data) return -ENOMEM; @@ -79,7 +79,7 @@ static int pmcmd_ioctl(u64 cmd, void __user *uptr) kfree(px_data); break; case ACRN_PMCMD_GET_CX_DATA: - cx_data = kmalloc(sizeof(*cx_data), GFP_KERNEL); + cx_data = kzalloc(sizeof(*cx_data), GFP_KERNEL); if (!cx_data) return -ENOMEM; diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 264b6523fe52..70fbc9a3e703 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -38,12 +38,6 @@ struct snp_guest_dev { struct miscdevice misc; struct snp_msg_desc *msg_desc; - - union { - struct snp_report_req report; - struct snp_derived_key_req derived_key; - struct snp_ext_report_req ext_report; - } req; }; /* @@ -71,7 +65,7 @@ struct snp_req_resp { static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) { - struct snp_report_req *report_req = &snp_dev->req.report; + struct snp_report_req *report_req __free(kfree) = NULL; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_report_resp *report_resp; struct snp_guest_req req = {}; @@ -80,6 +74,10 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io if (!arg->req_data || !arg->resp_data) return -EINVAL; + report_req = kzalloc(sizeof(*report_req), GFP_KERNEL_ACCOUNT); + if (!report_req) + return -ENOMEM; + if (copy_from_user(report_req, (void __user *)arg->req_data, sizeof(*report_req))) return -EFAULT; @@ -116,7 +114,7 @@ e_free: static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) { - struct snp_derived_key_req *derived_key_req = &snp_dev->req.derived_key; + struct snp_derived_key_req *derived_key_req __free(kfree) = NULL; struct snp_derived_key_resp derived_key_resp = {0}; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_guest_req req = {}; @@ -136,6 +134,10 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque if (sizeof(buf) < resp_len) return -ENOMEM; + derived_key_req = kzalloc(sizeof(*derived_key_req), GFP_KERNEL_ACCOUNT); + if (!derived_key_req) + return -ENOMEM; + if (copy_from_user(derived_key_req, (void __user *)arg->req_data, sizeof(*derived_key_req))) return -EFAULT; @@ -168,16 +170,21 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques struct snp_req_resp *io) { - struct snp_ext_report_req *report_req = &snp_dev->req.ext_report; + struct snp_ext_report_req *report_req __free(kfree) = NULL; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_report_resp *report_resp; struct snp_guest_req req = {}; int ret, npages = 0, resp_len; sockptr_t certs_address; + struct page *page; if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data)) return -EINVAL; + report_req = kzalloc(sizeof(*report_req), GFP_KERNEL_ACCOUNT); + if (!report_req) + return -ENOMEM; + if (copy_from_sockptr(report_req, io->req_data, sizeof(*report_req))) return -EFAULT; @@ -203,8 +210,20 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques * the host. If host does not supply any certs in it, then copy * zeros to indicate that certificate data was not provided. */ - memset(mdesc->certs_data, 0, report_req->certs_len); npages = report_req->certs_len >> PAGE_SHIFT; + page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, + get_order(report_req->certs_len)); + if (!page) + return -ENOMEM; + + req.certs_data = page_address(page); + ret = set_memory_decrypted((unsigned long)req.certs_data, npages); + if (ret) { + pr_err("failed to mark page shared, ret=%d\n", ret); + __free_pages(page, get_order(report_req->certs_len)); + return -EFAULT; + } + cmd: /* * The intermediate response buffer is used while decrypting the @@ -213,10 +232,12 @@ cmd: */ resp_len = sizeof(report_resp->data) + mdesc->ctx->authsize; report_resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT); - if (!report_resp) - return -ENOMEM; + if (!report_resp) { + ret = -ENOMEM; + goto e_free_data; + } - mdesc->input.data_npages = npages; + req.input.data_npages = npages; req.msg_version = arg->msg_version; req.msg_type = SNP_MSG_REPORT_REQ; @@ -231,7 +252,7 @@ cmd: /* If certs length is invalid then copy the returned length */ if (arg->vmm_error == SNP_GUEST_VMM_ERR_INVALID_LEN) { - report_req->certs_len = mdesc->input.data_npages << PAGE_SHIFT; + report_req->certs_len = req.input.data_npages << PAGE_SHIFT; if (copy_to_sockptr(io->req_data, report_req, sizeof(*report_req))) ret = -EFAULT; @@ -240,7 +261,7 @@ cmd: if (ret) goto e_free; - if (npages && copy_to_sockptr(certs_address, mdesc->certs_data, report_req->certs_len)) { + if (npages && copy_to_sockptr(certs_address, req.certs_data, report_req->certs_len)) { ret = -EFAULT; goto e_free; } @@ -250,6 +271,13 @@ cmd: e_free: kfree(report_resp); +e_free_data: + if (npages) { + if (set_memory_encrypted((unsigned long)req.certs_data, npages)) + WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); + else + __free_pages(page, get_order(report_req->certs_len)); + } return ret; } diff --git a/drivers/virt/vboxguest/Kconfig b/drivers/virt/vboxguest/Kconfig index 11b153e7454e..eaba28c95e73 100644 --- a/drivers/virt/vboxguest/Kconfig +++ b/drivers/virt/vboxguest/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config VBOXGUEST tristate "Virtual Box Guest integration support" - depends on (ARM64 || X86) && PCI && INPUT + depends on (ARM64 || X86 || COMPILE_TEST) && PCI && INPUT + depends on HAS_IOPORT help This is a driver for the Virtual Box Guest PCI device used in Virtual Box virtual machines. Enabling this driver will add diff --git a/fs/affs/file.c b/fs/affs/file.c index a5a861dd5223..7a71018e3f67 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -596,7 +596,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) BUG_ON(tmp > bsize); AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); - AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); + AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp); affs_fix_checksum(sb, bh); bh->b_state &= ~(1UL << BH_New); @@ -724,7 +724,8 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, tmp = min(bsize - boff, to - from); BUG_ON(boff + tmp > bsize || tmp > bsize); memcpy(AFFS_DATA(bh) + boff, data + from, tmp); - be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp); + AFFS_DATA_HEAD(bh)->size = cpu_to_be32( + max(boff + tmp, be32_to_cpu(AFFS_DATA_HEAD(bh)->size))); affs_fix_checksum(sb, bh); mark_buffer_dirty_inode(bh, inode); written += tmp; @@ -746,7 +747,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); - AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); + AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(bsize); AFFS_DATA_HEAD(bh)->next = 0; bh->b_state &= ~(1UL << BH_New); @@ -780,7 +781,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); - AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); + AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp); AFFS_DATA_HEAD(bh)->next = 0; bh->b_state &= ~(1UL << BH_New); diff --git a/fs/afs/cell.c b/fs/afs/cell.c index cee42646736c..96a6781f3653 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -64,7 +64,8 @@ static struct afs_cell *afs_find_cell_locked(struct afs_net *net, return ERR_PTR(-ENAMETOOLONG); if (!name) { - cell = net->ws_cell; + cell = rcu_dereference_protected(net->ws_cell, + lockdep_is_held(&net->cells_lock)); if (!cell) return ERR_PTR(-EDESTADDRREQ); goto found; @@ -388,8 +389,8 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) /* install the new cell */ down_write(&net->cells_lock); afs_see_cell(new_root, afs_cell_trace_see_ws); - old_root = net->ws_cell; - net->ws_cell = new_root; + old_root = rcu_replace_pointer(net->ws_cell, new_root, + lockdep_is_held(&net->cells_lock)); up_write(&net->cells_lock); afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws); @@ -945,8 +946,8 @@ void afs_cell_purge(struct afs_net *net) _enter(""); down_write(&net->cells_lock); - ws = net->ws_cell; - net->ws_cell = NULL; + ws = rcu_replace_pointer(net->ws_cell, NULL, + lockdep_is_held(&net->cells_lock)); up_write(&net->cells_lock); afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index d8bf52f77d93..008698d706ca 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -314,12 +314,23 @@ static const char *afs_atcell_get_link(struct dentry *dentry, struct inode *inod const char *name; bool dotted = vnode->fid.vnode == 3; - if (!net->ws_cell) + if (!dentry) { + /* We're in RCU-pathwalk. */ + cell = rcu_dereference(net->ws_cell); + if (dotted) + name = cell->name - 1; + else + name = cell->name; + /* Shouldn't need to set a delayed call. */ + return name; + } + + if (!rcu_access_pointer(net->ws_cell)) return ERR_PTR(-ENOENT); down_read(&net->cells_lock); - cell = net->ws_cell; + cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock)); if (dotted) name = cell->name - 1; else diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 90f407774a9a..df30bd62da79 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -287,7 +287,7 @@ struct afs_net { /* Cell database */ struct rb_root cells; - struct afs_cell *ws_cell; + struct afs_cell __rcu *ws_cell; struct work_struct cells_manager; struct timer_list cells_timer; atomic_t cells_outstanding; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index e7614f4f30c2..12c88d8be3fe 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -206,7 +206,7 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v) net = afs_seq2net_single(m); down_read(&net->cells_lock); - cell = net->ws_cell; + cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock)); if (cell) seq_printf(m, "%s\n", cell->name); up_read(&net->cells_lock); @@ -242,7 +242,7 @@ static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size) ret = -EEXIST; inode_lock(file_inode(file)); - if (!net->ws_cell) + if (!rcu_access_pointer(net->ws_cell)) ret = afs_cell_init(net, buf); else printk("busy\n"); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 5dabbf3c0965..05b1250619ec 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1021,8 +1021,8 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, /* allocate journal on a device: */ -static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, - bool new_fs, struct closure *cl) +static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, + bool new_fs, struct closure *cl) { struct bch_fs *c = ca->fs; struct journal_device *ja = &ca->journal; @@ -1150,26 +1150,20 @@ err_free: return ret; } -/* - * Allocate more journal space at runtime - not currently making use if it, but - * the code works: - */ -int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, - unsigned nr) +static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca, + unsigned nr, bool new_fs) { struct journal_device *ja = &ca->journal; - struct closure cl; int ret = 0; + struct closure cl; closure_init_stack(&cl); - down_write(&c->state_lock); - /* don't handle reducing nr of buckets yet: */ if (nr < ja->nr) - goto unlock; + return 0; - while (ja->nr < nr) { + while (!ret && ja->nr < nr) { struct disk_reservation disk_res = { 0, 0, 0 }; /* @@ -1182,27 +1176,38 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, * filesystem-wide allocation will succeed, this is a device * specific allocation - we can hang here: */ + if (!new_fs) { + ret = bch2_disk_reservation_get(c, &disk_res, + bucket_to_sector(ca, nr - ja->nr), 1, 0); + if (ret) + break; + } - ret = bch2_disk_reservation_get(c, &disk_res, - bucket_to_sector(ca, nr - ja->nr), 1, 0); - if (ret) - break; + ret = bch2_set_nr_journal_buckets_iter(ca, nr, new_fs, &cl); - ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl); + if (ret == -BCH_ERR_bucket_alloc_blocked || + ret == -BCH_ERR_open_buckets_empty) + ret = 0; /* wait and retry */ bch2_disk_reservation_put(c, &disk_res); - closure_sync(&cl); - - if (ret && - ret != -BCH_ERR_bucket_alloc_blocked && - ret != -BCH_ERR_open_buckets_empty) - break; } - bch_err_fn(c, ret); -unlock: + return ret; +} + +/* + * Allocate more journal space at runtime - not currently making use if it, but + * the code works: + */ +int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, + unsigned nr) +{ + down_write(&c->state_lock); + int ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, false); up_write(&c->state_lock); + + bch_err_fn(c, ret); return ret; } @@ -1228,7 +1233,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) min(1 << 13, (1 << 24) / ca->mi.bucket_size)); - ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL); + ret = bch2_set_nr_journal_buckets_loop(ca->fs, ca, nr, new_fs); err: bch_err_fn(ca, ret); return ret; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 21805509ab9e..6718dc37c5a3 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -74,20 +74,14 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, struct move_bucket *b, u64 time) { struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_s_c k; - struct bch_alloc_v4 _a; - const struct bch_alloc_v4 *a; - int ret; - if (bch2_bucket_is_open(trans->c, - b->k.bucket.inode, - b->k.bucket.offset)) + if (bch2_bucket_is_open(c, b->k.bucket.inode, b->k.bucket.offset)) return 0; - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, - b->k.bucket, BTREE_ITER_cached); - ret = bkey_err(k); + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, + b->k.bucket, BTREE_ITER_cached); + int ret = bkey_err(k); if (ret) return ret; @@ -95,13 +89,18 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, if (!ca) goto out; - a = bch2_alloc_to_v4(k, &_a); + if (ca->mi.state != BCH_MEMBER_STATE_rw || + !bch2_dev_is_online(ca)) + goto out_put; + + struct bch_alloc_v4 _a; + const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); b->k.gen = a->gen; b->sectors = bch2_bucket_sectors_dirty(*a); u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); ret = lru_idx && lru_idx <= time; - +out_put: bch2_dev_put(ca); out: bch2_trans_iter_exit(trans, &iter); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 8037ccbacf6a..a81a7b6c0989 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -69,14 +69,20 @@ enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_meta return v; } -void bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) +bool bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) { - mutex_lock(&c->sb_lock); - SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, - max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); - c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + bool ret = (c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && + version <= c->sb.version_incompat_allowed; + + if (ret) { + mutex_lock(&c->sb_lock); + SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, + max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } + + return ret; } const char * const bch2_sb_fields[] = { @@ -1219,9 +1225,11 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) c->disk_sb.sb->version = cpu_to_le16(new_version); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); - if (incompat) + if (incompat) { SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version)); + c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field); + } } static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index f1ab4f943720..b4cff9ebdebb 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -21,17 +21,14 @@ static inline bool bch2_version_compatible(u16 version) void bch2_version_to_text(struct printbuf *, enum bcachefs_metadata_version); enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version); -void bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version); +bool bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version); static inline bool bch2_request_incompat_feature(struct bch_fs *c, enum bcachefs_metadata_version version) { - if (unlikely(version > c->sb.version_incompat)) { - if (version > c->sb.version_incompat_allowed) - return false; - bch2_set_version_incompat(c, version); - } - return true; + return likely(version <= c->sb.version_incompat) + ? true + : bch2_set_version_incompat(c, version); } static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a9322601ab5c..38756f8cef46 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1382,8 +1382,13 @@ static noinline int cow_file_range(struct btrfs_inode *inode, continue; } if (done_offset) { - *done_offset = start - 1; - return 0; + /* + * Move @end to the end of the processed range, + * and exit the loop to unlock the processed extents. + */ + end = start - 1; + ret = 0; + break; } ret = -ENOSPC; } diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 53b846d99ece..14f53f757555 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1330,13 +1330,13 @@ MODULE_PARM_DESC(read_policy, int btrfs_read_policy_to_enum(const char *str, s64 *value_ret) { - char param[32] = { 0 }; + char param[32]; char __maybe_unused *value_str; if (!str || strlen(str) == 0) return 0; - strncpy(param, str, sizeof(param) - 1); + strscpy(param, str); #ifdef CONFIG_BTRFS_EXPERIMENTAL /* Separate value from input in policy:value format. */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fb22d4425cb0..3f8afbd1ebb5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7155,6 +7155,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, btrfs_err(fs_info, "failed to add chunk map, start=%llu len=%llu: %d", map->start, map->chunk_len, ret); + btrfs_free_chunk_map(map); } return ret; diff --git a/fs/coredump.c b/fs/coredump.c index 591700e1b2ce..4375c70144d0 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -63,6 +63,7 @@ static void free_vma_snapshot(struct coredump_params *cprm); static int core_uses_pid; static unsigned int core_pipe_limit; +static unsigned int core_sort_vma; static char core_pattern[CORENAME_MAX_SIZE] = "core"; static int core_name_size = CORENAME_MAX_SIZE; unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; @@ -1026,6 +1027,15 @@ static const struct ctl_table coredump_sysctls[] = { .extra1 = (unsigned int *)&core_file_note_size_min, .extra2 = (unsigned int *)&core_file_note_size_max, }, + { + .procname = "core_sort_vma", + .data = &core_sort_vma, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; static int __init init_fs_coredump_sysctls(void) @@ -1256,8 +1266,9 @@ static bool dump_vma_snapshot(struct coredump_params *cprm) cprm->vma_data_size += m->dump_size; } - sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta), - cmp_vma_size, NULL); + if (core_sort_vma) + sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta), + cmp_vma_size, NULL); return true; } diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index cb1b6d0c3454..c294a8fc566d 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -57,10 +57,11 @@ static ssize_t efivarfs_file_write(struct file *file, if (bytes == -ENOENT) { /* - * zero size signals to release that the write deleted - * the variable + * FIXME: temporary workaround for fwupdate, signal + * failed write with a 1 to keep created but not + * written files */ - i_size_write(inode, 0); + i_size_write(inode, 1); } else { i_size_write(inode, datasize + sizeof(attributes)); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); @@ -124,7 +125,8 @@ static int efivarfs_file_release(struct inode *inode, struct file *file) struct efivar_entry *var = inode->i_private; inode_lock(inode); - var->removed = (--var->open_count == 0 && i_size_read(inode) == 0); + /* FIXME: temporary work around for fwupdate */ + var->removed = (--var->open_count == 0 && i_size_read(inode) == 1); inode_unlock(inode); if (var->removed) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 09fcf731e65d..6eae8cf655c1 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -367,6 +367,8 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; + register_pm_notifier(&sfi->pm_nb); + return efivar_init(efivarfs_callback, sb, true); } @@ -552,7 +554,6 @@ static int efivarfs_init_fs_context(struct fs_context *fc) sfi->pm_nb.notifier_call = efivarfs_pm_notify; sfi->pm_nb.priority = 0; - register_pm_notifier(&sfi->pm_nb); return 0; } diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index ce9be95c9172..9ff825f1502d 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -141,7 +141,7 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync) return 0; } -void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) +int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) { int i, b; unsigned int ent_idx; @@ -150,13 +150,17 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) struct exfat_mount_options *opts = &sbi->options; if (!is_valid_cluster(sbi, clu)) - return; + return -EIO; ent_idx = CLUSTER_TO_BITMAP_ENT(clu); i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); + if (!test_bit_le(b, sbi->vol_amap[i]->b_data)) + return -EIO; + clear_bit_le(b, sbi->vol_amap[i]->b_data); + exfat_update_bh(sbi->vol_amap[i], sync); if (opts->discard) { @@ -171,6 +175,8 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) opts->discard = 0; } } + + return 0; } /* diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 78be6964a8a0..d30ce18a88b7 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -456,7 +456,7 @@ int exfat_count_num_clusters(struct super_block *sb, int exfat_load_bitmap(struct super_block *sb); void exfat_free_bitmap(struct exfat_sb_info *sbi); int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync); -void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync); +int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync); unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu); int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count); int exfat_trim_fs(struct inode *inode, struct fstrim_range *range); diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 9e5492ac409b..6f3651c6ca91 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -175,6 +175,7 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain BITMAP_OFFSET_SECTOR_INDEX(sb, CLUSTER_TO_BITMAP_ENT(clu)); if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + int err; unsigned int last_cluster = p_chain->dir + p_chain->size - 1; do { bool sync = false; @@ -189,7 +190,9 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain cur_cmap_i = next_cmap_i; } - exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + err = exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + if (err) + break; clu++; num_clusters++; } while (num_clusters < p_chain->size); @@ -210,12 +213,13 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain cur_cmap_i = next_cmap_i; } - exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + if (exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode)))) + break; clu = n_clu; num_clusters++; if (err) - goto dec_used_clus; + break; if (num_clusters >= sbi->num_clusters - EXFAT_FIRST_CLUSTER) { /* @@ -229,7 +233,6 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain } while (clu != EXFAT_EOF_CLUSTER); } -dec_used_clus: sbi->used_clusters -= num_clusters; return 0; } diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 05b51e721783..807349d8ea05 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -587,7 +587,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) valid_size = ei->valid_size; ret = generic_write_checks(iocb, iter); - if (ret < 0) + if (ret <= 0) goto unlock; if (iocb->ki_flags & IOCB_DIRECT) { diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 691dd77b6ab5..8b30027d8251 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -232,7 +232,7 @@ static int exfat_search_empty_slot(struct super_block *sb, dentry = 0; } - while (dentry + num_entries < total_entries && + while (dentry + num_entries <= total_entries && clu.dir != EXFAT_EOF_CLUSTER) { i = dentry & (dentries_per_clu - 1); @@ -646,6 +646,11 @@ static int exfat_find(struct inode *dir, struct qstr *qname, info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size); info->size = le64_to_cpu(ep2->dentry.stream.size); + if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) { + exfat_fs_error(sb, "data size is invalid(%lld)", info->size); + return -EIO; + } + info->start_clu = le32_to_cpu(ep2->dentry.stream.start_clu); if (!is_valid_cluster(sbi, info->start_clu) && info->size) { exfat_warn(sb, "start_clu is invalid cluster(0x%x)", diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 2b2d1b755544..2c3a4d09e500 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1457,7 +1457,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, if (ret < 0) goto out; - if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) { + if (pipe_buf_usage(pipe) + cs.nr_segs > pipe->max_usage) { ret = -EIO; goto out; } @@ -2107,7 +2107,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { - unsigned int head, tail, mask, count; + unsigned int head, tail, count; unsigned nbuf; unsigned idx; struct pipe_buffer *bufs; @@ -2124,8 +2124,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, head = pipe->head; tail = pipe->tail; - mask = pipe->ring_size - 1; - count = head - tail; + count = pipe_occupancy(head, tail); bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL); if (!bufs) { @@ -2135,8 +2134,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, nbuf = 0; rem = 0; - for (idx = tail; idx != head && rem < len; idx++) - rem += pipe->bufs[idx & mask].len; + for (idx = tail; !pipe_empty(head, idx) && rem < len; idx++) + rem += pipe_buf(pipe, idx)->len; ret = -EINVAL; if (rem < len) @@ -2147,10 +2146,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, struct pipe_buffer *ibuf; struct pipe_buffer *obuf; - if (WARN_ON(nbuf >= count || tail == head)) + if (WARN_ON(nbuf >= count || pipe_empty(head, tail))) goto out_free; - ibuf = &pipe->bufs[tail & mask]; + ibuf = pipe_buf(pipe, tail); obuf = &bufs[nbuf]; if (rem >= ibuf->len) { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1bb646752e46..033feeab8c34 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -29,6 +29,7 @@ #include <linux/pagemap.h> #include <linux/gfp.h> #include <linux/swap.h> +#include <linux/compaction.h> #include <linux/uaccess.h> #include <linux/filelock.h> @@ -457,7 +458,7 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp) /* If the private flag is set, then the folio is not freeable */ if (folio_test_private(folio)) { if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL || - current_is_kswapd()) + current_is_kswapd() || current_is_kcompactd()) return false; if (nfs_wb_folio(folio->mapping->host, folio) < 0) return false; diff --git a/fs/pipe.c b/fs/pipe.c index ce1af7592780..4d0799e4e719 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -210,11 +210,10 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ static inline bool pipe_readable(const struct pipe_inode_info *pipe) { - unsigned int head = READ_ONCE(pipe->head); - unsigned int tail = READ_ONCE(pipe->tail); + union pipe_index idx = { .head_tail = READ_ONCE(pipe->head_tail) }; unsigned int writers = READ_ONCE(pipe->writers); - return !pipe_empty(head, tail) || !writers; + return !pipe_empty(idx.head, idx.tail) || !writers; } static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe, @@ -395,7 +394,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) wake_next_reader = true; mutex_lock(&pipe->mutex); } - if (pipe_empty(pipe->head, pipe->tail)) + if (pipe_is_empty(pipe)) wake_next_reader = false; mutex_unlock(&pipe->mutex); @@ -417,11 +416,10 @@ static inline int is_packetized(struct file *file) /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ static inline bool pipe_writable(const struct pipe_inode_info *pipe) { - unsigned int head = READ_ONCE(pipe->head); - unsigned int tail = READ_ONCE(pipe->tail); + union pipe_index idx = { .head_tail = READ_ONCE(pipe->head_tail) }; unsigned int max_usage = READ_ONCE(pipe->max_usage); - return !pipe_full(head, tail, max_usage) || + return !pipe_full(idx.head, idx.tail, max_usage) || !READ_ONCE(pipe->readers); } @@ -579,11 +577,11 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); mutex_lock(&pipe->mutex); - was_empty = pipe_empty(pipe->head, pipe->tail); + was_empty = pipe_is_empty(pipe); wake_next_writer = true; } out: - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (pipe_is_full(pipe)) wake_next_writer = false; mutex_unlock(&pipe->mutex); @@ -616,7 +614,7 @@ out: static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe = filp->private_data; - unsigned int count, head, tail, mask; + unsigned int count, head, tail; switch (cmd) { case FIONREAD: @@ -624,10 +622,9 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) count = 0; head = pipe->head; tail = pipe->tail; - mask = pipe->ring_size - 1; - while (tail != head) { - count += pipe->bufs[tail & mask].len; + while (!pipe_empty(head, tail)) { + count += pipe_buf(pipe, tail)->len; tail++; } mutex_unlock(&pipe->mutex); @@ -659,7 +656,7 @@ pipe_poll(struct file *filp, poll_table *wait) { __poll_t mask; struct pipe_inode_info *pipe = filp->private_data; - unsigned int head, tail; + union pipe_index idx; /* Epoll has some historical nasty semantics, this enables them */ WRITE_ONCE(pipe->poll_usage, true); @@ -680,19 +677,18 @@ pipe_poll(struct file *filp, poll_table *wait) * if something changes and you got it wrong, the poll * table entry will wake you up and fix it. */ - head = READ_ONCE(pipe->head); - tail = READ_ONCE(pipe->tail); + idx.head_tail = READ_ONCE(pipe->head_tail); mask = 0; if (filp->f_mode & FMODE_READ) { - if (!pipe_empty(head, tail)) + if (!pipe_empty(idx.head, idx.tail)) mask |= EPOLLIN | EPOLLRDNORM; if (!pipe->writers && filp->f_pipe != pipe->w_counter) mask |= EPOLLHUP; } if (filp->f_mode & FMODE_WRITE) { - if (!pipe_full(head, tail, pipe->max_usage)) + if (!pipe_full(idx.head, idx.tail, pipe->max_usage)) mask |= EPOLLOUT | EPOLLWRNORM; /* * Most Unices do not set EPOLLERR for FIFOs but on Linux they diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 699a3f76d083..64bd68f750f8 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -763,7 +763,7 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, struct cifs_fattr *fattr, bool mode_from_special_sid) { int i; - int num_aces = 0; + u16 num_aces = 0; int acl_size; char *acl_base; struct smb_ace **ppace; @@ -778,14 +778,15 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, } /* validate that we do not go past end of acl */ - if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) { + if (end_of_acl < (char *)pdacl + sizeof(struct smb_acl) || + end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) { cifs_dbg(VFS, "ACL too small to parse DACL\n"); return; } cifs_dbg(NOISY, "DACL revision %d size %d num aces %d\n", le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size), - le32_to_cpu(pdacl->num_aces)); + le16_to_cpu(pdacl->num_aces)); /* reset rwx permissions for user/group/other. Also, if num_aces is 0 i.e. DACL has no ACEs, @@ -795,12 +796,15 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, acl_base = (char *)pdacl; acl_size = sizeof(struct smb_acl); - num_aces = le32_to_cpu(pdacl->num_aces); + num_aces = le16_to_cpu(pdacl->num_aces); if (num_aces > 0) { umode_t denied_mode = 0; - if (num_aces > ULONG_MAX / sizeof(struct smb_ace *)) + if (num_aces > (le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) / + (offsetof(struct smb_ace, sid) + + offsetof(struct smb_sid, sub_auth) + sizeof(__le16))) return; + ppace = kmalloc_array(num_aces, sizeof(struct smb_ace *), GFP_KERNEL); if (!ppace) @@ -937,12 +941,12 @@ unsigned int setup_special_user_owner_ACE(struct smb_ace *pntace) static void populate_new_aces(char *nacl_base, struct smb_sid *pownersid, struct smb_sid *pgrpsid, - __u64 *pnmode, u32 *pnum_aces, u16 *pnsize, + __u64 *pnmode, u16 *pnum_aces, u16 *pnsize, bool modefromsid, bool posix) { __u64 nmode; - u32 num_aces = 0; + u16 num_aces = 0; u16 nsize = 0; __u64 user_mode; __u64 group_mode; @@ -1050,7 +1054,7 @@ static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *p u16 size = 0; struct smb_ace *pntace = NULL; char *acl_base = NULL; - u32 src_num_aces = 0; + u16 src_num_aces = 0; u16 nsize = 0; struct smb_ace *pnntace = NULL; char *nacl_base = NULL; @@ -1058,7 +1062,7 @@ static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *p acl_base = (char *)pdacl; size = sizeof(struct smb_acl); - src_num_aces = le32_to_cpu(pdacl->num_aces); + src_num_aces = le16_to_cpu(pdacl->num_aces); nacl_base = (char *)pndacl; nsize = sizeof(struct smb_acl); @@ -1090,11 +1094,11 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, u16 size = 0; struct smb_ace *pntace = NULL; char *acl_base = NULL; - u32 src_num_aces = 0; + u16 src_num_aces = 0; u16 nsize = 0; struct smb_ace *pnntace = NULL; char *nacl_base = NULL; - u32 num_aces = 0; + u16 num_aces = 0; bool new_aces_set = false; /* Assuming that pndacl and pnmode are never NULL */ @@ -1112,7 +1116,7 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, acl_base = (char *)pdacl; size = sizeof(struct smb_acl); - src_num_aces = le32_to_cpu(pdacl->num_aces); + src_num_aces = le16_to_cpu(pdacl->num_aces); /* Retain old ACEs which we can retain */ for (i = 0; i < src_num_aces; ++i) { @@ -1158,7 +1162,7 @@ next_ace: } finalize_dacl: - pndacl->num_aces = cpu_to_le32(num_aces); + pndacl->num_aces = cpu_to_le16(num_aces); pndacl->size = cpu_to_le16(nsize); return 0; @@ -1293,7 +1297,7 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION); ndacl_ptr->size = cpu_to_le16(0); - ndacl_ptr->num_aces = cpu_to_le32(0); + ndacl_ptr->num_aces = cpu_to_le16(0); rc = set_chmod_dacl(dacl_ptr, ndacl_ptr, owner_sid_ptr, group_sid_ptr, pnmode, mode_from_sid, posix); @@ -1653,7 +1657,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); if (mode_from_sid) nsecdesclen += - le32_to_cpu(dacl_ptr->num_aces) * sizeof(struct smb_ace); + le16_to_cpu(dacl_ptr->num_aces) * sizeof(struct smb_ace); else /* cifsacl */ nsecdesclen += le16_to_cpu(dacl_ptr->size); } diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 3feaa0f68169..d07682020c64 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1338,7 +1338,8 @@ cifs_readv_callback(struct mid_q_entry *mid) rdata->credits.value = 0; rdata->subreq.error = rdata->result; rdata->subreq.transferred += rdata->got_bytes; - queue_work(cifsiod_wq, &rdata->subreq.work); + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_progress); + netfs_read_subreq_terminated(&rdata->subreq); release_mid(mid); add_credits(server, &credits, 0); } diff --git a/fs/smb/common/smbacl.h b/fs/smb/common/smbacl.h index 6a60698fc6f0..a624ec9e4a14 100644 --- a/fs/smb/common/smbacl.h +++ b/fs/smb/common/smbacl.h @@ -107,7 +107,8 @@ struct smb_sid { struct smb_acl { __le16 revision; /* revision level */ __le16 size; - __le32 num_aces; + __le16 num_aces; + __le16 reserved; } __attribute__((packed)); struct smb_ace { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index f1efcd027475..c53121538990 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7458,17 +7458,17 @@ out_check_cl: } no_check_cl: + flock = smb_lock->fl; + list_del(&smb_lock->llist); + if (smb_lock->zero_len) { err = 0; goto skip; } - - flock = smb_lock->fl; - list_del(&smb_lock->llist); retry: rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL); skip: - if (flags & SMB2_LOCKFLAG_UNLOCK) { + if (smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) { if (!rc) { ksmbd_debug(SMB, "File unlocked\n"); } else if (rc == -ENOENT) { diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index d39d3e553366..49b128698670 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -333,7 +333,7 @@ void posix_state_to_acl(struct posix_acl_state *state, pace->e_perm = state->other.allow; } -int init_acl_state(struct posix_acl_state *state, int cnt) +int init_acl_state(struct posix_acl_state *state, u16 cnt) { int alloc; @@ -368,7 +368,7 @@ static void parse_dacl(struct mnt_idmap *idmap, struct smb_fattr *fattr) { int i, ret; - int num_aces = 0; + u16 num_aces = 0; unsigned int acl_size; char *acl_base; struct smb_ace **ppace; @@ -389,16 +389,18 @@ static void parse_dacl(struct mnt_idmap *idmap, ksmbd_debug(SMB, "DACL revision %d size %d num aces %d\n", le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size), - le32_to_cpu(pdacl->num_aces)); + le16_to_cpu(pdacl->num_aces)); acl_base = (char *)pdacl; acl_size = sizeof(struct smb_acl); - num_aces = le32_to_cpu(pdacl->num_aces); + num_aces = le16_to_cpu(pdacl->num_aces); if (num_aces <= 0) return; - if (num_aces > ULONG_MAX / sizeof(struct smb_ace *)) + if (num_aces > (le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) / + (offsetof(struct smb_ace, sid) + + offsetof(struct smb_sid, sub_auth) + sizeof(__le16))) return; ret = init_acl_state(&acl_state, num_aces); @@ -432,6 +434,7 @@ static void parse_dacl(struct mnt_idmap *idmap, offsetof(struct smb_sid, sub_auth); if (end_of_acl - acl_base < acl_size || + ppace[i]->sid.num_subauth == 0 || ppace[i]->sid.num_subauth > SID_MAX_SUB_AUTHORITIES || (end_of_acl - acl_base < acl_size + sizeof(__le32) * ppace[i]->sid.num_subauth) || @@ -580,7 +583,7 @@ static void parse_dacl(struct mnt_idmap *idmap, static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap, struct smb_ace *pndace, - struct smb_fattr *fattr, u32 *num_aces, + struct smb_fattr *fattr, u16 *num_aces, u16 *size, u32 nt_aces_num) { struct posix_acl_entry *pace; @@ -701,7 +704,7 @@ static void set_ntacl_dacl(struct mnt_idmap *idmap, struct smb_fattr *fattr) { struct smb_ace *ntace, *pndace; - int nt_num_aces = le32_to_cpu(nt_dacl->num_aces), num_aces = 0; + u16 nt_num_aces = le16_to_cpu(nt_dacl->num_aces), num_aces = 0; unsigned short size = 0; int i; @@ -728,7 +731,7 @@ static void set_ntacl_dacl(struct mnt_idmap *idmap, set_posix_acl_entries_dacl(idmap, pndace, fattr, &num_aces, &size, nt_num_aces); - pndacl->num_aces = cpu_to_le32(num_aces); + pndacl->num_aces = cpu_to_le16(num_aces); pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size); } @@ -736,7 +739,7 @@ static void set_mode_dacl(struct mnt_idmap *idmap, struct smb_acl *pndacl, struct smb_fattr *fattr) { struct smb_ace *pace, *pndace; - u32 num_aces = 0; + u16 num_aces = 0; u16 size = 0, ace_size = 0; uid_t uid; const struct smb_sid *sid; @@ -792,7 +795,7 @@ static void set_mode_dacl(struct mnt_idmap *idmap, fattr->cf_mode, 0007); out: - pndacl->num_aces = cpu_to_le32(num_aces); + pndacl->num_aces = cpu_to_le16(num_aces); pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size); } @@ -807,6 +810,13 @@ static int parse_sid(struct smb_sid *psid, char *end_of_acl) return -EINVAL; } + if (!psid->num_subauth) + return 0; + + if (psid->num_subauth > SID_MAX_SUB_AUTHORITIES || + end_of_acl < (char *)psid + 8 + sizeof(__le32) * psid->num_subauth) + return -EINVAL; + return 0; } @@ -848,6 +858,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, pntsd->type = cpu_to_le16(DACL_PRESENT); if (pntsd->osidoffset) { + if (le32_to_cpu(pntsd->osidoffset) < sizeof(struct smb_ntsd)) + return -EINVAL; + rc = parse_sid(owner_sid_ptr, end_of_acl); if (rc) { pr_err("%s: Error %d parsing Owner SID\n", __func__, rc); @@ -863,6 +876,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, } if (pntsd->gsidoffset) { + if (le32_to_cpu(pntsd->gsidoffset) < sizeof(struct smb_ntsd)) + return -EINVAL; + rc = parse_sid(group_sid_ptr, end_of_acl); if (rc) { pr_err("%s: Error %d mapping Owner SID to gid\n", @@ -884,6 +900,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, pntsd->type |= cpu_to_le16(DACL_PROTECTED); if (dacloffset) { + if (dacloffset < sizeof(struct smb_ntsd)) + return -EINVAL; + parse_dacl(idmap, dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr, fattr); } @@ -1006,8 +1025,9 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct smb_sid owner_sid, group_sid; struct dentry *parent = path->dentry->d_parent; struct mnt_idmap *idmap = mnt_idmap(path->mnt); - int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size; - int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size; + int inherited_flags = 0, flags = 0, i, nt_size = 0, pdacl_size; + int rc = 0, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size; + u16 num_aces, ace_cnt = 0; char *aces_base; bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode); @@ -1023,7 +1043,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset); acl_len = pntsd_size - dacloffset; - num_aces = le32_to_cpu(parent_pdacl->num_aces); + num_aces = le16_to_cpu(parent_pdacl->num_aces); pntsd_type = le16_to_cpu(parent_pntsd->type); pdacl_size = le16_to_cpu(parent_pdacl->size); @@ -1183,7 +1203,7 @@ pass: pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset)); pdacl->revision = cpu_to_le16(2); pdacl->size = cpu_to_le16(sizeof(struct smb_acl) + nt_size); - pdacl->num_aces = cpu_to_le32(ace_cnt); + pdacl->num_aces = cpu_to_le16(ace_cnt); pace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); memcpy(pace, aces_base, nt_size); pntsd_size += sizeof(struct smb_acl) + nt_size; @@ -1264,7 +1284,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); aces_size = acl_size - sizeof(struct smb_acl); - for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { + for (i = 0; i < le16_to_cpu(pdacl->num_aces); i++) { if (offsetof(struct smb_ace, access_req) > aces_size) break; ace_size = le16_to_cpu(ace->size); @@ -1285,7 +1305,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); aces_size = acl_size - sizeof(struct smb_acl); - for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { + for (i = 0; i < le16_to_cpu(pdacl->num_aces); i++) { if (offsetof(struct smb_ace, access_req) > aces_size) break; ace_size = le16_to_cpu(ace->size); diff --git a/fs/smb/server/smbacl.h b/fs/smb/server/smbacl.h index 24ce576fc292..355adaee39b8 100644 --- a/fs/smb/server/smbacl.h +++ b/fs/smb/server/smbacl.h @@ -86,7 +86,7 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, int build_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info, __u32 *secdesclen, struct smb_fattr *fattr); -int init_acl_state(struct posix_acl_state *state, int cnt); +int init_acl_state(struct posix_acl_state *state, u16 cnt); void free_acl_state(struct posix_acl_state *state); void posix_state_to_acl(struct posix_acl_state *state, struct posix_acl_entry *pace); diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index 0460ebea6ff0..3f185ae60dc5 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -281,6 +281,7 @@ static int handle_response(int type, void *payload, size_t sz) if (entry->type + 1 != type) { pr_err("Waiting for IPC type %d, got %d. Ignore.\n", entry->type + 1, type); + continue; } entry->response = kvzalloc(sz, KSMBD_DEFAULT_GFP); diff --git a/fs/splice.c b/fs/splice.c index 28cfa63aa236..23fa5561b944 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -331,7 +331,7 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos, int i; /* Work out how much data we can actually add into the pipe */ - used = pipe_occupancy(pipe->head, pipe->tail); + used = pipe_buf_usage(pipe); npages = max_t(ssize_t, pipe->max_usage - used, 0); len = min_t(size_t, len, npages * PAGE_SIZE); npages = DIV_ROUND_UP(len, PAGE_SIZE); @@ -527,7 +527,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des return -ERESTARTSYS; repeat: - while (pipe_empty(pipe->head, pipe->tail)) { + while (pipe_is_empty(pipe)) { if (!pipe->writers) return 0; @@ -820,7 +820,7 @@ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, if (signal_pending(current)) break; - while (pipe_empty(pipe->head, pipe->tail)) { + while (pipe_is_empty(pipe)) { ret = 0; if (!pipe->writers) goto out; @@ -968,7 +968,7 @@ static ssize_t do_splice_read(struct file *in, loff_t *ppos, return 0; /* Don't try to read more the pipe has space for. */ - p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail); + p_space = pipe->max_usage - pipe_buf_usage(pipe); len = min_t(size_t, len, p_space << PAGE_SHIFT); if (unlikely(len > MAX_RW_COUNT)) @@ -1080,7 +1080,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, more = sd->flags & SPLICE_F_MORE; sd->flags |= SPLICE_F_MORE; - WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail)); + WARN_ON_ONCE(!pipe_is_empty(pipe)); while (len) { size_t read_len; @@ -1268,7 +1268,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) send_sig(SIGPIPE, current, 0); return -EPIPE; } - if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (!pipe_is_full(pipe)) return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; @@ -1652,13 +1652,13 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Check the pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (!pipe_empty(pipe->head, pipe->tail)) + if (!pipe_is_empty(pipe)) return 0; ret = 0; pipe_lock(pipe); - while (pipe_empty(pipe->head, pipe->tail)) { + while (pipe_is_empty(pipe)) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -1688,13 +1688,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Check pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (!pipe_is_full(pipe)) return 0; ret = 0; pipe_lock(pipe); - while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + while (pipe_is_full(pipe)) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 1d94bb784108..0bc96ab6580b 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -21,8 +21,7 @@ #define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ -static const unsigned char VBSF_MOUNT_SIGNATURE[4] = { '\000', '\377', '\376', - '\375' }; +static const unsigned char VBSF_MOUNT_SIGNATURE[4] __nonstring = "\000\377\376\375"; static int follow_symlinks; module_param(follow_symlinks, int, 0444); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 15bb790359f8..5d560e9073f4 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -29,11 +29,6 @@ struct kmem_cache *xfs_buf_cache; /* * Locking orders * - * xfs_buf_ioacct_inc: - * xfs_buf_ioacct_dec: - * b_sema (caller holds) - * b_lock - * * xfs_buf_stale: * b_sema (caller holds) * b_lock @@ -82,51 +77,6 @@ xfs_buf_vmap_len( } /* - * Bump the I/O in flight count on the buftarg if we haven't yet done so for - * this buffer. The count is incremented once per buffer (per hold cycle) - * because the corresponding decrement is deferred to buffer release. Buffers - * can undergo I/O multiple times in a hold-release cycle and per buffer I/O - * tracking adds unnecessary overhead. This is used for sychronization purposes - * with unmount (see xfs_buftarg_drain()), so all we really need is a count of - * in-flight buffers. - * - * Buffers that are never released (e.g., superblock, iclog buffers) must set - * the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count - * never reaches zero and unmount hangs indefinitely. - */ -static inline void -xfs_buf_ioacct_inc( - struct xfs_buf *bp) -{ - if (bp->b_flags & XBF_NO_IOACCT) - return; - - ASSERT(bp->b_flags & XBF_ASYNC); - spin_lock(&bp->b_lock); - if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { - bp->b_state |= XFS_BSTATE_IN_FLIGHT; - percpu_counter_inc(&bp->b_target->bt_io_count); - } - spin_unlock(&bp->b_lock); -} - -/* - * Clear the in-flight state on a buffer about to be released to the LRU or - * freed and unaccount from the buftarg. - */ -static inline void -__xfs_buf_ioacct_dec( - struct xfs_buf *bp) -{ - lockdep_assert_held(&bp->b_lock); - - if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { - bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; - percpu_counter_dec(&bp->b_target->bt_io_count); - } -} - -/* * When we mark a buffer stale, we remove the buffer from the LRU and clear the * b_lru_ref count so that the buffer is freed immediately when the buffer * reference count falls to zero. If the buffer is already on the LRU, we need @@ -149,15 +99,7 @@ xfs_buf_stale( */ bp->b_flags &= ~_XBF_DELWRI_Q; - /* - * Once the buffer is marked stale and unlocked, a subsequent lookup - * could reset b_flags. There is no guarantee that the buffer is - * unaccounted (released to LRU) before that occurs. Drop in-flight - * status now to preserve accounting consistency. - */ spin_lock(&bp->b_lock); - __xfs_buf_ioacct_dec(bp); - atomic_set(&bp->b_lru_ref, 0); if (!(bp->b_state & XFS_BSTATE_DISPOSE) && (list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru))) @@ -794,18 +736,13 @@ out_put_perag: int _xfs_buf_read( - struct xfs_buf *bp, - xfs_buf_flags_t flags) + struct xfs_buf *bp) { - ASSERT(!(flags & XBF_WRITE)); ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL); bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE); - bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); - + bp->b_flags |= XBF_READ; xfs_buf_submit(bp); - if (flags & XBF_ASYNC) - return 0; return xfs_buf_iowait(bp); } @@ -857,6 +794,8 @@ xfs_buf_read_map( struct xfs_buf *bp; int error; + ASSERT(!(flags & (XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD))); + flags |= XBF_READ; *bpp = NULL; @@ -870,21 +809,11 @@ xfs_buf_read_map( /* Initiate the buffer read and wait. */ XFS_STATS_INC(target->bt_mount, xb_get_read); bp->b_ops = ops; - error = _xfs_buf_read(bp, flags); - - /* Readahead iodone already dropped the buffer, so exit. */ - if (flags & XBF_ASYNC) - return 0; + error = _xfs_buf_read(bp); } else { /* Buffer already read; all we need to do is check it. */ error = xfs_buf_reverify(bp, ops); - /* Readahead already finished; drop the buffer and exit. */ - if (flags & XBF_ASYNC) { - xfs_buf_relse(bp); - return 0; - } - /* We do not want read in the flags */ bp->b_flags &= ~XBF_READ; ASSERT(bp->b_ops != NULL || ops == NULL); @@ -936,6 +865,7 @@ xfs_buf_readahead_map( int nmaps, const struct xfs_buf_ops *ops) { + const xfs_buf_flags_t flags = XBF_READ | XBF_ASYNC | XBF_READ_AHEAD; struct xfs_buf *bp; /* @@ -945,9 +875,21 @@ xfs_buf_readahead_map( if (xfs_buftarg_is_mem(target)) return; - xfs_buf_read_map(target, map, nmaps, - XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops, - __this_address); + if (xfs_buf_get_map(target, map, nmaps, flags | XBF_TRYLOCK, &bp)) + return; + trace_xfs_buf_readahead(bp, 0, _RET_IP_); + + if (bp->b_flags & XBF_DONE) { + xfs_buf_reverify(bp, ops); + xfs_buf_relse(bp); + return; + } + XFS_STATS_INC(target->bt_mount, xb_get_read); + bp->b_ops = ops; + bp->b_flags &= ~(XBF_WRITE | XBF_DONE); + bp->b_flags |= flags; + percpu_counter_inc(&target->bt_readahead_count); + xfs_buf_submit(bp); } /* @@ -1003,10 +945,12 @@ xfs_buf_get_uncached( struct xfs_buf *bp; DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); + /* there are currently no valid flags for xfs_buf_get_uncached */ + ASSERT(flags == 0); + *bpp = NULL; - /* flags might contain irrelevant bits, pass only what we care about */ - error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp); + error = _xfs_buf_alloc(target, &map, 1, flags, &bp); if (error) return error; @@ -1060,7 +1004,6 @@ xfs_buf_rele_uncached( spin_unlock(&bp->b_lock); return; } - __xfs_buf_ioacct_dec(bp); spin_unlock(&bp->b_lock); xfs_buf_free(bp); } @@ -1079,20 +1022,12 @@ xfs_buf_rele_cached( spin_lock(&bp->b_lock); ASSERT(bp->b_hold >= 1); if (bp->b_hold > 1) { - /* - * Drop the in-flight state if the buffer is already on the LRU - * and it holds the only reference. This is racy because we - * haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT - * ensures the decrement occurs only once per-buf. - */ - if (--bp->b_hold == 1 && !list_empty(&bp->b_lru)) - __xfs_buf_ioacct_dec(bp); + bp->b_hold--; goto out_unlock; } /* we are asked to drop the last reference */ - __xfs_buf_ioacct_dec(bp); - if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { + if (atomic_read(&bp->b_lru_ref)) { /* * If the buffer is added to the LRU, keep the reference to the * buffer for the LRU and clear the (now stale) dispose list @@ -1345,6 +1280,7 @@ xfs_buf_ioend_handle_error( resubmit: xfs_buf_ioerror(bp, 0); bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL); + reinit_completion(&bp->b_iowait); xfs_buf_submit(bp); return true; out_stale: @@ -1355,8 +1291,9 @@ out_stale: return false; } -static void -xfs_buf_ioend( +/* returns false if the caller needs to resubmit the I/O, else true */ +static bool +__xfs_buf_ioend( struct xfs_buf *bp) { trace_xfs_buf_iodone(bp, _RET_IP_); @@ -1369,6 +1306,8 @@ xfs_buf_ioend( bp->b_ops->verify_read(bp); if (!bp->b_error) bp->b_flags |= XBF_DONE; + if (bp->b_flags & XBF_READ_AHEAD) + percpu_counter_dec(&bp->b_target->bt_readahead_count); } else { if (!bp->b_error) { bp->b_flags &= ~XBF_WRITE_FAIL; @@ -1376,7 +1315,7 @@ xfs_buf_ioend( } if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp)) - return; + return false; /* clear the retry state */ bp->b_last_error = 0; @@ -1397,7 +1336,15 @@ xfs_buf_ioend( bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD | _XBF_LOGRECOVERY); + return true; +} +static void +xfs_buf_ioend( + struct xfs_buf *bp) +{ + if (!__xfs_buf_ioend(bp)) + return; if (bp->b_flags & XBF_ASYNC) xfs_buf_relse(bp); else @@ -1411,15 +1358,8 @@ xfs_buf_ioend_work( struct xfs_buf *bp = container_of(work, struct xfs_buf, b_ioend_work); - xfs_buf_ioend(bp); -} - -static void -xfs_buf_ioend_async( - struct xfs_buf *bp) -{ - INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); - queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); + if (__xfs_buf_ioend(bp)) + xfs_buf_relse(bp); } void @@ -1491,7 +1431,13 @@ xfs_buf_bio_end_io( XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR)) xfs_buf_ioerror(bp, -EIO); - xfs_buf_ioend_async(bp); + if (bp->b_flags & XBF_ASYNC) { + INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); + queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); + } else { + complete(&bp->b_iowait); + } + bio_put(bio); } @@ -1568,9 +1514,11 @@ xfs_buf_iowait( { ASSERT(!(bp->b_flags & XBF_ASYNC)); - trace_xfs_buf_iowait(bp, _RET_IP_); - wait_for_completion(&bp->b_iowait); - trace_xfs_buf_iowait_done(bp, _RET_IP_); + do { + trace_xfs_buf_iowait(bp, _RET_IP_); + wait_for_completion(&bp->b_iowait); + trace_xfs_buf_iowait_done(bp, _RET_IP_); + } while (!__xfs_buf_ioend(bp)); return bp->b_error; } @@ -1648,9 +1596,6 @@ xfs_buf_submit( */ bp->b_error = 0; - if (bp->b_flags & XBF_ASYNC) - xfs_buf_ioacct_inc(bp); - if ((bp->b_flags & XBF_WRITE) && !xfs_buf_verify_write(bp)) { xfs_force_shutdown(bp->b_mount, SHUTDOWN_CORRUPT_INCORE); xfs_buf_ioend(bp); @@ -1776,9 +1721,8 @@ xfs_buftarg_wait( struct xfs_buftarg *btp) { /* - * First wait on the buftarg I/O count for all in-flight buffers to be - * released. This is critical as new buffers do not make the LRU until - * they are released. + * First wait for all in-flight readahead buffers to be released. This is + * critical as new buffers do not make the LRU until they are released. * * Next, flush the buffer workqueue to ensure all completion processing * has finished. Just waiting on buffer locks is not sufficient for @@ -1787,7 +1731,7 @@ xfs_buftarg_wait( * all reference counts have been dropped before we start walking the * LRU list. */ - while (percpu_counter_sum(&btp->bt_io_count)) + while (percpu_counter_sum(&btp->bt_readahead_count)) delay(100); flush_workqueue(btp->bt_mount->m_buf_workqueue); } @@ -1904,8 +1848,8 @@ xfs_destroy_buftarg( struct xfs_buftarg *btp) { shrinker_free(btp->bt_shrinker); - ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); - percpu_counter_destroy(&btp->bt_io_count); + ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0); + percpu_counter_destroy(&btp->bt_readahead_count); list_lru_destroy(&btp->bt_lru); } @@ -1959,7 +1903,7 @@ xfs_init_buftarg( if (list_lru_init(&btp->bt_lru)) return -ENOMEM; - if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) + if (percpu_counter_init(&btp->bt_readahead_count, 0, GFP_KERNEL)) goto out_destroy_lru; btp->bt_shrinker = @@ -1973,7 +1917,7 @@ xfs_init_buftarg( return 0; out_destroy_io_count: - percpu_counter_destroy(&btp->bt_io_count); + percpu_counter_destroy(&btp->bt_readahead_count); out_destroy_lru: list_lru_destroy(&btp->bt_lru); return -ENOMEM; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 3b4ed42e11c0..80e06eecaf56 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -27,7 +27,6 @@ struct xfs_buf; #define XBF_READ (1u << 0) /* buffer intended for reading from device */ #define XBF_WRITE (1u << 1) /* buffer intended for writing to device */ #define XBF_READ_AHEAD (1u << 2) /* asynchronous read-ahead */ -#define XBF_NO_IOACCT (1u << 3) /* bypass I/O accounting (non-LRU bufs) */ #define XBF_ASYNC (1u << 4) /* initiator will not wait for completion */ #define XBF_DONE (1u << 5) /* all pages in the buffer uptodate */ #define XBF_STALE (1u << 6) /* buffer has been staled, do not find it */ @@ -58,7 +57,6 @@ typedef unsigned int xfs_buf_flags_t; { XBF_READ, "READ" }, \ { XBF_WRITE, "WRITE" }, \ { XBF_READ_AHEAD, "READ_AHEAD" }, \ - { XBF_NO_IOACCT, "NO_IOACCT" }, \ { XBF_ASYNC, "ASYNC" }, \ { XBF_DONE, "DONE" }, \ { XBF_STALE, "STALE" }, \ @@ -77,7 +75,6 @@ typedef unsigned int xfs_buf_flags_t; * Internal state flags. */ #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ -#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ struct xfs_buf_cache { struct rhashtable bc_hash; @@ -116,7 +113,7 @@ struct xfs_buftarg { struct shrinker *bt_shrinker; struct list_lru bt_lru; - struct percpu_counter bt_io_count; + struct percpu_counter bt_readahead_count; struct ratelimit_state bt_ioerror_rl; /* Atomic write unit values */ @@ -291,7 +288,7 @@ int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, size_t numblks, xfs_buf_flags_t flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops); -int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags); +int _xfs_buf_read(struct xfs_buf *bp); void xfs_buf_hold(struct xfs_buf *bp); /* Releasing Buffers */ diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c index 07bebbfb16ee..5b64a2b3b113 100644 --- a/fs/xfs/xfs_buf_mem.c +++ b/fs/xfs/xfs_buf_mem.c @@ -117,7 +117,7 @@ xmbuf_free( struct xfs_buftarg *btp) { ASSERT(xfs_buftarg_is_mem(btp)); - ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); + ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0); trace_xmbuf_free(btp); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b3c27dbccce8..2f76531842f8 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3380,7 +3380,7 @@ xlog_do_recover( */ xfs_buf_lock(bp); xfs_buf_hold(bp); - error = _xfs_buf_read(bp, XBF_READ); + error = _xfs_buf_read(bp); if (error) { if (!xlog_is_shutdown(log)) { xfs_buf_ioerror_alert(bp, __this_address); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 477c5262cf91..b69356582b86 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -181,14 +181,11 @@ xfs_readsb( /* * Allocate a (locked) buffer to hold the superblock. This will be kept - * around at all times to optimize access to the superblock. Therefore, - * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count - * elevated. + * around at all times to optimize access to the superblock. */ reread: error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), XBF_NO_IOACCT, &bp, - buf_ops); + BTOBB(sector_size), 0, &bp, buf_ops); if (error) { if (loud) xfs_warn(mp, "SB validate failed with error %d.", error); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index d8e6d073d64d..57bef567e011 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1407,7 +1407,7 @@ xfs_rtmount_readsb( /* m_blkbb_log is not set up yet */ error = xfs_buf_read_uncached(mp->m_rtdev_targp, XFS_RTSB_DADDR, - mp->m_sb.sb_blocksize >> BBSHIFT, XBF_NO_IOACCT, &bp, + mp->m_sb.sb_blocksize >> BBSHIFT, 0, &bp, &xfs_rtsb_buf_ops); if (error) { xfs_warn(mp, "rt sb validate failed with error %d.", error); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index b29462363b81..bfc2f1249022 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -593,6 +593,7 @@ DEFINE_EVENT(xfs_buf_flags_class, name, \ DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); +DEFINE_BUF_FLAGS_EVENT(xfs_buf_readahead); TRACE_EVENT(xfs_buf_ioerror, TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip), diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index f42133dae68e..2afc95bf1655 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -90,7 +90,7 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #ifndef __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, unsigned long sz) { return ptep_get_and_clear(mm, addr, ptep); } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 02a4adb4a999..0d5b186abee8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -457,7 +457,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) . = ALIGN((align)); \ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ __start_rodata = .; \ - *(.rodata) *(.rodata.*) \ + *(.rodata) *(.rodata.*) *(.data.rel.ro*) \ SCHED_DATA \ RO_AFTER_INIT_DATA /* Read only after init */ \ . = ALIGN(8); \ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fa2a76cc2f73..71f4f0cc3dac 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -28,7 +28,7 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); typedef __u32 __bitwise req_flags_t; /* Keep rqf_name[] in sync with the definitions below */ -enum { +enum rqf_flags { /* drive already may have started this one */ __RQF_STARTED, /* request for flush sequence */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 248416ecd01c..d37751789bf5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,10 +196,11 @@ struct gendisk { unsigned int zone_capacity; unsigned int last_zone_capacity; unsigned long __rcu *conv_zones_bitmap; - unsigned int zone_wplugs_hash_bits; - spinlock_t zone_wplugs_lock; + unsigned int zone_wplugs_hash_bits; + atomic_t nr_zone_wplugs; + spinlock_t zone_wplugs_lock; struct mempool_s *zone_wplugs_pool; - struct hlist_head *zone_wplugs_hash; + struct hlist_head *zone_wplugs_hash; struct workqueue_struct *zone_wplugs_wq; #endif /* CONFIG_BLK_DEV_ZONED */ @@ -367,6 +368,7 @@ struct queue_limits { unsigned int max_sectors; unsigned int max_user_sectors; unsigned int max_segment_size; + unsigned int min_segment_size; unsigned int physical_block_size; unsigned int logical_block_size; unsigned int alignment_offset; diff --git a/include/linux/call_once.h b/include/linux/call_once.h index 6261aa0b3fb0..13cd6469e7e5 100644 --- a/include/linux/call_once.h +++ b/include/linux/call_once.h @@ -26,20 +26,41 @@ do { \ __once_init((once), #once, &__key); \ } while (0) -static inline void call_once(struct once *once, void (*cb)(struct once *)) +/* + * call_once - Ensure a function has been called exactly once + * + * @once: Tracking struct + * @cb: Function to be called + * + * If @once has never completed successfully before, call @cb and, if + * it returns a zero or positive value, mark @once as completed. Return + * the value returned by @cb + * + * If @once has completed succesfully before, return 0. + * + * The call to @cb is implicitly surrounded by a mutex, though for + * efficiency the * function avoids taking it after the first call. + */ +static inline int call_once(struct once *once, int (*cb)(struct once *)) { - /* Pairs with atomic_set_release() below. */ - if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) - return; - - guard(mutex)(&once->lock); - WARN_ON(atomic_read(&once->state) == ONCE_RUNNING); - if (atomic_read(&once->state) != ONCE_NOT_STARTED) - return; - - atomic_set(&once->state, ONCE_RUNNING); - cb(once); - atomic_set_release(&once->state, ONCE_COMPLETED); + int r, state; + + /* Pairs with atomic_set_release() below. */ + if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) + return 0; + + guard(mutex)(&once->lock); + state = atomic_read(&once->state); + if (unlikely(state != ONCE_NOT_STARTED)) + return WARN_ON_ONCE(state != ONCE_COMPLETED) ? -EINVAL : 0; + + atomic_set(&once->state, ONCE_RUNNING); + r = cb(once); + if (r < 0) + atomic_set(&once->state, ONCE_NOT_STARTED); + else + atomic_set_release(&once->state, ONCE_COMPLETED); + return r; } #endif /* _LINUX_CALL_ONCE_H */ diff --git a/include/linux/compaction.h b/include/linux/compaction.h index e94776496049..7bf0c521db63 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -80,6 +80,11 @@ static inline unsigned long compact_gap(unsigned int order) return 2UL << order; } +static inline int current_is_kcompactd(void) +{ + return current->flags & PF_KCOMPACTD; +} + #ifdef CONFIG_COMPACTION extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order); diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 200fd3c5bc70..155385754824 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -110,7 +110,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Unreachable code */ #ifdef CONFIG_OBJTOOL /* Annotate a C jump table to allow objtool to follow the code flow */ -#define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #") +#define __annotate_jump_table __section(".data.rel.ro.c_jump_table") #else /* !CONFIG_OBJTOOL */ #define __annotate_jump_table #endif /* CONFIG_OBJTOOL */ diff --git a/include/linux/cred.h b/include/linux/cred.h index 0c3c4b16b469..5658a3bfe803 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -172,18 +172,12 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) static inline const struct cred *override_creds(const struct cred *override_cred) { - const struct cred *old = current->cred; - - rcu_assign_pointer(current->cred, override_cred); - return old; + return rcu_replace_pointer(current->cred, override_cred, 1); } static inline const struct cred *revert_creds(const struct cred *revert_cred) { - const struct cred *override_cred = current->cred; - - rcu_assign_pointer(current->cred, revert_cred); - return override_cred; + return rcu_replace_pointer(current->cred, revert_cred, 1); } /** diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 7f222dccc7d1..8210ece94fa6 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -210,6 +210,14 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); +struct link_mode_info { + int speed; + u8 lanes; + u8 duplex; +}; + +extern const struct link_mode_info link_mode_params[]; + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ec8c0ccc8f95..76a75ec03dd6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -682,6 +682,7 @@ struct huge_bootmem_page { int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); +void wait_for_freed_hugetlb_folios(void); struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, bool cow_from_owner); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, @@ -1004,7 +1005,9 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm) static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); } #endif @@ -1066,6 +1069,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn, return 0; } +static inline void wait_for_freed_hugetlb_folios(void) +{ +} + static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, bool cow_from_owner) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a6e2aadbb91b..5aeeed22f35b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -207,6 +207,7 @@ struct inet6_cork { struct ipv6_txoptions *opt; u8 hop_limit; u8 tclass; + u8 dontfrag:1; }; /* struct ipv6_pinfo - ipv6 private area */ diff --git a/include/linux/log2.h b/include/linux/log2.h index 9f30d087a128..1366cb688a6d 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -41,7 +41,7 @@ int __ilog2_u64(u64 n) * *not* considered a power of two. * Return: true if @n is a power of 2, otherwise false. */ -static inline __attribute__((const)) +static __always_inline __attribute__((const)) bool is_power_of_2(unsigned long n) { return (n != 0 && ((n & (n - 1)) == 0)); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index fd37f4e54d76..904804e995aa 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1251,6 +1251,7 @@ enum mlx5_cap_type { MLX5_CAP_GENERAL_2 = 0x20, MLX5_CAP_PORT_SELECTION = 0x25, MLX5_CAP_ADV_VIRTUALIZATION = 0x26, + MLX5_CAP_ADV_RDMA = 0x28, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1345,6 +1346,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_rdma.cap) +#define MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(mdev, cap) \ + MLX5_CAP_ADV_RDMA(mdev, rdma_transport_rx_flow_table_properties.cap) + +#define MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(mdev, cap) \ + MLX5_CAP_ADV_RDMA(mdev, rdma_transport_tx_flow_table_properties.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap) @@ -1384,6 +1391,10 @@ enum mlx5_qcam_feature_groups { MLX5_GET(adv_virtualization_cap, \ mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->cur, cap) +#define MLX5_CAP_ADV_RDMA(mdev, cap) \ + MLX5_GET(adv_rdma_cap, \ + mdev->caps.hca[MLX5_CAP_ADV_RDMA]->cur, cap) + #define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \ MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap) @@ -1506,6 +1517,7 @@ enum { MLX5_PHYSICAL_LAYER_COUNTERS_GROUP = 0x12, MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP = 0x13, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP = 0x16, + MLX5_PHYSICAL_LAYER_RECOVERY_GROUP = 0x1a, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, MLX5_INFINIBAND_EXTENDED_PORT_COUNTERS_GROUP = 0x21, }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 46bd7550adf8..d1dfbad9a447 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -304,6 +304,8 @@ struct mlx5_cmd { struct semaphore sem; struct semaphore pages_sem; struct semaphore throttle_sem; + struct semaphore unprivileged_sem; + struct xarray privileged_uids; } vars; enum mlx5_cmdif_state state; void *cmd_alloc_buf; @@ -964,6 +966,8 @@ struct mlx5_async_work { mlx5_async_cbk_t user_callback; u16 opcode; /* cmd opcode */ u16 op_mod; /* cmd op_mod */ + u8 throttle_locked:1; + u8 unpriv_locked:1; void *out; /* pointer to the cmd output buffer */ }; @@ -994,6 +998,8 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); +int mlx5_cmd_add_privileged_uid(struct mlx5_core_dev *dev, u16 uid); +void mlx5_cmd_remove_privileged_uid(struct mlx5_core_dev *dev, u16 uid); void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 01cb72d68c23..939e58c2f386 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -40,6 +40,7 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) +#define MLX5_RDMA_TRANSPORT_BYPASS_PRIO 0 #define MLX5_FS_MAX_POOL_SIZE BIT(30) enum mlx5_flow_destination_type { @@ -110,6 +111,8 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC, MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC, MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC, + MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX, + MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX, }; enum { @@ -194,9 +197,9 @@ struct mlx5_flow_namespace * mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); struct mlx5_flow_namespace * -mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev, - enum mlx5_flow_namespace_type type, - int vport); +mlx5_get_flow_vport_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type, + int vport_idx); struct mlx5_flow_table_attr { int prio; @@ -204,6 +207,7 @@ struct mlx5_flow_table_attr { u32 level; u32 flags; u16 uid; + u16 vport; struct mlx5_flow_table *next_ft; struct { @@ -240,6 +244,7 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); struct mlx5_exe_aso { u32 object_id; + int base_id; u8 type; u8 return_reg_id; union { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cc2875e843f7..2c09df4ee574 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1570,6 +1570,8 @@ enum { enum { MLX5_UCTX_CAP_RAW_TX = 1UL << 0, MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1, + MLX5_UCTX_CAP_RDMA_CTRL = 1UL << 3, + MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA = 1UL << 4, }; #define MLX5_FC_BULK_SIZE_FACTOR 128 @@ -1991,7 +1993,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_geneve_tlv_options[0x8]; u8 reserved_at_568[0x3]; u8 max_geneve_tlv_option_data_len[0x5]; - u8 reserved_at_570[0x9]; + u8 reserved_at_570[0x1]; + u8 adv_rdma[0x1]; + u8 reserved_at_572[0x7]; u8 adv_virtualization[0x1]; u8 reserved_at_57a[0x6]; @@ -2140,7 +2144,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 log_min_mkey_entity_size[0x5]; u8 reserved_at_1b0[0x10]; - u8 reserved_at_1c0[0x60]; + u8 general_obj_types_127_64[0x40]; + u8 reserved_at_200[0x20]; u8 reserved_at_220[0x1]; u8 sw_vhca_id_valid[0x1]; @@ -2640,6 +2645,12 @@ struct mlx5_ifc_field_select_802_1qau_rp_bits { u8 field_select_8021qaurp[0x20]; }; +struct mlx5_ifc_phys_layer_recovery_cntrs_bits { + u8 total_successful_recovery_events[0x20]; + + u8 reserved_at_20[0x7a0]; +}; + struct mlx5_ifc_phys_layer_cntrs_bits { u8 time_since_last_clear_high[0x20]; @@ -4841,6 +4852,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { struct mlx5_ifc_ib_ext_port_cntrs_grp_data_layout_bits ib_ext_port_cntrs_grp_data_layout; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; struct mlx5_ifc_phys_layer_statistical_cntrs_bits phys_layer_statistical_cntrs; + struct mlx5_ifc_phys_layer_recovery_cntrs_bits phys_layer_recovery_cntrs; u8 reserved_at_0[0x7c0]; }; @@ -10579,7 +10591,9 @@ struct mlx5_ifc_mtutc_reg_bits { }; struct mlx5_ifc_pcam_enhanced_features_bits { - u8 reserved_at_0[0x1d]; + u8 reserved_at_0[0x10]; + u8 ppcnt_recovery_counters[0x1]; + u8 reserved_at_11[0xc]; u8 fec_200G_per_lane_in_pplm[0x1]; u8 reserved_at_1e[0x2a]; u8 fec_100G_per_lane_in_pplm[0x1]; @@ -12495,12 +12509,17 @@ enum { }; enum { + MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL = BIT_ULL(0x13), +}; + +enum { MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc, MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13, MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20, MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, + MLX5_GENERAL_OBJECT_TYPES_RDMA_CTRL = 0x53, MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; @@ -13068,6 +13087,44 @@ struct mlx5_ifc_load_vhca_state_out_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_adv_rdma_cap_bits { + u8 rdma_transport_manager[0x1]; + u8 rdma_transport_manager_other_eswitch[0x1]; + u8 reserved_at_2[0x1e]; + + u8 rcx_type[0x8]; + u8 reserved_at_28[0x2]; + u8 ps_entry_log_max_value[0x6]; + u8 reserved_at_30[0x6]; + u8 qp_max_ps_num_entry[0xa]; + + u8 mp_max_num_queues[0x8]; + u8 ps_user_context_max_log_size[0x8]; + u8 message_based_qp_and_striding_wq[0x8]; + u8 reserved_at_58[0x8]; + + u8 max_receive_send_message_size_stride[0x10]; + u8 reserved_at_70[0x10]; + + u8 max_receive_send_message_size_byte[0x20]; + + u8 reserved_at_a0[0x160]; + + struct mlx5_ifc_flow_table_prop_layout_bits rdma_transport_rx_flow_table_properties; + + struct mlx5_ifc_flow_table_prop_layout_bits rdma_transport_tx_flow_table_properties; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_rx_ft_field_support_2; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_tx_ft_field_support_2; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_rx_ft_field_bitmask_support_2; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_tx_ft_field_bitmask_support_2; + + u8 reserved_at_800[0x3800]; +}; + struct mlx5_ifc_adv_virtualization_cap_bits { u8 reserved_at_0[0x3]; u8 pg_track_log_max_num[0x5]; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index fd625e0dd869..58770b86f793 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -61,15 +61,6 @@ enum mlx5_an_status { #define MLX5_EEPROM_PAGE_LENGTH 256 #define MLX5_EEPROM_HIGH_PAGE_LENGTH 128 -struct mlx5_module_eeprom_query_params { - u16 size; - u16 offset; - u16 i2c_address; - u32 page; - u32 bank; - u32 module_number; -}; - enum mlx5e_link_mode { MLX5E_1000BASE_CX_SGMII = 0, MLX5E_1000BASE_KX = 1, @@ -145,12 +136,6 @@ enum mlx5_ptys_width { MLX5_PTYS_WIDTH_12X = 1 << 4, }; -struct mlx5_port_eth_proto { - u32 cap; - u32 admin; - u32 oper; -}; - #define MLX5E_PROT_MASK(link_mode) (1U << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ @@ -163,14 +148,7 @@ int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper, u16 *proto_oper, u8 local_port, u8 plane_index); -void mlx5_toggle_port_link(struct mlx5_core_dev *dev); -int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, - enum mlx5_port_status status); -int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, - enum mlx5_port_status *status); -int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); - -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); + void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, u8 port); @@ -178,65 +156,4 @@ void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, u8 *vl_hw_cap, u8 local_port); -int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause); -int mlx5_query_port_pause(struct mlx5_core_dev *dev, - u32 *rx_pause, u32 *tx_pause); - -int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx); -int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, - u8 *pfc_en_rx); - -int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, - u16 stall_critical_watermark, - u16 stall_minor_watermark); -int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev, - u16 *stall_critical_watermark, u16 *stall_minor_watermark); - -int mlx5_max_tc(struct mlx5_core_dev *mdev); - -int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc); -int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, - u8 prio, u8 *tc); -int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group); -int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, - u8 tc, u8 *tc_group); -int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw); -int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, - u8 tc, u8 *bw_pct); -int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, - u8 *max_bw_value, - u8 *max_bw_unit); -int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, - u8 *max_bw_value, - u8 *max_bw_unit); -int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode); -int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode); - -int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen); -int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen); -int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable); -void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, - bool *enabled); -int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, - u16 offset, u16 size, u8 *data); -int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, - struct mlx5_module_eeprom_query_params *params, u8 *data); - -int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); -int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); - -int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state); -int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state); -int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio); -int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio); - -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, - struct mlx5_port_eth_proto *eproto); -bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev); -u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, - bool force_legacy); -u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, - bool force_legacy); -int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); - #endif /* __MLX5_PORT_H__ */ diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 11be70a7929f..7a01c518e573 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -53,12 +53,12 @@ enum { NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */ NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */ NETIF_F_GSO_FRAGLIST_BIT, /* ... Fraglist GSO */ + NETIF_F_GSO_ACCECN_BIT, /* TCP AccECN w/ TSO (no clear CWR) */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_FRAGLIST_BIT, + NETIF_F_GSO_ACCECN_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ - __UNUSED_NETIF_F_37, NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */ NETIF_F_RXHASH_BIT, /* Receive hashing offload */ NETIF_F_RXCSUM_BIT, /* Receive checksumming offload */ @@ -128,6 +128,7 @@ enum { #define NETIF_F_SG __NETIF_F(SG) #define NETIF_F_TSO6 __NETIF_F(TSO6) #define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN) +#define NETIF_F_GSO_ACCECN __NETIF_F(GSO_ACCECN) #define NETIF_F_TSO __NETIF_F(TSO) #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) @@ -210,7 +211,8 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \ + NETIF_F_GSO_ACCECN | NETIF_F_GSO_SCTP | \ NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST) /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ab86ec228b7..67527243459b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2486,19 +2486,29 @@ struct net_device { bool up; /** + * @request_ops_lock: request the core to run all @netdev_ops and + * @ethtool_ops under the @lock. + */ + bool request_ops_lock; + + /** * @lock: netdev-scope lock, protects a small selection of fields. * Should always be taken using netdev_lock() / netdev_unlock() helpers. * Drivers are free to use it for other protection. * * Protects: * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, - * @net_shaper_hierarchy, @reg_state, @threaded + * @net_shaper_hierarchy, @reg_state, @threaded, @dev_addr * * Partially protects (writers must hold both @lock and rtnl_lock): * @up * * Also protects some fields in struct napi_struct. * + * For the drivers that implement shaper or queue API, the scope + * of this lock is expanded to cover most ndo/queue/ethtool/sysfs + * operations. + * * Ordering: take after rtnl_lock. */ struct mutex lock; @@ -2620,21 +2630,6 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } -#define netdev_lockdep_set_classes(dev) \ -{ \ - static struct lock_class_key qdisc_tx_busylock_key; \ - static struct lock_class_key qdisc_xmit_lock_key; \ - static struct lock_class_key dev_addr_list_lock_key; \ - unsigned int i; \ - \ - (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ - lockdep_set_class(&(dev)->addr_list_lock, \ - &dev_addr_list_lock_key); \ - for (i = 0; i < (dev)->num_tx_queues; i++) \ - lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ - &qdisc_xmit_lock_key); \ -} - u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, @@ -2740,18 +2735,7 @@ static inline void netdev_unlock(struct net_device *dev) { mutex_unlock(&dev->lock); } - -static inline void netdev_assert_locked(struct net_device *dev) -{ - lockdep_assert_held(&dev->lock); -} - -static inline void netdev_assert_locked_or_invisible(struct net_device *dev) -{ - if (dev->reg_state == NETREG_REGISTERED || - dev->reg_state == NETREG_UNREGISTERING) - netdev_assert_locked(dev); -} +/* Additional netdev_lock()-related helpers are in net/netdev_lock.h */ void netif_napi_set_irq_locked(struct napi_struct *napi, int irq); @@ -3327,9 +3311,12 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); bool netdev_name_in_use(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); +int netif_open(struct net_device *dev, struct netlink_ext_ack *extack); int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); +void netif_close(struct net_device *dev); void dev_close(struct net_device *dev); void dev_close_many(struct list_head *head, bool unlink); +void netif_disable_lro(struct net_device *dev); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, @@ -4177,6 +4164,8 @@ int put_user_ifreq(struct ifreq *ifr, void __user *arg); int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, void __user *data, bool *need_copyout); int dev_ifconf(struct net *net, struct ifconf __user *ifc); +int dev_eth_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd); int generic_hwtstamp_get_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg); int generic_hwtstamp_set_lower(struct net_device *dev, @@ -4186,27 +4175,27 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); +int netif_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack); int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); +int netif_set_alias(struct net_device *dev, const char *alias, size_t len); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); -int __dev_change_net_namespace(struct net_device *dev, struct net *net, +int netif_change_net_namespace(struct net_device *dev, struct net *net, const char *pat, int new_ifindex, struct netlink_ext_ack *extack); -static inline int dev_change_net_namespace(struct net_device *dev, struct net *net, - const char *pat) -{ - return __dev_change_net_namespace(dev, net, pat, 0, NULL); -} + const char *pat); int __dev_set_mtu(struct net_device *, int); +int netif_set_mtu(struct net_device *dev, int new_mtu); int dev_set_mtu(struct net_device *, int); int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); +int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); -int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); @@ -4218,6 +4207,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); +int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); u8 dev_xdp_sb_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); @@ -4960,6 +4950,7 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); +int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); void netdev_state_change(struct net_device *dev); void __netdev_notify_peers(struct net_device *dev); @@ -5278,6 +5269,8 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FRAGLIST != (NETIF_F_GSO_FRAGLIST >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_TCP_ACCECN != + (NETIF_F_GSO_ACCECN >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index f91e50a76efd..f6e8abe0b1f1 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -33,6 +33,7 @@ struct netpoll { u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; struct sk_buff_head skb_pool; + struct work_struct refill_wq; }; struct netpoll_info { diff --git a/include/linux/phy.h b/include/linux/phy.h index c4a6385faf41..c24e1a565819 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1275,21 +1275,6 @@ const char *phy_rate_matching_to_str(int rate_matching); int phy_interface_num_ports(phy_interface_t interface); -/* A structure for mapping a particular speed and duplex - * combination to a particular SUPPORTED and ADVERTISED value - */ -struct phy_setting { - u32 speed; - u8 duplex; - u8 bit; -}; - -const struct phy_setting * -phy_lookup_setting(int speed, int duplex, const unsigned long *mask, - bool exact); -size_t phy_speeds(unsigned int *speeds, size_t size, - unsigned long *mask); - /** * phy_is_started - Convenience function to check whether PHY is started * @phydev: The phy_device struct @@ -2107,22 +2092,6 @@ int __phy_hwtstamp_set(struct phy_device *phydev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack); -int __phy_package_read_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum); - -int phy_package_read_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum); - -int __phy_package_write_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum, u16 val); - -int phy_package_write_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum, u16 val); - extern const struct bus_type mdio_bus_type; struct mdio_board_info { diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 08df65f6867a..79876c84ae81 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -595,6 +595,14 @@ void pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, * The %neg_mode argument should be tested via the phylink_mode_*() family of * functions, or for PCS that set pcs->neg_mode true, should be tested * against the PHYLINK_PCS_NEG_* definitions. + * + * pcs_config() will be called when configuration of the PCS is required + * or when the advertisement is possibly updated. It must not unnecessarily + * disrupt an established link. + * + * When an autonegotiation restart is required for 802.3z modes, .pcs_config() + * should return a positive non-zero integer (e.g. 1) to indicate to phylink + * to call the pcs_an_restart() method. */ int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, @@ -714,7 +722,6 @@ void phylink_ethtool_get_pauseparam(struct phylink *, int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_get_eee_err(struct phylink *); -int phylink_init_eee(struct phylink *, bool); int phylink_ethtool_get_eee(struct phylink *link, struct ethtool_keee *eee); int phylink_ethtool_set_eee(struct phylink *link, struct ethtool_keee *eee); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 8ff23bf5a819..b698758000f8 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -31,6 +31,33 @@ struct pipe_buffer { unsigned long private; }; +/* + * Really only alpha needs 32-bit fields, but + * might as well do it for 64-bit architectures + * since that's what we've historically done, + * and it makes 'head_tail' always be a simple + * 'unsigned long'. + */ +#ifdef CONFIG_64BIT +typedef unsigned int pipe_index_t; +#else +typedef unsigned short pipe_index_t; +#endif + +/* + * We have to declare this outside 'struct pipe_inode_info', + * but then we can't use 'union pipe_index' for an anonymous + * union, so we end up having to duplicate this declaration + * below. Annoying. + */ +union pipe_index { + unsigned long head_tail; + struct { + pipe_index_t head; + pipe_index_t tail; + }; +}; + /** * struct pipe_inode_info - a linux kernel pipe * @mutex: mutex protecting the whole thing @@ -38,6 +65,7 @@ struct pipe_buffer { * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption + * @head_tail: unsigned long union of @head and @tail * @note_loss: The next read() should insert a data-lost message * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) @@ -58,8 +86,16 @@ struct pipe_buffer { struct pipe_inode_info { struct mutex mutex; wait_queue_head_t rd_wait, wr_wait; - unsigned int head; - unsigned int tail; + + /* This has to match the 'union pipe_index' above */ + union { + unsigned long head_tail; + struct { + pipe_index_t head; + pipe_index_t tail; + }; + }; + unsigned int max_usage; unsigned int ring_size; unsigned int nr_accounted; @@ -141,23 +177,23 @@ static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) } /** - * pipe_empty - Return true if the pipe is empty + * pipe_occupancy - Return number of slots used in the pipe * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ -static inline bool pipe_empty(unsigned int head, unsigned int tail) +static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) { - return head == tail; + return (pipe_index_t)(head - tail); } /** - * pipe_occupancy - Return number of slots used in the pipe + * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ -static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) +static inline bool pipe_empty(unsigned int head, unsigned int tail) { - return head - tail; + return !pipe_occupancy(head, tail); } /** @@ -173,6 +209,33 @@ static inline bool pipe_full(unsigned int head, unsigned int tail, } /** + * pipe_is_full - Return true if the pipe is full + * @pipe: the pipe + */ +static inline bool pipe_is_full(const struct pipe_inode_info *pipe) +{ + return pipe_full(pipe->head, pipe->tail, pipe->max_usage); +} + +/** + * pipe_is_empty - Return true if the pipe is empty + * @pipe: the pipe + */ +static inline bool pipe_is_empty(const struct pipe_inode_info *pipe) +{ + return pipe_empty(pipe->head, pipe->tail); +} + +/** + * pipe_buf_usage - Return how many pipe buffers are in use + * @pipe: the pipe + */ +static inline unsigned int pipe_buf_usage(const struct pipe_inode_info *pipe) +{ + return pipe_occupancy(pipe->head, pipe->tail); +} + +/** * pipe_buf - Return the pipe buffer for the specified slot in the pipe ring * @pipe: The pipe to access * @slot: The slot of interest @@ -245,15 +308,6 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, return buf->ops->try_steal(pipe, buf); } -static inline void pipe_discard_from(struct pipe_inode_info *pipe, - unsigned int old_head) -{ - unsigned int mask = pipe->ring_size - 1; - - while (pipe->head > old_head) - pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); -} - /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index 8ab5b0e8eb2c..8c9df7dadd5d 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -33,6 +33,8 @@ enum platform_profile_option { * @probe: Callback to setup choices available to the new class device. These * choices will only be enforced when setting a new profile, not when * getting the current one. + * @hidden_choices: Callback to setup choices that are not visible to the user + * but can be set by the driver. * @profile_get: Callback that will be called when showing the current platform * profile in sysfs. * @profile_set: Callback that will be called when storing a new platform @@ -40,6 +42,7 @@ enum platform_profile_option { */ struct platform_profile_ops { int (*probe)(void *drvdata, unsigned long *choices); + int (*hidden_choices)(void *drvdata, unsigned long *choices); int (*profile_get)(struct device *dev, enum platform_profile_option *profile); int (*profile_set)(struct device *dev, enum platform_profile_option profile); }; diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h index 2c8bfd0f1b6b..6322d8c1c6b4 100644 --- a/include/linux/rcuref.h +++ b/include/linux/rcuref.h @@ -71,27 +71,30 @@ static inline __must_check bool rcuref_get(rcuref_t *ref) return rcuref_get_slowpath(ref); } -extern __must_check bool rcuref_put_slowpath(rcuref_t *ref); +extern __must_check bool rcuref_put_slowpath(rcuref_t *ref, unsigned int cnt); /* * Internal helper. Do not invoke directly. */ static __always_inline __must_check bool __rcuref_put(rcuref_t *ref) { + int cnt; + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(), "suspicious rcuref_put_rcusafe() usage"); /* * Unconditionally decrease the reference count. The saturation and * dead zones provide enough tolerance for this. */ - if (likely(!atomic_add_negative_release(-1, &ref->refcnt))) + cnt = atomic_sub_return_release(1, &ref->refcnt); + if (likely(cnt >= 0)) return false; /* * Handle the last reference drop and cases inside the saturation * and dead zones. */ - return rcuref_put_slowpath(ref); + return rcuref_put_slowpath(ref, cnt); } /** diff --git a/include/linux/sched.h b/include/linux/sched.h index 9632e3318e0d..9c15365a30c0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1701,7 +1701,7 @@ extern struct pid *cad_pid; #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ -#define PF__HOLE__00010000 0x00010000 +#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14517e95a46c..cd8294cdc249 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -708,6 +708,8 @@ enum { SKB_GSO_UDP_L4 = 1 << 17, SKB_GSO_FRAGLIST = 1 << 18, + + SKB_GSO_TCP_ACCECN = 1 << 19, }; #if BITS_PER_LONG > 32 @@ -3865,25 +3867,6 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) __must_check; -static inline int skb_add_data(struct sk_buff *skb, - struct iov_iter *from, int copy) -{ - const int off = skb->len; - - if (skb->ip_summed == CHECKSUM_NONE) { - __wsum csum = 0; - if (csum_and_copy_from_iter_full(skb_put(skb, copy), copy, - &csum, from)) { - skb->csum = csum_block_add(skb->csum, csum, off); - return 0; - } - } else if (copy_from_iter_full(skb_put(skb, copy), copy, from)) - return 0; - - __skb_trim(skb, off); - return -EFAULT; -} - static inline bool skb_can_coalesce(struct sk_buff *skb, int i, const struct page *page, int off) { diff --git a/include/linux/udp.h b/include/linux/udp.h index 0807e21cfec9..895240177f4f 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -101,6 +101,13 @@ struct udp_sock { /* Cache friendly copy of sk->sk_peek_off >= 0 */ bool peeking_with_offset; + + /* + * Accounting for the tunnel GRO fastpath. + * Unprotected by compilers guard, as it uses space available in + * the last UDP socket cacheline. + */ + struct hlist_node tunnel_list; }; #define udp_test_bit(nr, sk) \ @@ -219,4 +226,13 @@ static inline void udp_allow_gso(struct sock *sk) #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) +static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6) +{ +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) + return rcu_dereference(net->ipv4.udp_tunnel_gro[is_ipv6].sk); +#else + return NULL; +#endif +} + #endif /* _LINUX_UDP_H */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f756fac95488..6281063cbd8e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -804,6 +804,7 @@ struct hci_conn_params { extern struct list_head hci_dev_list; extern struct list_head hci_cb_list; extern rwlock_t hci_dev_list_lock; +extern struct mutex hci_cb_list_lock; #define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags) #define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags) @@ -2010,47 +2011,24 @@ struct hci_cb { char *name; - bool (*match) (struct hci_conn *conn); void (*connect_cfm) (struct hci_conn *conn, __u8 status); void (*disconn_cfm) (struct hci_conn *conn, __u8 status); void (*security_cfm) (struct hci_conn *conn, __u8 status, - __u8 encrypt); + __u8 encrypt); void (*key_change_cfm) (struct hci_conn *conn, __u8 status); void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role); }; -static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list) -{ - struct hci_cb *cb, *cpy; - - rcu_read_lock(); - list_for_each_entry_rcu(cb, &hci_cb_list, list) { - if (cb->match && cb->match(conn)) { - cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC); - if (!cpy) - break; - - *cpy = *cb; - INIT_LIST_HEAD(&cpy->list); - list_add_rcu(&cpy->list, list); - } - } - rcu_read_unlock(); -} - static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->connect_cfm) cb->connect_cfm(conn, status); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); if (conn->connect_cfm_cb) conn->connect_cfm_cb(conn, status); @@ -2058,43 +2036,22 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->disconn_cfm) cb->disconn_cfm(conn, reason); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); if (conn->disconn_cfm_cb) conn->disconn_cfm_cb(conn, reason); } -static inline void hci_security_cfm(struct hci_conn *conn, __u8 status, - __u8 encrypt) -{ - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); - - list_for_each_entry_safe(cb, tmp, &list, list) { - if (cb->security_cfm) - cb->security_cfm(conn, status, encrypt); - kfree(cb); - } - - if (conn->security_cfm_cb) - conn->security_cfm_cb(conn, status); -} - static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) { + struct hci_cb *cb; __u8 encrypt; if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) @@ -2102,11 +2059,20 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00; - hci_security_cfm(conn, status, encrypt); + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->security_cfm) + cb->security_cfm(conn, status, encrypt); + } + mutex_unlock(&hci_cb_list_lock); + + if (conn->security_cfm_cb) + conn->security_cfm_cb(conn, status); } static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) { + struct hci_cb *cb; __u8 encrypt; if (conn->state == BT_CONFIG) { @@ -2133,38 +2099,40 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) conn->sec_level = conn->pending_sec_level; } - hci_security_cfm(conn, status, encrypt); + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->security_cfm) + cb->security_cfm(conn, status, encrypt); + } + mutex_unlock(&hci_cb_list_lock); + + if (conn->security_cfm_cb) + conn->security_cfm_cb(conn, status); } static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->key_change_cfm) cb->key_change_cfm(conn, status); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); } static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, __u8 role) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->role_switch_cfm) cb->role_switch_cfm(conn, status, role); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); } static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index d9978ffacc97..f736d3097e43 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -117,7 +117,8 @@ struct inet_connection_sock { #define ATO_BITS 8 __u32 ato:ATO_BITS, /* Predicted tick of soft clock */ lrcv_flowlabel:20, /* last received ipv6 flowlabel */ - unused:4; + dst_quick_ack:1, /* cache dst RTAX_QUICKACK */ + unused:3; unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 5af6eb14c5db..0eccd9c3a883 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -137,7 +137,7 @@ static inline void fqdir_pre_exit(struct fqdir *fqdir) } void fqdir_exit(struct fqdir *fqdir); -void inet_frag_kill(struct inet_frag_queue *q); +void inet_frag_kill(struct inet_frag_queue *q, int *refs); void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); @@ -145,9 +145,9 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); unsigned int inet_frag_rbtree_purge(struct rb_root *root, enum skb_drop_reason reason); -static inline void inet_frag_put(struct inet_frag_queue *q) +static inline void inet_frag_putn(struct inet_frag_queue *q, int refs) { - if (refcount_dec_and_test(&q->refcnt)) + if (refs && refcount_sub_and_test(refs, &q->refcnt)) inet_frag_destroy(q); } diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f447d61d9598..949641e92539 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -527,10 +527,12 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, + u32 hash_port0, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **, - bool rcu_lookup)); + bool rcu_lookup, + u32 hash)); int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk); diff --git a/include/net/ip.h b/include/net/ip.h index ce5e59957dd5..8a48ade24620 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -357,7 +357,7 @@ static inline void inet_get_local_port_range(const struct net *net, int *low, in bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high); #ifdef CONFIG_SYSCTL -static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) +static inline bool inet_is_local_reserved_port(const struct net *net, unsigned short port) { if (!net->ipv4.sysctl_local_reserved_ports) return false; diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 7321ffe3a108..38ef66826939 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -66,6 +66,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) { struct net_device *dev = NULL; struct sk_buff *head; + int refs = 1; rcu_read_lock(); /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ @@ -77,7 +78,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) goto out; fq->q.flags |= INET_FRAG_DROP; - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, &refs); dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) @@ -109,7 +110,7 @@ out: spin_unlock(&fq->q.lock); out_rcu_unlock: rcu_read_unlock(); - inet_frag_put(&fq->q); + inet_frag_putn(&fq->q, refs); } /* Check if the upper layer header is truncated in the first fragment. */ diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h new file mode 100644 index 000000000000..99631fbd7f54 --- /dev/null +++ b/include/net/netdev_lock.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_NETDEV_LOCK_H +#define _NET_NETDEV_LOCK_H + +#include <linux/lockdep.h> +#include <linux/netdevice.h> + +static inline bool netdev_trylock(struct net_device *dev) +{ + return mutex_trylock(&dev->lock); +} + +static inline void netdev_assert_locked(struct net_device *dev) +{ + lockdep_assert_held(&dev->lock); +} + +static inline void netdev_assert_locked_or_invisible(struct net_device *dev) +{ + if (dev->reg_state == NETREG_REGISTERED || + dev->reg_state == NETREG_UNREGISTERING) + netdev_assert_locked(dev); +} + +static inline bool netdev_need_ops_lock(struct net_device *dev) +{ + bool ret = dev->request_ops_lock || !!dev->queue_mgmt_ops; + +#if IS_ENABLED(CONFIG_NET_SHAPER) + ret |= !!dev->netdev_ops->net_shaper_ops; +#endif + + return ret; +} + +static inline void netdev_lock_ops(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_lock(dev); +} + +static inline void netdev_unlock_ops(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_unlock(dev); +} + +static inline void netdev_ops_assert_locked(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + lockdep_assert_held(&dev->lock); +} + +static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, + const struct lockdep_map *b) +{ + /* Only lower devices currently grab the instance lock, so no + * real ordering issues can occur. In the near future, only + * hardware devices will grab instance lock which also does not + * involve any ordering. Suppress lockdep ordering warnings + * until (if) we start grabbing instance lock on pure SW + * devices (bond/team/veth/etc). + */ + if (a == b) + return 0; + return -1; +} + +#define netdev_lockdep_set_classes(dev) \ +{ \ + static struct lock_class_key qdisc_tx_busylock_key; \ + static struct lock_class_key qdisc_xmit_lock_key; \ + static struct lock_class_key dev_addr_list_lock_key; \ + static struct lock_class_key dev_instance_lock_key; \ + unsigned int i; \ + \ + (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ + lockdep_set_class(&(dev)->addr_list_lock, \ + &dev_addr_list_lock_key); \ + lockdep_set_class(&(dev)->lock, \ + &dev_instance_lock_key); \ + lock_set_cmp_fn(&dev->lock, netdev_lock_cmp_fn, NULL); \ + for (i = 0; i < (dev)->num_tx_queues; i++) \ + lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ + &qdisc_xmit_lock_key); \ +} + +#endif diff --git a/include/net/netdev_netlink.h b/include/net/netdev_netlink.h new file mode 100644 index 000000000000..075962dbe743 --- /dev/null +++ b/include/net/netdev_netlink.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_NETDEV_NETLINK_H +#define __NET_NETDEV_NETLINK_H + +#include <linux/list.h> + +struct netdev_nl_sock { + struct mutex lock; + struct list_head bindings; +}; + +#endif /* __NET_NETDEV_NETLINK_H */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 60d5dcdb289c..803d5f1601f9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1891,7 +1891,7 @@ void nft_chain_filter_fini(void); void __init nft_chain_route_init(void); void nft_chain_route_fini(void); -void nf_tables_trans_destroy_flush_work(void); +void nf_tables_trans_destroy_flush_work(struct net *net); int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); __be64 nf_jiffies64_to_msecs(u64 input); @@ -1905,6 +1905,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re struct nftables_pernet { struct list_head tables; struct list_head commit_list; + struct list_head destroy_list; struct list_head commit_set_list; struct list_head binding_list; struct list_head module_list; @@ -1915,6 +1916,7 @@ struct nftables_pernet { unsigned int base_seq; unsigned int gc_seq; u8 validate_state; + struct work_struct destroy_work; }; extern unsigned int nf_tables_net_id; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 650b2dc9199f..6373e3f17da8 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -47,6 +47,11 @@ struct sysctl_fib_multipath_hash_seed { }; #endif +struct udp_tunnel_gro { + struct sock __rcu *sk; + struct hlist_head list; +}; + struct netns_ipv4 { /* Cacheline organization can be found documented in * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. @@ -85,6 +90,11 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; struct udp_table *udp_table; +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) + /* Not in a pernet subsys because need to be available at GRO stage */ + struct udp_tunnel_gro udp_tunnel_gro[2]; +#endif + #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; struct ctl_table_header *frags_hdr; @@ -277,4 +287,5 @@ struct netns_ipv4 { struct hlist_head *inet_addr_lst; struct delayed_work addr_chk_work; }; + #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index a9bc959fb102..d08fbf90495d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -26,6 +26,7 @@ #include <linux/kref.h> #include <linux/ktime.h> #include <linux/indirect_call_wrapper.h> +#include <linux/bits.h> #include <net/inet_connection_sock.h> #include <net/inet_timewait_sock.h> @@ -373,16 +374,53 @@ static inline void tcp_dec_quickack_mode(struct sock *sk) } } -#define TCP_ECN_OK 1 -#define TCP_ECN_QUEUE_CWR 2 -#define TCP_ECN_DEMAND_CWR 4 -#define TCP_ECN_SEEN 8 +#define TCP_ECN_MODE_RFC3168 BIT(0) +#define TCP_ECN_QUEUE_CWR BIT(1) +#define TCP_ECN_DEMAND_CWR BIT(2) +#define TCP_ECN_SEEN BIT(3) +#define TCP_ECN_MODE_ACCECN BIT(4) + +#define TCP_ECN_DISABLED 0 +#define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN) +#define TCP_ECN_MODE_ANY (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN) + +static inline bool tcp_ecn_mode_any(const struct tcp_sock *tp) +{ + return tp->ecn_flags & TCP_ECN_MODE_ANY; +} + +static inline bool tcp_ecn_mode_rfc3168(const struct tcp_sock *tp) +{ + return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_RFC3168; +} + +static inline bool tcp_ecn_mode_accecn(const struct tcp_sock *tp) +{ + return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_ACCECN; +} + +static inline bool tcp_ecn_disabled(const struct tcp_sock *tp) +{ + return !tcp_ecn_mode_any(tp); +} + +static inline bool tcp_ecn_mode_pending(const struct tcp_sock *tp) +{ + return (tp->ecn_flags & TCP_ECN_MODE_PENDING) == TCP_ECN_MODE_PENDING; +} + +static inline void tcp_ecn_mode_set(struct tcp_sock *tp, u8 mode) +{ + tp->ecn_flags &= ~TCP_ECN_MODE_ANY; + tp->ecn_flags |= mode; +} enum tcp_tw_status { TCP_TW_SUCCESS = 0, TCP_TW_RST = 1, TCP_TW_ACK = 2, - TCP_TW_SYN = 3 + TCP_TW_SYN = 3, + TCP_TW_ACK_OOW = 4 }; @@ -669,7 +707,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority, enum sk_rst_reason reason); int tcp_send_synack(struct sock *); void tcp_push_one(struct sock *, unsigned int mss_now); -void __tcp_send_ack(struct sock *sk, u32 rcv_nxt); +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt, u16 flags); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); @@ -934,15 +972,22 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) #define tcp_flag_byte(th) (((u_int8_t *)th)[13]) -#define TCPHDR_FIN 0x01 -#define TCPHDR_SYN 0x02 -#define TCPHDR_RST 0x04 -#define TCPHDR_PSH 0x08 -#define TCPHDR_ACK 0x10 -#define TCPHDR_URG 0x20 -#define TCPHDR_ECE 0x40 -#define TCPHDR_CWR 0x80 - +#define TCPHDR_FIN BIT(0) +#define TCPHDR_SYN BIT(1) +#define TCPHDR_RST BIT(2) +#define TCPHDR_PSH BIT(3) +#define TCPHDR_ACK BIT(4) +#define TCPHDR_URG BIT(5) +#define TCPHDR_ECE BIT(6) +#define TCPHDR_CWR BIT(7) +#define TCPHDR_AE BIT(8) +#define TCPHDR_FLAGS_MASK (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \ + TCPHDR_PSH | TCPHDR_ACK | TCPHDR_URG | \ + TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE) +#define tcp_flags_ntohs(th) (ntohs(*(__be16 *)&tcp_flag_word(th)) & \ + TCPHDR_FLAGS_MASK) + +#define TCPHDR_ACE (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE) #define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR) /* State flags for sacked in struct tcp_skb_cb */ @@ -977,7 +1022,7 @@ struct tcp_skb_cb { u16 tcp_gso_size; }; }; - __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ + __u16 tcp_flags; /* TCP header flags (tcp[12-13])*/ __u8 sacked; /* State flags for SACK. */ __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ @@ -1132,9 +1177,9 @@ enum tcp_ca_ack_event_flags { #define TCP_CA_UNSPEC 0 /* Algorithm can be set on socket without CAP_NET_ADMIN privileges */ -#define TCP_CONG_NON_RESTRICTED 0x1 +#define TCP_CONG_NON_RESTRICTED BIT(0) /* Requires ECN/ECT set on all packets */ -#define TCP_CONG_NEEDS_ECN 0x2 +#define TCP_CONG_NEEDS_ECN BIT(1) #define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) union tcp_cc_info; @@ -2598,8 +2643,8 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(struct sock *sk, struct sk_buff *skb); - size_t (*get_info_size)(const struct sock *sk); + int (*get_info)(struct sock *sk, struct sk_buff *skb, bool net_admin); + size_t (*get_info_size)(const struct sock *sk, bool net_admin); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, const gfp_t priority); diff --git a/include/net/udp.h b/include/net/udp.h index 6e89520e100d..a772510b2aa5 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -290,6 +290,7 @@ static inline void udp_lib_init_sock(struct sock *sk) struct udp_sock *up = udp_sk(sk); skb_queue_head_init(&up->reader_queue); + INIT_HLIST_NODE(&up->tunnel_list); up->forward_threshold = sk->sk_rcvbuf >> 2; set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index a93dc51f6323..a7b230867eb1 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -203,6 +203,28 @@ static inline void udp_tunnel_encap_enable(struct sock *sk) udp_encap_enable(); } +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) +void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add); +void udp_tunnel_update_gro_rcv(struct sock *sk, bool add); +#else +static inline void udp_tunnel_update_gro_lookup(struct net *net, + struct sock *sk, bool add) {} +static inline void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) {} +#endif + +static inline void udp_tunnel_cleanup_gro(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + struct net *net = sock_net(sk); + + udp_tunnel_update_gro_rcv(sk, false); + + if (!up->tunnel_list.pprev) + return; + + udp_tunnel_update_gro_lookup(net, sk, false); +} + #define UDP_TUNNEL_NIC_MAX_TABLES 4 enum udp_tunnel_nic_info_flags { diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 35dc016c9bb4..936bcac270b5 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -342,7 +342,7 @@ enum batadv_nl_attrs { BATADV_ATTR_MCAST_FLAGS_PRIV, /** - * @BATADV_ATTR_VLANID: VLAN id on top of soft interface + * @BATADV_ATTR_VLANID: VLAN id on top of mesh interface */ BATADV_ATTR_VLANID, @@ -380,7 +380,7 @@ enum batadv_nl_attrs { /** * @BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED: whether the bridge loop * avoidance feature is enabled. This feature detects and avoids loops - * between the mesh and devices bridged with the soft interface + * between the mesh and devices bridged with the mesh interface */ BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, @@ -509,7 +509,7 @@ enum batadv_nl_commands { BATADV_CMD_UNSPEC, /** - * @BATADV_CMD_GET_MESH: Get attributes from softif/mesh + * @BATADV_CMD_GET_MESH: Get attributes from mesh(if) */ BATADV_CMD_GET_MESH, @@ -535,7 +535,7 @@ enum batadv_nl_commands { /** * @BATADV_CMD_GET_HARDIF: Get attributes from a hardif of the - * current softif + * current mesh(if) */ BATADV_CMD_GET_HARDIF, @@ -591,25 +591,25 @@ enum batadv_nl_commands { BATADV_CMD_GET_MCAST_FLAGS, /** - * @BATADV_CMD_SET_MESH: Set attributes for softif/mesh + * @BATADV_CMD_SET_MESH: Set attributes for mesh(if) */ BATADV_CMD_SET_MESH, /** * @BATADV_CMD_SET_HARDIF: Set attributes for hardif of the - * current softif + * current mesh(if) */ BATADV_CMD_SET_HARDIF, /** * @BATADV_CMD_GET_VLAN: Get attributes from a VLAN of the - * current softif + * current mesh(if) */ BATADV_CMD_GET_VLAN, /** * @BATADV_CMD_SET_VLAN: Set attributes for VLAN of the - * current softif + * current mesh(if) */ BATADV_CMD_SET_VLAN, @@ -691,7 +691,7 @@ enum batadv_ifla_attrs { */ IFLA_BATADV_ALGO_NAME, - /* add attributes above here, update the policy in soft-interface.c */ + /* add attributes above here, update the policy in mesh-interface.c */ /** * @__IFLA_BATADV_MAX: internal use diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 32a27b4a5020..92a2e79222ea 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -28,7 +28,8 @@ struct tcphdr { __be32 seq; __be32 ack_seq; #if defined(__LITTLE_ENDIAN_BITFIELD) - __u16 res1:4, + __u16 ae:1, + res1:3, doff:4, fin:1, syn:1, @@ -40,7 +41,8 @@ struct tcphdr { cwr:1; #elif defined(__BIG_ENDIAN_BITFIELD) __u16 doff:4, - res1:4, + res1:3, + ae:1, cwr:1, ece:1, urg:1, @@ -70,6 +72,7 @@ union tcp_word_hdr { #define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3]) enum { + TCP_FLAG_AE = __constant_cpu_to_be32(0x01000000), TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), @@ -78,7 +81,7 @@ enum { TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000), TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000), TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), - TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), + TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0E000000), TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) }; diff --git a/io_uring/net.c b/io_uring/net.c index 17852a6616ff..5d0b56ff50ee 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -322,7 +322,9 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, if (unlikely(ret)) return ret; - return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); + ret = __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); + sr->msg_control = iomsg->msg.msg_control_user; + return ret; } #endif diff --git a/io_uring/rw.c b/io_uring/rw.c index 9edc6baebd01..e5528cebcd06 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -560,11 +560,10 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) if (kiocb->ki_flags & IOCB_WRITE) io_req_end_write(req); if (unlikely(res != req->cqe.res)) { - if (res == -EAGAIN && io_rw_should_reissue(req)) { + if (res == -EAGAIN && io_rw_should_reissue(req)) req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE; - return; - } - req->cqe.res = res; + else + req->cqe.res = res; } /* order with io_iopoll_complete() checking ->iopoll_completed */ diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 1a4fec330eaa..42ae8d595c2c 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -25,6 +25,7 @@ #include <linux/rhashtable.h> #include <linux/rtnetlink.h> #include <linux/rwsem.h> +#include <net/netdev_lock.h> #include <net/xdp.h> /* Protects offdevs, members of bpf_offload_netdev and offload members @@ -528,13 +529,14 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&offmap->map, attr); - rtnl_lock(); - down_write(&bpf_devs_lock); offmap->netdev = __dev_get_by_index(net, attr->map_ifindex); err = bpf_dev_offload_check(offmap->netdev); if (err) - goto err_unlock; + goto err_unlock_rtnl; + + netdev_lock_ops(offmap->netdev); + down_write(&bpf_devs_lock); ondev = bpf_offload_find_netdev(offmap->netdev); if (!ondev) { @@ -548,12 +550,15 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) list_add_tail(&offmap->offloads, &ondev->maps); up_write(&bpf_devs_lock); + netdev_unlock_ops(offmap->netdev); rtnl_unlock(); return &offmap->map; err_unlock: up_write(&bpf_devs_lock); + netdev_unlock_ops(offmap->netdev); +err_unlock_rtnl: rtnl_unlock(); bpf_map_area_free(offmap); return ERR_PTR(err); diff --git a/kernel/events/core.c b/kernel/events/core.c index bcb09e011e9e..823aa0824916 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4950,7 +4950,7 @@ static struct perf_event_pmu_context * find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx, struct perf_event *event) { - struct perf_event_pmu_context *new = NULL, *epc; + struct perf_event_pmu_context *new = NULL, *pos = NULL, *epc; void *task_ctx_data = NULL; if (!ctx->task) { @@ -5007,12 +5007,19 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx, atomic_inc(&epc->refcount); goto found_epc; } + /* Make sure the pmu_ctx_list is sorted by PMU type: */ + if (!pos && epc->pmu->type > pmu->type) + pos = epc; } epc = new; new = NULL; - list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); + if (!pos) + list_add_tail(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); + else + list_add(&epc->pmu_ctx_entry, pos->pmu_ctx_entry.prev); + epc->ctx = ctx; found_epc: @@ -5962,14 +5969,15 @@ static int _perf_event_period(struct perf_event *event, u64 value) if (!value) return -EINVAL; - if (event->attr.freq && value > sysctl_perf_event_sample_rate) - return -EINVAL; - - if (perf_event_check_period(event, value)) - return -EINVAL; - - if (!event->attr.freq && (value & (1ULL << 63))) - return -EINVAL; + if (event->attr.freq) { + if (value > sysctl_perf_event_sample_rate) + return -EINVAL; + } else { + if (perf_event_check_period(event, value)) + return -EINVAL; + if (value & (1ULL << 63)) + return -EINVAL; + } event_function_call(event, __perf_event_period, &value); @@ -8321,7 +8329,8 @@ void perf_event_exec(void) perf_event_enable_on_exec(ctx); perf_event_remove_on_exec(ctx); - perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true); + scoped_guard(rcu) + perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true); perf_unpin_context(ctx); put_ctx(ctx); @@ -11821,6 +11830,21 @@ free_dev: static struct lock_class_key cpuctx_mutex; static struct lock_class_key cpuctx_lock; +static bool idr_cmpxchg(struct idr *idr, unsigned long id, void *old, void *new) +{ + void *tmp, *val = idr_find(idr, id); + + if (val != old) + return false; + + tmp = idr_replace(idr, new, id); + if (IS_ERR(tmp)) + return false; + + WARN_ON_ONCE(tmp != val); + return true; +} + int perf_pmu_register(struct pmu *pmu, const char *name, int type) { int cpu, ret, max = PERF_TYPE_MAX; @@ -11847,7 +11871,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) if (type >= 0) max = type; - ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL); + ret = idr_alloc(&pmu_idr, NULL, max, 0, GFP_KERNEL); if (ret < 0) goto free_pdc; @@ -11855,6 +11879,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) type = ret; pmu->type = type; + atomic_set(&pmu->exclusive_cnt, 0); if (pmu_bus_running && !pmu->dev) { ret = pmu_dev_alloc(pmu); @@ -11903,14 +11928,22 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) if (!pmu->event_idx) pmu->event_idx = perf_event_idx_default; + /* + * Now that the PMU is complete, make it visible to perf_try_init_event(). + */ + if (!idr_cmpxchg(&pmu_idr, pmu->type, NULL, pmu)) + goto free_context; list_add_rcu(&pmu->entry, &pmus); - atomic_set(&pmu->exclusive_cnt, 0); + ret = 0; unlock: mutex_unlock(&pmus_lock); return ret; +free_context: + free_percpu(pmu->cpu_pmu_context); + free_dev: if (pmu->dev && pmu->dev != PMU_NULL_DEV) { device_del(pmu->dev); @@ -11930,6 +11963,8 @@ void perf_pmu_unregister(struct pmu *pmu) { mutex_lock(&pmus_lock); list_del_rcu(&pmu->entry); + idr_remove(&pmu_idr, pmu->type); + mutex_unlock(&pmus_lock); /* * We dereference the pmu list under both SRCU and regular RCU, so @@ -11939,7 +11974,6 @@ void perf_pmu_unregister(struct pmu *pmu) synchronize_rcu(); free_percpu(pmu->pmu_disable_count); - idr_remove(&pmu_idr, pmu->type); if (pmu_bus_running && pmu->dev && pmu->dev != PMU_NULL_DEV) { if (pmu->nr_addr_filters) device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); @@ -11947,7 +11981,6 @@ void perf_pmu_unregister(struct pmu *pmu) put_device(pmu->dev); } free_pmu_context(pmu); - mutex_unlock(&pmus_lock); } EXPORT_SYMBOL_GPL(perf_pmu_unregister); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index bf2a87a0a378..b4ca8898fe17 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -495,6 +495,11 @@ retry: if (ret <= 0) goto put_old; + if (is_zero_page(old_page)) { + ret = -EINVAL; + goto put_old; + } + if (WARN(!is_register && PageCompound(old_page), "uprobe unregister should never work on compound page\n")) { ret = -EINVAL; @@ -762,10 +767,14 @@ static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get) enum hprobe_state hstate; /* - * return_instance's hprobe is protected by RCU. - * Underlying uprobe is itself protected from reuse by SRCU. + * Caller should guarantee that return_instance is not going to be + * freed from under us. This can be achieved either through holding + * rcu_read_lock() or by owning return_instance in the first place. + * + * Underlying uprobe is itself protected from reuse by SRCU, so ensure + * SRCU lock is held properly. */ - lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu)); + lockdep_assert(srcu_read_lock_held(&uretprobes_srcu)); hstate = READ_ONCE(hprobe->state); switch (hstate) { diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 8f6cfec87555..7098ed44e717 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -107,7 +107,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns goto out_free_idr; ns->ns.ops = &pidns_operations; - ns->pid_max = parent_pid_ns->pid_max; + ns->pid_max = PID_MAX_LIMIT; err = register_pidns_sysctls(ns); if (err) goto out_free_inum; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9aecd914ac69..67189907214d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7285,7 +7285,7 @@ out_unlock: #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) int __sched __cond_resched(void) { - if (should_resched(0)) { + if (should_resched(0) && !irqs_disabled()) { preempt_schedule_common(); return 1; } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 38e4537790af..ff4df16b5186 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -3189,7 +3189,7 @@ int sched_dl_global_validate(void) * value smaller than the currently allocated bandwidth in * any of the root_domains. */ - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { rcu_read_lock_sched(); if (dl_bw_visited(cpu, gen)) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 0f1da199cfc7..7b9dfee858e7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6422,6 +6422,9 @@ static bool check_builtin_idle_enabled(void) __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) { + if (!ops_cpu_valid(prev_cpu, NULL)) + goto prev_cpu; + if (!check_builtin_idle_enabled()) goto prev_cpu; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1c0ef435a7aa..c798d2795243 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4045,15 +4045,17 @@ static inline bool child_cfs_rq_on_list(struct cfs_rq *cfs_rq) { struct cfs_rq *prev_cfs_rq; struct list_head *prev; + struct rq *rq = rq_of(cfs_rq); if (cfs_rq->on_list) { prev = cfs_rq->leaf_cfs_rq_list.prev; } else { - struct rq *rq = rq_of(cfs_rq); - prev = rq->tmp_alone_branch; } + if (prev == &rq->leaf_cfs_rq_list) + return false; + prev_cfs_rq = container_of(prev, struct cfs_rq, leaf_cfs_rq_list); return (prev_cfs_rq->tg->parent == cfs_rq->tg); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6b0c25761ccb..fc88e0688daf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -540,6 +540,7 @@ static int function_stat_show(struct seq_file *m, void *v) static struct trace_seq s; unsigned long long avg; unsigned long long stddev; + unsigned long long stddev_denom; #endif guard(mutex)(&ftrace_profile_lock); @@ -559,23 +560,19 @@ static int function_stat_show(struct seq_file *m, void *v) #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_puts(m, " "); - /* Sample standard deviation (s^2) */ - if (rec->counter <= 1) - stddev = 0; - else { - /* - * Apply Welford's method: - * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) - */ + /* + * Variance formula: + * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) + * Maybe Welford's method is better here? + * Divide only by 1000 for ns^2 -> us^2 conversion. + * trace_print_graph_duration will divide by 1000 again. + */ + stddev = 0; + stddev_denom = rec->counter * (rec->counter - 1) * 1000; + if (stddev_denom) { stddev = rec->counter * rec->time_squared - rec->time * rec->time; - - /* - * Divide only 1000 for ns^2 -> us^2 conversion. - * trace_print_graph_duration will divide 1000 again. - */ - stddev = div64_ul(stddev, - rec->counter * (rec->counter - 1) * 1000); + stddev = div64_ul(stddev, stddev_denom); } trace_seq_init(&s); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 261163b00137..ad7419e24055 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6724,27 +6724,27 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, if (existing_hist_update_only(glob, trigger_data, file)) goto out_free; - ret = event_trigger_register(cmd_ops, file, glob, trigger_data); - if (ret < 0) - goto out_free; + if (!get_named_trigger_data(trigger_data)) { - if (get_named_trigger_data(trigger_data)) - goto enable; + ret = create_actions(hist_data); + if (ret) + goto out_free; - ret = create_actions(hist_data); - if (ret) - goto out_unreg; + if (has_hist_vars(hist_data) || hist_data->n_var_refs) { + ret = save_hist_vars(hist_data); + if (ret) + goto out_free; + } - if (has_hist_vars(hist_data) || hist_data->n_var_refs) { - ret = save_hist_vars(hist_data); + ret = tracing_map_init(hist_data->map); if (ret) - goto out_unreg; + goto out_free; } - ret = tracing_map_init(hist_data->map); - if (ret) - goto out_unreg; -enable: + ret = event_trigger_register(cmd_ops, file, glob, trigger_data); + if (ret < 0) + goto out_free; + ret = hist_trigger_enable(trigger_data, file); if (ret) goto out_unreg; diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index b8f3c4ba309b..e27305d31fc5 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1049,6 +1049,19 @@ static int parse_symbol_and_return(int argc, const char *argv[], if (*is_return) return 0; + if (is_tracepoint) { + tmp = *symbol; + while (*tmp && (isalnum(*tmp) || *tmp == '_')) + tmp++; + if (*tmp) { + /* find a wrong character. */ + trace_probe_log_err(tmp - *symbol, BAD_TP_NAME); + kfree(*symbol); + *symbol = NULL; + return -EINVAL; + } + } + /* If there is $retval, this should be a return fprobe. */ for (i = 2; i < argc; i++) { tmp = strstr(argv[i], "$retval"); @@ -1056,6 +1069,8 @@ static int parse_symbol_and_return(int argc, const char *argv[], if (is_tracepoint) { trace_probe_log_set_index(i); trace_probe_log_err(tmp - argv[i], RETVAL_ON_PROBE); + kfree(*symbol); + *symbol = NULL; return -EINVAL; } *is_return = true; @@ -1215,6 +1230,11 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], if (is_return && tf->tp.entry_arg) { tf->fp.entry_handler = trace_fprobe_entry_handler; tf->fp.entry_data_size = traceprobe_get_entry_data_size(&tf->tp); + if (ALIGN(tf->fp.entry_data_size, sizeof(long)) > MAX_FPROBE_DATA_SIZE) { + trace_probe_log_set_index(2); + trace_probe_log_err(0, TOO_MANY_EARGS); + return -E2BIG; + } } ret = traceprobe_set_print_fmt(&tf->tp, diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 5803e6a41570..96792bc4b092 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -36,7 +36,6 @@ #define MAX_BTF_ARGS_LEN 128 #define MAX_DENTRY_ARGS_LEN 256 #define MAX_STRING_SIZE PATH_MAX -#define MAX_ARG_BUF_LEN (MAX_TRACE_ARGS * MAX_ARG_NAME_LEN) /* Reserved field names */ #define FIELD_STRING_IP "__probe_ip" @@ -481,6 +480,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(NON_UNIQ_SYMBOL, "The symbol is not unique"), \ C(BAD_RETPROBE, "Retprobe address must be an function entry"), \ C(NO_TRACEPOINT, "Tracepoint is not found"), \ + C(BAD_TP_NAME, "Invalid character in tracepoint name"),\ C(BAD_ADDR_SUFFIX, "Invalid probed address suffix"), \ C(NO_GROUP_NAME, "Group name is not specified"), \ C(GROUP_TOO_LONG, "Group name is too long"), \ @@ -544,7 +544,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(NO_BTF_FIELD, "This field is not found."), \ C(BAD_BTF_TID, "Failed to get BTF type info."),\ C(BAD_TYPE4STR, "This type does not fit for string."),\ - C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"), + C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"),\ + C(TOO_MANY_EARGS, "Too many entry arguments specified"), #undef C #define C(a, b) TP_ERR_##a diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index 8800f5acc007..2ef2e1b80091 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -133,7 +133,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), vtsk = kzalloc(sizeof(*vtsk), GFP_KERNEL); if (!vtsk) - return NULL; + return ERR_PTR(-ENOMEM); init_completion(&vtsk->exited); mutex_init(&vtsk->exit_mutex); vtsk->data = arg; @@ -145,7 +145,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args); if (IS_ERR(tsk)) { kfree(vtsk); - return NULL; + return ERR_PTR(PTR_ERR(tsk)); } vtsk->task = tsk; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 238321830993..fbd42f5b8e1e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2103,7 +2103,7 @@ config FAIL_SKB_REALLOC reallocated, catching possible invalid pointers to the skb. For more information, check - Documentation/dev-tools/fault-injection/fault-injection.rst + Documentation/fault-injection/fault-injection.rst config FAULT_INJECTION_CONFIGFS bool "Configfs interface for fault-injection capabilities" diff --git a/lib/rcuref.c b/lib/rcuref.c index 97f300eca927..5bd726b71e39 100644 --- a/lib/rcuref.c +++ b/lib/rcuref.c @@ -220,6 +220,7 @@ EXPORT_SYMBOL_GPL(rcuref_get_slowpath); /** * rcuref_put_slowpath - Slowpath of __rcuref_put() * @ref: Pointer to the reference count + * @cnt: The resulting value of the fastpath decrement * * Invoked when the reference count is outside of the valid zone. * @@ -233,10 +234,8 @@ EXPORT_SYMBOL_GPL(rcuref_get_slowpath); * with a concurrent get()/put() pair. Caller is not allowed to * deconstruct the protected object. */ -bool rcuref_put_slowpath(rcuref_t *ref) +bool rcuref_put_slowpath(rcuref_t *ref, unsigned int cnt) { - unsigned int cnt = atomic_read(&ref->refcnt); - /* Did this drop the last reference? */ if (likely(cnt == RCUREF_NOREF)) { /* diff --git a/mm/compaction.c b/mm/compaction.c index 12ed8425fa17..a3203d97123e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -3181,6 +3181,7 @@ static int kcompactd(void *p) long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC); long timeout = default_timeout; + current->flags |= PF_KCOMPACTD; set_freezable(); pgdat->kcompactd_max_order = 0; @@ -3237,6 +3238,8 @@ static int kcompactd(void *p) pgdat->proactive_compact_trigger = false; } + current->flags &= ~PF_KCOMPACTD; + return 0; } diff --git a/mm/filemap.c b/mm/filemap.c index d4564a79eb35..2974691fdfad 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2897,8 +2897,7 @@ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe, size = min(size, folio_size(folio) - offset); offset %= PAGE_SIZE; - while (spliced < size && - !pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + while (spliced < size && !pipe_is_full(pipe)) { struct pipe_buffer *buf = pipe_head_buf(pipe); size_t part = min_t(size_t, PAGE_SIZE - offset, size - spliced); @@ -2955,7 +2954,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, iocb.ki_pos = *ppos; /* Work out how much data we can actually add into the pipe */ - used = pipe_occupancy(pipe->head, pipe->tail); + used = pipe_buf_usage(pipe); npages = max_t(ssize_t, pipe->max_usage - used, 0); len = min_t(size_t, len, npages * PAGE_SIZE); @@ -3015,7 +3014,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, total_spliced += n; *ppos += n; in->f_ra.prev_pos = *ppos; - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (pipe_is_full(pipe)) goto out; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 163190e89ea1..97930d44d460 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) return ret; } +void wait_for_freed_hugetlb_folios(void) +{ + if (llist_empty(&hpage_freelist)) + return; + + flush_work(&free_hpage_work); +} + typedef enum { /* * For either 0/1: we checked the per-vma resv map, and one resv @@ -5447,7 +5455,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, if (src_ptl != dst_ptl) spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); - pte = huge_ptep_get_and_clear(mm, old_addr, src_pte); + pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz); if (need_clear_uffd_wp && pte_marker_uffd_wp(pte)) huge_pte_clear(mm, new_addr, dst_pte, sz); @@ -5622,7 +5630,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, set_vma_resv_flags(vma, HPAGE_RESV_UNMAPPED); } - pte = huge_ptep_get_and_clear(mm, address, ptep); + pte = huge_ptep_get_and_clear(mm, address, ptep, sz); tlb_remove_huge_tlb_entry(h, tlb, ptep, address); if (huge_pte_dirty(pte)) set_page_dirty(page); diff --git a/mm/internal.h b/mm/internal.h index 109ef30fee11..20b3535935a3 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1115,7 +1115,7 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask) * mm/memory-failure.c */ #ifdef CONFIG_MEMORY_FAILURE -void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu); +int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill); void shake_folio(struct folio *folio); extern int hwpoison_filter(struct page *p); @@ -1138,8 +1138,9 @@ unsigned long page_mapped_in_vma(const struct page *page, struct vm_area_struct *vma); #else -static inline void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) +static inline int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill) { + return -EBUSY; } #endif diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c index 3ea50f09311f..3df45c25c1f6 100644 --- a/mm/kmsan/hooks.c +++ b/mm/kmsan/hooks.c @@ -357,6 +357,7 @@ void kmsan_handle_dma(struct page *page, size_t offset, size_t size, size -= to_go; } } +EXPORT_SYMBOL_GPL(kmsan_handle_dma); void kmsan_handle_dma_sg(struct scatterlist *sg, int nents, enum dma_data_direction dir) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 995a15eb67e2..327e02fdc029 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1556,11 +1556,35 @@ static int get_hwpoison_page(struct page *p, unsigned long flags) return ret; } -void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) +int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill) { - if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) { - struct address_space *mapping; + enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON; + struct address_space *mapping; + + if (folio_test_swapcache(folio)) { + pr_err("%#lx: keeping poisoned page in swap cache\n", pfn); + ttu &= ~TTU_HWPOISON; + } + /* + * Propagate the dirty bit from PTEs to struct page first, because we + * need this to decide if we should kill or just drop the page. + * XXX: the dirty test could be racy: set_page_dirty() may not always + * be called inside page lock (it's recommended but not enforced). + */ + mapping = folio_mapping(folio); + if (!must_kill && !folio_test_dirty(folio) && mapping && + mapping_can_writeback(mapping)) { + if (folio_mkclean(folio)) { + folio_set_dirty(folio); + } else { + ttu &= ~TTU_HWPOISON; + pr_info("%#lx: corrupted page was clean: dropped without side effects\n", + pfn); + } + } + + if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) { /* * For hugetlb folios in shared mappings, try_to_unmap * could potentially call huge_pmd_unshare. Because of @@ -1572,7 +1596,7 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) if (!mapping) { pr_info("%#lx: could not lock mapping for mapped hugetlb folio\n", folio_pfn(folio)); - return; + return -EBUSY; } try_to_unmap(folio, ttu|TTU_RMAP_LOCKED); @@ -1580,6 +1604,8 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) } else { try_to_unmap(folio, ttu); } + + return folio_mapped(folio) ? -EBUSY : 0; } /* @@ -1589,8 +1615,6 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) static bool hwpoison_user_mappings(struct folio *folio, struct page *p, unsigned long pfn, int flags) { - enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON; - struct address_space *mapping; LIST_HEAD(tokill); bool unmap_success; int forcekill; @@ -1613,29 +1637,6 @@ static bool hwpoison_user_mappings(struct folio *folio, struct page *p, if (!folio_mapped(folio)) return true; - if (folio_test_swapcache(folio)) { - pr_err("%#lx: keeping poisoned page in swap cache\n", pfn); - ttu &= ~TTU_HWPOISON; - } - - /* - * Propagate the dirty bit from PTEs to struct page first, because we - * need this to decide if we should kill or just drop the page. - * XXX: the dirty test could be racy: set_page_dirty() may not always - * be called inside page lock (it's recommended but not enforced). - */ - mapping = folio_mapping(folio); - if (!(flags & MF_MUST_KILL) && !folio_test_dirty(folio) && mapping && - mapping_can_writeback(mapping)) { - if (folio_mkclean(folio)) { - folio_set_dirty(folio); - } else { - ttu &= ~TTU_HWPOISON; - pr_info("%#lx: corrupted page was clean: dropped without side effects\n", - pfn); - } - } - /* * First collect all the processes that have the page * mapped in dirty form. This has to be done before try_to_unmap, @@ -1643,9 +1644,7 @@ static bool hwpoison_user_mappings(struct folio *folio, struct page *p, */ collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED); - unmap_poisoned_folio(folio, ttu); - - unmap_success = !folio_mapped(folio); + unmap_success = !unmap_poisoned_folio(folio, pfn, flags & MF_MUST_KILL); if (!unmap_success) pr_err("%#lx: failed to unmap page (folio mapcount=%d)\n", pfn, folio_mapcount(folio)); diff --git a/mm/memory.c b/mm/memory.c index b4d3d4893267..b9661ccfa64f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3051,8 +3051,10 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, next = pgd_addr_end(addr, end); if (pgd_none(*pgd) && !create) continue; - if (WARN_ON_ONCE(pgd_leaf(*pgd))) - return -EINVAL; + if (WARN_ON_ONCE(pgd_leaf(*pgd))) { + err = -EINVAL; + break; + } if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) { if (!create) continue; @@ -5183,7 +5185,11 @@ vm_fault_t finish_fault(struct vm_fault *vmf) bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED); int type, nr_pages; - unsigned long addr = vmf->address; + unsigned long addr; + bool needs_fallback = false; + +fallback: + addr = vmf->address; /* Did we COW the page? */ if (is_cow) @@ -5222,7 +5228,8 @@ vm_fault_t finish_fault(struct vm_fault *vmf) * approach also applies to non-anonymous-shmem faults to avoid * inflating the RSS of the process. */ - if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma))) { + if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma)) || + unlikely(needs_fallback)) { nr_pages = 1; } else if (nr_pages > 1) { pgoff_t idx = folio_page_idx(folio, page); @@ -5258,9 +5265,9 @@ vm_fault_t finish_fault(struct vm_fault *vmf) ret = VM_FAULT_NOPAGE; goto unlock; } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) { - update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages); - ret = VM_FAULT_NOPAGE; - goto unlock; + needs_fallback = true; + pte_unmap_unlock(vmf->pte, vmf->ptl); + goto fallback; } folio_ref_add(folio, nr_pages - 1); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e3655f07dd6e..16cf9e17077e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1822,26 +1822,24 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (folio_test_large(folio)) pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1; - /* - * HWPoison pages have elevated reference counts so the migration would - * fail on them. It also doesn't make any sense to migrate them in the - * first place. Still try to unmap such a page in case it is still mapped - * (keep the unmap as the catch all safety net). - */ + if (!folio_try_get(folio)) + continue; + + if (unlikely(page_folio(page) != folio)) + goto put_folio; + if (folio_test_hwpoison(folio) || (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) { if (WARN_ON(folio_test_lru(folio))) folio_isolate_lru(folio); - if (folio_mapped(folio)) - unmap_poisoned_folio(folio, TTU_IGNORE_MLOCK); - continue; - } - - if (!folio_try_get(folio)) - continue; + if (folio_mapped(folio)) { + folio_lock(folio); + unmap_poisoned_folio(folio, pfn, false); + folio_unlock(folio); + } - if (unlikely(page_folio(page) != folio)) goto put_folio; + } if (!isolate_folio_to_list(folio, &source)) { if (__ratelimit(&migrate_rs)) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 579789600a3c..94917c729120 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4243,6 +4243,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, restart: compaction_retries = 0; no_progress_loops = 0; + compact_result = COMPACT_SKIPPED; compact_priority = DEF_COMPACT_PRIORITY; cpuset_mems_cookie = read_mems_allowed_begin(); zonelist_iter_cookie = zonelist_iter_begin(); @@ -5849,11 +5850,10 @@ static void setup_per_zone_lowmem_reserve(void) for (j = i + 1; j < MAX_NR_ZONES; j++) { struct zone *upper_zone = &pgdat->node_zones[j]; - bool empty = !zone_managed_pages(upper_zone); managed_pages += zone_managed_pages(upper_zone); - if (clear || empty) + if (clear) zone->lowmem_reserve[j] = 0; else zone->lowmem_reserve[j] = managed_pages / ratio; diff --git a/mm/page_isolation.c b/mm/page_isolation.c index c608e9d72865..a051a29e95ad 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -608,6 +608,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, int ret; /* + * Due to the deferred freeing of hugetlb folios, the hugepage folios may + * not immediately release to the buddy system. This can cause PageBuddy() + * to fail in __test_page_isolated_in_pageblock(). To ensure that the + * hugetlb folios are properly released back to the buddy system, we + * invoke the wait_for_freed_hugetlb_folios() function to wait for the + * release to complete. + */ + wait_for_freed_hugetlb_folios(); + + /* * Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free * pages are not aligned to pageblock_nr_pages. * Then we just check migratetype first. diff --git a/mm/shmem.c b/mm/shmem.c index 4ea6109a8043..1ede0800e846 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1548,7 +1548,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) if (WARN_ON_ONCE(!wbc->for_reclaim)) goto redirty; - if (WARN_ON_ONCE((info->flags & VM_LOCKED) || sbinfo->noswap)) + if ((info->flags & VM_LOCKED) || sbinfo->noswap) goto redirty; if (!total_swap_pages) @@ -2253,7 +2253,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct folio *folio = NULL; bool skip_swapcache = false; swp_entry_t swap; - int error, nr_pages; + int error, nr_pages, order, split_order; VM_BUG_ON(!*foliop || !xa_is_value(*foliop)); swap = radix_to_swp_entry(*foliop); @@ -2272,10 +2272,9 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); + order = xa_get_order(&mapping->i_pages, index); if (!folio) { - int order = xa_get_order(&mapping->i_pages, index); bool fallback_order0 = false; - int split_order; /* Or update major stats only when swapin succeeds?? */ if (fault_type) { @@ -2339,6 +2338,29 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, error = -ENOMEM; goto failed; } + } else if (order != folio_order(folio)) { + /* + * Swap readahead may swap in order 0 folios into swapcache + * asynchronously, while the shmem mapping can still stores + * large swap entries. In such cases, we should split the + * large swap entry to prevent possible data corruption. + */ + split_order = shmem_split_large_entry(inode, index, swap, gfp); + if (split_order < 0) { + error = split_order; + goto failed; + } + + /* + * If the large swap entry has already been split, it is + * necessary to recalculate the new swap entry based on + * the old order alignment. + */ + if (split_order > 0) { + pgoff_t offset = index - round_down(index, 1 << split_order); + + swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); + } } alloced: @@ -2346,7 +2368,8 @@ alloced: folio_lock(folio); if ((!skip_swapcache && !folio_test_swapcache(folio)) || folio->swap.val != swap.val || - !shmem_confirm_swap(mapping, index, swap)) { + !shmem_confirm_swap(mapping, index, swap) || + xa_get_order(&mapping->i_pages, index) != folio_order(folio)) { error = -EEXIST; goto unlock; } @@ -3487,7 +3510,7 @@ static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe, size = min_t(size_t, size, PAGE_SIZE - offset); - if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + if (!pipe_is_full(pipe)) { struct pipe_buffer *buf = pipe_head_buf(pipe); *buf = (struct pipe_buffer) { @@ -3514,7 +3537,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, int error = 0; /* Work out how much data we can actually add into the pipe */ - used = pipe_occupancy(pipe->head, pipe->tail); + used = pipe_buf_usage(pipe); npages = max_t(ssize_t, pipe->max_usage - used, 0); len = min_t(size_t, len, npages * PAGE_SIZE); @@ -3601,7 +3624,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, total_spliced += n; *ppos += n; in->f_ra.prev_pos = *ppos; - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (pipe_is_full(pipe)) break; cond_resched(); diff --git a/mm/slab_common.c b/mm/slab_common.c index 4030907b6b7d..4c9f0a87f733 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1304,6 +1304,8 @@ module_param(rcu_min_cached_objs, int, 0444); static int rcu_delay_page_cache_fill_msec = 5000; module_param(rcu_delay_page_cache_fill_msec, int, 0444); +static struct workqueue_struct *rcu_reclaim_wq; + /* Maximum number of jiffies to wait before draining a batch. */ #define KFREE_DRAIN_JIFFIES (5 * HZ) #define KFREE_N_BATCHES 2 @@ -1632,10 +1634,10 @@ __schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp) if (delayed_work_pending(&krcp->monitor_work)) { delay_left = krcp->monitor_work.timer.expires - jiffies; if (delay < delay_left) - mod_delayed_work(system_unbound_wq, &krcp->monitor_work, delay); + mod_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay); return; } - queue_delayed_work(system_unbound_wq, &krcp->monitor_work, delay); + queue_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay); } static void @@ -1733,7 +1735,7 @@ kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp) // "free channels", the batch can handle. Break // the loop since it is done with this CPU thus // queuing an RCU work is _always_ success here. - queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work); + queued = queue_rcu_work(rcu_reclaim_wq, &krwp->rcu_work); WARN_ON_ONCE(!queued); break; } @@ -1883,7 +1885,7 @@ run_page_cache_worker(struct kfree_rcu_cpu *krcp) if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING && !atomic_xchg(&krcp->work_in_progress, 1)) { if (atomic_read(&krcp->backoff_page_cache_fill)) { - queue_delayed_work(system_unbound_wq, + queue_delayed_work(rcu_reclaim_wq, &krcp->page_cache_work, msecs_to_jiffies(rcu_delay_page_cache_fill_msec)); } else { @@ -2120,6 +2122,10 @@ void __init kvfree_rcu_init(void) int i, j; struct shrinker *kfree_rcu_shrinker; + rcu_reclaim_wq = alloc_workqueue("kvfree_rcu_reclaim", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + WARN_ON(!rcu_reclaim_wq); + /* Clamp it to [0:100] seconds interval. */ if (rcu_delay_page_cache_fill_msec < 0 || rcu_delay_page_cache_fill_msec > 100 * MSEC_PER_SEC) { diff --git a/mm/swapfile.c b/mm/swapfile.c index ba19430dd4ea..df7c4e8b089c 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -653,7 +653,8 @@ static void relocate_cluster(struct swap_info_struct *si, return; if (!ci->count) { - free_cluster(si, ci); + if (ci->flags != CLUSTER_FLAG_FREE) + free_cluster(si, ci); } else if (ci->count != SWAPFILE_CLUSTER) { if (ci->flags != CLUSTER_FLAG_FRAG) move_cluster(si, ci, &si->frag_clusters[ci->order], @@ -858,6 +859,10 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) offset++; } + /* in case no swap cache is reclaimed */ + if (ci->flags == CLUSTER_FLAG_NONE) + relocate_cluster(si, ci); + unlock_cluster(ci); if (to_scan <= 0) break; @@ -2641,7 +2646,6 @@ static void wait_for_allocation(struct swap_info_struct *si) for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) { ci = lock_cluster(si, offset); unlock_cluster(ci); - offset += SWAPFILE_CLUSTER; } } @@ -3542,6 +3546,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr) int err, i; si = swp_swap_info(entry); + if (WARN_ON_ONCE(!si)) { + pr_err("%s%08lx\n", Bad_file, entry.val); + return -EINVAL; + } offset = swp_offset(entry); VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index af3dfc3633db..d06453fa8aba 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -18,6 +18,7 @@ #include <asm/tlbflush.h> #include <asm/tlb.h> #include "internal.h" +#include "swap.h" static __always_inline bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end) @@ -1076,16 +1077,14 @@ out: return err; } -static int move_swap_pte(struct mm_struct *mm, +static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, pte_t *dst_pte, pte_t *src_pte, pte_t orig_dst_pte, pte_t orig_src_pte, pmd_t *dst_pmd, pmd_t dst_pmdval, - spinlock_t *dst_ptl, spinlock_t *src_ptl) + spinlock_t *dst_ptl, spinlock_t *src_ptl, + struct folio *src_folio) { - if (!pte_swp_exclusive(orig_src_pte)) - return -EBUSY; - double_pt_lock(dst_ptl, src_ptl); if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte, @@ -1094,6 +1093,16 @@ static int move_swap_pte(struct mm_struct *mm, return -EAGAIN; } + /* + * The src_folio resides in the swapcache, requiring an update to its + * index and mapping to align with the dst_vma, where a swap-in may + * occur and hit the swapcache after moving the PTE. + */ + if (src_folio) { + folio_move_anon_rmap(src_folio, dst_vma); + src_folio->index = linear_page_index(dst_vma, dst_addr); + } + orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); set_pte_at(mm, dst_addr, dst_pte, orig_src_pte); double_pt_unlock(dst_ptl, src_ptl); @@ -1141,6 +1150,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, __u64 mode) { swp_entry_t entry; + struct swap_info_struct *si = NULL; pte_t orig_src_pte, orig_dst_pte; pte_t src_folio_pte; spinlock_t *src_ptl, *dst_ptl; @@ -1240,6 +1250,7 @@ retry: */ if (!src_folio) { struct folio *folio; + bool locked; /* * Pin the page while holding the lock to be sure the @@ -1259,14 +1270,28 @@ retry: goto out; } + locked = folio_trylock(folio); + /* + * We avoid waiting for folio lock with a raised + * refcount for large folios because extra refcounts + * will result in split_folio() failing later and + * retrying. If multiple tasks are trying to move a + * large folio we can end up livelocking. + */ + if (!locked && folio_test_large(folio)) { + spin_unlock(src_ptl); + err = -EAGAIN; + goto out; + } + folio_get(folio); src_folio = folio; src_folio_pte = orig_src_pte; spin_unlock(src_ptl); - if (!folio_trylock(src_folio)) { - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + if (!locked) { + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; /* now we can block and wait */ folio_lock(src_folio); @@ -1282,8 +1307,8 @@ retry: /* at this point we have src_folio locked */ if (folio_test_large(src_folio)) { /* split_folio() can block */ - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; err = split_folio(src_folio); if (err) @@ -1308,8 +1333,8 @@ retry: goto out; } if (!anon_vma_trylock_write(src_anon_vma)) { - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; /* now we can block and wait */ anon_vma_lock_write(src_anon_vma); @@ -1322,11 +1347,13 @@ retry: orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, dst_ptl, src_ptl, src_folio); } else { + struct folio *folio = NULL; + entry = pte_to_swp_entry(orig_src_pte); if (non_swap_entry(entry)) { if (is_migration_entry(entry)) { - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; migration_entry_wait(mm, src_pmd, src_addr); err = -EAGAIN; @@ -1335,9 +1362,53 @@ retry: goto out; } - err = move_swap_pte(mm, dst_addr, src_addr, dst_pte, src_pte, - orig_dst_pte, orig_src_pte, dst_pmd, - dst_pmdval, dst_ptl, src_ptl); + if (!pte_swp_exclusive(orig_src_pte)) { + err = -EBUSY; + goto out; + } + + si = get_swap_device(entry); + if (unlikely(!si)) { + err = -EAGAIN; + goto out; + } + /* + * Verify the existence of the swapcache. If present, the folio's + * index and mapping must be updated even when the PTE is a swap + * entry. The anon_vma lock is not taken during this process since + * the folio has already been unmapped, and the swap entry is + * exclusive, preventing rmap walks. + * + * For large folios, return -EBUSY immediately, as split_folio() + * also returns -EBUSY when attempting to split unmapped large + * folios in the swapcache. This issue needs to be resolved + * separately to allow proper handling. + */ + if (!src_folio) + folio = filemap_get_folio(swap_address_space(entry), + swap_cache_index(entry)); + if (!IS_ERR_OR_NULL(folio)) { + if (folio_test_large(folio)) { + err = -EBUSY; + folio_put(folio); + goto out; + } + src_folio = folio; + src_folio_pte = orig_src_pte; + if (!folio_trylock(src_folio)) { + pte_unmap(src_pte); + pte_unmap(dst_pte); + src_pte = dst_pte = NULL; + put_swap_device(si); + si = NULL; + /* now we can block and wait */ + folio_lock(src_folio); + goto retry; + } + } + err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte, + orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, + dst_ptl, src_ptl, src_folio); } out: @@ -1354,6 +1425,8 @@ out: if (src_pte) pte_unmap(src_pte); mmu_notifier_invalidate_range_end(&range); + if (si) + put_swap_device(si); return err; } @@ -1509,24 +1509,28 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg) { struct vm_area_struct *vma = vmg->vma; + unsigned long start = vmg->start; + unsigned long end = vmg->end; struct vm_area_struct *merged; /* First, try to merge. */ merged = vma_merge_existing_range(vmg); if (merged) return merged; + if (vmg_nomem(vmg)) + return ERR_PTR(-ENOMEM); /* Split any preceding portion of the VMA. */ - if (vma->vm_start < vmg->start) { - int err = split_vma(vmg->vmi, vma, vmg->start, 1); + if (vma->vm_start < start) { + int err = split_vma(vmg->vmi, vma, start, 1); if (err) return ERR_PTR(err); } /* Split any trailing portion of the VMA. */ - if (vma->vm_end > vmg->end) { - int err = split_vma(vmg->vmi, vma, vmg->end, 0); + if (vma->vm_end > end) { + int err = split_vma(vmg->vmi, vma, end, 0); if (err) return ERR_PTR(err); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a6e7acebe9ad..61981ee1c9d2 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -586,13 +586,13 @@ static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end, mask |= PGTBL_PGD_MODIFIED; err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask); if (err) - return err; + break; } while (pgd++, addr = next, addr != end); if (mask & ARCH_PAGE_TABLE_SYNC_MASK) arch_sync_kernel_mappings(start, end); - return 0; + return err; } /* diff --git a/mm/zswap.c b/mm/zswap.c index ac9d299e7d0c..23365e76a3ce 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -43,7 +43,7 @@ * statistics **********************************/ /* The number of compressed pages currently stored in zswap */ -atomic_long_t zswap_stored_pages = ATOMIC_INIT(0); +atomic_long_t zswap_stored_pages = ATOMIC_LONG_INIT(0); /* * The statistics below are not protected from concurrent access for diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index e45187b88220..41be38264493 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -131,7 +131,8 @@ int vlan_check_real_dev(struct net_device *real_dev, { const char *name = real_dev->name; - if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { + if (real_dev->features & NETIF_F_VLAN_CHALLENGED || + real_dev->type != ARPHRD_ETHER) { pr_info("VLANs not supported on %s\n", name); NL_SET_ERR_MSG_MOD(extack, "VLANs not supported on device"); return -EOPNOTSUPP; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 91d134961357..770a4dcf7f63 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -27,6 +27,7 @@ #include <linux/phy.h> #include <net/arp.h> #include <net/macsec.h> +#include <net/netdev_lock.h> #include "vlan.h" #include "vlanproc.h" @@ -377,7 +378,6 @@ static int vlan_hwtstamp_set(struct net_device *dev, static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - const struct net_device_ops *ops = real_dev->netdev_ops; struct ifreq ifrr; int err = -EOPNOTSUPP; @@ -388,8 +388,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) - err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd); + err = dev_eth_ioctl(real_dev, &ifrr, cmd); break; } diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index b51d8b071b56..1cc9be6de456 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -19,6 +19,7 @@ batman-adv-y += hard-interface.o batman-adv-y += hash.o batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o batman-adv-y += main.o +batman-adv-y += mesh-interface.o batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast_forw.o batman-adv-y += netlink.o @@ -26,7 +27,6 @@ batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o batman-adv-y += routing.o batman-adv-y += send.o -batman-adv-y += soft-interface.o batman-adv-$(CONFIG_BATMAN_ADV_TRACING) += trace.o batman-adv-y += tp_meter.o batman-adv-y += translation-table.o diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index 4eee53d19eb0..c0c982b6f029 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -90,15 +90,15 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) } /** - * batadv_algo_select() - Select algorithm of soft interface - * @bat_priv: the bat priv with all the soft interface information + * batadv_algo_select() - Select algorithm of mesh interface + * @bat_priv: the bat priv with all the mesh interface information * @name: name of the algorithm to select * - * The algorithm callbacks for the soft interface will be set when the algorithm + * The algorithm callbacks for the mesh interface will be set when the algorithm * with the correct name was found. Any previous selected algorithm will not be * deinitialized and the new selected algorithm will also not be initialized. * It is therefore not allowed to call batadv_algo_select outside the creation - * function of the soft interface. + * function of the mesh interface. * * Return: 0 on success or negative error number in case of failure */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 07ae5dd1f150..7b4f659612a3 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -23,6 +23,7 @@ #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> +#include <linux/minmax.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/netlink.h> @@ -129,7 +130,7 @@ static u8 batadv_ring_buffer_avg(const u8 lq_recv[]) /** * batadv_iv_ogm_orig_get() - retrieve or create (if does not exist) an * originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: mac address of the originator * * Return: the originator object corresponding to the passed mac address or NULL @@ -333,7 +334,7 @@ batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); const char *fwd_str; u8 packet_num; s16 buff_pos; @@ -355,7 +356,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* we might have aggregated direct link packets with an * ordinary base packet */ - if (forw_packet->direct_link_flags & BIT(packet_num) && + if (test_bit(packet_num, forw_packet->direct_link_flags) && forw_packet->if_incoming == hard_iface) batadv_ogm_packet->flags |= BATADV_DIRECTLINK; else @@ -396,20 +397,20 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* send a batman ogm packet */ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) { - struct net_device *soft_iface; + struct net_device *mesh_iface; if (!forw_packet->if_incoming) { pr_err("Error - can't forward packet: incoming iface not specified\n"); return; } - soft_iface = forw_packet->if_incoming->soft_iface; + mesh_iface = forw_packet->if_incoming->mesh_iface; if (WARN_ON(!forw_packet->if_outgoing)) return; - if (forw_packet->if_outgoing->soft_iface != soft_iface) { - pr_warn("%s: soft interface switch for queued OGM\n", __func__); + if (forw_packet->if_outgoing->mesh_iface != mesh_iface) { + pr_warn("%s: mesh interface switch for queued OGM\n", __func__); return; } @@ -424,7 +425,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) * batadv_iv_ogm_can_aggregate() - find out if an OGM can be aggregated on an * existing forward packet * @new_bat_ogm_packet: OGM packet to be aggregated - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @packet_len: (total) length of the OGM * @send_time: timestamp (jiffies) when the packet is to be sent * @directlink: true if this is a direct link packet @@ -444,28 +445,37 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, const struct batadv_forw_packet *forw_packet) { struct batadv_ogm_packet *batadv_ogm_packet; - int aggregated_bytes = forw_packet->packet_len + packet_len; + unsigned int aggregated_bytes = forw_packet->packet_len + packet_len; struct batadv_hard_iface *primary_if = NULL; + u8 packet_num = forw_packet->num_packets; bool res = false; unsigned long aggregation_end_time; + unsigned int max_bytes; batadv_ogm_packet = (struct batadv_ogm_packet *)forw_packet->skb->data; aggregation_end_time = send_time; aggregation_end_time += msecs_to_jiffies(BATADV_MAX_AGGREGATION_MS); + max_bytes = min_t(unsigned int, if_outgoing->net_dev->mtu, + BATADV_MAX_AGGREGATION_BYTES); + /* we can aggregate the current packet to this aggregated packet * if: * * - the send time is within our MAX_AGGREGATION_MS time * - the resulting packet won't be bigger than - * MAX_AGGREGATION_BYTES + * MAX_AGGREGATION_BYTES and MTU of the outgoing interface + * - the number of packets is lower than MAX_AGGREGATION_PACKETS * otherwise aggregation is not possible */ if (!time_before(send_time, forw_packet->send_time) || !time_after_eq(aggregation_end_time, forw_packet->send_time)) return false; - if (aggregated_bytes > BATADV_MAX_AGGREGATION_BYTES) + if (aggregated_bytes > max_bytes) + return false; + + if (packet_num >= BATADV_MAX_AGGREGATION_PACKETS) return false; /* packet is not leaving on the same interface. */ @@ -540,16 +550,16 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, struct batadv_hard_iface *if_outgoing, int own_packet) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_forw_packet *forw_packet_aggr; struct sk_buff *skb; unsigned char *skb_buff; unsigned int skb_size; atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left; - if (atomic_read(&bat_priv->aggregated_ogms) && - packet_len < BATADV_MAX_AGGREGATION_BYTES) - skb_size = BATADV_MAX_AGGREGATION_BYTES; + if (atomic_read(&bat_priv->aggregated_ogms)) + skb_size = max_t(unsigned int, BATADV_MAX_AGGREGATION_BYTES, + packet_len); else skb_size = packet_len; @@ -574,12 +584,13 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, memcpy(skb_buff, packet_buff, packet_len); forw_packet_aggr->own = own_packet; - forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS; + bitmap_zero(forw_packet_aggr->direct_link_flags, + BATADV_MAX_AGGREGATION_PACKETS); forw_packet_aggr->send_time = send_time; /* save packet direct link flag status */ if (direct_link) - forw_packet_aggr->direct_link_flags |= 1; + set_bit(0, forw_packet_aggr->direct_link_flags); INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work, batadv_iv_send_outstanding_bat_ogm_packet); @@ -592,22 +603,20 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, const unsigned char *packet_buff, int packet_len, bool direct_link) { - unsigned long new_direct_link_flag; - skb_put_data(forw_packet_aggr->skb, packet_buff, packet_len); forw_packet_aggr->packet_len += packet_len; - forw_packet_aggr->num_packets++; /* save packet direct link flag status */ - if (direct_link) { - new_direct_link_flag = BIT(forw_packet_aggr->num_packets); - forw_packet_aggr->direct_link_flags |= new_direct_link_flag; - } + if (direct_link) + set_bit(forw_packet_aggr->num_packets, + forw_packet_aggr->direct_link_flags); + + forw_packet_aggr->num_packets++; } /** * batadv_iv_ogm_queue_add() - queue up an OGM for transmission - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @packet_buff: pointer to the OGM * @packet_len: (total) length of the OGM * @if_incoming: interface where the packet was received @@ -686,7 +695,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); u16 tvlv_len; if (batadv_ogm_packet->ttl <= 1) { @@ -739,7 +748,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, static void batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; struct batadv_orig_node *orig_node; @@ -778,7 +787,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) */ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *primary_if, *tmp_hard_iface; @@ -840,7 +849,7 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) */ rcu_read_lock(); list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) { - if (tmp_hard_iface->soft_iface != hard_iface->soft_iface) + if (tmp_hard_iface->mesh_iface != hard_iface->mesh_iface) continue; if (!kref_get_unless_zero(&tmp_hard_iface->refcount)) @@ -901,7 +910,7 @@ static u8 batadv_iv_orig_ifinfo_sum(struct batadv_orig_node *orig_node, /** * batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an * originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the orig node who originally emitted the ogm packet * @orig_ifinfo: ifinfo for the outgoing interface of the orig_node * @ethhdr: Ethernet header of the OGM @@ -1065,7 +1074,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; u8 total_count; @@ -1207,7 +1216,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, const struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_orig_node *orig_node; struct batadv_orig_ifinfo *orig_ifinfo = NULL; struct batadv_neigh_node *neigh_node; @@ -1309,7 +1318,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_hardif_neigh_node *hardif_neigh = NULL; struct batadv_neigh_node *router = NULL; struct batadv_neigh_node *router_router = NULL; @@ -1549,7 +1558,7 @@ static void batadv_iv_ogm_process_reply(struct batadv_ogm_packet *ogm_packet, static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_incoming) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_orig_node *orig_neigh_node, *orig_node; struct batadv_hard_iface *hard_iface; struct batadv_ogm_packet *ogm_packet; @@ -1599,7 +1608,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != if_incoming->soft_iface) + if (hard_iface->mesh_iface != if_incoming->mesh_iface) continue; if (batadv_compare_eth(ethhdr->h_source, @@ -1664,7 +1673,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) @@ -1690,7 +1699,7 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); - bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); + bat_priv = netdev_priv(forw_packet->if_incoming->mesh_iface); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) { dropped = true; @@ -1721,7 +1730,7 @@ out: static int batadv_iv_ogm_receive(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_ogm_packet *ogm_packet; u8 *packet_pos; int ogm_offset; @@ -1800,7 +1809,7 @@ batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @neigh_node: Single hops neighbour @@ -1863,7 +1872,7 @@ batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @sub_s: Number of sub entries to skip @@ -1925,7 +1934,7 @@ batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @head: Bucket to be dumped * @idx_s: Number of entries to be skipped @@ -1966,7 +1975,7 @@ batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, * batadv_iv_ogm_orig_dump() - Dump the originators into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface */ static void @@ -2088,7 +2097,7 @@ batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @hard_iface: Hard interface to dump the neighbours for * @idx_s: Number of entries to skip * @@ -2125,7 +2134,7 @@ batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, * batadv_iv_ogm_neigh_dump() - Dump the neighbours into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @single_hardif: Limit dump to this hard interface */ static void @@ -2152,7 +2161,7 @@ batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, } else { list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (i_hardif++ < i_hardif_s) @@ -2236,7 +2245,7 @@ static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface) /** * batadv_iv_init_sel_class() - initialize GW selection class - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv) { @@ -2391,7 +2400,7 @@ out: * @msg: Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @gw_node: Gateway to be dumped * * Return: Error code, or 0 on success @@ -2466,7 +2475,7 @@ out: * batadv_iv_gw_dump() - Dump gateways into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information */ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index d35479c465e2..c16c2e60889d 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -43,7 +43,7 @@ static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hard_iface *primary_if; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -97,7 +97,7 @@ static void batadv_v_primary_iface_set(struct batadv_hard_iface *hard_iface) */ static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hard_iface *primary_if; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -166,7 +166,7 @@ batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @hard_iface: The hard interface to be dumped * @idx_s: Entries to be skipped * @@ -203,7 +203,7 @@ batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, * message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @single_hardif: Limit dumping to this hard interface */ static void @@ -228,7 +228,7 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, } } else { list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (i_hardif++ < i_hardif_s) @@ -254,7 +254,7 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @neigh_node: Single hops neighbour @@ -322,7 +322,7 @@ batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @sub_s: Number of sub entries to skip @@ -374,7 +374,7 @@ batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @head: Bucket to be dumped * @idx_s: Number of entries to be skipped @@ -414,7 +414,7 @@ batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, * batadv_v_orig_dump() - Dump the originators into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface */ static void @@ -502,7 +502,7 @@ err_ifinfo1: /** * batadv_v_init_sel_class() - initialize GW selection class - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_v_init_sel_class(struct batadv_priv *bat_priv) { @@ -553,7 +553,7 @@ out: /** * batadv_v_gw_get_best_gw_node() - retrieve the best GW node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: the GW node having the best GW-metric, NULL if no GW is known */ @@ -590,7 +590,7 @@ next: /** * batadv_v_gw_is_eligible() - check if a originator would be selected as GW - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @curr_gw_orig: originator representing the currently selected GW * @orig_node: the originator representing the new candidate * @@ -647,7 +647,7 @@ out: * @msg: Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @gw_node: Gateway to be dumped * * Return: Error code, or 0 on success @@ -746,7 +746,7 @@ out: * batadv_v_gw_dump() - Dump gateways into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information */ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index b065578b4436..70d6778da0d7 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -82,7 +82,7 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, u32 *pthroughput) { struct batadv_hard_iface *hard_iface = neigh->if_incoming; - struct net_device *soft_iface = hard_iface->soft_iface; + struct net_device *mesh_iface = hard_iface->mesh_iface; struct ethtool_link_ksettings link_settings; struct net_device *real_netdev; struct station_info sinfo; @@ -92,7 +92,7 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, /* don't query throughput when no longer associated with any * batman-adv interface */ - if (!soft_iface) + if (!mesh_iface) return false; /* if the user specified a customised value for this interface, then @@ -180,7 +180,7 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, default_throughput: if (!(hard_iface->bat_v.flags & BATADV_WARNING_DEFAULT)) { - batadv_info(soft_iface, + batadv_info(mesh_iface, "WiFi driver or ethtool info does not provide information about link speeds on interface %s, therefore defaulting to hardcoded throughput values of %u.%1u Mbps. Consider overriding the throughput manually or checking your driver.\n", hard_iface->net_dev->name, BATADV_THROUGHPUT_DEFAULT_VALUE / 10, @@ -226,7 +226,7 @@ static bool batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh) { struct batadv_hard_iface *hard_iface = neigh->if_incoming; - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); unsigned long last_tx_diff; struct sk_buff *skb; int probe_len, i; @@ -295,7 +295,7 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work); hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v); - bat_priv = netdev_priv(hard_iface->soft_iface); + bat_priv = netdev_priv(hard_iface->mesh_iface); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; @@ -476,7 +476,7 @@ void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface) /* update orig field of every elp iface belonging to this mesh */ rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (primary_iface->soft_iface != hard_iface->soft_iface) + if (primary_iface->mesh_iface != hard_iface->mesh_iface) continue; batadv_v_elp_iface_activate(primary_iface, hard_iface); @@ -486,7 +486,7 @@ void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface) /** * batadv_v_elp_neigh_update() - update an ELP neighbour node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @neigh_addr: the neighbour interface address * @if_incoming: the interface the packet was received through * @elp_packet: the received ELP packet @@ -552,7 +552,7 @@ orig_free: int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_elp_packet *elp_packet; struct batadv_hard_iface *primary_if; struct ethhdr *ethhdr; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index e503ee0d896b..3b9065a3c746 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -45,7 +45,7 @@ /** * batadv_v_ogm_orig_get() - retrieve and possibly create an originator node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the address of the originator * * Return: the orig_node corresponding to the specified address. If such an @@ -96,7 +96,7 @@ static void batadv_v_ogm_start_queue_timer(struct batadv_hard_iface *hard_iface) /** * batadv_v_ogm_start_timer() - restart the OGM sending timer - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv) { @@ -121,7 +121,7 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv) static void batadv_v_ogm_send_to_if(struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); if (hard_iface->if_status != BATADV_IF_ACTIVE) { kfree_skb(skb); @@ -239,7 +239,7 @@ static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface) static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); if (!atomic_read(&bat_priv->aggregated_ogms)) { batadv_v_ogm_send_to_if(skb, hard_iface); @@ -256,10 +256,10 @@ static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, } /** - * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM - * @bat_priv: the bat priv with all the soft interface information + * batadv_v_ogm_send_meshif() - periodic worker broadcasting the own OGM + * @bat_priv: the bat priv with all the mesh interface information */ -static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv) +static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv) { struct batadv_hard_iface *hard_iface; struct batadv_ogm2_packet *ogm_packet; @@ -302,7 +302,7 @@ static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv) /* broadcast on every interface */ rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) @@ -373,7 +373,7 @@ static void batadv_v_ogm_send(struct work_struct *work) bat_priv = container_of(bat_v, struct batadv_priv, bat_v); mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); - batadv_v_ogm_send_softif(bat_priv); + batadv_v_ogm_send_meshif(bat_priv); mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } @@ -408,7 +408,7 @@ void batadv_v_ogm_aggr_work(struct work_struct *work) */ int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); batadv_v_ogm_start_queue_timer(hard_iface); batadv_v_ogm_start_timer(bat_priv); @@ -435,7 +435,7 @@ void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface) */ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) { - struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(primary_iface->mesh_iface); struct batadv_ogm2_packet *ogm_packet; mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); @@ -452,7 +452,7 @@ unlock: /** * batadv_v_forward_penalty() - apply a penalty to the throughput metric * forwarded with B.A.T.M.A.N. V OGMs - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @if_incoming: the interface where the OGM has been received * @if_outgoing: the interface where the OGM has to be forwarded to * @throughput: the current throughput @@ -505,7 +505,7 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv, /** * batadv_v_ogm_forward() - check conditions and forward an OGM to the given * outgoing interface - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ogm_received: previously received OGM to be forwarded * @orig_node: the originator which has been updated * @neigh_node: the neigh_node through with the OGM has been received @@ -592,7 +592,7 @@ out: /** * batadv_v_ogm_metric_update() - update route metric based on OGM - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ogm2: OGM2 structure * @orig_node: Originator structure for which the OGM has been received * @neigh_node: the neigh_node through with the OGM has been received @@ -675,7 +675,7 @@ out: /** * batadv_v_ogm_route_update() - update routes based on OGM - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ethhdr: the Ethernet header of the OGM2 * @ogm2: OGM2 structure * @orig_node: Originator structure for which the OGM has been received @@ -770,7 +770,7 @@ out: /** * batadv_v_ogm_process_per_outif() - process a batman v OGM for an outgoing if - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ethhdr: the Ethernet header of the OGM2 * @ogm2: OGM2 structure * @orig_node: Originator structure for which the OGM has been received @@ -852,7 +852,7 @@ batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_incoming) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct ethhdr *ethhdr; struct batadv_orig_node *orig_node = NULL; struct batadv_hardif_neigh_node *hardif_neigh = NULL; @@ -926,7 +926,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) @@ -985,7 +985,7 @@ out: int batadv_v_ogm_packet_recv(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_ogm2_packet *ogm_packet; struct ethhdr *ethhdr; int ogm_offset; @@ -1036,7 +1036,7 @@ free_skb: /** * batadv_v_ogm_init() - initialise the OGM2 engine - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or a negative error code in case of failure */ @@ -1071,7 +1071,7 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv) /** * batadv_v_ogm_free() - free OGM private resources - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_v_ogm_free(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 649c41f393e1..2c49b2711650 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -23,7 +23,7 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n) /** * batadv_bit_get_packet() - receive and process one packet within the sequence * number window - * @priv: the bat priv with all the soft interface information + * @priv: the bat priv with all the mesh interface information * @seq_bits: pointer to the sequence number receive packet * @seq_num_diff: difference between the current/received sequence number and * the last sequence number diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 8c814f790d17..747755647c6a 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -209,7 +209,7 @@ static void batadv_claim_put(struct batadv_bla_claim *claim) /** * batadv_claim_hash_find() - looks for a claim in the claim hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @data: search data (may be local/static data) * * Return: claim if found or NULL otherwise. @@ -248,7 +248,7 @@ batadv_claim_hash_find(struct batadv_priv *bat_priv, /** * batadv_backbone_hash_find() - looks for a backbone gateway in the hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the address of the originator * @vid: the VLAN ID * @@ -332,7 +332,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) /** * batadv_bla_send_claim() - sends a claim frame according to the provided info - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @mac: the mac address to be announced within the claim * @vid: the VLAN ID * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) @@ -343,7 +343,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, struct sk_buff *skb; struct ethhdr *ethhdr; struct batadv_hard_iface *primary_if; - struct net_device *soft_iface; + struct net_device *mesh_iface; u8 *hw_src; struct batadv_bla_claim_dst local_claim_dest; __be32 zeroip = 0; @@ -356,12 +356,12 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, sizeof(local_claim_dest)); local_claim_dest.type = claimtype; - soft_iface = primary_if->soft_iface; + mesh_iface = primary_if->mesh_iface; skb = arp_create(ARPOP_REPLY, ETH_P_ARP, /* IP DST: 0.0.0.0 */ zeroip, - primary_if->soft_iface, + primary_if->mesh_iface, /* IP SRC: 0.0.0.0 */ zeroip, /* Ethernet DST: Broadcast */ @@ -439,7 +439,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, } skb_reset_mac_header(skb); - skb->protocol = eth_type_trans(skb, soft_iface); + skb->protocol = eth_type_trans(skb, mesh_iface); batadv_inc_counter(bat_priv, BATADV_CNT_RX); batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, skb->len + ETH_HLEN); @@ -466,7 +466,7 @@ static void batadv_bla_loopdetect_report(struct work_struct *work) report_work); bat_priv = backbone_gw->bat_priv; - batadv_info(bat_priv->soft_iface, + batadv_info(bat_priv->mesh_iface, "Possible loop on VLAN %d detected which can't be handled by BLA - please check your network setup!\n", batadv_print_vid(backbone_gw->vid)); snprintf(vid_str, sizeof(vid_str), "%d", @@ -481,7 +481,7 @@ static void batadv_bla_loopdetect_report(struct work_struct *work) /** * batadv_bla_get_backbone_gw() - finds or creates a backbone gateway - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the mac address of the originator * @vid: the VLAN ID * @own_backbone: set if the requested backbone is local @@ -554,7 +554,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, /** * batadv_bla_update_own_backbone_gw() - updates the own backbone gw for a VLAN - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: the selected primary interface * @vid: VLAN identifier * @@ -580,7 +580,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, /** * batadv_bla_answer_request() - answer a bla request by sending own claims - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: interface where the request came on * @vid: the vid where the request came on * @@ -657,7 +657,7 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw) /** * batadv_bla_send_announce() - Send an announcement frame - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @backbone_gw: our backbone gateway which should be announced */ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, @@ -678,7 +678,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, /** * batadv_bla_add_claim() - Adds a claim in the claim hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @mac: the mac address of the claim * @vid: the VLAN ID of the frame * @backbone_gw: the backbone gateway which claims it @@ -788,7 +788,7 @@ batadv_bla_claim_get_backbone_gw(struct batadv_bla_claim *claim) /** * batadv_bla_del_claim() - delete a claim from the claim hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @mac: mac address of the claim to be removed * @vid: VLAN id for the claim to be removed */ @@ -826,7 +826,7 @@ free_claim: /** * batadv_handle_announce() - check for ANNOUNCE frame - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @an_addr: announcement mac address (ARP Sender HW address) * @backbone_addr: originator address of the sender (Ethernet source MAC) * @vid: the VLAN ID of the frame @@ -884,8 +884,8 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, /** * batadv_handle_request() - check for REQUEST frame - * @bat_priv: the bat priv with all the soft interface information - * @primary_if: the primary hard interface of this batman soft interface + * @bat_priv: the bat priv with all the mesh interface information + * @primary_if: the primary hard interface of this batman mesh interface * @backbone_addr: backbone address to be requested (ARP sender HW MAC) * @ethhdr: ethernet header of a packet * @vid: the VLAN ID of the frame @@ -917,8 +917,8 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv, /** * batadv_handle_unclaim() - check for UNCLAIM frame - * @bat_priv: the bat priv with all the soft interface information - * @primary_if: the primary hard interface of this batman soft interface + * @bat_priv: the bat priv with all the mesh interface information + * @primary_if: the primary hard interface of this batman mesh interface * @backbone_addr: originator address of the backbone (Ethernet source) * @claim_addr: Client to be unclaimed (ARP sender HW MAC) * @vid: the VLAN ID of the frame @@ -955,8 +955,8 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, /** * batadv_handle_claim() - check for CLAIM frame - * @bat_priv: the bat priv with all the soft interface information - * @primary_if: the primary hard interface of this batman soft interface + * @bat_priv: the bat priv with all the mesh interface information + * @primary_if: the primary hard interface of this batman mesh interface * @backbone_addr: originator address of the backbone (Ethernet Source) * @claim_addr: client mac address to be claimed (ARP sender HW MAC) * @vid: the VLAN ID of the frame @@ -992,7 +992,7 @@ static bool batadv_handle_claim(struct batadv_priv *bat_priv, /** * batadv_check_claim_group() - check for claim group membership - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: the primary interface of this batman interface * @hw_src: the Hardware source in the ARP Header * @hw_dst: the Hardware destination in the ARP Header @@ -1067,8 +1067,8 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, /** * batadv_bla_process_claim() - Check if this is a claim frame, and process it - * @bat_priv: the bat priv with all the soft interface information - * @primary_if: the primary hard interface of this batman soft interface + * @bat_priv: the bat priv with all the mesh interface information + * @primary_if: the primary hard interface of this batman mesh interface * @skb: the frame to be checked * * Return: true if it was a claim frame, otherwise return false to @@ -1210,7 +1210,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, /** * batadv_bla_purge_backbone_gw() - Remove backbone gateways after a timeout or * immediately - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @now: whether the whole hash shall be wiped now * * Check when we last heard from other nodes, and remove them in case of @@ -1262,7 +1262,7 @@ purge_now: /** * batadv_bla_purge_claims() - Remove claims after a timeout or immediately - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: the selected primary interface, may be NULL if now is set * @now: whether the whole hash shall be wiped now * @@ -1321,7 +1321,7 @@ skip: /** * batadv_bla_update_orig_address() - Update the backbone gateways when the own * originator address changes - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: the new selected primary_if * @oldif: the old primary interface, may be NULL */ @@ -1376,7 +1376,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, /** * batadv_bla_send_loopdetect() - send a loopdetect frame - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @backbone_gw: the backbone gateway for which a loop should be detected * * To detect loops that the bridge loop avoidance can't handle, send a loop @@ -1396,7 +1396,7 @@ batadv_bla_send_loopdetect(struct batadv_priv *bat_priv, /** * batadv_bla_status_update() - purge bla interfaces if necessary - * @net_dev: the soft interface net device + * @net_dev: the mesh interface net device */ void batadv_bla_status_update(struct net_device *net_dev) { @@ -1520,7 +1520,7 @@ static struct lock_class_key batadv_backbone_hash_lock_class_key; /** * batadv_bla_init() - initialize all bla structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success, < 0 on error. */ @@ -1586,7 +1586,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv) /** * batadv_bla_check_duplist() - Check if a frame is in the broadcast dup. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: contains the multicast packet to be checked * @payload_ptr: pointer to position inside the head buffer of the skb * marking the start of the data to be CRC'ed @@ -1680,7 +1680,7 @@ out: /** * batadv_bla_check_ucast_duplist() - Check if a frame is in the broadcast dup. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: contains the multicast packet to be checked, decapsulated from a * unicast_packet * @@ -1698,7 +1698,7 @@ static bool batadv_bla_check_ucast_duplist(struct batadv_priv *bat_priv, /** * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: contains the bcast_packet to be checked * * Check if it is on our broadcast list. Another gateway might have sent the @@ -1723,7 +1723,7 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, /** * batadv_bla_is_backbone_gw_orig() - Check if the originator is a gateway for * the VLAN identified by vid. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: originator mac address * @vid: VLAN identifier * @@ -1766,7 +1766,7 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, * @orig_node: the orig_node of the frame * @hdr_size: maximum length of the frame * - * Return: true if the orig_node is also a gateway on the soft interface, + * Return: true if the orig_node is also a gateway on the mesh interface, * otherwise it returns false. */ bool batadv_bla_is_backbone_gw(struct sk_buff *skb, @@ -1796,9 +1796,9 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb, /** * batadv_bla_free() - free all bla structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * - * for softinterface free or module unload + * for meshinterface free or module unload */ void batadv_bla_free(struct batadv_priv *bat_priv) { @@ -1822,7 +1822,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv) /** * batadv_bla_loopdetect_check() - check and handle a detected loop - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the packet to check * @primary_if: interface where the request came on * @vid: the VLAN ID of the frame @@ -1877,7 +1877,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, /** * batadv_bla_rx() - check packets coming from the mesh. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame * @packet_type: the batman packet type this frame came in @@ -2010,7 +2010,7 @@ out: /** * batadv_bla_tx() - check packets going into the mesh - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame * @@ -2232,18 +2232,18 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if = NULL; int portid = NETLINK_CB(cb->skb).portid; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_hashtable *hash; struct batadv_priv *bat_priv; int bucket = cb->args[0]; int idx = cb->args[1]; int ret = 0; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); hash = bat_priv->bla.claim_hash; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -2267,7 +2267,7 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); return ret; } @@ -2393,18 +2393,18 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if = NULL; int portid = NETLINK_CB(cb->skb).portid; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_hashtable *hash; struct batadv_priv *bat_priv; int bucket = cb->args[0]; int idx = cb->args[1]; int ret = 0; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); hash = bat_priv->bla.backbone_hash; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -2428,7 +2428,7 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); return ret; } @@ -2437,7 +2437,7 @@ out: /** * batadv_bla_check_claim() - check if address is claimed * - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: mac address of which the claim status is checked * @vid: the VLAN ID * diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index e5a07152d4ec..8b8132eb0a79 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -96,7 +96,7 @@ static void batadv_dat_purge(struct work_struct *work); /** * batadv_dat_start_timer() - initialise the DAT periodic worker - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) { @@ -145,7 +145,7 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) /** * __batadv_dat_purge() - delete entries from the DAT local storage - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @to_purge: function in charge to decide whether an entry has to be purged or * not. This function takes the dat_entry as argument and has to * returns a boolean value: true is the entry has to be deleted, @@ -315,7 +315,7 @@ static u32 batadv_hash_dat(const void *data, u32 size) /** * batadv_dat_entry_hash_find() - look for a given dat_entry in the local hash * table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ip: search key * @vid: VLAN identifier * @@ -357,7 +357,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip, /** * batadv_dat_entry_add() - add a new dat entry or update it if already exists - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ip: ipv4 to add/edit * @mac_addr: mac address to assign to the given ipv4 * @vid: VLAN identifier @@ -414,7 +414,7 @@ out: /** * batadv_dbg_arp() - print a debug message containing all the ARP packet * details - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * @msg: message to print together with the debugging information @@ -549,7 +549,7 @@ out: /** * batadv_choose_next_candidate() - select the next DHT candidate - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @cands: candidates array * @select: number of candidates already present in the array * @ip_key: key to look up in the DHT @@ -613,7 +613,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, /** * batadv_dat_select_candidates() - select the nodes which the DHT message has * to be sent to - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ip_dst: ipv4 to look up in the DHT * @vid: VLAN identifier * @@ -658,7 +658,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, /** * batadv_dat_forward_data() - copy and send payload to the selected candidates - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: payload to send * @ip: the DHT key * @vid: VLAN identifier @@ -734,7 +734,7 @@ free_orig: /** * batadv_dat_tvlv_container_update() - update the dat tvlv container after dat * setting change - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv) { @@ -756,7 +756,7 @@ static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv) /** * batadv_dat_status_update() - update the dat tvlv container after dat * setting change - * @net_dev: the soft interface net device + * @net_dev: the mesh interface net device */ void batadv_dat_status_update(struct net_device *net_dev) { @@ -767,7 +767,7 @@ void batadv_dat_status_update(struct net_device *net_dev) /** * batadv_dat_tvlv_ogm_handler_v1() - process incoming dat tvlv container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the gateway data @@ -786,7 +786,7 @@ static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /** * batadv_dat_hash_free() - free the local DAT hash table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_dat_hash_free(struct batadv_priv *bat_priv) { @@ -802,7 +802,7 @@ static void batadv_dat_hash_free(struct batadv_priv *bat_priv) /** * batadv_dat_init() - initialise the DAT internals - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 in case of success, a negative error code otherwise */ @@ -828,7 +828,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv) /** * batadv_dat_free() - free the DAT internals - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_dat_free(struct batadv_priv *bat_priv) { @@ -936,18 +936,18 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if = NULL; int portid = NETLINK_CB(cb->skb).portid; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_hashtable *hash; struct batadv_priv *bat_priv; int bucket = cb->args[0]; int idx = cb->args[1]; int ret = 0; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); hash = bat_priv->dat.hash; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -973,14 +973,14 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); return ret; } /** * batadv_arp_get_type() - parse an ARP packet and gets the type - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to analyse * @hdr_size: size of the possible header before the ARP packet in the skb * @@ -1080,7 +1080,7 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size) /** * batadv_dat_arp_create_reply() - create an ARP Reply - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ip_src: ARP sender IP * @ip_dst: ARP target IP * @hw_src: Ethernet source and ARP sender MAC @@ -1099,7 +1099,7 @@ batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src, { struct sk_buff *skb; - skb = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_dst, bat_priv->soft_iface, + skb = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_dst, bat_priv->mesh_iface, ip_src, hw_dst, hw_src, hw_dst); if (!skb) return NULL; @@ -1116,7 +1116,7 @@ batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src, /** * batadv_dat_snoop_outgoing_arp_request() - snoop the ARP request and try to * answer using DAT - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to check * * Return: true if the message has been sent to the dht candidates, false @@ -1132,7 +1132,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, bool ret = false; struct batadv_dat_entry *dat_entry = NULL; struct sk_buff *skb_new; - struct net_device *soft_iface = bat_priv->soft_iface; + struct net_device *mesh_iface = bat_priv->mesh_iface; int hdr_size = 0; unsigned short vid; @@ -1162,7 +1162,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, * client will answer itself. DAT would only generate a * duplicate packet. * - * Moreover, if the soft-interface is enslaved into a bridge, an + * Moreover, if the mesh-interface is enslaved into a bridge, an * additional DAT answer may trigger kernel warnings about * a packet coming from the wrong port. */ @@ -1191,7 +1191,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, if (!skb_new) goto out; - skb_new->protocol = eth_type_trans(skb_new, soft_iface); + skb_new->protocol = eth_type_trans(skb_new, mesh_iface); batadv_inc_counter(bat_priv, BATADV_CNT_RX); batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, @@ -1213,7 +1213,7 @@ out: /** * batadv_dat_snoop_incoming_arp_request() - snoop the ARP request and try to * answer using the local DAT storage - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to check * @hdr_size: size of the encapsulation header * @@ -1281,7 +1281,7 @@ out: /** * batadv_dat_snoop_outgoing_arp_reply() - snoop the ARP reply and fill the DHT - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to check */ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, @@ -1324,7 +1324,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, /** * batadv_dat_snoop_incoming_arp_reply() - snoop the ARP reply and fill the * local DAT storage only - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to check * @hdr_size: size of the encapsulation header * @@ -1605,7 +1605,7 @@ static bool batadv_dat_get_dhcp_chaddr(struct sk_buff *skb, u8 *buf) /** * batadv_dat_put_dhcp() - puts addresses from a DHCP packet into the DHT and * DAT cache - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @chaddr: the DHCP client MAC address * @yiaddr: the DHCP client IP address * @hw_dst: the DHCP server MAC address @@ -1690,7 +1690,7 @@ batadv_dat_check_dhcp_ack(struct sk_buff *skb, __be16 proto, __be32 *ip_src, /** * batadv_dat_snoop_outgoing_dhcp_ack() - snoop DHCPACK and fill DAT with it - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the packet to snoop * @proto: ethernet protocol hint (behind a potential vlan) * @vid: VLAN identifier @@ -1723,7 +1723,7 @@ void batadv_dat_snoop_outgoing_dhcp_ack(struct batadv_priv *bat_priv, /** * batadv_dat_snoop_incoming_dhcp_ack() - snoop DHCPACK and fill DAT cache - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the packet to snoop * @hdr_size: header size, up to the tail of the batman-adv header * @@ -1771,7 +1771,7 @@ void batadv_dat_snoop_incoming_dhcp_ack(struct batadv_priv *bat_priv, /** * batadv_dat_drop_broadcast_packet() - check if an ARP request has to be * dropped (because the node has already obtained the reply via DAT) or not - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @forw_packet: the broadcast packet * * Return: true if the node can drop the packet, false otherwise. diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index bed7f3d20844..e7b75e82eb1d 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -56,7 +56,7 @@ batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node) /** * batadv_dat_init_own_addr() - assign a DAT address to the node itself - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: a pointer to the primary interface */ static inline void @@ -77,7 +77,7 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb); /** * batadv_dat_inc_counter() - increment the correct DAT packet counter - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @subtype: the 4addr subtype of the packet to be counted * * Updates the ethtool statistics for the received packet if it is a DAT subtype diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 757c084ac2d1..cc14bc41381e 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -349,7 +349,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, struct batadv_hard_iface *recv_if, struct batadv_orig_node *orig_node_src) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_neigh_node *neigh_node = NULL; struct batadv_frag_packet *packet; u16 total_size; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index f68e34ed1f62..7a11b245e9f4 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -71,7 +71,7 @@ void batadv_gw_node_release(struct kref *ref) /** * batadv_gw_get_selected_gw_node() - Get currently selected gateway - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: selected gateway (with increased refcnt), NULL on errors */ @@ -95,7 +95,7 @@ out: /** * batadv_gw_get_selected_orig() - Get originator of currently selected gateway - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: orig_node of selected gateway (with increased refcnt), NULL on errors */ @@ -144,7 +144,7 @@ static void batadv_gw_select(struct batadv_priv *bat_priv, /** * batadv_gw_reselect() - force a gateway reselection - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Set a flag to remind the GW component to perform a new gateway reselection. * However this function does not ensure that the current gateway is going to be @@ -160,7 +160,7 @@ void batadv_gw_reselect(struct batadv_priv *bat_priv) /** * batadv_gw_check_client_stop() - check if client mode has been switched off - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * This function assumes the caller has checked that the gw state *is actually * changing*. This function is not supposed to be called when there is no state @@ -192,7 +192,7 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv) /** * batadv_gw_election() - Elect the best gateway - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_gw_election(struct batadv_priv *bat_priv) { @@ -280,7 +280,7 @@ out: /** * batadv_gw_check_election() - Elect orig node as best gateway when eligible - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be checked */ void batadv_gw_check_election(struct batadv_priv *bat_priv, @@ -314,7 +314,7 @@ out: /** * batadv_gw_node_add() - add gateway node to list of available gateways - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: originator announcing gateway capabilities * @gateway: announced bandwidth information * @@ -361,7 +361,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, /** * batadv_gw_node_get() - retrieve gateway node from list of available gateways - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: originator announcing gateway capabilities * * Return: gateway node if found or NULL otherwise. @@ -391,7 +391,7 @@ struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, /** * batadv_gw_node_update() - update list of available gateways with changed * bandwidth information - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: originator announcing gateway capabilities * @gateway: announced bandwidth information */ @@ -458,7 +458,7 @@ out: /** * batadv_gw_node_delete() - Remove orig_node from gateway list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is currently in process of being removed */ void batadv_gw_node_delete(struct batadv_priv *bat_priv, @@ -473,8 +473,8 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, } /** - * batadv_gw_node_free() - Free gateway information from soft interface - * @bat_priv: the bat priv with all the soft interface information + * batadv_gw_node_free() - Free gateway information from mesh interface + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_gw_node_free(struct batadv_priv *bat_priv) { @@ -501,15 +501,15 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv) int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if = NULL; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; int ret; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { @@ -528,7 +528,7 @@ int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); return ret; } @@ -657,7 +657,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, /** * batadv_gw_out_of_range() - check if the dhcp request destination is the best * gateway - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the outgoing packet * * Check if the skb is a DHCP request and if it is sent to the current best GW diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 2dd36ef03c84..315fa90f0c94 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -20,7 +20,7 @@ /** * batadv_gw_tvlv_container_update() - update the gw tvlv container after * gateway setting change - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv) { @@ -48,7 +48,7 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv) /** * batadv_gw_tvlv_ogm_handler_v1() - process incoming gateway tvlv container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the gateway data @@ -89,7 +89,7 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /** * batadv_gw_init() - initialise the gateway handling internals - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_gw_init(struct batadv_priv *bat_priv) { @@ -105,7 +105,7 @@ void batadv_gw_init(struct batadv_priv *bat_priv) /** * batadv_gw_free() - free the gateway handling internals - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_gw_free(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 96a412beab2d..f145f9662653 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -36,9 +36,9 @@ #include "distributed-arp-table.h" #include "gateway_client.h" #include "log.h" +#include "mesh-interface.h" #include "originator.h" #include "send.h" -#include "soft-interface.h" #include "translation-table.h" /** @@ -51,7 +51,7 @@ void batadv_hardif_release(struct kref *ref) struct batadv_hard_iface *hard_iface; hard_iface = container_of(ref, struct batadv_hard_iface, refcount); - dev_put(hard_iface->net_dev); + netdev_put(hard_iface->net_dev, &hard_iface->dev_tracker); kfree_rcu(hard_iface, rcu); } @@ -141,7 +141,7 @@ static bool batadv_mutual_parents(const struct net_device *dev1, * is important to prevent this new interface from being used to create a new * mesh network (this behaviour would lead to a batman-over-batman * configuration). This function recursively checks all the fathers of the - * device passed as argument looking for a batman-adv soft interface. + * device passed as argument looking for a batman-adv mesh interface. * * Return: true if the device is descendant of a batman-adv mesh interface (or * if it is a batman-adv interface itself), false otherwise @@ -155,7 +155,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) bool ret; /* check if this is a batman-adv mesh interface */ - if (batadv_softif_is_valid(net_dev)) + if (batadv_meshif_is_valid(net_dev)) return true; iflink = dev_get_iflink(net_dev); @@ -233,10 +233,10 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) } hard_iface = batadv_hardif_get_by_netdev(netdev); - if (!hard_iface || !hard_iface->soft_iface) + if (!hard_iface || !hard_iface->mesh_iface) goto out; - net = dev_net(hard_iface->soft_iface); + net = dev_net(hard_iface->mesh_iface); real_net = batadv_getlink_net(netdev, net); /* iflink to itself, most likely physical device */ @@ -438,13 +438,13 @@ out: } static struct batadv_hard_iface * -batadv_hardif_get_active(const struct net_device *soft_iface) +batadv_hardif_get_active(const struct net_device *mesh_iface) { struct batadv_hard_iface *hard_iface; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != soft_iface) + if (hard_iface->mesh_iface != mesh_iface) continue; if (hard_iface->if_status == BATADV_IF_ACTIVE && @@ -532,9 +532,9 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev) /** * batadv_hardif_recalc_extra_skbroom() - Recalculate skbuff extra head/tailroom - * @soft_iface: netdev struct of the mesh interface + * @mesh_iface: netdev struct of the mesh interface */ -static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) +static void batadv_hardif_recalc_extra_skbroom(struct net_device *mesh_iface) { const struct batadv_hard_iface *hard_iface; unsigned short lower_header_len = ETH_HLEN; @@ -547,7 +547,7 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) continue; - if (hard_iface->soft_iface != soft_iface) + if (hard_iface->mesh_iface != mesh_iface) continue; lower_header_len = max_t(unsigned short, lower_header_len, @@ -567,20 +567,20 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) /* fragmentation headers don't strip the unicast/... header */ needed_headroom += sizeof(struct batadv_frag_packet); - soft_iface->needed_headroom = needed_headroom; - soft_iface->needed_tailroom = lower_tailroom; + mesh_iface->needed_headroom = needed_headroom; + mesh_iface->needed_tailroom = lower_tailroom; } /** - * batadv_hardif_min_mtu() - Calculate maximum MTU for soft interface - * @soft_iface: netdev struct of the soft interface + * batadv_hardif_min_mtu() - Calculate maximum MTU for mesh interface + * @mesh_iface: netdev struct of the mesh interface * - * Return: MTU for the soft-interface (limited by the minimal MTU of all active + * Return: MTU for the mesh-interface (limited by the minimal MTU of all active * slave interfaces) */ -int batadv_hardif_min_mtu(struct net_device *soft_iface) +int batadv_hardif_min_mtu(struct net_device *mesh_iface) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); const struct batadv_hard_iface *hard_iface; int min_mtu = INT_MAX; @@ -590,7 +590,7 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) continue; - if (hard_iface->soft_iface != soft_iface) + if (hard_iface->mesh_iface != mesh_iface) continue; min_mtu = min_t(int, hard_iface->net_dev->mtu, min_mtu); @@ -616,26 +616,24 @@ out: */ atomic_set(&bat_priv->packet_size_max, min_mtu); - /* the real soft-interface MTU is computed by removing the payload + /* the real mesh-interface MTU is computed by removing the payload * overhead from the maximum amount of bytes that was just computed. - * - * However batman-adv does not support MTUs bigger than ETH_DATA_LEN */ - return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN); + return min_t(int, min_mtu - batadv_max_header_len(), BATADV_MAX_MTU); } /** * batadv_update_min_mtu() - Adjusts the MTU if a new interface with a smaller * MTU appeared - * @soft_iface: netdev struct of the soft interface + * @mesh_iface: netdev struct of the mesh interface */ -void batadv_update_min_mtu(struct net_device *soft_iface) +void batadv_update_min_mtu(struct net_device *mesh_iface) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); int limit_mtu; int mtu; - mtu = batadv_hardif_min_mtu(soft_iface); + mtu = batadv_hardif_min_mtu(mesh_iface); if (bat_priv->mtu_set_by_user) limit_mtu = bat_priv->mtu_set_by_user; @@ -643,12 +641,12 @@ void batadv_update_min_mtu(struct net_device *soft_iface) limit_mtu = ETH_DATA_LEN; mtu = min(mtu, limit_mtu); - dev_set_mtu(soft_iface, mtu); + dev_set_mtu(mesh_iface, mtu); /* Check if the local translate table should be cleaned up to match a * new (and smaller) MTU. */ - batadv_tt_local_resize_to_mtu(soft_iface); + batadv_tt_local_resize_to_mtu(mesh_iface); } static void @@ -660,7 +658,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; - bat_priv = netdev_priv(hard_iface->soft_iface); + bat_priv = netdev_priv(hard_iface->mesh_iface); bat_priv->algo_ops->iface.update_mac(hard_iface); hard_iface->if_status = BATADV_IF_TO_BE_ACTIVATED; @@ -672,10 +670,10 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) if (!primary_if) batadv_primary_if_select(bat_priv, hard_iface); - batadv_info(hard_iface->soft_iface, "Interface activated: %s\n", + batadv_info(hard_iface->mesh_iface, "Interface activated: %s\n", hard_iface->net_dev->name); - batadv_update_min_mtu(hard_iface->soft_iface); + batadv_update_min_mtu(hard_iface->mesh_iface); if (bat_priv->algo_ops->iface.activate) bat_priv->algo_ops->iface.activate(hard_iface); @@ -693,21 +691,21 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) hard_iface->if_status = BATADV_IF_INACTIVE; - batadv_info(hard_iface->soft_iface, "Interface deactivated: %s\n", + batadv_info(hard_iface->mesh_iface, "Interface deactivated: %s\n", hard_iface->net_dev->name); - batadv_update_min_mtu(hard_iface->soft_iface); + batadv_update_min_mtu(hard_iface->mesh_iface); } /** - * batadv_hardif_enable_interface() - Enslave hard interface to soft interface - * @hard_iface: hard interface to add to soft interface - * @soft_iface: netdev struct of the mesh interface + * batadv_hardif_enable_interface() - Enslave hard interface to mesh interface + * @hard_iface: hard interface to add to mesh interface + * @mesh_iface: netdev struct of the mesh interface * * Return: 0 on success or negative error number in case of failure */ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, - struct net_device *soft_iface) + struct net_device *mesh_iface) { struct batadv_priv *bat_priv; __be16 ethertype = htons(ETH_P_BATMAN); @@ -717,7 +715,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, int ret; hardif_mtu = READ_ONCE(hard_iface->net_dev->mtu); - required_mtu = READ_ONCE(soft_iface->mtu) + max_header_len; + required_mtu = READ_ONCE(mesh_iface->mtu) + max_header_len; if (hardif_mtu < ETH_MIN_MTU + max_header_len) return -EINVAL; @@ -727,12 +725,13 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, kref_get(&hard_iface->refcount); - dev_hold(soft_iface); - hard_iface->soft_iface = soft_iface; - bat_priv = netdev_priv(hard_iface->soft_iface); + dev_hold(mesh_iface); + netdev_hold(mesh_iface, &hard_iface->meshif_dev_tracker, GFP_ATOMIC); + hard_iface->mesh_iface = mesh_iface; + bat_priv = netdev_priv(hard_iface->mesh_iface); ret = netdev_master_upper_dev_link(hard_iface->net_dev, - soft_iface, NULL, NULL, NULL); + mesh_iface, NULL, NULL, NULL); if (ret) goto err_dev; @@ -748,19 +747,19 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->batman_adv_ptype.dev = hard_iface->net_dev; dev_add_pack(&hard_iface->batman_adv_ptype); - batadv_info(hard_iface->soft_iface, "Adding interface: %s\n", + batadv_info(hard_iface->mesh_iface, "Adding interface: %s\n", hard_iface->net_dev->name); if (atomic_read(&bat_priv->fragmentation) && hardif_mtu < required_mtu) - batadv_info(hard_iface->soft_iface, + batadv_info(hard_iface->mesh_iface, "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %i would solve the problem.\n", hard_iface->net_dev->name, hardif_mtu, required_mtu); if (!atomic_read(&bat_priv->fragmentation) && hardif_mtu < required_mtu) - batadv_info(hard_iface->soft_iface, + batadv_info(hard_iface->mesh_iface, "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %i.\n", hard_iface->net_dev->name, hardif_mtu, required_mtu); @@ -768,11 +767,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, if (batadv_hardif_is_iface_up(hard_iface)) batadv_hardif_activate_interface(hard_iface); else - batadv_err(hard_iface->soft_iface, + batadv_err(hard_iface->mesh_iface, "Not using interface %s (retrying later): interface not active\n", hard_iface->net_dev->name); - batadv_hardif_recalc_extra_skbroom(soft_iface); + batadv_hardif_recalc_extra_skbroom(mesh_iface); if (bat_priv->algo_ops->iface.enabled) bat_priv->algo_ops->iface.enabled(hard_iface); @@ -781,17 +780,17 @@ out: return 0; err_upper: - netdev_upper_dev_unlink(hard_iface->net_dev, soft_iface); + netdev_upper_dev_unlink(hard_iface->net_dev, mesh_iface); err_dev: - hard_iface->soft_iface = NULL; - dev_put(soft_iface); + hard_iface->mesh_iface = NULL; + netdev_put(mesh_iface, &hard_iface->meshif_dev_tracker); batadv_hardif_put(hard_iface); return ret; } /** - * batadv_hardif_cnt() - get number of interfaces enslaved to soft interface - * @soft_iface: soft interface to check + * batadv_hardif_cnt() - get number of interfaces enslaved to mesh interface + * @mesh_iface: mesh interface to check * * This function is only using RCU for locking - the result can therefore be * off when another function is modifying the list at the same time. The @@ -799,14 +798,14 @@ err_dev: * * Return: number of connected/enslaved hard interfaces */ -static size_t batadv_hardif_cnt(const struct net_device *soft_iface) +static size_t batadv_hardif_cnt(const struct net_device *mesh_iface) { struct batadv_hard_iface *hard_iface; size_t count = 0; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != soft_iface) + if (hard_iface->mesh_iface != mesh_iface) continue; count++; @@ -817,12 +816,12 @@ static size_t batadv_hardif_cnt(const struct net_device *soft_iface) } /** - * batadv_hardif_disable_interface() - Remove hard interface from soft interface + * batadv_hardif_disable_interface() - Remove hard interface from mesh interface * @hard_iface: hard interface to be removed */ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hard_iface *primary_if = NULL; batadv_hardif_deactivate_interface(hard_iface); @@ -830,7 +829,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; - batadv_info(hard_iface->soft_iface, "Removing interface: %s\n", + batadv_info(hard_iface->mesh_iface, "Removing interface: %s\n", hard_iface->net_dev->name); dev_remove_pack(&hard_iface->batman_adv_ptype); batadv_hardif_put(hard_iface); @@ -839,7 +838,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) if (hard_iface == primary_if) { struct batadv_hard_iface *new_if; - new_if = batadv_hardif_get_active(hard_iface->soft_iface); + new_if = batadv_hardif_get_active(hard_iface->mesh_iface); batadv_primary_if_select(bat_priv, new_if); batadv_hardif_put(new_if); @@ -851,16 +850,16 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) /* delete all references to this hard_iface */ batadv_purge_orig_ref(bat_priv); batadv_purge_outstanding_packets(bat_priv, hard_iface); - dev_put(hard_iface->soft_iface); + netdev_put(hard_iface->mesh_iface, &hard_iface->meshif_dev_tracker); - netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface); - batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface); + netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->mesh_iface); + batadv_hardif_recalc_extra_skbroom(hard_iface->mesh_iface); /* nobody uses this interface anymore */ - if (batadv_hardif_cnt(hard_iface->soft_iface) <= 1) + if (batadv_hardif_cnt(hard_iface->mesh_iface) <= 1) batadv_gw_check_client_stop(bat_priv); - hard_iface->soft_iface = NULL; + hard_iface->mesh_iface = NULL; batadv_hardif_put(hard_iface); out: @@ -875,16 +874,16 @@ batadv_hardif_add_interface(struct net_device *net_dev) ASSERT_RTNL(); if (!batadv_is_valid_iface(net_dev)) - goto out; - - dev_hold(net_dev); + return NULL; hard_iface = kzalloc(sizeof(*hard_iface), GFP_ATOMIC); if (!hard_iface) - goto release_dev; + return NULL; + netdev_hold(net_dev, &hard_iface->dev_tracker, GFP_ATOMIC); hard_iface->net_dev = net_dev; - hard_iface->soft_iface = NULL; + + hard_iface->mesh_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; INIT_LIST_HEAD(&hard_iface->list); @@ -909,11 +908,6 @@ batadv_hardif_add_interface(struct net_device *net_dev) batadv_hardif_generation++; return hard_iface; - -release_dev: - dev_put(net_dev); -out: - return NULL; } static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) @@ -932,13 +926,13 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) } /** - * batadv_hard_if_event_softif() - Handle events for soft interfaces + * batadv_hard_if_event_meshif() - Handle events for mesh interfaces * @event: NETDEV_* event to handle * @net_dev: net_device which generated an event * * Return: NOTIFY_* result */ -static int batadv_hard_if_event_softif(unsigned long event, +static int batadv_hard_if_event_meshif(unsigned long event, struct net_device *net_dev) { struct batadv_priv *bat_priv; @@ -946,7 +940,7 @@ static int batadv_hard_if_event_softif(unsigned long event, switch (event) { case NETDEV_REGISTER: bat_priv = netdev_priv(net_dev); - batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); + batadv_meshif_create_vlan(bat_priv, BATADV_NO_FLAGS); break; } @@ -961,8 +955,8 @@ static int batadv_hard_if_event(struct notifier_block *this, struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; - if (batadv_softif_is_valid(net_dev)) - return batadv_hard_if_event_softif(event, net_dev); + if (batadv_meshif_is_valid(net_dev)) + return batadv_hard_if_event_meshif(event, net_dev); hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface && (event == NETDEV_REGISTER || @@ -988,8 +982,8 @@ static int batadv_hard_if_event(struct notifier_block *this, batadv_hardif_remove_interface(hard_iface); break; case NETDEV_CHANGEMTU: - if (hard_iface->soft_iface) - batadv_update_min_mtu(hard_iface->soft_iface); + if (hard_iface->mesh_iface) + batadv_update_min_mtu(hard_iface->mesh_iface); break; case NETDEV_CHANGEADDR: if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) @@ -997,7 +991,7 @@ static int batadv_hard_if_event(struct notifier_block *this, batadv_check_known_mac_addr(hard_iface->net_dev); - bat_priv = netdev_priv(hard_iface->soft_iface); + bat_priv = netdev_priv(hard_iface->mesh_iface); bat_priv->algo_ops->iface.update_mac(hard_iface); primary_if = batadv_primary_if_get_selected(bat_priv); diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 64f660dbbe54..262a78364742 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -23,12 +23,12 @@ enum batadv_hard_if_state { /** * @BATADV_IF_NOT_IN_USE: interface is not used as slave interface of a - * batman-adv soft interface + * batman-adv mesh interface */ BATADV_IF_NOT_IN_USE, /** - * @BATADV_IF_TO_BE_REMOVED: interface will be removed from soft + * @BATADV_IF_TO_BE_REMOVED: interface will be removed from mesh * interface */ BATADV_IF_TO_BE_REMOVED, @@ -74,10 +74,10 @@ bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface); struct batadv_hard_iface* batadv_hardif_get_by_netdev(const struct net_device *net_dev); int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, - struct net_device *soft_iface); + struct net_device *mesh_iface); void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface); -int batadv_hardif_min_mtu(struct net_device *soft_iface); -void batadv_update_min_mtu(struct net_device *soft_iface); +int batadv_hardif_min_mtu(struct net_device *mesh_iface); +void batadv_update_min_mtu(struct net_device *mesh_iface); void batadv_hardif_release(struct kref *ref); int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing, u8 *orig_addr, u8 *orig_neigh); @@ -97,7 +97,7 @@ static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface) /** * batadv_primary_if_get_selected() - Get reference to primary interface - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: primary interface (with increased refcnt), otherwise NULL */ diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index 7a93a1e94c40..c19d07eeb070 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -13,7 +13,7 @@ /** * batadv_debug_log() - Add debug log entry - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @fmt: format string * * Return: 0 on success or negative error number in case of failure diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index 6717c965f0fa..567afaa8df99 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -71,7 +71,7 @@ __printf(2, 3); /** * _batadv_dbg() - Store debug output with(out) rate limiting * @type: type of debug message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ratelimited: whether output should be rate limited * @fmt: format string * @arg: variable arguments @@ -97,7 +97,7 @@ static inline void _batadv_dbg(int type __always_unused, /** * batadv_dbg() - Store debug output without rate limiting * @type: type of debug message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @arg: format string and variable arguments */ #define batadv_dbg(type, bat_priv, arg...) \ @@ -106,7 +106,7 @@ static inline void _batadv_dbg(int type __always_unused, /** * batadv_dbg_ratelimited() - Store debug output with rate limiting * @type: type of debug message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @arg: format string and variable arguments */ #define batadv_dbg_ratelimited(type, bat_priv, arg...) \ @@ -114,7 +114,7 @@ static inline void _batadv_dbg(int type __always_unused, /** * batadv_info() - Store message in debug buffer and print it to kmsg buffer - * @net_dev: the soft interface net device + * @net_dev: the mesh interface net device * @fmt: format string * @arg: variable arguments */ @@ -128,7 +128,7 @@ static inline void _batadv_dbg(int type __always_unused, /** * batadv_err() - Store error in debug buffer and print it to kmsg buffer - * @net_dev: the soft interface net device + * @net_dev: the mesh interface net device * @fmt: format string * @arg: variable arguments */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 333e947afcce..a08132888a3d 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -51,13 +51,13 @@ #include "gateway_common.h" #include "hard-interface.h" #include "log.h" +#include "mesh-interface.h" #include "multicast.h" #include "netlink.h" #include "network-coding.h" #include "originator.h" #include "routing.h" #include "send.h" -#include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" @@ -143,14 +143,14 @@ static void __exit batadv_exit(void) } /** - * batadv_mesh_init() - Initialize soft interface - * @soft_iface: netdev struct of the soft interface + * batadv_mesh_init() - Initialize mesh interface + * @mesh_iface: netdev struct of the mesh interface * * Return: 0 on success or negative error number in case of failure */ -int batadv_mesh_init(struct net_device *soft_iface) +int batadv_mesh_init(struct net_device *mesh_iface) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); int ret; spin_lock_init(&bat_priv->forw_bat_list_lock); @@ -167,7 +167,7 @@ int batadv_mesh_init(struct net_device *soft_iface) #endif spin_lock_init(&bat_priv->tvlv.container_list_lock); spin_lock_init(&bat_priv->tvlv.handler_list_lock); - spin_lock_init(&bat_priv->softif_vlan_list_lock); + spin_lock_init(&bat_priv->meshif_vlan_list_lock); spin_lock_init(&bat_priv->tp_list_lock); INIT_HLIST_HEAD(&bat_priv->forw_bat_list); @@ -186,7 +186,7 @@ int batadv_mesh_init(struct net_device *soft_iface) #endif INIT_HLIST_HEAD(&bat_priv->tvlv.container_list); INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list); - INIT_HLIST_HEAD(&bat_priv->softif_vlan_list); + INIT_HLIST_HEAD(&bat_priv->meshif_vlan_list); INIT_HLIST_HEAD(&bat_priv->tp_list); bat_priv->gw.generation = 0; @@ -253,12 +253,12 @@ err_orig: } /** - * batadv_mesh_free() - Deinitialize soft interface - * @soft_iface: netdev struct of the soft interface + * batadv_mesh_free() - Deinitialize mesh interface + * @mesh_iface: netdev struct of the mesh interface */ -void batadv_mesh_free(struct net_device *soft_iface) +void batadv_mesh_free(struct net_device *mesh_iface) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); @@ -297,7 +297,7 @@ void batadv_mesh_free(struct net_device *soft_iface) /** * batadv_is_my_mac() - check if the given mac address belongs to any of the * real interfaces in the current mesh - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the address to check * * Return: 'true' if the mac address was found, false otherwise. @@ -312,7 +312,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) { @@ -457,10 +457,10 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, if (unlikely(skb->mac_len != ETH_HLEN || !skb_mac_header(skb))) goto err_free; - if (!hard_iface->soft_iface) + if (!hard_iface->mesh_iface) goto err_free; - bat_priv = netdev_priv(hard_iface->soft_iface); + bat_priv = netdev_priv(hard_iface->mesh_iface); if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto err_free; @@ -651,7 +651,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) /** * batadv_vlan_ap_isola_get() - return AP isolation status for the given vlan - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier for which the AP isolation attributed as to be * looked up * @@ -661,15 +661,15 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) { bool ap_isolation_enabled = false; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; /* if the AP isolation is requested on a VLAN, then check for its * setting in the proper VLAN private data structure */ - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (vlan) { ap_isolation_enabled = atomic_read(&vlan->ap_isolation); - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); } return ap_isolation_enabled; @@ -677,7 +677,7 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) /** * batadv_throw_uevent() - Send an uevent with batman-adv specific env data - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: subsystem type of event. Stored in uevent's BATTYPE * @action: action type of event. Stored in uevent's BATACTION * @data: string with additional information to the event (ignored for @@ -692,7 +692,7 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, struct kobject *bat_kobj; char *uevent_env[4] = { NULL, NULL, NULL, NULL }; - bat_kobj = &bat_priv->soft_iface->dev.kobj; + bat_kobj = &bat_priv->mesh_iface->dev.kobj; uevent_env[0] = kasprintf(GFP_ATOMIC, "%s%s", BATADV_UEV_TYPE_VAR, diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 964f3088af5b..67af435ee04e 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2025.0" +#define BATADV_SOURCE_VERSION "2025.1" #endif /* B.A.T.M.A.N. parameters */ @@ -22,6 +22,8 @@ #define BATADV_THROUGHPUT_MAX_VALUE 0xFFFFFFFF #define BATADV_JITTER 20 +#define BATADV_MAX_MTU (ETH_MAX_MTU - batadv_max_header_len()) + /* Time To Live of broadcast messages */ #define BATADV_TTL 50 @@ -102,9 +104,7 @@ */ #define BATADV_TQ_SIMILARITY_THRESHOLD 50 -/* should not be bigger than 512 bytes or change the size of - * forw_packet->direct_link_flags - */ +#define BATADV_MAX_AGGREGATION_PACKETS 32 #define BATADV_MAX_AGGREGATION_BYTES 512 #define BATADV_MAX_AGGREGATION_MS 100 @@ -129,10 +129,10 @@ #define BATADV_TP_MAX_NUM 5 /** - * enum batadv_mesh_state - State of a soft interface + * enum batadv_mesh_state - State of a mesh interface */ enum batadv_mesh_state { - /** @BATADV_MESH_INACTIVE: soft interface is not yet running */ + /** @BATADV_MESH_INACTIVE: mesh interface is not yet running */ BATADV_MESH_INACTIVE, /** @BATADV_MESH_ACTIVE: interface is up and running */ @@ -238,8 +238,8 @@ extern unsigned int batadv_hardif_generation; extern unsigned char batadv_broadcast_addr[]; extern struct workqueue_struct *batadv_event_workqueue; -int batadv_mesh_init(struct net_device *soft_iface); -void batadv_mesh_free(struct net_device *soft_iface); +int batadv_mesh_init(struct net_device *mesh_iface); +void batadv_mesh_free(struct net_device *mesh_iface); bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr); int batadv_max_header_len(void); void batadv_skb_set_priority(struct sk_buff *skb, int offset); @@ -345,8 +345,8 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, #define batadv_seq_after(x, y) batadv_seq_before(y, x) /** - * batadv_add_counter() - Add to per cpu statistics counter of soft interface - * @bat_priv: the bat priv with all the soft interface information + * batadv_add_counter() - Add to per cpu statistics counter of mesh interface + * @bat_priv: the bat priv with all the mesh interface information * @idx: counter index which should be modified * @count: value to increase counter by * @@ -359,8 +359,8 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx, } /** - * batadv_inc_counter() - Increase per cpu statistics counter of soft interface - * @b: the bat priv with all the soft interface information + * batadv_inc_counter() - Increase per cpu statistics counter of mesh interface + * @b: the bat priv with all the mesh interface information * @i: counter index which should be modified */ #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/mesh-interface.c index e190fa954f22..59e7b5aacbc9 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/mesh-interface.c @@ -4,7 +4,7 @@ * Marek Lindner, Simon Wunderlich */ -#include "soft-interface.h" +#include "mesh-interface.h" #include "main.h" #include <linux/atomic.h> @@ -91,7 +91,7 @@ static int batadv_interface_release(struct net_device *dev) /** * batadv_sum_counter() - Sum the cpu-local counters for index 'idx' - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @idx: index of counter to sum up * * Return: sum of all cpu-local counters @@ -125,7 +125,7 @@ static struct net_device_stats *batadv_interface_stats(struct net_device *dev) static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) { struct batadv_priv *bat_priv = netdev_priv(dev); - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; struct sockaddr *addr = p; u8 old_addr[ETH_ALEN]; @@ -140,7 +140,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) return 0; rcu_read_lock(); - hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry_rcu(vlan, &bat_priv->meshif_vlan_list, list) { batadv_tt_local_remove(bat_priv, old_addr, vlan->vid, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, vlan->vid, @@ -170,7 +170,7 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) * @dev: registered network device to modify * * We do not actually need to set any rx filters for the virtual batman - * soft interface. However a dummy handler enables a user to set static + * mesh interface. However a dummy handler enables a user to set static * multicast listeners for instance. */ static void batadv_interface_set_rx_mode(struct net_device *dev) @@ -178,10 +178,10 @@ static void batadv_interface_set_rx_mode(struct net_device *dev) } static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, - struct net_device *soft_iface) + struct net_device *mesh_iface) { struct ethhdr *ethhdr; - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct batadv_hard_iface *primary_if = NULL; struct batadv_bcast_packet *bcast_packet; static const u8 stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, @@ -209,7 +209,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, /* reset control block to avoid left overs from previous users */ memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); - netif_trans_update(soft_iface); + netif_trans_update(mesh_iface); vid = batadv_get_vid(skb, 0); skb_reset_mac_header(skb); @@ -246,7 +246,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, /* Register the client MAC in the transtable */ if (!is_multicast_ether_addr(ethhdr->h_source) && !batadv_bla_is_loopdetect_mac(ethhdr->h_source)) { - client_added = batadv_tt_local_add(soft_iface, ethhdr->h_source, + client_added = batadv_tt_local_add(mesh_iface, ethhdr->h_source, vid, skb->skb_iif, skb->mark); if (!client_added) @@ -397,12 +397,12 @@ end: /** * batadv_interface_rx() - receive ethernet frame on local batman-adv interface - * @soft_iface: local interface which will receive the ethernet frame - * @skb: ethernet frame for @soft_iface + * @mesh_iface: local interface which will receive the ethernet frame + * @skb: ethernet frame for @mesh_iface * @hdr_size: size of already parsed batman-adv header * @orig_node: originator from which the batman-adv packet was sent * - * Sends an ethernet frame to the receive path of the local @soft_iface. + * Sends an ethernet frame to the receive path of the local @mesh_iface. * skb->data has still point to the batman-adv header with the size @hdr_size. * The caller has to have parsed this header already and made sure that at least * @hdr_size bytes are still available for pull in @skb. @@ -412,12 +412,12 @@ end: * unicast packets will be dropped directly when it was sent between two * isolated clients. */ -void batadv_interface_rx(struct net_device *soft_iface, +void batadv_interface_rx(struct net_device *mesh_iface, struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node) { struct batadv_bcast_packet *batadv_bcast_packet; - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct vlan_ethhdr *vhdr; struct ethhdr *ethhdr; unsigned short vid; @@ -457,7 +457,7 @@ void batadv_interface_rx(struct net_device *soft_iface, } /* skb->dev & skb->pkt_type are set here */ - skb->protocol = eth_type_trans(skb, soft_iface); + skb->protocol = eth_type_trans(skb, mesh_iface); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); batadv_inc_counter(bat_priv, BATADV_CNT_RX); @@ -502,38 +502,38 @@ out: } /** - * batadv_softif_vlan_release() - release vlan from lists and queue for free + * batadv_meshif_vlan_release() - release vlan from lists and queue for free * after rcu grace period * @ref: kref pointer of the vlan object */ -void batadv_softif_vlan_release(struct kref *ref) +void batadv_meshif_vlan_release(struct kref *ref) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; - vlan = container_of(ref, struct batadv_softif_vlan, refcount); + vlan = container_of(ref, struct batadv_meshif_vlan, refcount); - spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock); + spin_lock_bh(&vlan->bat_priv->meshif_vlan_list_lock); hlist_del_rcu(&vlan->list); - spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock); + spin_unlock_bh(&vlan->bat_priv->meshif_vlan_list_lock); kfree_rcu(vlan, rcu); } /** - * batadv_softif_vlan_get() - get the vlan object for a specific vid - * @bat_priv: the bat priv with all the soft interface information + * batadv_meshif_vlan_get() - get the vlan object for a specific vid + * @bat_priv: the bat priv with all the mesh interface information * @vid: the identifier of the vlan object to retrieve * * Return: the private data of the vlan matching the vid passed as argument or * NULL otherwise. The refcounter of the returned object is incremented by 1. */ -struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, +struct batadv_meshif_vlan *batadv_meshif_vlan_get(struct batadv_priv *bat_priv, unsigned short vid) { - struct batadv_softif_vlan *vlan_tmp, *vlan = NULL; + struct batadv_meshif_vlan *vlan_tmp, *vlan = NULL; rcu_read_lock(); - hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->meshif_vlan_list, list) { if (vlan_tmp->vid != vid) continue; @@ -549,28 +549,28 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, } /** - * batadv_softif_create_vlan() - allocate the needed resources for a new vlan - * @bat_priv: the bat priv with all the soft interface information + * batadv_meshif_create_vlan() - allocate the needed resources for a new vlan + * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier * * Return: 0 on success, a negative error otherwise. */ -int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) +int batadv_meshif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; - spin_lock_bh(&bat_priv->softif_vlan_list_lock); + spin_lock_bh(&bat_priv->meshif_vlan_list_lock); - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (vlan) { - batadv_softif_vlan_put(vlan); - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + batadv_meshif_vlan_put(vlan); + spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); return -EEXIST; } vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC); if (!vlan) { - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); return -ENOMEM; } @@ -581,37 +581,37 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) atomic_set(&vlan->ap_isolation, 0); kref_get(&vlan->refcount); - hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + hlist_add_head_rcu(&vlan->list, &bat_priv->meshif_vlan_list); + spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); /* add a new TT local entry. This one will be marked with the NOPURGE * flag */ - batadv_tt_local_add(bat_priv->soft_iface, - bat_priv->soft_iface->dev_addr, vid, + batadv_tt_local_add(bat_priv->mesh_iface, + bat_priv->mesh_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); - /* don't return reference to new softif_vlan */ - batadv_softif_vlan_put(vlan); + /* don't return reference to new meshif_vlan */ + batadv_meshif_vlan_put(vlan); return 0; } /** - * batadv_softif_destroy_vlan() - remove and destroy a softif_vlan object - * @bat_priv: the bat priv with all the soft interface information + * batadv_meshif_destroy_vlan() - remove and destroy a meshif_vlan object + * @bat_priv: the bat priv with all the mesh interface information * @vlan: the object to remove */ -static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv, - struct batadv_softif_vlan *vlan) +static void batadv_meshif_destroy_vlan(struct batadv_priv *bat_priv, + struct batadv_meshif_vlan *vlan) { /* explicitly remove the associated TT local entry because it is marked * with the NOPURGE flag */ - batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr, + batadv_tt_local_remove(bat_priv, bat_priv->mesh_iface->dev_addr, vlan->vid, "vlan interface destroyed", false); - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); } /** @@ -629,7 +629,7 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; /* only 802.1Q vlans are supported. * batman-adv does not know how to handle other types @@ -648,21 +648,21 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, vid |= BATADV_VLAN_HAS_TAG; /* if a new vlan is getting created and it already exists, it means that - * it was not deleted yet. batadv_softif_vlan_get() increases the + * it was not deleted yet. batadv_meshif_vlan_get() increases the * refcount in order to revive the object. * * if it does not exist then create it. */ - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (!vlan) - return batadv_softif_create_vlan(bat_priv, vid); + return batadv_meshif_create_vlan(bat_priv, vid); /* add a new TT local entry. This one will be marked with the NOPURGE * flag. This must be added again, even if the vlan object already * exists, because the entry was deleted by kill_vid() */ - batadv_tt_local_add(bat_priv->soft_iface, - bat_priv->soft_iface->dev_addr, vid, + batadv_tt_local_add(bat_priv->mesh_iface, + bat_priv->mesh_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); return 0; @@ -684,7 +684,7 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; /* only 802.1Q vlans are supported. batman-adv does not know how to * handle other types @@ -693,19 +693,19 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, return -EINVAL; /* "priority tag" frames are handled like "untagged" frames - * and no softif_vlan needs to be destroyed + * and no meshif_vlan needs to be destroyed */ if (vid == 0) return 0; - vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); + vlan = batadv_meshif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); if (!vlan) return -ENOENT; - batadv_softif_destroy_vlan(bat_priv, vlan); + batadv_meshif_destroy_vlan(bat_priv, vlan); /* finally free the vlan object */ - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); return 0; } @@ -741,12 +741,12 @@ static void batadv_set_lockdep_class(struct net_device *dev) } /** - * batadv_softif_init_late() - late stage initialization of soft interface + * batadv_meshif_init_late() - late stage initialization of mesh interface * @dev: registered network device to modify * * Return: error code on failures */ -static int batadv_softif_init_late(struct net_device *dev) +static int batadv_meshif_init_late(struct net_device *dev) { struct batadv_priv *bat_priv; u32 random_seqno; @@ -756,7 +756,7 @@ static int batadv_softif_init_late(struct net_device *dev) batadv_set_lockdep_class(dev); bat_priv = netdev_priv(dev); - bat_priv->soft_iface = dev; + bat_priv->mesh_iface = dev; /* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called @@ -790,7 +790,7 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->log_level, 0); #endif atomic_set(&bat_priv->fragmentation, 1); - atomic_set(&bat_priv->packet_size_max, ETH_DATA_LEN); + atomic_set(&bat_priv->packet_size_max, BATADV_MAX_MTU); atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN); atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN); @@ -837,14 +837,14 @@ free_bat_counters: } /** - * batadv_softif_slave_add() - Add a slave interface to a batadv_soft_interface - * @dev: batadv_soft_interface used as master interface + * batadv_meshif_slave_add() - Add a slave interface to a batadv_mesh_interface + * @dev: batadv_mesh_interface used as master interface * @slave_dev: net_device which should become the slave interface * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ -static int batadv_softif_slave_add(struct net_device *dev, +static int batadv_meshif_slave_add(struct net_device *dev, struct net_device *slave_dev, struct netlink_ext_ack *extack) { @@ -852,7 +852,7 @@ static int batadv_softif_slave_add(struct net_device *dev, int ret = -EINVAL; hard_iface = batadv_hardif_get_by_netdev(slave_dev); - if (!hard_iface || hard_iface->soft_iface) + if (!hard_iface || hard_iface->mesh_iface) goto out; ret = batadv_hardif_enable_interface(hard_iface, dev); @@ -863,13 +863,13 @@ out: } /** - * batadv_softif_slave_del() - Delete a slave iface from a batadv_soft_interface - * @dev: batadv_soft_interface used as master interface + * batadv_meshif_slave_del() - Delete a slave iface from a batadv_mesh_interface + * @dev: batadv_mesh_interface used as master interface * @slave_dev: net_device which should be removed from the master interface * * Return: 0 if successful or error otherwise. */ -static int batadv_softif_slave_del(struct net_device *dev, +static int batadv_meshif_slave_del(struct net_device *dev, struct net_device *slave_dev) { struct batadv_hard_iface *hard_iface; @@ -877,7 +877,7 @@ static int batadv_softif_slave_del(struct net_device *dev, hard_iface = batadv_hardif_get_by_netdev(slave_dev); - if (!hard_iface || hard_iface->soft_iface != dev) + if (!hard_iface || hard_iface->mesh_iface != dev) goto out; batadv_hardif_disable_interface(hard_iface); @@ -889,7 +889,7 @@ out: } static const struct net_device_ops batadv_netdev_ops = { - .ndo_init = batadv_softif_init_late, + .ndo_init = batadv_meshif_init_late, .ndo_open = batadv_interface_open, .ndo_stop = batadv_interface_release, .ndo_get_stats = batadv_interface_stats, @@ -900,8 +900,8 @@ static const struct net_device_ops batadv_netdev_ops = { .ndo_set_rx_mode = batadv_interface_set_rx_mode, .ndo_start_xmit = batadv_interface_tx, .ndo_validate_addr = eth_validate_addr, - .ndo_add_slave = batadv_softif_slave_add, - .ndo_del_slave = batadv_softif_slave_del, + .ndo_add_slave = batadv_meshif_slave_add, + .ndo_del_slave = batadv_meshif_slave_del, }; static void batadv_get_drvinfo(struct net_device *dev, @@ -1009,10 +1009,10 @@ static const struct ethtool_ops batadv_ethtool_ops = { }; /** - * batadv_softif_free() - Deconstructor of batadv_soft_interface + * batadv_meshif_free() - Deconstructor of batadv_mesh_interface * @dev: Device to cleanup and remove */ -static void batadv_softif_free(struct net_device *dev) +static void batadv_meshif_free(struct net_device *dev) { batadv_mesh_free(dev); @@ -1024,16 +1024,16 @@ static void batadv_softif_free(struct net_device *dev) } /** - * batadv_softif_init_early() - early stage initialization of soft interface + * batadv_meshif_init_early() - early stage initialization of mesh interface * @dev: registered network device to modify */ -static void batadv_softif_init_early(struct net_device *dev) +static void batadv_meshif_init_early(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &batadv_netdev_ops; dev->needs_free_netdev = true; - dev->priv_destructor = batadv_softif_free; + dev->priv_destructor = batadv_meshif_free; dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; dev->priv_flags |= IFF_NO_QUEUE; dev->lltx = true; @@ -1043,6 +1043,7 @@ static void batadv_softif_init_early(struct net_device *dev) * have not been initialized yet */ dev->mtu = ETH_DATA_LEN; + dev->max_mtu = BATADV_MAX_MTU; /* generate random address */ eth_hw_addr_random(dev); @@ -1051,14 +1052,14 @@ static void batadv_softif_init_early(struct net_device *dev) } /** - * batadv_softif_validate() - validate configuration of new batadv link + * batadv_meshif_validate() - validate configuration of new batadv link * @tb: IFLA_INFO_DATA netlink attributes * @data: enum batadv_ifla_attrs attributes * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ -static int batadv_softif_validate(struct nlattr *tb[], struct nlattr *data[], +static int batadv_meshif_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct batadv_algo_ops *algo_ops; @@ -1076,14 +1077,14 @@ static int batadv_softif_validate(struct nlattr *tb[], struct nlattr *data[], } /** - * batadv_softif_newlink() - pre-initialize and register new batadv link + * batadv_meshif_newlink() - pre-initialize and register new batadv link * @dev: network device to register * @params: rtnl newlink parameters * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ -static int batadv_softif_newlink(struct net_device *dev, +static int batadv_meshif_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { @@ -1103,40 +1104,40 @@ static int batadv_softif_newlink(struct net_device *dev, } /** - * batadv_softif_destroy_netlink() - deletion of batadv_soft_interface via + * batadv_meshif_destroy_netlink() - deletion of batadv_mesh_interface via * netlink - * @soft_iface: the to-be-removed batman-adv interface + * @mesh_iface: the to-be-removed batman-adv interface * @head: list pointer */ -static void batadv_softif_destroy_netlink(struct net_device *soft_iface, +static void batadv_meshif_destroy_netlink(struct net_device *mesh_iface, struct list_head *head) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct batadv_hard_iface *hard_iface; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; list_for_each_entry(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface == soft_iface) + if (hard_iface->mesh_iface == mesh_iface) batadv_hardif_disable_interface(hard_iface); } /* destroy the "untagged" VLAN */ - vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + vlan = batadv_meshif_vlan_get(bat_priv, BATADV_NO_FLAGS); if (vlan) { - batadv_softif_destroy_vlan(bat_priv, vlan); - batadv_softif_vlan_put(vlan); + batadv_meshif_destroy_vlan(bat_priv, vlan); + batadv_meshif_vlan_put(vlan); } - unregister_netdevice_queue(soft_iface, head); + unregister_netdevice_queue(mesh_iface, head); } /** - * batadv_softif_is_valid() - Check whether device is a batadv soft interface + * batadv_meshif_is_valid() - Check whether device is a batadv mesh interface * @net_dev: device which should be checked * * Return: true when net_dev is a batman-adv interface, false otherwise */ -bool batadv_softif_is_valid(const struct net_device *net_dev) +bool batadv_meshif_is_valid(const struct net_device *net_dev) { if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) return true; @@ -1151,10 +1152,10 @@ static const struct nla_policy batadv_ifla_policy[IFLA_BATADV_MAX + 1] = { struct rtnl_link_ops batadv_link_ops __read_mostly = { .kind = "batadv", .priv_size = sizeof(struct batadv_priv), - .setup = batadv_softif_init_early, + .setup = batadv_meshif_init_early, .maxtype = IFLA_BATADV_MAX, .policy = batadv_ifla_policy, - .validate = batadv_softif_validate, - .newlink = batadv_softif_newlink, - .dellink = batadv_softif_destroy_netlink, + .validate = batadv_meshif_validate, + .newlink = batadv_meshif_newlink, + .dellink = batadv_meshif_destroy_netlink, }; diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/mesh-interface.h index 9f2003f1a497..7ba055b2bc26 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/mesh-interface.h @@ -4,8 +4,8 @@ * Marek Lindner */ -#ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_ -#define _NET_BATMAN_ADV_SOFT_INTERFACE_H_ +#ifndef _NET_BATMAN_ADV_MESH_INTERFACE_H_ +#define _NET_BATMAN_ADV_MESH_INTERFACE_H_ #include "main.h" @@ -16,27 +16,27 @@ #include <net/rtnetlink.h> int batadv_skb_head_push(struct sk_buff *skb, unsigned int len); -void batadv_interface_rx(struct net_device *soft_iface, +void batadv_interface_rx(struct net_device *mesh_iface, struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node); -bool batadv_softif_is_valid(const struct net_device *net_dev); +bool batadv_meshif_is_valid(const struct net_device *net_dev); extern struct rtnl_link_ops batadv_link_ops; -int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); -void batadv_softif_vlan_release(struct kref *ref); -struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, +int batadv_meshif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); +void batadv_meshif_vlan_release(struct kref *ref); +struct batadv_meshif_vlan *batadv_meshif_vlan_get(struct batadv_priv *bat_priv, unsigned short vid); /** - * batadv_softif_vlan_put() - decrease the vlan object refcounter and + * batadv_meshif_vlan_put() - decrease the vlan object refcounter and * possibly release it * @vlan: the vlan object to release */ -static inline void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan) +static inline void batadv_meshif_vlan_put(struct batadv_meshif_vlan *vlan) { if (!vlan) return; - kref_put(&vlan->refcount, batadv_softif_vlan_release); + kref_put(&vlan->refcount, batadv_meshif_vlan_release); } -#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ +#endif /* _NET_BATMAN_ADV_MESH_INTERFACE_H_ */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index d95c418484fa..5786680aff30 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -63,7 +63,7 @@ static void batadv_mcast_mla_update(struct work_struct *work); /** * batadv_mcast_start_timer() - schedule the multicast periodic worker - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_mcast_start_timer(struct batadv_priv *bat_priv) { @@ -72,18 +72,18 @@ static void batadv_mcast_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_mcast_get_bridge() - get the bridge on top of the softif if it exists - * @soft_iface: netdev struct of the mesh interface + * batadv_mcast_get_bridge() - get the bridge on top of the meshif if it exists + * @mesh_iface: netdev struct of the mesh interface * - * If the given soft interface has a bridge on top then the refcount + * If the given mesh interface has a bridge on top then the refcount * of the according net device is increased. * * Return: NULL if no such bridge exists. Otherwise the net device of the * bridge. */ -static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) +static struct net_device *batadv_mcast_get_bridge(struct net_device *mesh_iface) { - struct net_device *upper = soft_iface; + struct net_device *upper = mesh_iface; rcu_read_lock(); do { @@ -97,7 +97,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) } /** - * batadv_mcast_mla_rtr_flags_softif_get_ipv4() - get mcast router flags from + * batadv_mcast_mla_rtr_flags_meshif_get_ipv4() - get mcast router flags from * node for IPv4 * @dev: the interface to check * @@ -107,7 +107,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) * * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR4 otherwise. */ -static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv4(struct net_device *dev) +static u8 batadv_mcast_mla_rtr_flags_meshif_get_ipv4(struct net_device *dev) { struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -118,7 +118,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv4(struct net_device *dev) } /** - * batadv_mcast_mla_rtr_flags_softif_get_ipv6() - get mcast router flags from + * batadv_mcast_mla_rtr_flags_meshif_get_ipv6() - get mcast router flags from * node for IPv6 * @dev: the interface to check * @@ -129,7 +129,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv4(struct net_device *dev) * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR6 otherwise. */ #if IS_ENABLED(CONFIG_IPV6_MROUTE) -static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) +static u8 batadv_mcast_mla_rtr_flags_meshif_get_ipv6(struct net_device *dev) { struct inet6_dev *in6_dev = __in6_dev_get(dev); @@ -140,16 +140,16 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) } #else static inline u8 -batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) +batadv_mcast_mla_rtr_flags_meshif_get_ipv6(struct net_device *dev) { return BATADV_MCAST_WANT_NO_RTR6; } #endif /** - * batadv_mcast_mla_rtr_flags_softif_get() - get mcast router flags from node - * @bat_priv: the bat priv with all the soft interface information - * @bridge: bridge interface on top of the soft_iface if present, + * batadv_mcast_mla_rtr_flags_meshif_get() - get mcast router flags from node + * @bat_priv: the bat priv with all the mesh interface information + * @bridge: bridge interface on top of the mesh_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers on this @@ -161,16 +161,16 @@ batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) * BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present * The former two OR'd: no multicast router is present */ -static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv, +static u8 batadv_mcast_mla_rtr_flags_meshif_get(struct batadv_priv *bat_priv, struct net_device *bridge) { - struct net_device *dev = bridge ? bridge : bat_priv->soft_iface; + struct net_device *dev = bridge ? bridge : bat_priv->mesh_iface; u8 flags = BATADV_NO_FLAGS; rcu_read_lock(); - flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv4(dev); - flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv6(dev); + flags |= batadv_mcast_mla_rtr_flags_meshif_get_ipv4(dev); + flags |= batadv_mcast_mla_rtr_flags_meshif_get_ipv6(dev); rcu_read_unlock(); @@ -179,8 +179,8 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv, /** * batadv_mcast_mla_rtr_flags_bridge_get() - get mcast router flags from bridge - * @bat_priv: the bat priv with all the soft interface information - * @bridge: bridge interface on top of the soft_iface if present, + * @bat_priv: the bat priv with all the mesh interface information + * @bridge: bridge interface on top of the mesh_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers behind a bridge. @@ -194,7 +194,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv, static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv, struct net_device *bridge) { - struct net_device *dev = bat_priv->soft_iface; + struct net_device *dev = bat_priv->mesh_iface; u8 flags = BATADV_NO_FLAGS; if (!bridge) @@ -210,8 +210,8 @@ static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv, /** * batadv_mcast_mla_rtr_flags_get() - get multicast router flags - * @bat_priv: the bat priv with all the soft interface information - * @bridge: bridge interface on top of the soft_iface if present, + * @bat_priv: the bat priv with all the mesh interface information + * @bridge: bridge interface on top of the mesh_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers on this @@ -228,7 +228,7 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv, { u8 flags = BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6; - flags &= batadv_mcast_mla_rtr_flags_softif_get(bat_priv, bridge); + flags &= batadv_mcast_mla_rtr_flags_meshif_get(bat_priv, bridge); flags &= batadv_mcast_mla_rtr_flags_bridge_get(bat_priv, bridge); return flags; @@ -236,7 +236,7 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv, /** * batadv_mcast_mla_forw_flags_get() - get multicast forwarding flags - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Checks if all active hard interfaces have an MTU larger or equal to 1280 * bytes (IPv6 minimum MTU). @@ -252,7 +252,7 @@ static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv) if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (hard_iface->net_dev->mtu < IPV6_MIN_MTU) { @@ -267,7 +267,7 @@ static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv) /** * batadv_mcast_mla_flags_get() - get the new multicast flags - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: A set of flags for the current/next TVLV, querier and * bridge state. @@ -275,7 +275,7 @@ static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv) static struct batadv_mcast_mla_flags batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv) { - struct net_device *dev = bat_priv->soft_iface; + struct net_device *dev = bat_priv->mesh_iface; struct batadv_mcast_querier_state *qr4, *qr6; struct batadv_mcast_mla_flags mla_flags; struct net_device *bridge; @@ -351,13 +351,13 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, } /** - * batadv_mcast_mla_softif_get_ipv4() - get softif IPv4 multicast listeners + * batadv_mcast_mla_meshif_get_ipv4() - get meshif IPv4 multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of IPv4 multicast listeners residing - * on this kernel on the given soft interface, dev, in + * on this kernel on the given mesh interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * @@ -365,7 +365,7 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, * items added to the mcast_list otherwise. */ static int -batadv_mcast_mla_softif_get_ipv4(struct net_device *dev, +batadv_mcast_mla_meshif_get_ipv4(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { @@ -417,13 +417,13 @@ batadv_mcast_mla_softif_get_ipv4(struct net_device *dev, } /** - * batadv_mcast_mla_softif_get_ipv6() - get softif IPv6 multicast listeners + * batadv_mcast_mla_meshif_get_ipv6() - get meshif IPv6 multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of IPv6 multicast listeners residing - * on this kernel on the given soft interface, dev, in + * on this kernel on the given mesh interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * @@ -432,7 +432,7 @@ batadv_mcast_mla_softif_get_ipv4(struct net_device *dev, */ #if IS_ENABLED(CONFIG_IPV6) static int -batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, +batadv_mcast_mla_meshif_get_ipv6(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { @@ -490,7 +490,7 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, } #else static inline int -batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, +batadv_mcast_mla_meshif_get_ipv6(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { @@ -499,13 +499,13 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, #endif /** - * batadv_mcast_mla_softif_get() - get softif multicast listeners + * batadv_mcast_mla_meshif_get() - get meshif multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of multicast listeners residing - * on this kernel on the given soft interface, dev, in + * on this kernel on the given mesh interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * @@ -518,7 +518,7 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, * items added to the mcast_list otherwise. */ static int -batadv_mcast_mla_softif_get(struct net_device *dev, +batadv_mcast_mla_meshif_get(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { @@ -528,11 +528,11 @@ batadv_mcast_mla_softif_get(struct net_device *dev, if (bridge) dev = bridge; - ret4 = batadv_mcast_mla_softif_get_ipv4(dev, mcast_list, flags); + ret4 = batadv_mcast_mla_meshif_get_ipv4(dev, mcast_list, flags); if (ret4 < 0) goto out; - ret6 = batadv_mcast_mla_softif_get_ipv6(dev, mcast_list, flags); + ret6 = batadv_mcast_mla_meshif_get_ipv6(dev, mcast_list, flags); if (ret6 < 0) { ret4 = 0; goto out; @@ -576,7 +576,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src) * * Collects multicast addresses of multicast listeners residing * on foreign, non-mesh devices which we gave access to our mesh via - * a bridge on top of the given soft interface, dev, in the given + * a bridge on top of the given mesh interface, dev, in the given * mcast_list. * * Return: -ENOMEM on memory allocation error or the number of @@ -672,7 +672,7 @@ static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list) /** * batadv_mcast_mla_tt_retract() - clean up multicast listener announcements - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @mcast_list: a list of addresses which should _not_ be removed * * Retracts the announcement of any multicast listener from the @@ -704,7 +704,7 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, /** * batadv_mcast_mla_tt_add() - add multicast listener announcements - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @mcast_list: a list of addresses which are going to get added * * Adds multicast listener announcements from the given mcast_list to the @@ -724,7 +724,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, &bat_priv->mcast.mla_list)) continue; - if (!batadv_tt_local_add(bat_priv->soft_iface, + if (!batadv_tt_local_add(bat_priv->mesh_iface, mcast_entry->addr, BATADV_NO_FLAGS, BATADV_NULL_IFINDEX, BATADV_NO_MARK)) continue; @@ -737,7 +737,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, /** * batadv_mcast_querier_log() - debug output regarding the querier status on * link - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD") * @old_state: the previous querier state on our link * @new_state: the new querier state on our link @@ -754,7 +754,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, * potentially shadowing listeners from us then. * * This is only interesting for nodes with a bridge on top of their - * soft interface. + * mesh interface. */ static void batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, @@ -762,14 +762,14 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, struct batadv_mcast_querier_state *new_state) { if (!old_state->exists && new_state->exists) - batadv_info(bat_priv->soft_iface, "%s Querier appeared\n", + batadv_info(bat_priv->mesh_iface, "%s Querier appeared\n", str_proto); else if (old_state->exists && !new_state->exists) - batadv_info(bat_priv->soft_iface, + batadv_info(bat_priv->mesh_iface, "%s Querier disappeared - multicast optimizations disabled\n", str_proto); else if (!bat_priv->mcast.mla_flags.bridged && !new_state->exists) - batadv_info(bat_priv->soft_iface, + batadv_info(bat_priv->mesh_iface, "No %s Querier present - multicast optimizations disabled\n", str_proto); @@ -789,7 +789,7 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, /** * batadv_mcast_bridge_log() - debug output for topology changes in bridged * setups - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @new_flags: flags indicating the new multicast state * * If no bridges are ever used on this node, then this function does nothing. @@ -798,7 +798,7 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, * which might be relevant to our multicast optimizations. * * More precisely, it outputs information when a bridge interface is added or - * removed from a soft interface. And when a bridge is present, it further + * removed from a mesh interface. And when a bridge is present, it further * outputs information about the querier state which is relevant for the * multicast flags this node is going to set. */ @@ -827,7 +827,7 @@ batadv_mcast_bridge_log(struct batadv_priv *bat_priv, /** * batadv_mcast_flags_log() - output debug information about mcast flag changes - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @flags: TVLV flags indicating the new multicast state * * Whenever the multicast TVLV flags this node announces change, this function @@ -860,7 +860,7 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags) /** * batadv_mcast_mla_flags_update() - update multicast flags - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @flags: flags indicating the new multicast state * * Updates the own multicast tvlv with our current multicast related settings, @@ -889,7 +889,7 @@ batadv_mcast_mla_flags_update(struct batadv_priv *bat_priv, /** * __batadv_mcast_mla_update() - update the own MLAs - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Updates the own multicast listener announcements in the translation * table as well as the own, announced multicast tvlv container. @@ -901,18 +901,18 @@ batadv_mcast_mla_flags_update(struct batadv_priv *bat_priv, */ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv) { - struct net_device *soft_iface = bat_priv->soft_iface; + struct net_device *mesh_iface = bat_priv->mesh_iface; struct hlist_head mcast_list = HLIST_HEAD_INIT; struct batadv_mcast_mla_flags flags; int ret; flags = batadv_mcast_mla_flags_get(bat_priv); - ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list, &flags); + ret = batadv_mcast_mla_meshif_get(mesh_iface, &mcast_list, &flags); if (ret < 0) goto out; - ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list, &flags); + ret = batadv_mcast_mla_bridge_get(mesh_iface, &mcast_list, &flags); if (ret < 0) goto out; @@ -977,7 +977,7 @@ static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb) /** * batadv_mcast_forw_mode_check_ipv4() - check for optimized forwarding * potential - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the IPv4 packet to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable @@ -1042,7 +1042,7 @@ static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb) /** * batadv_mcast_forw_mode_check_ipv6() - check for optimized forwarding * potential - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the IPv6 packet to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable @@ -1084,7 +1084,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_mode_check() - check for optimized forwarding potential - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast frame to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable @@ -1124,7 +1124,7 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_all_ip_count() - count nodes with unspecific mcast * interest - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ethhdr: ethernet header of a packet * * Return: the number of nodes which want all IPv4 multicast traffic if the @@ -1147,7 +1147,7 @@ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_rtr_count() - count nodes with a multicast router - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @protocol: the ethernet protocol type to count multicast routers for * * Return: the number of nodes which want all routable IPv4 multicast traffic @@ -1170,7 +1170,7 @@ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_mode_by_count() - get forwarding mode by count - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to check * @vid: the vlan identifier * @is_routable: stores whether the destination is routable @@ -1214,7 +1214,7 @@ batadv_mcast_forw_mode_by_count(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_mode() - check on how to forward a multicast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to check * @vid: the vlan identifier * @is_routable: stores whether the destination is routable @@ -1259,7 +1259,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, /** * batadv_mcast_forw_send_orig() - send a multicast packet to an originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to send * @vid: the vlan identifier * @orig_node: the originator to send the packet to @@ -1288,7 +1288,7 @@ static int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_tt() - forwards a packet to multicast listeners - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @@ -1336,7 +1336,7 @@ out: /** * batadv_mcast_forw_want_all_ipv4() - forward to nodes with want-all-ipv4 - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @@ -1373,7 +1373,7 @@ batadv_mcast_forw_want_all_ipv4(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_all_ipv6() - forward to nodes with want-all-ipv6 - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: The multicast packet to transmit * @vid: the vlan identifier * @@ -1410,7 +1410,7 @@ batadv_mcast_forw_want_all_ipv6(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_all() - forward packet to nodes in a want-all list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @@ -1439,7 +1439,7 @@ batadv_mcast_forw_want_all(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_all_rtr4() - forward to nodes with want-all-rtr4 - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @@ -1476,7 +1476,7 @@ batadv_mcast_forw_want_all_rtr4(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_all_rtr6() - forward to nodes with want-all-rtr6 - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: The multicast packet to transmit * @vid: the vlan identifier * @@ -1513,7 +1513,7 @@ batadv_mcast_forw_want_all_rtr6(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_rtr() - forward packet to nodes in a want-all-rtr list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @@ -1542,7 +1542,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_send() - send packet to any detected multicast recipient - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @is_routable: stores whether the destination is routable @@ -1590,7 +1590,7 @@ skip_mc_router: /** * batadv_mcast_want_unsnoop_update() - update unsnoop counter and list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1636,7 +1636,7 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, /** * batadv_mcast_want_ipv4_update() - update want-all-ipv4 counter and list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1681,7 +1681,7 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, /** * batadv_mcast_want_ipv6_update() - update want-all-ipv6 counter and list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1726,7 +1726,7 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, /** * batadv_mcast_want_rtr4_update() - update want-all-rtr4 counter and list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1771,7 +1771,7 @@ static void batadv_mcast_want_rtr4_update(struct batadv_priv *bat_priv, /** * batadv_mcast_want_rtr6_update() - update want-all-rtr6 counter and list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1816,7 +1816,7 @@ static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv, /** * batadv_mcast_have_mc_ptype_update() - update multicast packet type counter - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1872,7 +1872,7 @@ batadv_mcast_tvlv_flags_get(bool enabled, void *tvlv_value, u16 tvlv_value_len) /** * batadv_mcast_tvlv_ogm_handler() - process incoming multicast tvlv container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the multicast data @@ -1915,7 +1915,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, /** * batadv_mcast_init() - initialize the multicast optimizations structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_mcast_init(struct batadv_priv *bat_priv) { @@ -1934,7 +1934,7 @@ void batadv_mcast_init(struct batadv_priv *bat_priv) /** * batadv_mcast_mesh_info_put() - put multicast info into a netlink message * @msg: buffer for the message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 or error code. */ @@ -2060,7 +2060,7 @@ skip: * @msg: buffer for the message * @portid: netlink port * @cb: Control block containing additional options - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @bucket: current bucket to dump * @idx: index in current bucket to the next entry to dump * @@ -2103,15 +2103,15 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb, struct batadv_hard_iface **primary_if) { struct batadv_hard_iface *hard_iface = NULL; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; int ret = 0; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); hard_iface = batadv_primary_if_get_selected(bat_priv); if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) { @@ -2120,7 +2120,7 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb, } out: - dev_put(soft_iface); + dev_put(mesh_iface); if (!ret && primary_if) *primary_if = hard_iface; @@ -2150,7 +2150,7 @@ int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb) if (ret) return ret; - bat_priv = netdev_priv(primary_if->soft_iface); + bat_priv = netdev_priv(primary_if->mesh_iface); ret = __batadv_mcast_flags_dump(msg, portid, cb, bat_priv, bucket, idx); batadv_hardif_put(primary_if); @@ -2159,7 +2159,7 @@ int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb) /** * batadv_mcast_free() - free the multicast optimizations structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_mcast_free(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/multicast_forw.c b/net/batman-adv/multicast_forw.c index fafd6ba8c056..b8668a80b94a 100644 --- a/net/batman-adv/multicast_forw.c +++ b/net/batman-adv/multicast_forw.c @@ -131,7 +131,7 @@ batadv_mcast_forw_orig_entry(struct hlist_node *node, /** * batadv_mcast_forw_push_dest() - push an originator MAC address onto an skb - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination address onto * @vid: the vlan identifier * @orig_node: the originator node to get the MAC address from @@ -174,7 +174,7 @@ static bool batadv_mcast_forw_push_dest(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_push_dests_list() - push originators from list onto an skb - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination addresses onto * @vid: the vlan identifier * @head: the list to gather originators from @@ -215,7 +215,7 @@ static int batadv_mcast_forw_push_dests_list(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_push_tt() - push originators with interest through TT - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination addresses onto * @vid: the vlan identifier * @num_dests: a pointer to store the number of pushed addresses in @@ -262,7 +262,7 @@ out: /** * batadv_mcast_forw_push_want_all() - push originators with want-all flag - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination addresses onto * @vid: the vlan identifier * @num_dests: a pointer to store the number of pushed addresses in @@ -308,7 +308,7 @@ static bool batadv_mcast_forw_push_want_all(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_push_want_rtr() - push originators with want-router flag - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination addresses onto * @vid: the vlan identifier * @num_dests: a pointer to store the number of pushed addresses in @@ -475,7 +475,7 @@ out: /** * batadv_mcast_forw_push_dests() - push originator addresses onto an skb - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination addresses onto * @vid: the vlan identifier * @is_routable: indicates whether the destination is routable @@ -567,7 +567,7 @@ static int batadv_mcast_forw_push_tracker(struct sk_buff *skb, int num_dests, /** * batadv_mcast_forw_push_tvlvs() - push a multicast tracker TVLV onto an skb - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the tracker TVLV onto * @vid: the vlan identifier * @is_routable: indicates whether the destination is routable @@ -634,7 +634,7 @@ batadv_mcast_forw_push_hdr(struct sk_buff *skb, unsigned short tvlv_len) /** * batadv_mcast_forw_scrub_dests() - scrub destinations in a tracker TVLV - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @comp_neigh: next hop neighbor to scrub+collect destinations for * @dest: start MAC entry in original skb's tracker TVLV * @next_dest: start MAC entry in to be sent skb's tracker TVLV @@ -905,7 +905,7 @@ static void batadv_mcast_forw_shrink_tracker(struct sk_buff *skb) /** * batadv_mcast_forw_packet() - forward a batman-adv multicast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the received or locally generated batman-adv multicast packet * @local_xmit: indicates that the packet was locally generated and not received * @@ -920,7 +920,7 @@ static void batadv_mcast_forw_shrink_tracker(struct sk_buff *skb) * * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error * code on failure. NET_RX_SUCCESS if the received packet is supposed to be - * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise. + * decapsulated and forwarded to the own mesh interface, NET_RX_DROP otherwise. */ static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, bool local_xmit) @@ -1028,7 +1028,7 @@ static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_tracker_tvlv_handler() - handle an mcast tracker tvlv - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the received batman-adv multicast packet * * Parses the tracker TVLV of an incoming batman-adv multicast packet and @@ -1042,7 +1042,7 @@ static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv, * * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error * code on failure. NET_RX_SUCCESS if the received packet is supposed to be - * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise. + * decapsulated and forwarded to the own mesh interface, NET_RX_DROP otherwise. */ int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv, struct sk_buff *skb) @@ -1075,7 +1075,7 @@ unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests) /** * batadv_mcast_forw_expand_head() - expand headroom for an mcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to send * * Tries to expand an skb's headroom so that its head to tail is 1298 @@ -1110,7 +1110,7 @@ static int batadv_mcast_forw_expand_head(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_push() - encapsulate skb in a batman-adv multicast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to encapsulate and send * @vid: the vlan identifier * @is_routable: indicates whether the destination is routable @@ -1154,7 +1154,7 @@ err: /** * batadv_mcast_forw_mcsend() - send a self prepared batman-adv multicast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to encapsulate and send * * Transmits a batman-adv multicast packet that was locally prepared and diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index eefba5600ded..e7c8f9f2bb1f 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -43,10 +43,10 @@ #include "gateway_common.h" #include "hard-interface.h" #include "log.h" +#include "mesh-interface.h" #include "multicast.h" #include "network-coding.h" #include "originator.h" -#include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" @@ -63,7 +63,7 @@ enum batadv_netlink_multicast_groups { */ enum batadv_genl_ops_flags { /** - * @BATADV_FLAG_NEED_MESH: request requires valid soft interface in + * @BATADV_FLAG_NEED_MESH: request requires valid mesh interface in * attribute BATADV_ATTR_MESH_IFINDEX and expects a pointer to it to be * saved in info->user_ptr[0] */ @@ -166,24 +166,24 @@ static int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) } /** - * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation softif attribute + * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation meshif attribute * @msg: Netlink message to dump into - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg, struct batadv_priv *bat_priv) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; u8 ap_isolation; - vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + vlan = batadv_meshif_vlan_get(bat_priv, BATADV_NO_FLAGS); if (!vlan) return 0; ap_isolation = atomic_read(&vlan->ap_isolation); - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); return nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, !!ap_isolation); @@ -192,21 +192,21 @@ static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg, /** * batadv_netlink_set_mesh_ap_isolation() - Set ap_isolation from genl msg * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr, struct batadv_priv *bat_priv) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; - vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + vlan = batadv_meshif_vlan_get(bat_priv, BATADV_NO_FLAGS); if (!vlan) return -ENOENT; atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); return 0; } @@ -214,7 +214,7 @@ static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr, /** * batadv_netlink_mesh_fill() - Fill message with mesh attributes * @msg: Netlink message to dump into - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @cmd: type of message to generate * @portid: Port making netlink request * @seq: sequence number for message @@ -227,7 +227,7 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, enum batadv_nl_commands cmd, u32 portid, u32 seq, int flags) { - struct net_device *soft_iface = bat_priv->soft_iface; + struct net_device *mesh_iface = bat_priv->mesh_iface; struct batadv_hard_iface *primary_if = NULL; struct net_device *hard_iface; void *hdr; @@ -239,10 +239,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) || nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_priv->algo_ops->name) || - nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) || - nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) || + nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, mesh_iface->ifindex) || + nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, mesh_iface->name) || nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN, - soft_iface->dev_addr) || + mesh_iface->dev_addr) || nla_put_u8(msg, BATADV_ATTR_TT_TTVN, (u8)atomic_read(&bat_priv->tt.vn))) goto nla_put_failure; @@ -369,8 +369,8 @@ nla_put_failure: } /** - * batadv_netlink_notify_mesh() - send softif attributes to listener - * @bat_priv: the bat priv with all the soft interface information + * batadv_netlink_notify_mesh() - send meshif attributes to listener + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success, < 0 on error */ @@ -391,14 +391,14 @@ static int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv) } genlmsg_multicast_netns(&batadv_netlink_family, - dev_net(bat_priv->soft_iface), msg, 0, + dev_net(bat_priv->mesh_iface), msg, 0, BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); return 0; } /** - * batadv_netlink_get_mesh() - Get softif attributes + * batadv_netlink_get_mesh() - Get meshif attributes * @skb: Netlink message with request data * @info: receiver information * @@ -427,7 +427,7 @@ static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info) } /** - * batadv_netlink_set_mesh() - Set softif attributes + * batadv_netlink_set_mesh() - Set meshif attributes * @skb: Netlink message with request data * @info: receiver information * @@ -474,7 +474,7 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) atomic_set(&bat_priv->bridge_loop_avoidance, !!nla_get_u8(attr)); - batadv_bla_status_update(bat_priv->soft_iface); + batadv_bla_status_update(bat_priv->mesh_iface); } #endif /* CONFIG_BATMAN_ADV_BLA */ @@ -484,7 +484,7 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) atomic_set(&bat_priv->distributed_arp_table, !!nla_get_u8(attr)); - batadv_dat_status_update(bat_priv->soft_iface); + batadv_dat_status_update(bat_priv->mesh_iface); } #endif /* CONFIG_BATMAN_ADV_DAT */ @@ -494,7 +494,7 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr)); rtnl_lock(); - batadv_update_min_mtu(bat_priv->soft_iface); + batadv_update_min_mtu(bat_priv->mesh_iface); rtnl_unlock(); } @@ -594,7 +594,7 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) attr = info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]; atomic_set(&bat_priv->network_coding, !!nla_get_u8(attr)); - batadv_nc_status_update(bat_priv->soft_iface); + batadv_nc_status_update(bat_priv->mesh_iface); } #endif /* CONFIG_BATMAN_ADV_NC */ @@ -633,7 +633,7 @@ batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie) /** * batadv_netlink_tpmeter_notify() - send tp_meter result via netlink to client - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: destination of tp_meter session * @result: reason for tp meter session stop * @test_time: total time of the tp_meter session @@ -680,7 +680,7 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, genlmsg_end(msg, hdr); genlmsg_multicast_netns(&batadv_netlink_family, - dev_net(bat_priv->soft_iface), msg, 0, + dev_net(bat_priv->mesh_iface), msg, 0, BATADV_NL_MCGRP_TPMETER, GFP_KERNEL); return 0; @@ -778,7 +778,7 @@ batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info) /** * batadv_netlink_hardif_fill() - Fill message with hardif attributes * @msg: Netlink message to dump into - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hard_iface: hard interface which was modified * @cmd: type of message to generate * @portid: Port making netlink request @@ -806,11 +806,11 @@ static int batadv_netlink_hardif_fill(struct sk_buff *msg, genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, - bat_priv->soft_iface->ifindex)) + bat_priv->mesh_iface->ifindex)) goto nla_put_failure; if (nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, - bat_priv->soft_iface->name)) + bat_priv->mesh_iface->name)) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, @@ -850,7 +850,7 @@ nla_put_failure: /** * batadv_netlink_notify_hardif() - send hardif attributes to listener - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hard_iface: hard interface which was modified * * Return: 0 on success, < 0 on error @@ -873,7 +873,7 @@ static int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, } genlmsg_multicast_netns(&batadv_netlink_family, - dev_net(bat_priv->soft_iface), msg, 0, + dev_net(bat_priv->mesh_iface), msg, 0, BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); return 0; @@ -963,24 +963,24 @@ static int batadv_netlink_set_hardif(struct sk_buff *skb, static int batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) { - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_hard_iface *hard_iface; struct batadv_priv *bat_priv; int portid = NETLINK_CB(cb->skb).portid; int skip = cb->args[0]; int i = 0; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); rtnl_lock(); cb->seq = batadv_hardif_generation << 1 | 1; list_for_each_entry(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != soft_iface) + if (hard_iface->mesh_iface != mesh_iface) continue; if (i++ < skip) @@ -997,7 +997,7 @@ batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) rtnl_unlock(); - dev_put(soft_iface); + dev_put(mesh_iface); cb->args[0] = i; @@ -1007,7 +1007,7 @@ batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) /** * batadv_netlink_vlan_fill() - Fill message with vlan attributes * @msg: Netlink message to dump into - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vlan: vlan which was modified * @cmd: type of message to generate * @portid: Port making netlink request @@ -1018,7 +1018,7 @@ batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) */ static int batadv_netlink_vlan_fill(struct sk_buff *msg, struct batadv_priv *bat_priv, - struct batadv_softif_vlan *vlan, + struct batadv_meshif_vlan *vlan, enum batadv_nl_commands cmd, u32 portid, u32 seq, int flags) { @@ -1029,11 +1029,11 @@ static int batadv_netlink_vlan_fill(struct sk_buff *msg, return -ENOBUFS; if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, - bat_priv->soft_iface->ifindex)) + bat_priv->mesh_iface->ifindex)) goto nla_put_failure; if (nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, - bat_priv->soft_iface->name)) + bat_priv->mesh_iface->name)) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK)) @@ -1053,13 +1053,13 @@ nla_put_failure: /** * batadv_netlink_notify_vlan() - send vlan attributes to listener - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vlan: vlan which was modified * * Return: 0 on success, < 0 on error */ static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, - struct batadv_softif_vlan *vlan) + struct batadv_meshif_vlan *vlan) { struct sk_buff *msg; int ret; @@ -1076,7 +1076,7 @@ static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, } genlmsg_multicast_netns(&batadv_netlink_family, - dev_net(bat_priv->soft_iface), msg, 0, + dev_net(bat_priv->mesh_iface), msg, 0, BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); return 0; @@ -1091,7 +1091,7 @@ static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, */ static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info) { - struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_meshif_vlan *vlan = info->user_ptr[1]; struct batadv_priv *bat_priv = info->user_ptr[0]; struct sk_buff *msg; int ret; @@ -1121,7 +1121,7 @@ static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info) */ static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info) { - struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_meshif_vlan *vlan = info->user_ptr[1]; struct batadv_priv *bat_priv = info->user_ptr[0]; struct nlattr *attr; @@ -1137,43 +1137,43 @@ static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info) } /** - * batadv_netlink_get_softif_from_ifindex() - Get soft-iface from ifindex + * batadv_netlink_get_meshif_from_ifindex() - Get mesh-iface from ifindex * @net: the applicable net namespace - * @ifindex: index of the soft interface + * @ifindex: index of the mesh interface * - * Return: Pointer to soft interface (with increased refcnt) on success, error + * Return: Pointer to mesh interface (with increased refcnt) on success, error * pointer on error */ static struct net_device * -batadv_netlink_get_softif_from_ifindex(struct net *net, int ifindex) +batadv_netlink_get_meshif_from_ifindex(struct net *net, int ifindex) { - struct net_device *soft_iface; + struct net_device *mesh_iface; - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface) + mesh_iface = dev_get_by_index(net, ifindex); + if (!mesh_iface) return ERR_PTR(-ENODEV); - if (!batadv_softif_is_valid(soft_iface)) - goto err_put_softif; + if (!batadv_meshif_is_valid(mesh_iface)) + goto err_put_meshif; - return soft_iface; + return mesh_iface; -err_put_softif: - dev_put(soft_iface); +err_put_meshif: + dev_put(mesh_iface); return ERR_PTR(-EINVAL); } /** - * batadv_netlink_get_softif_from_info() - Get soft-iface from genl attributes + * batadv_netlink_get_meshif_from_info() - Get mesh-iface from genl attributes * @net: the applicable net namespace * @info: receiver information * - * Return: Pointer to soft interface (with increased refcnt) on success, error + * Return: Pointer to mesh interface (with increased refcnt) on success, error * pointer on error */ static struct net_device * -batadv_netlink_get_softif_from_info(struct net *net, struct genl_info *info) +batadv_netlink_get_meshif_from_info(struct net *net, struct genl_info *info) { int ifindex; @@ -1182,30 +1182,30 @@ batadv_netlink_get_softif_from_info(struct net *net, struct genl_info *info) ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - return batadv_netlink_get_softif_from_ifindex(net, ifindex); + return batadv_netlink_get_meshif_from_ifindex(net, ifindex); } /** - * batadv_netlink_get_softif() - Retrieve soft interface from netlink callback + * batadv_netlink_get_meshif() - Retrieve mesh interface from netlink callback * @cb: callback structure containing arguments * - * Return: Pointer to soft interface (with increased refcnt) on success, error + * Return: Pointer to mesh interface (with increased refcnt) on success, error * pointer on error */ -struct net_device *batadv_netlink_get_softif(struct netlink_callback *cb) +struct net_device *batadv_netlink_get_meshif(struct netlink_callback *cb) { int ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); if (!ifindex) return ERR_PTR(-ENONET); - return batadv_netlink_get_softif_from_ifindex(sock_net(cb->skb->sk), + return batadv_netlink_get_meshif_from_ifindex(sock_net(cb->skb->sk), ifindex); } /** * batadv_netlink_get_hardif_from_ifindex() - Get hard-iface from ifindex - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @net: the applicable net namespace * @ifindex: index of the hard interface * @@ -1227,7 +1227,7 @@ batadv_netlink_get_hardif_from_ifindex(struct batadv_priv *bat_priv, if (!hard_iface) goto err_put_harddev; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) goto err_put_hardif; /* hard_dev is referenced by hard_iface and not needed here */ @@ -1245,7 +1245,7 @@ err_put_harddev: /** * batadv_netlink_get_hardif_from_info() - Get hard-iface from genl attributes - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @net: the applicable net namespace * @info: receiver information * @@ -1268,7 +1268,7 @@ batadv_netlink_get_hardif_from_info(struct batadv_priv *bat_priv, /** * batadv_netlink_get_hardif() - Retrieve hard interface from netlink callback - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @cb: callback structure containing arguments * * Return: Pointer to hard interface (with increased refcnt) on success, error @@ -1290,18 +1290,18 @@ batadv_netlink_get_hardif(struct batadv_priv *bat_priv, /** * batadv_get_vlan_from_info() - Retrieve vlan from genl attributes - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @net: the applicable net namespace * @info: receiver information * * Return: Pointer to vlan on success (with increased refcnt), error pointer * on error */ -static struct batadv_softif_vlan * +static struct batadv_meshif_vlan * batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net, struct genl_info *info) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; u16 vid; if (!info->attrs[BATADV_ATTR_VLANID]) @@ -1309,7 +1309,7 @@ batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net, vid = nla_get_u16(info->attrs[BATADV_ATTR_VLANID]); - vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); + vlan = batadv_meshif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); if (!vlan) return ERR_PTR(-ENOENT); @@ -1331,8 +1331,8 @@ static int batadv_pre_doit(const struct genl_split_ops *ops, struct net *net = genl_info_net(info); struct batadv_hard_iface *hard_iface; struct batadv_priv *bat_priv = NULL; - struct batadv_softif_vlan *vlan; - struct net_device *soft_iface; + struct batadv_meshif_vlan *vlan; + struct net_device *mesh_iface; u8 user_ptr1_flags; u8 mesh_dep_flags; int ret; @@ -1347,11 +1347,11 @@ static int batadv_pre_doit(const struct genl_split_ops *ops, return -EINVAL; if (ops->internal_flags & BATADV_FLAG_NEED_MESH) { - soft_iface = batadv_netlink_get_softif_from_info(net, info); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif_from_info(net, info); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); info->user_ptr[0] = bat_priv; } @@ -1360,7 +1360,7 @@ static int batadv_pre_doit(const struct genl_split_ops *ops, info); if (IS_ERR(hard_iface)) { ret = PTR_ERR(hard_iface); - goto err_put_softif; + goto err_put_meshif; } info->user_ptr[1] = hard_iface; @@ -1370,7 +1370,7 @@ static int batadv_pre_doit(const struct genl_split_ops *ops, vlan = batadv_get_vlan_from_info(bat_priv, net, info); if (IS_ERR(vlan)) { ret = PTR_ERR(vlan); - goto err_put_softif; + goto err_put_meshif; } info->user_ptr[1] = vlan; @@ -1378,9 +1378,9 @@ static int batadv_pre_doit(const struct genl_split_ops *ops, return 0; -err_put_softif: +err_put_meshif: if (bat_priv) - dev_put(bat_priv->soft_iface); + dev_put(bat_priv->mesh_iface); return ret; } @@ -1396,7 +1396,7 @@ static void batadv_post_doit(const struct genl_split_ops *ops, struct genl_info *info) { struct batadv_hard_iface *hard_iface; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; struct batadv_priv *bat_priv; if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF && @@ -1408,12 +1408,12 @@ static void batadv_post_doit(const struct genl_split_ops *ops, if (ops->internal_flags & BATADV_FLAG_NEED_VLAN && info->user_ptr[1]) { vlan = info->user_ptr[1]; - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); } if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) { bat_priv = info->user_ptr[0]; - dev_put(bat_priv->soft_iface); + dev_put(bat_priv->mesh_iface); } } @@ -1567,7 +1567,7 @@ void __init batadv_netlink_register(void) ret = genl_register_family(&batadv_netlink_family); if (ret) - pr_warn("unable to register netlink family"); + pr_warn("unable to register netlink family\n"); } /** diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 2097c2ae98f1..fe4548b974bb 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -15,7 +15,7 @@ void batadv_netlink_register(void); void batadv_netlink_unregister(void); -struct net_device *batadv_netlink_get_softif(struct netlink_callback *cb); +struct net_device *batadv_netlink_get_meshif(struct netlink_callback *cb); struct batadv_hard_iface * batadv_netlink_get_hardif(struct batadv_priv *bat_priv, struct netlink_callback *cb); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 71ebd0284f95..9f56308779cc 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -65,7 +65,7 @@ int __init batadv_nc_init(void) /** * batadv_nc_start_timer() - initialise the nc periodic worker - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) { @@ -76,7 +76,7 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) /** * batadv_nc_tvlv_container_update() - update the network coding tvlv container * after network coding setting change - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv) { @@ -98,7 +98,7 @@ static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv) /** * batadv_nc_status_update() - update the network coding tvlv container after * network coding setting change - * @net_dev: the soft interface net device + * @net_dev: the mesh interface net device */ void batadv_nc_status_update(struct net_device *net_dev) { @@ -109,7 +109,7 @@ void batadv_nc_status_update(struct net_device *net_dev) /** * batadv_nc_tvlv_ogm_handler_v1() - process incoming nc tvlv container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the gateway data @@ -128,7 +128,7 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /** * batadv_nc_mesh_init() - initialise coding hash table and start housekeeping - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or negative error number in case of failure */ @@ -171,7 +171,7 @@ err: /** * batadv_nc_init_bat_priv() - initialise the nc specific bat_priv variables - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) { @@ -267,7 +267,7 @@ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet, /** * batadv_nc_to_purge_nc_node() - checks whether an nc node has to be purged - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @nc_node: the nc node to check * * Return: true if the entry has to be purged now, false otherwise @@ -283,7 +283,7 @@ static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, /** * batadv_nc_to_purge_nc_path_coding() - checks whether an nc path has timed out - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @nc_path: the nc path to check * * Return: true if the entry has to be purged now, false otherwise @@ -304,7 +304,7 @@ static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, /** * batadv_nc_to_purge_nc_path_decoding() - checks whether an nc path has timed * out - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @nc_path: the nc path to check * * Return: true if the entry has to be purged now, false otherwise @@ -325,7 +325,7 @@ static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv, /** * batadv_nc_purge_orig_nc_nodes() - go through list of nc nodes and purge stale * entries - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @list: list of nc nodes * @lock: nc node list lock * @to_purge: function in charge to decide whether an entry has to be purged or @@ -363,7 +363,7 @@ batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv, /** * batadv_nc_purge_orig() - purges all nc node data attached of the given * originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig_node with the nc node entries to be purged * @to_purge: function in charge to decide whether an entry has to be purged or * not. This function takes the nc node as argument and has to return @@ -389,7 +389,7 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv, /** * batadv_nc_purge_orig_hash() - traverse entire originator hash to check if * they have timed out nc nodes - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) { @@ -416,7 +416,7 @@ static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) /** * batadv_nc_purge_paths() - traverse all nc paths part of the hash and remove * unused ones - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hash: hash table containing the nc paths to check * @to_purge: function in charge to decide whether an entry has to be purged or * not. This function takes the nc node as argument and has to return @@ -579,7 +579,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) /** * batadv_nc_sniffed_purge() - Checks timestamp of given sniffed nc_packet. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @nc_path: the nc path the packet belongs to * @nc_packet: the nc packet to be checked * @@ -618,7 +618,7 @@ out: /** * batadv_nc_fwd_flush() - Checks the timestamp of the given nc packet. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @nc_path: the nc path the packet belongs to * @nc_packet: the nc packet to be checked * @@ -657,7 +657,7 @@ static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, /** * batadv_nc_process_nc_paths() - traverse given nc packet pool and free timed * out nc packets - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hash: to be processed hash table * @process_fn: Function called to process given nc packet. Should return true * to encourage this function to proceed with the next packet. @@ -744,7 +744,7 @@ static void batadv_nc_worker(struct work_struct *work) /** * batadv_can_nc_with_orig() - checks whether the given orig node is suitable * for coding or not - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: neighboring orig node which may be used as nc candidate * @ogm_packet: incoming ogm packet also used for the checks * @@ -825,7 +825,7 @@ batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, /** * batadv_nc_get_nc_node() - retrieves an nc node or creates the entry if it was * not found - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node originating the ogm packet * @orig_neigh_node: neighboring orig node from which we received the ogm packet * (can be equal to orig_node) @@ -888,7 +888,7 @@ unlock: /** * batadv_nc_update_nc_node() - updates stored incoming and outgoing nc node * structs (best called on incoming OGMs) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node originating the ogm packet * @orig_neigh_node: neighboring orig node from which we received the ogm packet * (can be equal to orig_node) @@ -940,7 +940,7 @@ out: /** * batadv_nc_get_path() - get existing nc_path or allocate a new one - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hash: hash table containing the nc path * @src: ethernet source address - first half of the nc path search key * @dst: ethernet destination address - second half of the nc path search key @@ -1032,7 +1032,7 @@ static void batadv_nc_memxor(char *dst, const char *src, unsigned int len) /** * batadv_nc_code_packets() - code a received unicast_packet with an nc packet * into a coded_packet and send it - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: data skb to forward * @ethhdr: pointer to the ethernet header inside the skb * @nc_packet: structure containing the packet to the skb can be coded with @@ -1245,7 +1245,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src) /** * batadv_nc_path_search() - Find the coding path matching in_nc_node and * out_nc_node to retrieve a buffered packet that can be used for coding. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @in_nc_node: pointer to skb next hop's neighbor nc node * @out_nc_node: pointer to skb source's neighbor nc node * @skb: data skb to forward @@ -1313,7 +1313,7 @@ batadv_nc_path_search(struct batadv_priv *bat_priv, /** * batadv_nc_skb_src_search() - Loops through the list of neighboring nodes of * the skb's sender (may be equal to the originator). - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: data skb to forward * @eth_dst: next hop mac address of skb * @eth_src: source mac address of skb @@ -1359,7 +1359,7 @@ batadv_nc_skb_src_search(struct batadv_priv *bat_priv, /** * batadv_nc_skb_store_before_coding() - set the ethernet src and dst of the * unicast skb before it is stored for use in later decoding - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: data skb to store * @eth_dst_new: new destination mac address of skb */ @@ -1408,7 +1408,7 @@ static bool batadv_nc_skb_dst_search(struct sk_buff *skb, struct batadv_neigh_node *neigh_node, struct ethhdr *ethhdr) { - struct net_device *netdev = neigh_node->if_incoming->soft_iface; + struct net_device *netdev = neigh_node->if_incoming->mesh_iface; struct batadv_priv *bat_priv = netdev_priv(netdev); struct batadv_orig_node *orig_node = neigh_node->orig_node; struct batadv_nc_node *nc_node; @@ -1495,7 +1495,7 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, bool batadv_nc_skb_forward(struct sk_buff *skb, struct batadv_neigh_node *neigh_node) { - const struct net_device *netdev = neigh_node->if_incoming->soft_iface; + const struct net_device *netdev = neigh_node->if_incoming->mesh_iface; struct batadv_priv *bat_priv = netdev_priv(netdev); struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; @@ -1544,7 +1544,7 @@ out: /** * batadv_nc_skb_store_for_decoding() - save a clone of the skb which can be * used when decoding coded packets - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: data skb to store */ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, @@ -1605,7 +1605,7 @@ out: /** * batadv_nc_skb_store_sniffed_unicast() - check if a received unicast packet * should be saved in the decoding buffer and, if so, store it there - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: unicast skb to store */ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, @@ -1625,7 +1625,7 @@ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, /** * batadv_nc_skb_decode_packet() - decode given skb using the decode data stored * in nc_packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: unicast skb to decode * @nc_packet: decode data needed to decode the skb * @@ -1719,7 +1719,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, /** * batadv_nc_find_decoding_packet() - search through buffered decoding data to * find the data needed to decode the coded packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ethhdr: pointer to the ethernet header inside the coded packet * @coded: coded packet we try to find decode data for * @@ -1793,7 +1793,7 @@ batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, static int batadv_nc_recv_coded_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_unicast_packet *unicast_packet; struct batadv_coded_packet *coded_packet; struct batadv_nc_packet *nc_packet; @@ -1858,7 +1858,7 @@ free_skb: /** * batadv_nc_mesh_free() - clean up network coding memory - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index bcc2e20e0cd6..d9cfc5c6b208 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -47,7 +47,7 @@ static struct lock_class_key batadv_orig_hash_lock_class_key; /** * batadv_orig_hash_find() - Find and return originator from orig_hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @data: mac address of the originator * * Return: orig_node (with increased refcnt), NULL on errors @@ -213,7 +213,7 @@ void batadv_orig_node_vlan_release(struct kref *ref) /** * batadv_originator_init() - Initialize all originator structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or negative error number in case of failure */ @@ -338,7 +338,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node, /** * batadv_orig_to_router() - get next hop neighbor to an orig address - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_addr: the originator MAC address to search the best next hop router for * @if_outgoing: the interface where the payload packet has been received or * the OGM should be sent to @@ -567,7 +567,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, const u8 *neigh_addr, struct batadv_orig_node *orig_node) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hardif_neigh_node *hardif_neigh; spin_lock_bh(&hard_iface->neigh_list_lock); @@ -754,20 +754,20 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if, *hard_iface; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; int ret; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { ret = -ENOENT; - goto out_put_soft_iface; + goto out_put_mesh_iface; } hard_iface = batadv_netlink_get_hardif(bat_priv, cb); @@ -794,8 +794,8 @@ out_put_hard_iface: batadv_hardif_put(hard_iface); out_put_primary_if: batadv_hardif_put(primary_if); -out_put_soft_iface: - dev_put(soft_iface); +out_put_mesh_iface: + dev_put(mesh_iface); return ret; } @@ -892,7 +892,7 @@ void batadv_orig_node_release(struct kref *ref) /** * batadv_originator_free() - Free all originator structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_originator_free(struct batadv_priv *bat_priv) { @@ -928,7 +928,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv) /** * batadv_orig_node_new() - creates a new orig_node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the originator * * Creates a new originator object and initialises all the generic fields. @@ -1009,7 +1009,7 @@ free_orig_node: /** * batadv_purge_neigh_ifinfo() - purge obsolete ifinfo entries from neighbor - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @neigh: orig node which is to be checked */ static void @@ -1050,7 +1050,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, /** * batadv_purge_orig_ifinfo() - purge obsolete ifinfo entries from originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be checked * * Return: true if any ifinfo entry was purged, false otherwise. @@ -1102,7 +1102,7 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, /** * batadv_purge_orig_neighbors() - purges neighbors from originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be checked * * Return: true if any neighbor was purged, false otherwise @@ -1160,7 +1160,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, /** * batadv_find_best_neighbor() - finds the best neighbor after purging - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be checked * @if_outgoing: the interface for which the metric should be compared * @@ -1194,7 +1194,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv, /** * batadv_purge_orig_node() - purges obsolete information from an orig_node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be checked * * This function checks if the orig_node or substructures of it have become @@ -1236,7 +1236,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) @@ -1258,7 +1258,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, /** * batadv_purge_orig_ref() - Purge all outdated originators - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_purge_orig_ref(struct batadv_priv *bat_priv) { @@ -1325,20 +1325,20 @@ static void batadv_purge_orig(struct work_struct *work) int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if, *hard_iface; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; int ret; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { ret = -ENOENT; - goto out_put_soft_iface; + goto out_put_mesh_iface; } hard_iface = batadv_netlink_get_hardif(bat_priv, cb); @@ -1365,8 +1365,8 @@ out_put_hard_iface: batadv_hardif_put(hard_iface); out_put_primary_if: batadv_hardif_put(primary_if); -out_put_soft_iface: - dev_put(soft_iface); +out_put_mesh_iface: + dev_put(mesh_iface); return ret; } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index f1061985149f..35d8c5783999 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -30,10 +30,10 @@ #include "fragmentation.h" #include "hard-interface.h" #include "log.h" +#include "mesh-interface.h" #include "network-coding.h" #include "originator.h" #include "send.h" -#include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" #include "tvlv.h" @@ -43,7 +43,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /** * _batadv_update_route() - set the router for this originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be configured * @recv_if: the receive interface for which this route is set * @neigh_node: neighbor which should be the next router @@ -106,7 +106,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, /** * batadv_update_route() - set the router for this originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be configured * @recv_if: the receive interface for which this route is set * @neigh_node: neighbor which should be the next router @@ -133,7 +133,7 @@ out: /** * batadv_window_protected() - checks whether the host restarted and is in the * protection time. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @seq_num_diff: difference between the current/received sequence number and * the last sequence number * @seq_old_max_diff: maximum age of sequence number not considered as restart @@ -207,7 +207,7 @@ bool batadv_check_management_packet(struct sk_buff *skb, /** * batadv_recv_my_icmp_packet() - receive an icmp packet locally - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: icmp packet to process * * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP @@ -338,7 +338,7 @@ out: int batadv_recv_icmp_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_icmp_header *icmph; struct batadv_icmp_packet_rr *icmp_packet_rr; struct ethhdr *ethhdr; @@ -428,7 +428,7 @@ free_skb: /** * batadv_check_unicast_packet() - Check for malformed unicast packets - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to check * @hdr_size: size of header to pull * @@ -511,7 +511,7 @@ batadv_last_bonding_replace(struct batadv_orig_node *orig_node, /** * batadv_find_router() - find a suitable router for this originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the destination node * @recv_if: pointer to interface this packet was received on * @@ -656,7 +656,7 @@ next: static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_orig_node *orig_node = NULL; struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr = eth_hdr(skb); @@ -727,7 +727,7 @@ free_skb: /** * batadv_reroute_unicast_packet() - update the unicast header for re-routing - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: unicast packet to process * @unicast_packet: the unicast header to be updated * @dst_addr: the payload destination @@ -879,7 +879,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, return false; /* update the header in order to let the packet be delivered to this - * node's soft interface + * node's mesh interface */ primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) @@ -909,7 +909,7 @@ int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { struct batadv_unicast_packet *unicast_packet; - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); int check, hdr_size = sizeof(*unicast_packet); check = batadv_check_unicast_packet(bat_priv, skb, hdr_size); @@ -938,7 +938,7 @@ free_skb: int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_unicast_packet *unicast_packet; struct batadv_unicast_4addr_packet *unicast_4addr_packet; u8 *orig_addr, *orig_addr_gw; @@ -1017,7 +1017,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, batadv_dat_snoop_incoming_dhcp_ack(bat_priv, skb, hdr_size); - batadv_interface_rx(recv_if->soft_iface, skb, hdr_size, + batadv_interface_rx(recv_if->mesh_iface, skb, hdr_size, orig_node); rx_success: @@ -1047,7 +1047,7 @@ free_skb: int batadv_recv_unicast_tvlv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; unsigned char *tvlv_buff; u16 tvlv_buff_len; @@ -1103,7 +1103,7 @@ free_skb: int batadv_recv_frag_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_orig_node *orig_node_src = NULL; struct batadv_frag_packet *frag_packet; int ret = NET_RX_DROP; @@ -1165,7 +1165,7 @@ free_skb: int batadv_recv_bcast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_orig_node *orig_node = NULL; struct batadv_bcast_packet *bcast_packet; struct ethhdr *ethhdr; @@ -1255,7 +1255,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, batadv_dat_snoop_incoming_dhcp_ack(bat_priv, skb, hdr_size); /* broadcast for me */ - batadv_interface_rx(recv_if->soft_iface, skb, hdr_size, orig_node); + batadv_interface_rx(recv_if->mesh_iface, skb, hdr_size, orig_node); rx_success: ret = NET_RX_SUCCESS; @@ -1279,14 +1279,14 @@ out: * * Parses the given, received batman-adv multicast packet. Depending on the * contents of its TVLV forwards it and/or decapsulates it to hand it to the - * soft interface. + * mesh interface. * * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise. */ int batadv_recv_mcast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_mcast_packet *mcast_packet; int hdr_size = sizeof(*mcast_packet); unsigned char *tvlv_buff; @@ -1329,7 +1329,7 @@ int batadv_recv_mcast_packet(struct sk_buff *skb, batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL_BYTES, skb->len - hdr_size); - batadv_interface_rx(bat_priv->soft_iface, skb, hdr_size, NULL); + batadv_interface_rx(bat_priv->mesh_iface, skb, hdr_size, NULL); /* skb was consumed */ skb = NULL; } diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 0379b126865d..735ac8077821 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -34,10 +34,10 @@ #include "gateway_client.h" #include "hard-interface.h" #include "log.h" +#include "mesh-interface.h" #include "network-coding.h" #include "originator.h" #include "routing.h" -#include "soft-interface.h" #include "translation-table.h" static void batadv_send_outstanding_bcast_packet(struct work_struct *work); @@ -68,7 +68,7 @@ int batadv_send_skb_packet(struct sk_buff *skb, struct ethhdr *ethhdr; int ret; - bat_priv = netdev_priv(hard_iface->soft_iface); + bat_priv = netdev_priv(hard_iface->mesh_iface); if (hard_iface->if_status != BATADV_IF_ACTIVE) goto send_skb_err; @@ -272,7 +272,7 @@ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb, /** * batadv_send_skb_prepare_unicast_4addr() - encapsulate an skb with a * unicast 4addr header - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb containing the payload to encapsulate * @orig: the destination node * @packet_subtype: the unicast 4addr packet subtype to use @@ -314,7 +314,7 @@ out: /** * batadv_send_skb_unicast() - encapsulate and send an skb via unicast - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: payload to send * @packet_type: the batman unicast packet type to use * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast @@ -384,7 +384,7 @@ out: /** * batadv_send_skb_via_tt_generic() - send an skb via TT lookup - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: payload to send * @packet_type: the batman unicast packet type to use * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast @@ -430,7 +430,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, /** * batadv_send_skb_via_gw() - send an skb via gateway lookup - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: payload to send * @vid: the vid to be used to search the translation table * @@ -532,7 +532,7 @@ batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, forw_packet->queue_left = queue_left; forw_packet->if_incoming = if_incoming; forw_packet->if_outgoing = if_outgoing; - forw_packet->num_packets = 0; + forw_packet->num_packets = 1; return forw_packet; @@ -695,7 +695,7 @@ static void batadv_forw_packet_queue(struct batadv_forw_packet *forw_packet, /** * batadv_forw_packet_bcast_queue() - try to queue a broadcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @forw_packet: the forwarding packet to queue * @send_time: timestamp (jiffies) when the packet is to be sent * @@ -714,7 +714,7 @@ batadv_forw_packet_bcast_queue(struct batadv_priv *bat_priv, /** * batadv_forw_packet_ogmv1_queue() - try to queue an OGMv1 packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @forw_packet: the forwarding packet to queue * @send_time: timestamp (jiffies) when the packet is to be sent * @@ -732,7 +732,7 @@ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv, /** * batadv_forw_bcast_packet_to_list() - queue broadcast packet for transmissions - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet @@ -787,7 +787,7 @@ err: /** * batadv_forw_bcast_packet_if() - forward and queue a broadcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet @@ -838,7 +838,7 @@ static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv, /** * batadv_send_no_broadcast() - check whether (re)broadcast is necessary - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to check * @own_packet: true if it is a self-generated broadcast packet * @if_out: the outgoing interface checked and considered for (re)broadcast @@ -900,7 +900,7 @@ static bool batadv_send_no_broadcast(struct batadv_priv *bat_priv, /** * __batadv_forw_bcast_packet() - forward and queue a broadcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet @@ -930,7 +930,7 @@ static int __batadv_forw_bcast_packet(struct batadv_priv *bat_priv, rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) @@ -958,7 +958,7 @@ static int __batadv_forw_bcast_packet(struct batadv_priv *bat_priv, /** * batadv_forw_bcast_packet() - forward and queue a broadcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet @@ -979,7 +979,7 @@ int batadv_forw_bcast_packet(struct batadv_priv *bat_priv, /** * batadv_send_bcast_packet() - send and queue a broadcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet @@ -1060,7 +1060,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); - bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); + bat_priv = netdev_priv(forw_packet->if_incoming->mesh_iface); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) { dropped = true; @@ -1095,7 +1095,7 @@ out: /** * batadv_purge_outstanding_packets() - stop/purge scheduled bcast/OGMv1 packets - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hard_iface: the hard interface to cancel and purge bcast/ogm packets on * * This method cancels and purges any broadcast and OGMv1 packet on the given diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 08af251b765c..3415afec4a0c 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -68,7 +68,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, /** * batadv_send_skb_via_tt() - send an skb via TT lookup - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the payload to send * @dst_hint: can be used to override the destination contained in the skb * @vid: the vid to be used to search the translation table @@ -89,7 +89,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, /** * batadv_send_skb_via_tt_4addr() - send an skb via TT lookup - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the payload to send * @packet_subtype: the unicast 4addr packet subtype to use * @dst_hint: can be used to override the destination contained in the skb diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 7f3dd3c393e0..9fb14e40e156 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -206,7 +206,7 @@ static void batadv_tp_update_rto(struct batadv_tp_vars *tp_vars, * batadv_tp_batctl_notify() - send client status result to client * @reason: reason for tp meter session stop * @dst: destination of tp_meter session - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @start_time: start of transmission in jiffies * @total_sent: bytes acked to the receiver * @cookie: cookie of tp_meter session @@ -238,7 +238,7 @@ static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason, * batadv_tp_batctl_error_notify() - send client error result to client * @reason: reason for tp meter session stop * @dst: destination of tp_meter session - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @cookie: cookie of tp_meter session */ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, @@ -251,7 +251,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, /** * batadv_tp_list_find() - find a tp_vars object in the global list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: the other endpoint MAC address to look for * * Look for a tp_vars object matching dst as end_point and return it after @@ -287,7 +287,7 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, /** * batadv_tp_list_find_session() - find tp_vars session object in the global * list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: the other endpoint MAC address to look for * @session: session identifier * @@ -366,7 +366,7 @@ static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars) /** * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tp_vars: the private data of the current TP meter session to cleanup */ static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv, @@ -396,7 +396,7 @@ static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv, /** * batadv_tp_sender_end() - print info about ended session and inform client - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tp_vars: the private data of the current TP meter session */ static void batadv_tp_sender_end(struct batadv_priv *bat_priv, @@ -619,7 +619,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src, /** * batadv_tp_recv_ack() - ACK receiving function - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the buffer containing the received packet * * Process a received TP ACK packet @@ -832,7 +832,7 @@ static int batadv_tp_send(void *arg) } /* assume that all the hard_interfaces have a correctly - * configured MTU, so use the soft_iface MTU as MSS. + * configured MTU, so use the mesh_iface MTU as MSS. * This might not be true and in that case the fragmentation * should be used. * Now, try to send the packet as it is @@ -927,7 +927,7 @@ static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars) /** * batadv_tp_start() - start a new tp meter session - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: the receiver MAC address * @test_length: test length in milliseconds * @cookie: session cookie @@ -993,7 +993,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, /* initialise the CWND to 3*MSS (Section 3.1 in RFC5681). * For batman-adv the MSS is the size of the payload received by the - * soft_interface, hence its MTU + * mesh_interface, hence its MTU */ tp_vars->cwnd = BATADV_TP_PLEN * 3; /* at the beginning initialise the SS threshold to the biggest possible @@ -1052,7 +1052,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, /** * batadv_tp_stop() - stop currently running tp meter session - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: the receiver MAC address * @return_value: reason for tp meter session stop */ @@ -1141,7 +1141,7 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t) /** * batadv_tp_send_ack() - send an ACK packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: the mac address of the destination originator * @seq: the sequence number to ACK * @timestamp: the timestamp to echo back in the ACK @@ -1320,7 +1320,7 @@ static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars) /** * batadv_tp_init_recv() - return matching or create new receiver tp_vars - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @icmp: received icmp tp msg * * Return: corresponding tp_vars or NULL on errors @@ -1373,7 +1373,7 @@ out_unlock: /** * batadv_tp_recv_msg() - process a single data message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the buffer containing the received packet * * Process a received TP MSG packet @@ -1457,7 +1457,7 @@ out: /** * batadv_tp_meter_recv() - main TP Meter receiving function - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the buffer containing the received packet */ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h index 6b816cf1a953..7da692ec38e9 100644 --- a/net/batman-adv/trace.h +++ b/net/batman-adv/trace.h @@ -34,7 +34,7 @@ TRACE_EVENT(batadv_dbg, TP_ARGS(bat_priv, vaf), TP_STRUCT__entry( - __string(device, bat_priv->soft_iface->name) + __string(device, bat_priv->mesh_iface->name) __string(driver, KBUILD_MODNAME) __vstring(msg, vaf->fmt, vaf->va) ), diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index d4b71d34310f..4a3165920de1 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -47,9 +47,9 @@ #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "mesh-interface.h" #include "netlink.h" #include "originator.h" -#include "soft-interface.h" #include "tvlv.h" static struct kmem_cache *batadv_tl_cache __read_mostly; @@ -161,7 +161,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr, /** * batadv_tt_local_hash_find() - search the local table for a given client - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client to look for * @vid: VLAN identifier * @@ -186,7 +186,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr, /** * batadv_tt_global_hash_find() - search the global table for a given client - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client to look for * @vid: VLAN identifier * @@ -221,7 +221,7 @@ static void batadv_tt_local_entry_release(struct kref *ref) tt_local_entry = container_of(ref, struct batadv_tt_local_entry, common.refcount); - batadv_softif_vlan_put(tt_local_entry->vlan); + batadv_meshif_vlan_put(tt_local_entry->vlan); kfree_rcu(tt_local_entry, common.rcu); } @@ -260,7 +260,7 @@ void batadv_tt_global_entry_release(struct kref *ref) /** * batadv_tt_global_hash_count() - count the number of orig entries - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client to count entries for * @vid: VLAN identifier * @@ -286,28 +286,28 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, /** * batadv_tt_local_size_mod() - change the size by v of the local table * identified by vid - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier of the sub-table to change * @v: the amount to sum to the local table size */ static void batadv_tt_local_size_mod(struct batadv_priv *bat_priv, unsigned short vid, int v) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (!vlan) return; atomic_add(v, &vlan->tt.num_entries); - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); } /** * batadv_tt_local_size_inc() - increase by one the local table size for the * given vid - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier */ static void batadv_tt_local_size_inc(struct batadv_priv *bat_priv, @@ -319,7 +319,7 @@ static void batadv_tt_local_size_inc(struct batadv_priv *bat_priv, /** * batadv_tt_local_size_dec() - decrease by one the local table size for the * given vid - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier */ static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv, @@ -412,7 +412,7 @@ batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry) /** * batadv_tt_local_event() - store a local TT event (ADD/DEL) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_local_entry: the TT entry involved in the event * @event_flags: flags to store in the event structure */ @@ -504,7 +504,7 @@ static u16 batadv_tt_entries(u16 tt_len) /** * batadv_tt_local_table_transmit_size() - calculates the local translation * table size when transmitted over the air - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: local translation table size in bytes. */ @@ -512,11 +512,11 @@ static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv) { u16 num_vlan = 0; u16 tt_local_entries = 0; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; int hdr_size; rcu_read_lock(); - hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry_rcu(vlan, &bat_priv->meshif_vlan_list, list) { num_vlan++; tt_local_entries += atomic_read(&vlan->tt.num_entries); } @@ -576,7 +576,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, /** * batadv_tt_local_add() - add a new client to the local table or update an * existing client - * @soft_iface: netdev struct of the mesh interface + * @mesh_iface: netdev struct of the mesh interface * @addr: the mac address of the client to add * @vid: VLAN identifier * @ifindex: index of the interface where the client is connected to (useful to @@ -586,14 +586,14 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, * * Return: true if the client was successfully added, false otherwise. */ -bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, +bool batadv_tt_local_add(struct net_device *mesh_iface, const u8 *addr, unsigned short vid, int ifindex, u32 mark) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct batadv_tt_local_entry *tt_local; struct batadv_tt_global_entry *tt_global = NULL; - struct net *net = dev_net(soft_iface); - struct batadv_softif_vlan *vlan; + struct net *net = dev_net(mesh_iface); + struct batadv_meshif_vlan *vlan; struct net_device *in_dev = NULL; struct batadv_hard_iface *in_hardif = NULL; struct hlist_head *head; @@ -650,7 +650,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, table_size += batadv_tt_len(1); packet_size_max = atomic_read(&bat_priv->packet_size_max); if (table_size > packet_size_max) { - net_ratelimited_function(batadv_info, soft_iface, + net_ratelimited_function(batadv_info, mesh_iface, "Local translation table size (%i) exceeds maximum packet size (%i); Ignoring new local tt entry: %pM\n", table_size, packet_size_max, addr); goto out; @@ -661,9 +661,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, goto out; /* increase the refcounter of the related vlan */ - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (!vlan) { - net_ratelimited_function(batadv_info, soft_iface, + net_ratelimited_function(batadv_info, mesh_iface, "adding TT local entry %pM to non-existent VLAN %d\n", addr, batadv_print_vid(vid)); kmem_cache_free(batadv_tl_cache, tt_local); @@ -693,7 +693,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, /* the batman interface mac and multicast addresses should never be * purged */ - if (batadv_compare_eth(addr, soft_iface->dev_addr) || + if (batadv_compare_eth(addr, mesh_iface->dev_addr) || is_multicast_ether_addr(addr)) tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE; @@ -849,7 +849,7 @@ out: /** * batadv_tt_prepare_tvlv_local_data() - allocate and prepare the TT TVLV for * this node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_data: uninitialised pointer to the address of the TVLV buffer * @tt_change: uninitialised pointer to the address of the area where the TT * changes can be stored @@ -871,7 +871,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, s32 *tt_len) { struct batadv_tvlv_tt_vlan_data *tt_vlan; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; u16 num_vlan = 0; u16 vlan_entries = 0; u16 total_entries = 0; @@ -879,8 +879,8 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, u8 *tt_change_ptr; int change_offset; - spin_lock_bh(&bat_priv->softif_vlan_list_lock); - hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) { + spin_lock_bh(&bat_priv->meshif_vlan_list_lock); + hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) continue; @@ -909,7 +909,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (*tt_data)->vlan_data; - hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) continue; @@ -925,14 +925,14 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); return tvlv_len; } /** * batadv_tt_tvlv_container_update() - update the translation table tvlv * container after local tt changes have been committed - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) { @@ -956,7 +956,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) * The local change history should still be cleaned up so the next * TT round can start again with a clean state. */ - if (tt_diff_len > bat_priv->soft_iface->mtu) { + if (tt_diff_len > bat_priv->mesh_iface->mtu) { tt_diff_len = 0; tt_diff_entries_num = 0; drop_changes = true; @@ -1025,7 +1025,7 @@ container_register: * @msg :Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @common: tt local & tt global common data * * Return: Error code, or 0 on success @@ -1037,7 +1037,7 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, struct batadv_tt_common_entry *common) { void *hdr; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; struct batadv_tt_local_entry *local; unsigned int last_seen_msecs; u32 crc; @@ -1045,13 +1045,13 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, local = container_of(common, struct batadv_tt_local_entry, common); last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen); - vlan = batadv_softif_vlan_get(bat_priv, common->vid); + vlan = batadv_meshif_vlan_get(bat_priv, common->vid); if (!vlan) return 0; crc = vlan->tt.crc; - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, &batadv_netlink_family, NLM_F_MULTI, @@ -1084,7 +1084,7 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, * @msg: Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @hash: hash to dump * @bucket: bucket index to dump * @idx_s: Number of entries to skip @@ -1130,7 +1130,7 @@ batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, */ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) { - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; struct batadv_hashtable *hash; @@ -1139,11 +1139,11 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) int idx = cb->args[1]; int portid = NETLINK_CB(cb->skb).portid; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { @@ -1165,7 +1165,7 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); cb->args[0] = bucket; cb->args[1] = idx; @@ -1194,7 +1194,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, /** * batadv_tt_local_remove() - logically remove an entry from the local table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the MAC address of the client to remove * @vid: VLAN identifier * @message: message to append to the log on deletion @@ -1259,7 +1259,7 @@ out: /** * batadv_tt_local_purge_list() - purge inactive tt local entries - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @head: pointer to the list containing the local tt entries * @timeout: parameter deciding whether a given tt local entry is considered * inactive or not @@ -1294,7 +1294,7 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, /** * batadv_tt_local_purge() - purge inactive tt local entries - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @timeout: parameter deciding whether a given tt local entry is considered * inactive or not */ @@ -1529,7 +1529,7 @@ out: /** * batadv_tt_global_add() - add a new TT global entry or update an existing one - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the originator announcing the client * @tt_addr: the mac address of the non-mesh client * @vid: VLAN identifier @@ -1702,7 +1702,7 @@ out: /** * batadv_transtable_best_orig() - Get best originator list entry from tt entry - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_global_entry: global translation table entry to be analyzed * * This function assumes the caller holds rcu_read_lock(). @@ -1809,7 +1809,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @common: tt local & tt global common data * @sub_s: Number of entries to skip * @@ -1854,7 +1854,7 @@ batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @head: Pointer to the list containing the global tt entries * @idx_s: Number of entries to skip * @sub: Number of entries to skip @@ -1897,7 +1897,7 @@ batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, */ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) { - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; struct batadv_hashtable *hash; @@ -1908,11 +1908,11 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) int sub = cb->args[2]; int portid = NETLINK_CB(cb->skb).portid; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { @@ -1937,7 +1937,7 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); cb->args[0] = bucket; cb->args[1] = idx; @@ -1990,7 +1990,7 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) /** * batadv_tt_global_del_orig_node() - remove orig_node from a global tt entry - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_global_entry: the global entry to remove the orig_node from * @orig_node: the originator announcing the client * @message: message to append to the log on deletion @@ -2069,7 +2069,7 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, /** * batadv_tt_global_del() - remove a client from the global table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: an originator serving this client * @addr: the mac address of the client * @vid: VLAN identifier @@ -2134,7 +2134,7 @@ out: /** * batadv_tt_global_del_orig() - remove all the TT global entries belonging to * the given originator matching the provided vid - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the originator owning the entries to remove * @match_vid: the VLAN identifier to match. If negative all the entries will be * removed @@ -2305,7 +2305,7 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry, /** * batadv_transtable_search() - get the mesh destination for a given client - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @src: mac address of the source client * @addr: mac address of the destination client * @vid: VLAN identifier @@ -2364,7 +2364,7 @@ out: /** * batadv_tt_global_crc() - calculates the checksum of the local table belonging * to the given orig_node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: originator for which the CRC should be computed * @vid: VLAN identifier for which the CRC32 has to be computed * @@ -2458,7 +2458,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, /** * batadv_tt_local_crc() - calculates the checksum of the local table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vid: VLAN identifier for which the CRC32 has to be computed * * For details about the computation, please refer to the documentation for @@ -2593,7 +2593,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv) /** * batadv_tt_req_node_new() - search and possibly create a tt_req_node object - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node this request is being issued for * * Return: the pointer to the new tt_req_node struct if no request @@ -2689,7 +2689,7 @@ static bool batadv_tt_global_valid(const void *entry_ptr, /** * batadv_tt_tvlv_generate() - fill the tvlv buff with the tt entries from the * specified tt hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hash: hash table containing the tt entries * @tt_len: expected tvlv tt data buffer length in number of bytes * @tvlv_buff: pointer to the buffer to fill with the TT data @@ -2810,15 +2810,15 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, /** * batadv_tt_local_update_crc() - update all the local CRCs - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; /* recompute the global CRC for each VLAN */ rcu_read_lock(); - hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry_rcu(vlan, &bat_priv->meshif_vlan_list, list) { vlan->tt.crc = batadv_tt_local_crc(bat_priv, vlan->vid); } rcu_read_unlock(); @@ -2826,7 +2826,7 @@ static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv) /** * batadv_tt_global_update_crc() - update all the global CRCs for this orig_node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the orig_node for which the CRCs have to be updated */ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv, @@ -2853,7 +2853,7 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv, /** * batadv_send_tt_request() - send a TT Request message to a given node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst_orig_node: the destination of the message * @ttvn: the version number that the source of the message is looking for * @tt_vlan: pointer to the first tvlv VLAN object to request @@ -2938,7 +2938,7 @@ out: /** * batadv_send_other_tt_response() - send reply to tt request concerning another * node's translation table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender * @req_dst: mac address of tt request recipient @@ -3029,7 +3029,7 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, /* Don't send the response, if larger than fragmented packet. */ tt_len = sizeof(struct batadv_unicast_tvlv_packet) + tvlv_len; if (tt_len > atomic_read(&bat_priv->packet_size_max)) { - net_ratelimited_function(batadv_info, bat_priv->soft_iface, + net_ratelimited_function(batadv_info, bat_priv->mesh_iface, "Ignoring TT_REQUEST from %pM; Response size exceeds max packet size.\n", res_dst_orig_node->orig); goto out; @@ -3068,7 +3068,7 @@ out: /** * batadv_send_my_tt_response() - send reply to tt request concerning this * node's translation table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender * @@ -3185,7 +3185,7 @@ out: /** * batadv_send_tt_response() - send reply to tt request - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender * @req_dst: mac address of tt request recipient @@ -3280,7 +3280,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv, /** * batadv_is_my_client() - check if a client is served by the local node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client to check * @vid: VLAN identifier * @@ -3309,7 +3309,7 @@ out: /** * batadv_handle_tt_response() - process incoming tt reply - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_data: tt data containing the tt request information * @resp_src: mac address of tt reply sender * @num_entries: number of tt change entries appended to the tt data @@ -3397,7 +3397,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) /** * batadv_tt_check_roam_count() - check if a client has roamed too frequently - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @client: mac address of the roaming client * * This function checks whether the client already reached the @@ -3452,7 +3452,7 @@ unlock: /** * batadv_send_roam_adv() - send a roaming advertisement message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @client: mac address of the roaming client * @vid: VLAN identifier * @orig_node: message destination @@ -3516,8 +3516,8 @@ static void batadv_tt_purge(struct work_struct *work) } /** - * batadv_tt_free() - Free translation table of soft interface - * @bat_priv: the bat priv with all the soft interface information + * batadv_tt_free() - Free translation table of mesh interface + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_tt_free(struct batadv_priv *bat_priv) { @@ -3540,7 +3540,7 @@ void batadv_tt_free(struct batadv_priv *bat_priv) /** * batadv_tt_local_set_flags() - set or unset the specified flags on the local * table and possibly count them in the TT size - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @flags: the flag to switch * @enable: whether to set or unset the flag * @count: whether to increase the TT size by the number of changed entries @@ -3626,7 +3626,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) /** * batadv_tt_local_commit_changes_nolock() - commit all pending local tt changes * which have been queued in the time since the last commit - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Caller must hold tt->commit_lock. */ @@ -3659,7 +3659,7 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv) /** * batadv_tt_local_commit_changes() - commit all pending local tt changes which * have been queued in the time since the last commit - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) { @@ -3670,7 +3670,7 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) /** * batadv_is_ap_isolated() - Check if packet from upper layer should be dropped - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @src: source mac address of packet * @dst: destination mac address of packet * @vid: vlan id of packet @@ -3682,10 +3682,10 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, { struct batadv_tt_local_entry *tt_local_entry; struct batadv_tt_global_entry *tt_global_entry; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; bool ret = false; - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (!vlan) return false; @@ -3707,14 +3707,14 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, local_entry_put: batadv_tt_local_entry_put(tt_local_entry); vlan_put: - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); return ret; } /** * batadv_tt_update_orig() - update global translation table with new tt * information received via ogms - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the orig_node of the ogm * @tt_buff: pointer to the first tvlv VLAN entry * @tt_num_vlan: number of tvlv VLAN entries @@ -3798,7 +3798,7 @@ request_table: /** * batadv_tt_global_client_is_roaming() - check if a client is marked as roaming - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client to check * @vid: VLAN identifier * @@ -3824,7 +3824,7 @@ out: /** * batadv_tt_local_client_is_roaming() - tells whether the client is roaming - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the local client to query * @vid: VLAN identifier * @@ -3850,7 +3850,7 @@ out: /** * batadv_tt_add_temporary_global_entry() - Add temporary entry to global TT - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which the temporary entry should be associated with * @addr: mac address of the client * @vid: VLAN id of the new temporary global translation table @@ -3883,14 +3883,14 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, /** * batadv_tt_local_resize_to_mtu() - resize the local translation table fit the * maximum packet size that can be transported through the mesh - * @soft_iface: netdev struct of the mesh interface + * @mesh_iface: netdev struct of the mesh interface * * Remove entries older than 'timeout' and half timeout if more entries need * to be removed. */ -void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) +void batadv_tt_local_resize_to_mtu(struct net_device *mesh_iface) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); int packet_size_max = atomic_read(&bat_priv->packet_size_max); int table_size, timeout = BATADV_TT_LOCAL_TIMEOUT / 2; bool reduced = false; @@ -3907,7 +3907,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) timeout /= 2; reduced = true; - net_ratelimited_function(batadv_info, soft_iface, + net_ratelimited_function(batadv_info, mesh_iface, "Forced to purge local tt entries to fit new maximum fragment MTU (%i)\n", packet_size_max); } @@ -3923,7 +3923,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) /** * batadv_tt_tvlv_ogm_handler_v1() - process incoming tt tvlv container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the gateway data @@ -3962,7 +3962,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /** * batadv_tt_tvlv_unicast_handler_v1() - process incoming (unicast) tt tvlv * container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @src: mac address of tt tvlv sender * @dst: mac address of tt tvlv recipient * @tvlv_value: tvlv buffer containing the tt data @@ -4044,7 +4044,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, /** * batadv_roam_tvlv_unicast_handler_v1() - process incoming tt roam tvlv * container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @src: mac address of tt tvlv sender * @dst: mac address of tt tvlv recipient * @tvlv_value: tvlv buffer containing the tt data @@ -4093,7 +4093,7 @@ out: /** * batadv_tt_init() - initialise the translation table internals - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or negative error number in case of failure. */ @@ -4131,7 +4131,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv) /** * batadv_tt_global_is_isolated() - check if a client is marked as isolated - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client * @vid: the identifier of the VLAN where this client is connected * diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index d18740d9a22b..618d9dbca5ea 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -16,7 +16,7 @@ #include <linux/types.h> int batadv_tt_init(struct batadv_priv *bat_priv); -bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, +bool batadv_tt_local_add(struct net_device *mesh_iface, const u8 *addr, unsigned short vid, int ifindex, u32 mark); u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid, @@ -45,7 +45,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, u8 *addr, unsigned short vid); bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv, u8 *addr, unsigned short vid); -void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface); +void batadv_tt_local_resize_to_mtu(struct net_device *mesh_iface); bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *addr, diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 2a583215d439..76dff1f9c559 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -59,7 +59,7 @@ static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler) /** * batadv_tvlv_handler_get() - retrieve tvlv handler from the tvlv handler list * based on the provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: tvlv handler type to look for * @version: tvlv handler version to look for * @@ -118,7 +118,7 @@ static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv) /** * batadv_tvlv_container_get() - retrieve tvlv container from the tvlv container * list based on the provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: tvlv container type to look for * @version: tvlv container version to look for * @@ -152,7 +152,7 @@ batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) /** * batadv_tvlv_container_list_size() - calculate the size of the tvlv container * list entries - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Has to be called with the appropriate locks being acquired * (tvlv.container_list_lock). @@ -177,7 +177,7 @@ static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) /** * batadv_tvlv_container_remove() - remove tvlv container from the tvlv * container list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tvlv: the to be removed tvlv container * * Has to be called with the appropriate locks being acquired @@ -201,7 +201,7 @@ static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv, /** * batadv_tvlv_container_unregister() - unregister tvlv container based on the * provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: tvlv container type to unregister * @version: tvlv container type to unregister */ @@ -219,7 +219,7 @@ void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, /** * batadv_tvlv_container_register() - register tvlv type, version and content * to be propagated with each (primary interface) OGM - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: tvlv container type * @version: tvlv container version * @tvlv_value: tvlv container content @@ -297,7 +297,7 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, /** * batadv_tvlv_container_ogm_append() - append tvlv container content to given * OGM packet buffer - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @packet_buff: ogm packet buffer * @packet_buff_len: ogm packet buffer size including ogm header and tvlv * content @@ -350,7 +350,7 @@ end: /** * batadv_tvlv_call_handler() - parse the given tvlv buffer to call the * appropriate handlers - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tvlv_handler: tvlv callback function handling the tvlv content * @packet_type: indicates for which packet type the TVLV handler is called * @orig_node: orig node emitting the ogm packet @@ -421,7 +421,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, /** * batadv_tvlv_containers_process() - parse the given tvlv buffer to call the * appropriate handlers - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @packet_type: indicates for which packet type the TVLV handler is called * @orig_node: orig node emitting the ogm packet * @skb: the skb the TVLV handler is called for @@ -490,7 +490,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, /** * batadv_tvlv_ogm_receive() - process an incoming ogm and call the appropriate * handlers - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @batadv_ogm_packet: ogm packet containing the tvlv containers * @orig_node: orig node emitting the ogm packet */ @@ -518,7 +518,7 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, * batadv_tvlv_handler_register() - register tvlv handler based on the provided * type and version (both need to match) for ogm tvlv payload and/or unicast * payload - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @optr: ogm tvlv handler callback function. This function receives the orig * node, flags and the tvlv content as argument to process. * @uptr: unicast tvlv handler callback function. This function receives the @@ -583,7 +583,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, /** * batadv_tvlv_handler_unregister() - unregister tvlv handler based on the * provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: tvlv handler type to be unregistered * @version: tvlv handler version to be unregistered */ @@ -606,7 +606,7 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, /** * batadv_tvlv_unicast_send() - send a unicast packet with tvlv payload to the * specified host - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @src: source mac address of the unicast packet * @dst: destination mac address of the unicast packet * @type: tvlv type diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index fe89f08533fe..0ca0fc072fc9 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -186,6 +186,9 @@ struct batadv_hard_iface { /** @net_dev: pointer to the net_device */ struct net_device *net_dev; + /** @dev_tracker: device tracker for @net_dev */ + netdevice_tracker dev_tracker; + /** @refcount: number of contexts the object is used */ struct kref refcount; @@ -196,10 +199,13 @@ struct batadv_hard_iface { struct packet_type batman_adv_ptype; /** - * @soft_iface: the batman-adv interface which uses this network + * @mesh_iface: the batman-adv interface which uses this network * interface */ - struct net_device *soft_iface; + struct net_device *mesh_iface; + + /** @meshif_dev_tracker: device tracker for @mesh_iface */ + netdevice_tracker meshif_dev_tracker; /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; @@ -487,7 +493,7 @@ struct batadv_orig_node { /** @hash_entry: hlist node for &batadv_priv.orig_hash */ struct hlist_node hash_entry; - /** @bat_priv: pointer to soft_iface this orig node belongs to */ + /** @bat_priv: pointer to mesh_iface this orig node belongs to */ struct batadv_priv *bat_priv; /** @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno */ @@ -899,13 +905,13 @@ enum batadv_counters { /** * @BATADV_CNT_MCAST_RX_LOCAL: counter for received batman-adv multicast - * packets which were forwarded to the local soft interface + * packets which were forwarded to the local mesh interface */ BATADV_CNT_MCAST_RX_LOCAL, /** * @BATADV_CNT_MCAST_RX_LOCAL_BYTES: bytes counter for received - * batman-adv multicast packets which were forwarded to the local soft + * batman-adv multicast packets which were forwarded to the local mesh * interface */ BATADV_CNT_MCAST_RX_LOCAL_BYTES, @@ -1137,29 +1143,6 @@ struct batadv_priv_bla { }; #endif -#ifdef CONFIG_BATMAN_ADV_DEBUG - -/** - * struct batadv_priv_debug_log - debug logging data - */ -struct batadv_priv_debug_log { - /** @log_buff: buffer holding the logs (ring buffer) */ - char log_buff[BATADV_LOG_BUF_LEN]; - - /** @log_start: index of next character to read */ - unsigned long log_start; - - /** @log_end: index of next character to write */ - unsigned long log_end; - - /** @lock: lock protecting log_buff, log_start & log_end */ - spinlock_t lock; - - /** @queue_wait: log reader's wait queue */ - wait_queue_head_t queue_wait; -}; -#endif - /** * struct batadv_priv_gw - per mesh interface gateway data */ @@ -1264,7 +1247,7 @@ struct batadv_mcast_mla_flags { /** @enabled: whether the multicast tvlv is currently enabled */ unsigned char enabled:1; - /** @bridged: whether the soft interface has a bridge on top */ + /** @bridged: whether the mesh interface has a bridge on top */ unsigned char bridged:1; /** @tvlv_flags: the flags we have last sent in our mcast tvlv */ @@ -1400,7 +1383,7 @@ struct batadv_priv_nc { /** * @decoding_hash: Hash table used to buffer skbs that might be needed * to decode a received coded skb. The buffer is used for 1) skbs - * arriving on the soft-interface; 2) skbs overheard on the + * arriving on the mesh-interface; 2) skbs overheard on the * hard-interface; and 3) skbs forwarded by batman-adv. */ struct batadv_hashtable *decoding_hash; @@ -1553,9 +1536,9 @@ struct batadv_tp_vars { }; /** - * struct batadv_softif_vlan - per VLAN attributes set + * struct batadv_meshif_vlan - per VLAN attributes set */ -struct batadv_softif_vlan { +struct batadv_meshif_vlan { /** @bat_priv: pointer to the mesh object */ struct batadv_priv *bat_priv; @@ -1568,7 +1551,7 @@ struct batadv_softif_vlan { /** @tt: TT private attributes (VLAN specific) */ struct batadv_vlan_tt tt; - /** @list: list node for &bat_priv.softif_vlan_list */ + /** @list: list node for &bat_priv.meshif_vlan_list */ struct hlist_node list; /** @@ -1581,7 +1564,7 @@ struct batadv_softif_vlan { }; /** - * struct batadv_priv_bat_v - B.A.T.M.A.N. V per soft-interface private data + * struct batadv_priv_bat_v - B.A.T.M.A.N. V per mesh-interface private data */ struct batadv_priv_bat_v { /** @ogm_buff: buffer holding the OGM packet */ @@ -1610,8 +1593,8 @@ struct batadv_priv { */ atomic_t mesh_state; - /** @soft_iface: net device which holds this struct as private data */ - struct net_device *soft_iface; + /** @mesh_iface: net device which holds this struct as private data */ + struct net_device *mesh_iface; /** * @mtu_set_by_user: MTU was set once by user @@ -1760,24 +1743,19 @@ struct batadv_priv { struct batadv_algo_ops *algo_ops; /** - * @softif_vlan_list: a list of softif_vlan structs, one per VLAN + * @meshif_vlan_list: a list of meshif_vlan structs, one per VLAN * created on top of the mesh interface represented by this object */ - struct hlist_head softif_vlan_list; + struct hlist_head meshif_vlan_list; - /** @softif_vlan_list_lock: lock protecting softif_vlan_list */ - spinlock_t softif_vlan_list_lock; + /** @meshif_vlan_list_lock: lock protecting meshif_vlan_list */ + spinlock_t meshif_vlan_list_lock; #ifdef CONFIG_BATMAN_ADV_BLA /** @bla: bridge loop avoidance data */ struct batadv_priv_bla bla; #endif -#ifdef CONFIG_BATMAN_ADV_DEBUG - /** @debug_log: holding debug logging relevant data */ - struct batadv_priv_debug_log *debug_log; -#endif - /** @gw: gateway data */ struct batadv_priv_gw gw; @@ -1808,7 +1786,7 @@ struct batadv_priv { #endif /* CONFIG_BATMAN_ADV_NC */ #ifdef CONFIG_BATMAN_ADV_BATMAN_V - /** @bat_v: B.A.T.M.A.N. V per soft-interface private data */ + /** @bat_v: B.A.T.M.A.N. V per mesh-interface private data */ struct batadv_priv_bat_v bat_v; #endif }; @@ -1831,7 +1809,7 @@ struct batadv_bla_backbone_gw { /** @hash_entry: hlist node for &batadv_priv_bla.backbone_hash */ struct hlist_node hash_entry; - /** @bat_priv: pointer to soft_iface this backbone gateway belongs to */ + /** @bat_priv: pointer to mesh_iface this backbone gateway belongs to */ struct batadv_priv *bat_priv; /** @lasttime: last time we heard of this backbone gw */ @@ -1936,8 +1914,8 @@ struct batadv_tt_local_entry { /** @last_seen: timestamp used for purging stale tt local entries */ unsigned long last_seen; - /** @vlan: soft-interface vlan of the entry */ - struct batadv_softif_vlan *vlan; + /** @vlan: mesh-interface vlan of the entry */ + struct batadv_meshif_vlan *vlan; }; /** @@ -2161,7 +2139,7 @@ struct batadv_forw_packet { u16 packet_len; /** @direct_link_flags: direct link flags for aggregated OGM packets */ - u32 direct_link_flags; + DECLARE_BITMAP(direct_link_flags, BATADV_MAX_AGGREGATION_PACKETS); /** @num_packets: counter for aggregated OGMv1 packets */ u8 num_packets; diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 50cfec8ccac4..1298c8685bad 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -13,6 +13,7 @@ #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/addrconf.h> +#include <net/netdev_lock.h> #include <net/pkt_sched.h> #include <net/bluetooth/bluetooth.h> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e7ec12437c8b..012fc107901a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -57,6 +57,7 @@ DEFINE_RWLOCK(hci_dev_list_lock); /* HCI callback list */ LIST_HEAD(hci_cb_list); +DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); @@ -2972,7 +2973,9 @@ int hci_register_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - list_add_tail_rcu(&cb->list, &hci_cb_list); + mutex_lock(&hci_cb_list_lock); + list_add_tail(&cb->list, &hci_cb_list); + mutex_unlock(&hci_cb_list_lock); return 0; } @@ -2982,8 +2985,9 @@ int hci_unregister_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - list_del_rcu(&cb->list); - synchronize_rcu(); + mutex_lock(&hci_cb_list_lock); + list_del(&cb->list); + mutex_unlock(&hci_cb_list_lock); return 0; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 2cc7a9306350..903b0b52692a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3391,23 +3391,30 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, hci_update_scan(hdev); } - params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); - if (params) { - switch (params->auto_connect) { - case HCI_AUTO_CONN_LINK_LOSS: - if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) + /* Re-enable passive scanning if disconnected device is marked + * as auto-connectable. + */ + if (conn->type == LE_LINK) { + params = hci_conn_params_lookup(hdev, &conn->dst, + conn->dst_type); + if (params) { + switch (params->auto_connect) { + case HCI_AUTO_CONN_LINK_LOSS: + if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) + break; + fallthrough; + + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + hci_pend_le_list_del_init(params); + hci_pend_le_list_add(params, + &hdev->pend_le_conns); + hci_update_passive_scan(hdev); break; - fallthrough; - case HCI_AUTO_CONN_DIRECT: - case HCI_AUTO_CONN_ALWAYS: - hci_pend_le_list_del_init(params); - hci_pend_le_list_add(params, &hdev->pend_le_conns); - hci_update_passive_scan(hdev); - break; - - default: - break; + default: + break; + } } } diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 44acddf58a0c..0cb52a3308ba 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2187,11 +2187,6 @@ done: return HCI_LM_ACCEPT; } -static bool iso_match(struct hci_conn *hcon) -{ - return hcon->type == ISO_LINK || hcon->type == LE_LINK; -} - static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) { if (hcon->type != ISO_LINK) { @@ -2373,7 +2368,6 @@ drop: static struct hci_cb iso_cb = { .name = "ISO", - .match = iso_match, .connect_cfm = iso_connect_cfm, .disconn_cfm = iso_disconn_cfm, }; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b22078b67972..c27ea70f71e1 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7182,11 +7182,6 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, return NULL; } -static bool l2cap_match(struct hci_conn *hcon) -{ - return hcon->type == ACL_LINK || hcon->type == LE_LINK; -} - static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct hci_dev *hdev = hcon->hdev; @@ -7194,6 +7189,9 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) struct l2cap_chan *pchan; u8 dst_type; + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); if (status) { @@ -7258,6 +7256,9 @@ int l2cap_disconn_ind(struct hci_conn *hcon) static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); @@ -7565,7 +7566,6 @@ unlock: static struct hci_cb l2cap_cb = { .name = "L2CAP", - .match = l2cap_match, .connect_cfm = l2cap_connect_cfm, .disconn_cfm = l2cap_disconn_cfm, .security_cfm = l2cap_security_cfm, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f53304cb09db..621c555f639b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9660,6 +9660,9 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) + eir_precalc_len(sizeof(conn->dev_class))); + if (!skb) + return; + ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, &conn->dst); ev->addr.type = link_to_bdaddr(conn->type, conn->dst_type); @@ -10413,6 +10416,8 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0)); + if (!skb) + return; ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, bdaddr); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 4c56ca5a216c..ad5177e3a69b 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2134,11 +2134,6 @@ static int rfcomm_run(void *unused) return 0; } -static bool rfcomm_match(struct hci_conn *hcon) -{ - return hcon->type == ACL_LINK; -} - static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) { struct rfcomm_session *s; @@ -2185,7 +2180,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) static struct hci_cb rfcomm_cb = { .name = "RFCOMM", - .match = rfcomm_match, .security_cfm = rfcomm_security_cfm }; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index aa7bfe26cb40..5d1bc0d6aee0 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -107,6 +107,14 @@ static void sco_conn_put(struct sco_conn *conn) kref_put(&conn->ref, sco_conn_free); } +static struct sco_conn *sco_conn_hold(struct sco_conn *conn) +{ + BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref)); + + kref_get(&conn->ref); + return conn; +} + static struct sco_conn *sco_conn_hold_unless_zero(struct sco_conn *conn) { if (!conn) @@ -1353,6 +1361,7 @@ static void sco_conn_ready(struct sco_conn *conn) bacpy(&sco_pi(sk)->src, &conn->hcon->src); bacpy(&sco_pi(sk)->dst, &conn->hcon->dst); + sco_conn_hold(conn); hci_conn_hold(conn->hcon); __sco_chan_add(conn, sk, parent); @@ -1398,27 +1407,30 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) return lm; } -static bool sco_match(struct hci_conn *hcon) -{ - return hcon->type == SCO_LINK || hcon->type == ESCO_LINK; -} - static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status); if (!status) { struct sco_conn *conn; conn = sco_conn_add(hcon); - if (conn) + if (conn) { sco_conn_ready(conn); + sco_conn_put(conn); + } } else sco_conn_del(hcon, bt_to_errno(status)); } static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); sco_conn_del(hcon, bt_to_errno(reason)); @@ -1444,7 +1456,6 @@ drop: static struct hci_cb sco_cb = { .name = "SCO", - .match = sco_match, .connect_cfm = sco_connect_cfm, .disconn_cfm = sco_disconn_cfm, }; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 9d8c72ed01ab..a818fdc22da9 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -16,6 +16,8 @@ #include <linux/netfilter_bridge.h> #include <linux/uaccess.h> +#include <net/netdev_lock.h> + #include "br_private.h" #define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ diff --git a/net/can/af_can.c b/net/can/af_can.c index 01f3fbb3b67d..7b191dbe3693 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -172,6 +172,8 @@ static int can_create(struct net *net, struct socket *sock, int protocol, sock_orphan(sk); sock_put(sk); sock->sk = NULL; + } else { + sock_prot_inuse_add(net, sk->sk_prot, 1); } errout: diff --git a/net/can/bcm.c b/net/can/bcm.c index 217049fa496e..6dc041e054ba 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1625,6 +1625,7 @@ static int bcm_release(struct socket *sock) sock->sk = NULL; release_sock(sk); + sock_prot_inuse_add(net, sk->sk_prot, -1); sock_put(sk); return 0; diff --git a/net/can/isotp.c b/net/can/isotp.c index 16046931542a..789583c62f98 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1239,6 +1239,7 @@ static int isotp_release(struct socket *sock) sock->sk = NULL; release_sock(sk); + sock_prot_inuse_add(net, sk->sk_prot, -1); sock_put(sk); return 0; diff --git a/net/can/raw.c b/net/can/raw.c index 9b1d5f036f57..020f21430b1d 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -397,11 +397,13 @@ static int raw_release(struct socket *sock) { struct sock *sk = sock->sk; struct raw_sock *ro; + struct net *net; if (!sk) return 0; ro = raw_sk(sk); + net = sock_net(sk); spin_lock(&raw_notifier_lock); while (raw_busy_notifier == ro) { @@ -421,7 +423,7 @@ static int raw_release(struct socket *sock) raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk); netdev_put(ro->dev, &ro->dev_tracker); } else { - raw_disable_allfilters(sock_net(sk), NULL, sk); + raw_disable_allfilters(net, NULL, sk); } } @@ -440,6 +442,7 @@ static int raw_release(struct socket *sock) release_sock(sk); rtnl_unlock(); + sock_prot_inuse_add(net, sk->sk_prot, -1); sock_put(sk); return 0; diff --git a/net/core/Makefile b/net/core/Makefile index d9326600e289..a10c3bd96798 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,7 +9,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ +obj-y += dev.o dev_api.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ fib_notifier.o xdp.o flow_offload.o gro.o \ diff --git a/net/core/dev.c b/net/core/dev.c index 2dc705604509..6fa6ed5b5798 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -156,6 +156,7 @@ #include <linux/pm_runtime.h> #include <linux/prandom.h> #include <linux/once_lite.h> +#include <net/netdev_lock.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/types.h> #include <net/page_pool/helpers.h> @@ -1058,6 +1059,28 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex) return __netdev_put_lock(dev); } +/** + * netdev_get_by_name_lock() - find a device by its name + * @net: the applicable net namespace + * @name: name of device + * + * Search for an interface by name. If a valid device + * with @name is found it will be returned with netdev->lock held. + * netdev_unlock() must be called to release it. + * + * Return: pointer to a device with lock held, NULL if not found. + */ +struct net_device *netdev_get_by_name_lock(struct net *net, const char *name) +{ + struct net_device *dev; + + dev = dev_get_by_name(net, name); + if (!dev) + return NULL; + + return __netdev_put_lock(dev); +} + struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index) @@ -1371,15 +1394,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev, return ret < 0 ? ret : 0; } -/** - * dev_change_name - change name of a device - * @dev: device - * @newname: name (or format string) must be at least IFNAMSIZ - * - * Change name of a device, can pass format strings "eth%d". - * for wildcarding. - */ -int dev_change_name(struct net_device *dev, const char *newname) +int netif_change_name(struct net_device *dev, const char *newname) { struct net *net = dev_net(dev); unsigned char old_assign_type; @@ -1449,15 +1464,7 @@ rollback: return err; } -/** - * dev_set_alias - change ifalias of a device - * @dev: device - * @alias: name up to IFALIASZ - * @len: limit of bytes to copy from info - * - * Set ifalias for a device, - */ -int dev_set_alias(struct net_device *dev, const char *alias, size_t len) +int netif_set_alias(struct net_device *dev, const char *alias, size_t len) { struct dev_ifalias *new_alias = NULL; @@ -1483,7 +1490,6 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return len; } -EXPORT_SYMBOL(dev_set_alias); /** * dev_get_alias - get ifalias of a device @@ -1629,6 +1635,8 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) set_bit(__LINK_STATE_START, &dev->state); + netdev_ops_assert_locked(dev); + if (ops->ndo_validate_addr) ret = ops->ndo_validate_addr(dev); @@ -1649,20 +1657,7 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) return ret; } -/** - * dev_open - prepare an interface for use. - * @dev: device to open - * @extack: netlink extended ack - * - * Takes a device from down to up state. The device's private open - * function is invoked and then the multicast lists are loaded. Finally - * the device is moved into the up state and a %NETDEV_UP message is - * sent to the netdev notifier chain. - * - * Calling this function on an active interface is a nop. On a failure - * a negative errno code is returned. - */ -int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) +int netif_open(struct net_device *dev, struct netlink_ext_ack *extack) { int ret; @@ -1678,7 +1673,6 @@ int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) return ret; } -EXPORT_SYMBOL(dev_open); static void __dev_close_many(struct list_head *head) { @@ -1716,6 +1710,9 @@ static void __dev_close_many(struct list_head *head) * We allow it to be called even after a DETACH hot-plug * event. */ + + netdev_ops_assert_locked(dev); + if (ops->ndo_stop) ops->ndo_stop(dev); @@ -1753,16 +1750,7 @@ void dev_close_many(struct list_head *head, bool unlink) } EXPORT_SYMBOL(dev_close_many); -/** - * dev_close - shutdown an interface. - * @dev: device to shutdown - * - * This function moves an active device into down state. A - * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device - * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier - * chain. - */ -void dev_close(struct net_device *dev) +void netif_close(struct net_device *dev) { if (dev->flags & IFF_UP) { LIST_HEAD(single); @@ -1772,18 +1760,9 @@ void dev_close(struct net_device *dev) list_del(&single); } } -EXPORT_SYMBOL(dev_close); - +EXPORT_SYMBOL(netif_close); -/** - * dev_disable_lro - disable Large Receive Offload on a device - * @dev: device - * - * Disable Large Receive Offload (LRO) on a net device. Must be - * called under RTNL. This is needed if received packets may be - * forwarded to another interface. - */ -void dev_disable_lro(struct net_device *dev) +void netif_disable_lro(struct net_device *dev) { struct net_device *lower_dev; struct list_head *iter; @@ -1794,10 +1773,12 @@ void dev_disable_lro(struct net_device *dev) if (unlikely(dev->features & NETIF_F_LRO)) netdev_WARN(dev, "failed to disable LRO!\n"); - netdev_for_each_lower_dev(dev, lower_dev, iter) - dev_disable_lro(lower_dev); + netdev_for_each_lower_dev(dev, lower_dev, iter) { + netdev_lock_ops(lower_dev); + netif_disable_lro(lower_dev); + netdev_unlock_ops(lower_dev); + } } -EXPORT_SYMBOL(dev_disable_lro); /** * dev_disable_gro_hw - disable HW Generic Receive Offload on a device @@ -3873,6 +3854,9 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device { netdev_features_t features; + if (!skb_frags_readable(skb)) + goto out_kfree_skb; + features = netif_skb_features(skb); skb = validate_xmit_vlan(skb, features); if (unlikely(!skb)) @@ -6056,7 +6040,7 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) static_branch_dec(&generic_xdp_needed_key); } else if (new && !old) { static_branch_inc(&generic_xdp_needed_key); - dev_disable_lro(dev); + netif_disable_lro(dev); dev_disable_gro_hw(dev); } break; @@ -9228,7 +9212,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc) } EXPORT_SYMBOL(dev_set_promiscuity); -static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) +int netif_set_allmulti(struct net_device *dev, int inc, bool notify) { unsigned int old_flags = dev->flags, old_gflags = dev->gflags; unsigned int allmulti, flags; @@ -9263,25 +9247,6 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) return 0; } -/** - * dev_set_allmulti - update allmulti count on a device - * @dev: device - * @inc: modifier - * - * Add or remove reception of all multicast frames to a device. While the - * count in the device remains above zero the interface remains listening - * to all interfaces. Once it hits zero the device reverts back to normal - * filtering operation. A negative @inc value is used to drop the counter - * when releasing a resource needing all multicasts. - * Return 0 if successful or a negative errno code on error. - */ - -int dev_set_allmulti(struct net_device *dev, int inc) -{ - return __dev_set_allmulti(dev, inc, true); -} -EXPORT_SYMBOL(dev_set_allmulti); - /* * Upload unicast and multicast address lists to device and * configure RX filtering. When the device doesn't support unicast @@ -9414,7 +9379,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags, int inc = (flags & IFF_ALLMULTI) ? 1 : -1; dev->gflags ^= IFF_ALLMULTI; - __dev_set_allmulti(dev, inc, false); + netif_set_allmulti(dev, inc, false); } return ret; @@ -9449,17 +9414,8 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, } } -/** - * dev_change_flags - change device settings - * @dev: device - * @flags: device state flags - * @extack: netlink extended ack - * - * Change settings on device based state flags. The flags are - * in the userspace exported format. - */ -int dev_change_flags(struct net_device *dev, unsigned int flags, - struct netlink_ext_ack *extack) +int netif_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack) { int ret; unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags; @@ -9472,7 +9428,6 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, __dev_notify_flags(dev, old_flags, changes, 0, NULL); return ret; } -EXPORT_SYMBOL(dev_change_flags); int __dev_set_mtu(struct net_device *dev, int new_mtu) { @@ -9504,15 +9459,15 @@ int dev_validate_mtu(struct net_device *dev, int new_mtu, } /** - * dev_set_mtu_ext - Change maximum transfer unit + * netif_set_mtu_ext - Change maximum transfer unit * @dev: device * @new_mtu: new transfer unit * @extack: netlink extended ack * * Change the maximum transfer size of the network device. */ -int dev_set_mtu_ext(struct net_device *dev, int new_mtu, - struct netlink_ext_ack *extack) +int netif_set_mtu_ext(struct net_device *dev, int new_mtu, + struct netlink_ext_ack *extack) { int err, orig_mtu; @@ -9550,25 +9505,20 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu, return err; } -int dev_set_mtu(struct net_device *dev, int new_mtu) +int netif_set_mtu(struct net_device *dev, int new_mtu) { struct netlink_ext_ack extack; int err; memset(&extack, 0, sizeof(extack)); - err = dev_set_mtu_ext(dev, new_mtu, &extack); + err = netif_set_mtu_ext(dev, new_mtu, &extack); if (err && extack._msg) net_err_ratelimited("%s: %s\n", dev->name, extack._msg); return err; } -EXPORT_SYMBOL(dev_set_mtu); +EXPORT_SYMBOL(netif_set_mtu); -/** - * dev_change_tx_queue_len - Change TX queue length of a netdevice - * @dev: device - * @new_len: new tx queue length - */ -int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) +int netif_change_tx_queue_len(struct net_device *dev, unsigned long new_len) { unsigned int orig_len = dev->tx_queue_len; int res; @@ -9595,12 +9545,7 @@ err_rollback: return res; } -/** - * dev_set_group - Change group this device belongs to - * @dev: device - * @new_group: group this device should belong to - */ -void dev_set_group(struct net_device *dev, int new_group) +void netif_set_group(struct net_device *dev, int new_group) { dev->group = new_group; } @@ -9626,16 +9571,8 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, } EXPORT_SYMBOL(dev_pre_changeaddr_notify); -/** - * dev_set_mac_address - Change Media Access Control Address - * @dev: device - * @sa: new address - * @extack: netlink extended ack - * - * Change the hardware (MAC) address of the device - */ -int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack) +int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; int err; @@ -9659,58 +9596,29 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, add_device_randomness(dev->dev_addr, dev->addr_len); return 0; } -EXPORT_SYMBOL(dev_set_mac_address); - -DECLARE_RWSEM(dev_addr_sem); - -int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack) -{ - int ret; - - down_write(&dev_addr_sem); - ret = dev_set_mac_address(dev, sa, extack); - up_write(&dev_addr_sem); - return ret; -} -EXPORT_SYMBOL(dev_set_mac_address_user); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { size_t size = sizeof(sa->sa_data_min); struct net_device *dev; - int ret = 0; - down_read(&dev_addr_sem); - rcu_read_lock(); + dev = netdev_get_by_name_lock(net, dev_name); + if (!dev) + return -ENODEV; - dev = dev_get_by_name_rcu(net, dev_name); - if (!dev) { - ret = -ENODEV; - goto unlock; - } if (!dev->addr_len) memset(sa->sa_data, 0, size); else memcpy(sa->sa_data, dev->dev_addr, min_t(size_t, size, dev->addr_len)); sa->sa_family = dev->type; + netdev_unlock(dev); -unlock: - rcu_read_unlock(); - up_read(&dev_addr_sem); - return ret; + return 0; } EXPORT_SYMBOL(dev_get_mac_address); -/** - * dev_change_carrier - Change device carrier - * @dev: device - * @new_carrier: new value - * - * Change device carrier - */ -int dev_change_carrier(struct net_device *dev, bool new_carrier) +int netif_change_carrier(struct net_device *dev, bool new_carrier) { const struct net_device_ops *ops = dev->netdev_ops; @@ -9821,13 +9729,7 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) } EXPORT_SYMBOL(netdev_port_same_parent_id); -/** - * dev_change_proto_down - set carrier according to proto_down. - * - * @dev: device - * @proto_down: new value - */ -int dev_change_proto_down(struct net_device *dev, bool proto_down) +int netif_change_proto_down(struct net_device *dev, bool proto_down) { if (!dev->change_proto_down) return -EOPNOTSUPP; @@ -9842,14 +9744,14 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } /** - * dev_change_proto_down_reason - proto down reason + * netdev_change_proto_down_reason_locked - proto down reason * * @dev: device * @mask: proto down mask * @value: proto down value */ -void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, - u32 value) +void netdev_change_proto_down_reason_locked(struct net_device *dev, + unsigned long mask, u32 value) { u32 proto_down_reason; int b; @@ -9938,7 +9840,7 @@ u8 dev_xdp_sb_prog_count(struct net_device *dev) return count; } -int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) +int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) { if (!dev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; @@ -9958,7 +9860,6 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) return dev->netdev_ops->ndo_bpf(dev, bpf); } -EXPORT_SYMBOL_GPL(dev_xdp_propagate); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { @@ -9988,6 +9889,8 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, struct netdev_bpf xdp; int err; + netdev_ops_assert_locked(dev); + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && prog && !prog->aux->xdp_has_frags) { NL_SET_ERR_MSG(extack, "unable to install XDP to device using tcp-data-split"); @@ -10220,7 +10123,9 @@ static void bpf_xdp_link_release(struct bpf_link *link) * already NULL, in which case link was already auto-detached */ if (xdp_link->dev) { + netdev_lock_ops(xdp_link->dev); WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link)); + netdev_unlock_ops(xdp_link->dev); xdp_link->dev = NULL; } @@ -10302,10 +10207,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, goto out_unlock; } + netdev_lock_ops(xdp_link->dev); mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags); bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, xdp_link->flags, new_prog); + netdev_unlock_ops(xdp_link->dev); if (err) goto out_unlock; @@ -10657,6 +10564,7 @@ int __netdev_update_features(struct net_device *dev) int err = -1; ASSERT_RTNL(); + netdev_ops_assert_locked(dev); features = netdev_get_wanted_features(dev); @@ -11090,7 +10998,9 @@ int register_netdevice(struct net_device *dev) if (ret) goto err_uninit_notify; + netdev_lock_ops(dev); __netdev_update_features(dev); + netdev_unlock_ops(dev); /* * Default initial state at registry is that the @@ -12006,11 +11916,14 @@ void unregister_netdevice_many_notify(struct list_head *head, } /* If device is running, close it first. */ - list_for_each_entry(dev, head, unreg_list) + list_for_each_entry(dev, head, unreg_list) { list_add_tail(&dev->close_list, &close_head); + netdev_lock_ops(dev); + } dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { + netdev_unlock_ops(dev); /* And unlink it from device chain. */ unlist_netdevice(dev); netdev_lock(dev); @@ -12027,7 +11940,9 @@ void unregister_netdevice_many_notify(struct list_head *head, /* Shutdown queueing discipline. */ dev_shutdown(dev); dev_tcx_uninstall(dev); + netdev_lock_ops(dev); dev_xdp_uninstall(dev); + netdev_unlock_ops(dev); bpf_dev_bound_netdev_unregister(dev); dev_memory_provider_uninstall(dev); @@ -12123,24 +12038,7 @@ void unregister_netdev(struct net_device *dev) } EXPORT_SYMBOL(unregister_netdev); -/** - * __dev_change_net_namespace - move device to different nethost namespace - * @dev: device - * @net: network namespace - * @pat: If not NULL name pattern to try if the current device name - * is already taken in the destination network namespace. - * @new_ifindex: If not zero, specifies device index in the target - * namespace. - * @extack: netlink extended ack - * - * This function shuts down a device interface and moves it - * to a new network namespace. On success 0 is returned, on - * a failure a netagive errno code is returned. - * - * Callers must hold the rtnl semaphore. - */ - -int __dev_change_net_namespace(struct net_device *dev, struct net *net, +int netif_change_net_namespace(struct net_device *dev, struct net *net, const char *pat, int new_ifindex, struct netlink_ext_ack *extack) { @@ -12226,7 +12124,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, */ /* If device is running close it first. */ - dev_close(dev); + netif_close(dev); /* And unlink it from device chain */ unlist_netdevice(dev); @@ -12308,7 +12206,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, out: return err; } -EXPORT_SYMBOL_GPL(__dev_change_net_namespace); static int dev_cpu_dead(unsigned int oldcpu) { diff --git a/net/core/dev.h b/net/core/dev.h index caa13e431a6b..0ddd3631acb0 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/rwsem.h> #include <linux/netdevice.h> +#include <net/netdev_lock.h> struct net; struct netlink_ext_ack; @@ -28,6 +29,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id); struct net_device *dev_get_by_napi_id(unsigned int napi_id); struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex); +struct net_device *netdev_get_by_name_lock(struct net *net, const char *name); struct net_device *__netdev_put_lock(struct net_device *dev); struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, @@ -69,8 +71,6 @@ extern int weight_p; extern int dev_weight_rx_bias; extern int dev_weight_tx_bias; -extern struct rw_semaphore dev_addr_sem; - /* rtnl helpers */ extern struct list_head net_todo_list; void netdev_run_todo(void); @@ -85,6 +85,7 @@ struct netdev_name_node { }; int netdev_get_name(struct net *net, char *name, int ifindex); +int netif_change_name(struct net_device *dev, const char *newname); int dev_change_name(struct net_device *dev, const char *newname); #define netdev_for_each_altname(dev, namenode) \ @@ -98,24 +99,28 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); int dev_validate_mtu(struct net_device *dev, int mtu, struct netlink_ext_ack *extack); -int dev_set_mtu_ext(struct net_device *dev, int mtu, - struct netlink_ext_ack *extack); +int netif_set_mtu_ext(struct net_device *dev, int new_mtu, + struct netlink_ext_ack *extack); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); +int netif_change_proto_down(struct net_device *dev, bool proto_down); int dev_change_proto_down(struct net_device *dev, bool proto_down); -void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, - u32 value); +void netdev_change_proto_down_reason_locked(struct net_device *dev, + unsigned long mask, u32 value); typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); +int netif_change_tx_queue_len(struct net_device *dev, unsigned long new_len); int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len); +void netif_set_group(struct net_device *dev, int new_group); void dev_set_group(struct net_device *dev, int new_group); +int netif_change_carrier(struct net_device *dev, bool new_carrier); int dev_change_carrier(struct net_device *dev, bool new_carrier); void __dev_set_rx_mode(struct net_device *dev); @@ -134,9 +139,11 @@ static inline void netif_set_up(struct net_device *dev, bool value) else dev->flags &= ~IFF_UP; - netdev_lock(dev); + if (!netdev_need_ops_lock(dev)) + netdev_lock(dev); dev->up = value; - netdev_unlock(dev); + if (!netdev_need_ops_lock(dev)) + netdev_unlock(dev); } static inline void netif_set_gso_max_size(struct net_device *dev, diff --git a/net/core/dev_api.c b/net/core/dev_api.c new file mode 100644 index 000000000000..1f0e24849bc6 --- /dev/null +++ b/net/core/dev_api.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/netdevice.h> +#include <net/netdev_lock.h> + +#include "dev.h" + +/** + * dev_change_name() - change name of a device + * @dev: device + * @newname: name (or format string) must be at least IFNAMSIZ + * + * Change name of a device, can pass format strings "eth%d". + * for wildcarding. + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_name(struct net_device *dev, const char *newname) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_name(dev, newname); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_set_alias() - change ifalias of a device + * @dev: device + * @alias: name up to IFALIASZ + * @len: limit of bytes to copy from info + * + * Set ifalias for a device. + * + * Return: 0 on success, -errno on failure. + */ +int dev_set_alias(struct net_device *dev, const char *alias, size_t len) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_alias(dev, alias, len); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_alias); + +/** + * dev_change_flags() - change device settings + * @dev: device + * @flags: device state flags + * @extack: netlink extended ack + * + * Change settings on device based state flags. The flags are + * in the userspace exported format. + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_flags(dev, flags, extack); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_change_flags); + +/** + * dev_set_group() - change group this device belongs to + * @dev: device + * @new_group: group this device should belong to + */ +void dev_set_group(struct net_device *dev, int new_group) +{ + netdev_lock_ops(dev); + netif_set_group(dev, new_group); + netdev_unlock_ops(dev); +} + +/** + * dev_change_net_namespace() - move device to different nethost namespace + * @dev: device + * @net: network namespace + * @pat: If not NULL name pattern to try if the current device name + * is already taken in the destination network namespace. + * + * This function shuts down a device interface and moves it + * to a new network namespace. On success 0 is returned, on + * a failure a netagive errno code is returned. + * + * Callers must hold the rtnl semaphore. + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_net_namespace(struct net_device *dev, struct net *net, + const char *pat) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_net_namespace(dev, net, pat, 0, NULL); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_change_net_namespace); + +/** + * dev_change_carrier() - change device carrier + * @dev: device + * @new_carrier: new value + * + * Change device carrier + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_carrier(struct net_device *dev, bool new_carrier) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_carrier(dev, new_carrier); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_change_tx_queue_len() - change TX queue length of a netdevice + * @dev: device + * @new_len: new tx queue length + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_tx_queue_len(dev, new_len); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_change_proto_down() - set carrier according to proto_down + * @dev: device + * @proto_down: new value + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_proto_down(struct net_device *dev, bool proto_down) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_proto_down(dev, proto_down); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_open() - prepare an interface for use + * @dev: device to open + * @extack: netlink extended ack + * + * Takes a device from down to up state. The device's private open + * function is invoked and then the multicast lists are loaded. Finally + * the device is moved into the up state and a %NETDEV_UP message is + * sent to the netdev notifier chain. + * + * Calling this function on an active interface is a nop. On a failure + * a negative errno code is returned. + * + * Return: 0 on success, -errno on failure. + */ +int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_open(dev, extack); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_open); + +/** + * dev_close() - shutdown an interface + * @dev: device to shutdown + * + * This function moves an active device into down state. A + * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device + * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier + * chain. + */ +void dev_close(struct net_device *dev) +{ + netdev_lock_ops(dev); + netif_close(dev); + netdev_unlock_ops(dev); +} +EXPORT_SYMBOL(dev_close); + +int dev_eth_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + int ret = -ENODEV; + + if (!ops->ndo_eth_ioctl) + return -EOPNOTSUPP; + + netdev_lock_ops(dev); + if (netif_device_present(dev)) + ret = ops->ndo_eth_ioctl(dev, ifr, cmd); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_eth_ioctl); + +int dev_set_mtu(struct net_device *dev, int new_mtu) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_mtu(dev, new_mtu); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_mtu); + +/** + * dev_disable_lro() - disable Large Receive Offload on a device + * @dev: device + * + * Disable Large Receive Offload (LRO) on a net device. Must be + * called under RTNL. This is needed if received packets may be + * forwarded to another interface. + */ +void dev_disable_lro(struct net_device *dev) +{ + netdev_lock_ops(dev); + netif_disable_lro(dev); + netdev_unlock_ops(dev); +} +EXPORT_SYMBOL(dev_disable_lro); + +/** + * dev_set_allmulti() - update allmulti count on a device + * @dev: device + * @inc: modifier + * + * Add or remove reception of all multicast frames to a device. While the + * count in the device remains above zero the interface remains listening + * to all interfaces. Once it hits zero the device reverts back to normal + * filtering operation. A negative @inc value is used to drop the counter + * when releasing a resource needing all multicasts. + * + * Return: 0 on success, -errno on failure. + */ + +int dev_set_allmulti(struct net_device *dev, int inc) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_allmulti(dev, inc, true); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_allmulti); + +/** + * dev_set_mac_address() - change Media Access Control Address + * @dev: device + * @sa: new address + * @extack: netlink extended ack + * + * Change the hardware (MAC) address of the device + * + * Return: 0 on success, -errno on failure. + */ +int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack) +{ + int ret; + + netdev_lock(dev); + ret = netif_set_mac_address(dev, sa, extack); + netdev_unlock(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_mac_address); + +int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_xdp_propagate(dev, bpf); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_xdp_propagate); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 4c2098ac9d72..5471cf4fc984 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -10,6 +10,7 @@ #include <linux/wireless.h> #include <linux/if_bridge.h> #include <net/dsa_stubs.h> +#include <net/netdev_lock.h> #include <net/wext.h> #include "dev.h" @@ -110,7 +111,7 @@ static int dev_getifmap(struct net_device *dev, struct ifreq *ifr) return 0; } -static int dev_setifmap(struct net_device *dev, struct ifreq *ifr) +static int netif_setifmap(struct net_device *dev, struct ifreq *ifr) { struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map; @@ -240,20 +241,6 @@ int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg) return 0; } -static int dev_eth_ioctl(struct net_device *dev, - struct ifreq *ifr, unsigned int cmd) -{ - const struct net_device_ops *ops = dev->netdev_ops; - - if (!ops->ndo_eth_ioctl) - return -EOPNOTSUPP; - - if (!netif_device_present(dev)) - return -ENODEV; - - return ops->ndo_eth_ioctl(dev, ifr, cmd); -} - /** * dev_get_hwtstamp_phylib() - Get hardware timestamping settings of NIC * or of attached phylib PHY @@ -305,7 +292,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) return -ENODEV; kernel_cfg.ifr = ifr; + netdev_lock_ops(dev); err = dev_get_hwtstamp_phylib(dev, &kernel_cfg); + netdev_unlock_ops(dev); if (err) return err; @@ -429,7 +418,9 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) if (!netif_device_present(dev)) return -ENODEV; + netdev_lock_ops(dev); err = dev_set_hwtstamp_phylib(dev, &kernel_cfg, &extack); + netdev_unlock_ops(dev); if (err) return err; @@ -504,10 +495,14 @@ static int dev_siocbond(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocbond) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocbond(dev, ifr, cmd); - else - return -ENODEV; + ret = ops->ndo_siocbond(dev, ifr, cmd); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -519,10 +514,14 @@ static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocdevprivate) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocdevprivate(dev, ifr, data, cmd); - else - return -ENODEV; + ret = ops->ndo_siocdevprivate(dev, ifr, data, cmd); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -533,10 +532,14 @@ static int dev_siocwandev(struct net_device *dev, struct if_settings *ifs) const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocwandev) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocwandev(dev, ifs); - else - return -ENODEV; + ret = ops->ndo_siocwandev(dev, ifs); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -572,7 +575,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, case SIOCSIFHWADDR: if (dev->addr_len > sizeof(struct sockaddr)) return -EINVAL; - return dev_set_mac_address_user(dev, &ifr->ifr_hwaddr, NULL); + return dev_set_mac_address(dev, &ifr->ifr_hwaddr, NULL); case SIOCSIFHWBROADCAST: if (ifr->ifr_hwaddr.sa_family != dev->type) @@ -580,11 +583,16 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); + netdev_lock_ops(dev); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + netdev_unlock_ops(dev); return 0; case SIOCSIFMAP: - return dev_setifmap(dev, ifr); + netdev_lock_ops(dev); + err = netif_setifmap(dev, ifr); + netdev_unlock_ops(dev); + return err; case SIOCADDMULTI: if (!ops->ndo_set_rx_mode || @@ -592,7 +600,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_lock_ops(dev); + err = dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_unlock_ops(dev); + return err; case SIOCDELMULTI: if (!ops->ndo_set_rx_mode || @@ -600,7 +611,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_lock_ops(dev); + err = dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_unlock_ops(dev); + return err; case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) diff --git a/net/core/devmem.c b/net/core/devmem.c index 7c6e0b5b6acb..6802e82a4d03 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -25,7 +25,6 @@ /* Device memory support */ -/* Protected by rtnl_lock() */ static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); static const struct memory_provider_ops dmabuf_devmem_ops; @@ -118,6 +117,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) struct netdev_rx_queue *rxq; unsigned long xa_idx; unsigned int rxq_idx; + int err; if (binding->list.next) list_del(&binding->list); @@ -128,9 +128,12 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) rxq->mp_params.mp_priv = NULL; rxq->mp_params.mp_ops = NULL; + netdev_lock(binding->dev); rxq_idx = get_netdev_rx_queue_index(rxq); - WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx)); + err = netdev_rx_queue_restart(binding->dev, rxq_idx); + WARN_ON(err && err != -ENETDOWN); + netdev_unlock(binding->dev); } xa_erase(&net_devmem_dmabuf_bindings, binding->id); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 8d9dc048a548..529a0f721268 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -23,6 +23,7 @@ #include <linux/of.h> #include <linux/of_net.h> #include <linux/cpu.h> +#include <net/netdev_lock.h> #include <net/netdev_rx_queue.h> #include <net/rps.h> @@ -262,14 +263,11 @@ static ssize_t address_show(struct device *dev, struct device_attribute *attr, struct net_device *ndev = to_net_dev(dev); ssize_t ret = -EINVAL; - down_read(&dev_addr_sem); - - rcu_read_lock(); + netdev_lock(ndev); if (dev_isalive(ndev)) ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len); - rcu_read_unlock(); + netdev_unlock(ndev); - up_read(&dev_addr_sem); return ret; } static DEVICE_ATTR_RO(address); @@ -1483,8 +1481,10 @@ static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr, return err; err = -EOPNOTSUPP; + netdev_lock_ops(dev); if (dev->netdev_ops->ndo_set_tx_maxrate) err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); + netdev_unlock_ops(dev); if (!err) { queue->tx_maxrate = rate; diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 996ac6a449eb..739f7b6506a6 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -9,7 +9,7 @@ #include "netdev-genl-gen.h" #include <uapi/linux/netdev.h> -#include <linux/list.h> +#include <net/netdev_netlink.h> /* Integer value ranges */ static const struct netlink_range_validation netdev_a_page_pool_id_range = { @@ -217,7 +217,7 @@ struct genl_family netdev_nl_family __ro_after_init = { .n_split_ops = ARRAY_SIZE(netdev_nl_ops), .mcgrps = netdev_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps), - .sock_priv_size = sizeof(struct list_head), + .sock_priv_size = sizeof(struct netdev_nl_sock), .sock_priv_init = __netdev_nl_sock_priv_init, .sock_priv_destroy = __netdev_nl_sock_priv_destroy, }; diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index e09dd7539ff2..17d39fd64c94 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -10,7 +10,7 @@ #include <net/genetlink.h> #include <uapi/linux/netdev.h> -#include <linux/list.h> +#include <net/netdev_netlink.h> /* Common nested types */ extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1]; @@ -42,7 +42,7 @@ enum { extern struct genl_family netdev_nl_family; -void netdev_nl_sock_priv_init(struct list_head *priv); -void netdev_nl_sock_priv_destroy(struct list_head *priv); +void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv); +void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv); #endif /* _LINUX_NETDEV_GEN_H */ diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 2b774183d31c..a186fea63c09 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -829,8 +829,8 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; struct net_devmem_dmabuf_binding *binding; - struct list_head *sock_binding_list; u32 ifindex, dmabuf_fd, rxq_idx; + struct netdev_nl_sock *priv; struct net_device *netdev; struct sk_buff *rsp; struct nlattr *attr; @@ -845,10 +845,9 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]); - sock_binding_list = genl_sk_priv_get(&netdev_nl_family, - NETLINK_CB(skb).sk); - if (IS_ERR(sock_binding_list)) - return PTR_ERR(sock_binding_list); + priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk); + if (IS_ERR(priv)) + return PTR_ERR(priv); rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!rsp) @@ -860,12 +859,12 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_genlmsg_free; } - rtnl_lock(); + mutex_lock(&priv->lock); - netdev = __dev_get_by_index(genl_info_net(info), ifindex); + netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); if (!netdev || !netif_device_present(netdev)) { err = -ENODEV; - goto err_unlock; + goto err_unlock_sock; } if (dev_xdp_prog_count(netdev)) { @@ -909,7 +908,7 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unbind; } - list_add(&binding->list, sock_binding_list); + list_add(&binding->list, &priv->bindings); nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); genlmsg_end(rsp, hdr); @@ -918,34 +917,39 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) if (err) goto err_unbind; - rtnl_unlock(); + netdev_unlock(netdev); + + mutex_unlock(&priv->lock); return 0; err_unbind: net_devmem_unbind_dmabuf(binding); err_unlock: - rtnl_unlock(); + netdev_unlock(netdev); +err_unlock_sock: + mutex_unlock(&priv->lock); err_genlmsg_free: nlmsg_free(rsp); return err; } -void netdev_nl_sock_priv_init(struct list_head *priv) +void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) { - INIT_LIST_HEAD(priv); + INIT_LIST_HEAD(&priv->bindings); + mutex_init(&priv->lock); } -void netdev_nl_sock_priv_destroy(struct list_head *priv) +void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) { struct net_devmem_dmabuf_binding *binding; struct net_devmem_dmabuf_binding *temp; - list_for_each_entry_safe(binding, temp, priv, list) { - rtnl_lock(); + mutex_lock(&priv->lock); + list_for_each_entry_safe(binding, temp, &priv->bindings, list) { net_devmem_unbind_dmabuf(binding); - rtnl_unlock(); } + mutex_unlock(&priv->lock); } static int netdev_genl_netdevice_event(struct notifier_block *nb, diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index ddd54e1e7289..a5b234b33cd5 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/netdevice.h> +#include <net/netdev_lock.h> #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/memory_provider.h> @@ -18,7 +19,7 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) !qops->ndo_queue_mem_alloc || !qops->ndo_queue_start) return -EOPNOTSUPP; - ASSERT_RTNL(); + netdev_assert_locked(dev); new_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL); if (!new_mem) @@ -113,9 +114,9 @@ int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, { int ret; - rtnl_lock(); + netdev_lock(dev); ret = __net_mp_open_rxq(dev, ifq_idx, p); - rtnl_unlock(); + netdev_unlock(dev); return ret; } @@ -148,7 +149,7 @@ static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, struct pp_memory_provider_params *old_p) { - rtnl_lock(); + netdev_lock(dev); __net_mp_close_rxq(dev, ifq_idx, old_p); - rtnl_unlock(); + netdev_unlock(dev); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 62b4041aae1a..3cc3eae9def3 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -284,12 +284,13 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) struct sk_buff *skb; zap_completion_queue(); - refill_skbs(np); repeat: skb = alloc_skb(len, GFP_ATOMIC); - if (!skb) + if (!skb) { skb = skb_dequeue(&np->skb_pool); + schedule_work(&np->refill_wq); + } if (!skb) { if (++count < 10) { @@ -319,6 +320,7 @@ static int netpoll_owner_active(struct net_device *dev) static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { netdev_tx_t status = NETDEV_TX_BUSY; + netdev_tx_t ret = NET_XMIT_DROP; struct net_device *dev; unsigned long tries; /* It is up to the caller to keep npinfo alive. */ @@ -327,11 +329,12 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) lockdep_assert_irqs_disabled(); dev = np->dev; + rcu_read_lock(); npinfo = rcu_dereference_bh(dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { dev_kfree_skb_irq(skb); - return NET_XMIT_DROP; + goto out; } /* don't get messages out of order, and no recursion */ @@ -370,7 +373,10 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } - return NETDEV_TX_OK; + ret = NETDEV_TX_OK; +out: + rcu_read_unlock(); + return ret; } netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) @@ -535,6 +541,7 @@ static void skb_pool_flush(struct netpoll *np) { struct sk_buff_head *skb_pool; + cancel_work_sync(&np->refill_wq); skb_pool = &np->skb_pool; skb_queue_purge_reason(skb_pool, SKB_CONSUMED); } @@ -621,6 +628,14 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } EXPORT_SYMBOL(netpoll_parse_options); +static void refill_skbs_work_handler(struct work_struct *work) +{ + struct netpoll *np = + container_of(work, struct netpoll, refill_wq); + + refill_skbs(np); +} + int __netpoll_setup(struct netpoll *np, struct net_device *ndev) { struct netpoll_info *npinfo; @@ -666,6 +681,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) /* fill up the skb queue */ refill_skbs(np); + INIT_WORK(&np->refill_wq, refill_skbs_work_handler); /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b4612d305970..90597bf84e3d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -53,6 +53,7 @@ #include <net/fib_rules.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> +#include <net/netdev_lock.h> #include <net/devlink.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/addrconf.h> @@ -2912,12 +2913,19 @@ static int do_set_master(struct net_device *dev, int ifindex, const struct net_device_ops *ops; int err; + /* Release the lower lock, the upper is responsible for locking + * the lower if needed. None of the existing upper devices + * use netdev instance lock, so don't grab it. + */ + if (upper_dev) { if (upper_dev->ifindex == ifindex) return 0; ops = upper_dev->netdev_ops; if (ops->ndo_del_slave) { + netdev_unlock_ops(dev); err = ops->ndo_del_slave(upper_dev, dev); + netdev_lock_ops(dev); if (err) return err; } else { @@ -2931,7 +2939,9 @@ static int do_set_master(struct net_device *dev, int ifindex, return -EINVAL; ops = upper_dev->netdev_ops; if (ops->ndo_add_slave) { + netdev_unlock_ops(dev); err = ops->ndo_add_slave(upper_dev, dev, extack); + netdev_lock_ops(dev); if (err) return err; } else { @@ -2981,7 +2991,7 @@ static int do_set_proto_down(struct net_device *dev, if (pdreason[IFLA_PROTO_DOWN_REASON_MASK]) mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]); - dev_change_proto_down_reason(dev, mask, value); + netdev_change_proto_down_reason_locked(dev, mask, value); } if (nl_proto_down) { @@ -2992,8 +3002,7 @@ static int do_set_proto_down(struct net_device *dev, NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons"); return -EBUSY; } - err = dev_change_proto_down(dev, - proto_down); + err = netif_change_proto_down(dev, proto_down); if (err) return err; } @@ -3013,6 +3022,8 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, char ifname[IFNAMSIZ]; int err; + netdev_lock_ops(dev); + err = validate_linkmsg(dev, tb, extack); if (err < 0) goto errout; @@ -3028,7 +3039,8 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0); - err = __dev_change_net_namespace(dev, tgt_net, pat, new_ifindex, extack); + err = netif_change_net_namespace(dev, tgt_net, pat, + new_ifindex, extack); if (err) goto errout; @@ -3078,7 +3090,11 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = dev_set_mac_address_user(dev, sa, extack); + if (!netdev_need_ops_lock(dev)) + netdev_lock(dev); + err = netif_set_mac_address(dev, sa, extack); + if (!netdev_need_ops_lock(dev)) + netdev_unlock(dev); kfree(sa); if (err) goto errout; @@ -3086,14 +3102,14 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, } if (tb[IFLA_MTU]) { - err = dev_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack); + err = netif_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack); if (err < 0) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_GROUP]) { - dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); + netif_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); status |= DO_SETLINK_NOTIFY; } @@ -3103,15 +3119,15 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, * requested. */ if (ifm->ifi_index > 0 && ifname[0]) { - err = dev_change_name(dev, ifname); + err = netif_change_name(dev, ifname); if (err < 0) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_IFALIAS]) { - err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), - nla_len(tb[IFLA_IFALIAS])); + err = netif_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), + nla_len(tb[IFLA_IFALIAS])); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; @@ -3123,8 +3139,8 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, } if (ifm->ifi_flags || ifm->ifi_change) { - err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), - extack); + err = netif_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), + extack); if (err < 0) goto errout; } @@ -3137,7 +3153,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, } if (tb[IFLA_CARRIER]) { - err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); + err = netif_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -3146,7 +3162,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, if (tb[IFLA_TXQLEN]) { unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]); - err = dev_change_tx_queue_len(dev, value); + err = netif_change_tx_queue_len(dev, value); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -3377,6 +3393,8 @@ errout: dev->name); } + netdev_unlock_ops(dev); + return err; } @@ -3810,6 +3828,8 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, goto out; } + netdev_lock_ops(dev); + err = rtnl_configure_link(dev, ifm, portid, nlh); if (err < 0) goto out_unregister; @@ -3818,9 +3838,12 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, if (err) goto out_unregister; } + + netdev_unlock_ops(dev); out: return err; out_unregister: + netdev_unlock_ops(dev); if (ops->newlink) { LIST_HEAD(list_kill); diff --git a/net/core/sock.c b/net/core/sock.c index a0598518ce89..323892066def 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2565,8 +2565,12 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) u32 max_segs = 1; sk->sk_route_caps = dst->dev->features; - if (sk_is_tcp(sk)) + if (sk_is_tcp(sk)) { + struct inet_connection_sock *icsk = inet_csk(sk); + sk->sk_route_caps |= NETIF_F_GSO; + icsk->icsk_ack.dst_quick_ack = dst_metric(dst, RTAX_QUICKACK); + } if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; if (unlikely(sk->sk_gso_disabled)) diff --git a/net/dsa/conduit.c b/net/dsa/conduit.c index 3dfdb3cb47dc..4ae255cfb23f 100644 --- a/net/dsa/conduit.c +++ b/net/dsa/conduit.c @@ -10,6 +10,7 @@ #include <linux/netdevice.h> #include <linux/netlink.h> #include <net/dsa.h> +#include <net/netdev_lock.h> #include "conduit.h" #include "dsa.h" @@ -26,7 +27,9 @@ static int dsa_conduit_get_regs_len(struct net_device *dev) int len; if (ops->get_regs_len) { + netdev_lock_ops(dev); len = ops->get_regs_len(dev); + netdev_unlock_ops(dev); if (len < 0) return len; ret += len; @@ -57,11 +60,15 @@ static void dsa_conduit_get_regs(struct net_device *dev, int len; if (ops->get_regs_len && ops->get_regs) { + netdev_lock_ops(dev); len = ops->get_regs_len(dev); - if (len < 0) + if (len < 0) { + netdev_unlock_ops(dev); return; + } regs->len = len; ops->get_regs(dev, regs, data); + netdev_unlock_ops(dev); data += regs->len; } @@ -91,8 +98,10 @@ static void dsa_conduit_get_ethtool_stats(struct net_device *dev, int count = 0; if (ops->get_sset_count && ops->get_ethtool_stats) { + netdev_lock_ops(dev); count = ops->get_sset_count(dev, ETH_SS_STATS); ops->get_ethtool_stats(dev, stats, data); + netdev_unlock_ops(dev); } if (ds->ops->get_ethtool_stats) @@ -114,8 +123,10 @@ static void dsa_conduit_get_ethtool_phy_stats(struct net_device *dev, if (count >= 0) phy_ethtool_get_stats(dev->phydev, stats, data); } else if (ops->get_sset_count && ops->get_ethtool_phy_stats) { + netdev_lock_ops(dev); count = ops->get_sset_count(dev, ETH_SS_PHY_STATS); ops->get_ethtool_phy_stats(dev, stats, data); + netdev_unlock_ops(dev); } if (count < 0) @@ -132,11 +143,13 @@ static int dsa_conduit_get_sset_count(struct net_device *dev, int sset) struct dsa_switch *ds = cpu_dp->ds; int count = 0; + netdev_lock_ops(dev); if (sset == ETH_SS_PHY_STATS && dev->phydev && !ops->get_ethtool_phy_stats) count = phy_ethtool_get_sset_count(dev->phydev); else if (ops->get_sset_count) count = ops->get_sset_count(dev, sset); + netdev_unlock_ops(dev); if (count < 0) count = 0; @@ -163,6 +176,7 @@ static void dsa_conduit_get_strings(struct net_device *dev, uint32_t stringset, /* We do not want to be NULL-terminated, since this is a prefix */ pfx[sizeof(pfx) - 1] = '_'; + netdev_lock_ops(dev); if (stringset == ETH_SS_PHY_STATS && dev->phydev && !ops->get_ethtool_phy_stats) { mcount = phy_ethtool_get_sset_count(dev->phydev); @@ -176,6 +190,7 @@ static void dsa_conduit_get_strings(struct net_device *dev, uint32_t stringset, mcount = 0; ops->get_strings(dev, stringset, data); } + netdev_unlock_ops(dev); if (ds->ops->get_strings) { ndata = data + mcount * len; diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index f22051f33868..0364b8fb577b 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -2,6 +2,7 @@ #include <linux/phy.h> #include <linux/ethtool_netlink.h> +#include <net/netdev_lock.h> #include "netlink.h" #include "common.h" @@ -72,23 +73,24 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); - phydev = ethnl_req_get_phydev(&req_info, - tb[ETHTOOL_A_CABLE_TEST_HEADER], + netdev_lock_ops(dev); + phydev = ethnl_req_get_phydev(&req_info, tb, + ETHTOOL_A_CABLE_TEST_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ops = ethtool_phy_ops; if (!ops || !ops->start_cable_test) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = ops->start_cable_test(phydev, info->extack); @@ -97,7 +99,8 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) if (!ret) ethnl_cable_test_started(phydev, ETHTOOL_MSG_CABLE_TEST_NTF); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; @@ -339,23 +342,24 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) goto out_dev_put; rtnl_lock(); - phydev = ethnl_req_get_phydev(&req_info, - tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER], + netdev_lock_ops(dev); + phydev = ethnl_req_get_phydev(&req_info, tb, + ETHTOOL_A_CABLE_TEST_TDR_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ops = ethtool_phy_ops; if (!ops || !ops->start_cable_test_tdr) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = ops->start_cable_test_tdr(phydev, info->extack, &cfg); @@ -365,7 +369,8 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) ethnl_cable_test_started(phydev, ETHTOOL_MSG_CABLE_TEST_TDR_NTF); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); out_dev_put: ethnl_parse_header_dev_put(&req_info); diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index 48aef6220f00..df5f344209c4 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -2,6 +2,7 @@ #include <linux/ethtool.h> #include <linux/firmware.h> +#include <net/netdev_lock.h> #include "common.h" #include "module_fw.h" @@ -418,8 +419,13 @@ cmis_fw_update_commit_image(struct ethtool_cmis_cdb *cdb, static int cmis_fw_update_reset(struct net_device *dev) { __u32 reset_data = ETH_RESET_PHY; + int ret; - return dev->ethtool_ops->reset(dev, &reset_data); + netdev_lock_ops(dev); + ret = dev->ethtool_ops->reset(dev, &reset_data); + netdev_unlock_ops(dev); + + return ret; } void diff --git a/net/ethtool/common.c b/net/ethtool/common.c index ac8b6107863e..7e3c16856c1a 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -36,6 +36,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", + [NETIF_F_GSO_ACCECN_BIT] = "tx-tcp-accecn-segmentation", [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", @@ -423,6 +424,7 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(800000, VR4, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); +EXPORT_SYMBOL_GPL(link_mode_params); const char netif_msg_class_names[][ETH_GSTRING_LEN] = { [NETIF_MSG_DRV_BIT] = "drv", diff --git a/net/ethtool/common.h b/net/ethtool/common.h index a1088c2441d0..b4683d286a5a 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -15,12 +15,6 @@ #define __SOF_TIMESTAMPING_CNT (const_ilog2(SOF_TIMESTAMPING_LAST) + 1) #define __HWTSTAMP_FLAG_CNT (const_ilog2(HWTSTAMP_FLAG_LAST) + 1) -struct link_mode_info { - int speed; - u8 lanes; - u8 duplex; -}; - struct genl_info; struct hwtstamp_provider_desc; @@ -33,7 +27,6 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN]; extern const char phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN]; extern const char link_mode_names[][ETH_GSTRING_LEN]; -extern const struct link_mode_info link_mode_params[]; extern const char netif_msg_class_names[][ETH_GSTRING_LEN]; extern const char wol_mode_names[][ETH_GSTRING_LEN]; extern const char sof_timestamping_names[][ETH_GSTRING_LEN]; diff --git a/net/ethtool/features.c b/net/ethtool/features.c index b6cb101d7f19..f2217983be2b 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <net/netdev_lock.h> + #include "netlink.h" #include "common.h" #include "bitset.h" @@ -234,9 +236,10 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + netdev_lock_ops(dev); ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ethnl_features_to_bitmap(old_active, dev->features); ethnl_features_to_bitmap(old_wanted, dev->wanted_features); ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT, @@ -286,7 +289,8 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) out_ops: ethnl_ops_complete(dev); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index e95b41f40cac..221639407c72 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -31,6 +31,7 @@ #include <net/ipv6.h> #include <net/xdp_sock_drv.h> #include <net/flow_offload.h> +#include <net/netdev_lock.h> #include <linux/ethtool_netlink.h> #include "common.h" @@ -2317,6 +2318,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) */ busy = true; netdev_hold(dev, &dev_tracker, GFP_KERNEL); + netdev_unlock_ops(dev); rtnl_unlock(); if (rc == 0) { @@ -2331,8 +2333,10 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) do { rtnl_lock(); + netdev_lock_ops(dev); rc = ops->set_phys_id(dev, (i++ & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON); + netdev_unlock_ops(dev); rtnl_unlock(); if (rc) break; @@ -2341,6 +2345,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) } rtnl_lock(); + netdev_lock_ops(dev); netdev_put(dev, &dev_tracker); busy = false; @@ -3140,6 +3145,7 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, return -EPERM; } + netdev_lock_ops(dev); if (dev->dev.parent) pm_runtime_get_sync(dev->dev.parent); @@ -3373,6 +3379,7 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, out: if (dev->dev.parent) pm_runtime_put(dev->dev.parent); + netdev_unlock_ops(dev); return rc; } diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index af19e1bed303..05a5f72c99fa 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -103,7 +103,7 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_LINKSTATE_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_LINKSTATE_HEADER, info->extack); if (IS_ERR(phydev)) { ret = PTR_ERR(phydev); diff --git a/net/ethtool/module.c b/net/ethtool/module.c index 6988e07bdcd6..4d4e0a82579a 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -4,6 +4,7 @@ #include <linux/firmware.h> #include <linux/sfp.h> #include <net/devlink.h> +#include <net/netdev_lock.h> #include "netlink.h" #include "common.h" @@ -419,19 +420,21 @@ int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + netdev_lock_ops(dev); ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = ethnl_module_fw_flash_validate(dev, info->extack); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = module_flash_fw(dev, tb, skb, info); ethnl_ops_complete(dev); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index b4c45207fa32..a163d40c6431 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <net/netdev_lock.h> #include <net/netdev_queues.h> #include <net/sock.h> #include <linux/ethtool_netlink.h> @@ -90,6 +91,8 @@ int ethnl_ops_begin(struct net_device *dev) if (dev->dev.parent) pm_runtime_get_sync(dev->dev.parent); + netdev_ops_assert_locked(dev); + if (!netif_device_present(dev) || dev->reg_state >= NETREG_UNREGISTERING) { ret = -ENODEV; @@ -211,7 +214,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, } struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, - const struct nlattr *header, + struct nlattr **tb, unsigned int header, struct netlink_ext_ack *extack) { struct phy_device *phydev; @@ -225,8 +228,8 @@ struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, return req_info->dev->phydev; phydev = phy_link_topo_get_phy(req_info->dev, req_info->phy_index); - if (!phydev) { - NL_SET_ERR_MSG_ATTR(extack, header, + if (!phydev && tb) { + NL_SET_ERR_MSG_ATTR(extack, tb[header], "no phy matching phyindex"); return ERR_PTR(-ENODEV); } @@ -490,7 +493,11 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ethnl_init_reply_data(reply_data, ops, req_info->dev); rtnl_lock(); + if (req_info->dev) + netdev_lock_ops(req_info->dev); ret = ops->prepare_data(req_info, reply_data, info); + if (req_info->dev) + netdev_unlock_ops(req_info->dev); rtnl_unlock(); if (ret < 0) goto err_cleanup; @@ -548,7 +555,9 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev); rtnl_lock(); + netdev_lock_ops(dev); ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, info); + netdev_unlock_ops(dev); rtnl_unlock(); if (ret < 0) goto out; @@ -693,6 +702,7 @@ static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + netdev_lock_ops(dev); dev->cfg_pending = kmemdup(dev->cfg, sizeof(*dev->cfg), GFP_KERNEL_ACCOUNT); if (!dev->cfg_pending) { @@ -720,6 +730,7 @@ out_free_cfg: kfree(dev->cfg_pending); out_tie_cfg: dev->cfg_pending = dev->cfg; + netdev_unlock_ops(dev); rtnl_unlock(); out_dev: ethnl_parse_header_dev_put(&req_info); @@ -777,6 +788,8 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, req_info->dev = dev; req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS; + netdev_ops_assert_locked(dev); + ethnl_init_reply_data(reply_data, ops, dev); ret = ops->prepare_data(req_info, reply_data, &info); if (ret < 0) diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index ff69ca0715de..ec6ab5443a6f 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -275,7 +275,8 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) * ethnl_req_get_phydev() - Gets the phy_device targeted by this request, * if any. Must be called under rntl_lock(). * @req_info: The ethnl request to get the phy from. - * @header: The netlink header, used for error reporting. + * @tb: The netlink attributes array, for error reporting. + * @header: The netlink header index, used for error reporting. * @extack: The netlink extended ACK, for error reporting. * * The caller must hold RTNL, until it's done interacting with the returned @@ -289,7 +290,7 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) * is returned. */ struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, - const struct nlattr *header, + struct nlattr **tb, unsigned int header, struct netlink_ext_ack *extack); /** diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index ed8f690f6bac..1f590e8d75ed 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -9,6 +9,7 @@ #include <linux/phy.h> #include <linux/phy_link_topology.h> #include <linux/sfp.h> +#include <net/netdev_lock.h> struct phy_req_info { struct ethnl_req_info base; @@ -125,7 +126,7 @@ static int ethnl_phy_parse_request(struct ethnl_req_info *req_base, struct phy_req_info *req_info = PHY_REQINFO(req_base); struct phy_device *phydev; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PHY_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PHY_HEADER, extack); if (!phydev) return 0; @@ -158,18 +159,19 @@ int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) return ret; rtnl_lock(); + netdev_lock_ops(req_info.base.dev); ret = ethnl_phy_parse_request(&req_info.base, tb, info->extack); if (ret < 0) - goto err_unlock_rtnl; + goto err_unlock; /* No PHY, return early */ if (!req_info.pdn) - goto err_unlock_rtnl; + goto err_unlock; ret = ethnl_phy_reply_size(&req_info.base, info->extack); if (ret < 0) - goto err_unlock_rtnl; + goto err_unlock; reply_len = ret + ethnl_reply_header_size(); rskb = ethnl_reply_init(reply_len, req_info.base.dev, @@ -178,13 +180,14 @@ int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) info, &reply_payload); if (!rskb) { ret = -ENOMEM; - goto err_unlock_rtnl; + goto err_unlock; } ret = ethnl_phy_fill_reply(&req_info.base, rskb); if (ret) goto err_free_msg; + netdev_unlock_ops(req_info.base.dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info.base); genlmsg_end(rskb, reply_payload); @@ -193,7 +196,8 @@ int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) err_free_msg: nlmsg_free(rskb); -err_unlock_rtnl: +err_unlock: + netdev_unlock_ops(req_info.base.dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info.base); return ret; @@ -290,10 +294,15 @@ int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); if (ctx->phy_req_info->base.dev) { - ret = ethnl_phy_dump_one_dev(skb, ctx->phy_req_info->base.dev, cb); + dev = ctx->phy_req_info->base.dev; + netdev_lock_ops(dev); + ret = ethnl_phy_dump_one_dev(skb, dev, cb); + netdev_unlock_ops(dev); } else { for_each_netdev_dump(net, dev, ctx->ifindex) { + netdev_lock_ops(dev); ret = ethnl_phy_dump_one_dev(skb, dev, cb); + netdev_unlock_ops(dev); if (ret) break; diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c index d95d92f173a6..e1f7820a6158 100644 --- a/net/ethtool/plca.c +++ b/net/ethtool/plca.c @@ -62,7 +62,7 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { @@ -152,7 +152,7 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info) bool mod = false; int ret; - phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) @@ -211,7 +211,7 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 2819e2ba6be2..4f6b99eab2a6 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -64,7 +64,7 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PSE_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PSE_HEADER, info->extack); if (IS_ERR(phydev)) return -ENODEV; @@ -261,7 +261,7 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER], + phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PSE_HEADER, info->extack); ret = ethnl_set_pse_validate(phydev, info); if (ret) diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 58df9ad02ce8..6d9b1769896b 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <net/netdev_lock.h> + #include "netlink.h" #include "common.h" @@ -345,7 +347,9 @@ int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (ctx->match_ifindex && ctx->match_ifindex != ctx->ifindex) break; + netdev_lock_ops(dev); ret = rss_dump_one_dev(skb, cb, dev); + netdev_unlock_ops(dev); if (ret) break; } diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c index 038a2558f052..3ca8eb2a3b31 100644 --- a/net/ethtool/stats.c +++ b/net/ethtool/stats.c @@ -138,7 +138,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_STATS_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_STATS_HEADER, info->extack); if (IS_ERR(phydev)) return PTR_ERR(phydev); diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 6b76c05caba4..f6a67109beda 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -309,7 +309,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base, return 0; } - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_HEADER_FLAGS], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_HEADER_FLAGS, info->extack); /* phydev can be NULL, check for errors only */ diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 691be6c445b3..8130b406ef10 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -4,6 +4,7 @@ #include <linux/phy.h> #include <linux/phy_link_topology.h> #include <linux/ptp_clock_kernel.h> +#include <net/netdev_lock.h> #include "netlink.h" #include "common.h" @@ -290,7 +291,8 @@ static void *ethnl_tsinfo_prepare_dump(struct sk_buff *skb, reply_data = ctx->reply_data; memset(reply_data, 0, sizeof(*reply_data)); reply_data->base.dev = dev; - memset(&reply_data->ts_info, 0, sizeof(reply_data->ts_info)); + reply_data->ts_info.cmd = ETHTOOL_GET_TS_INFO; + reply_data->ts_info.phc_index = -1; return ehdr; } @@ -448,12 +450,15 @@ int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); if (ctx->req_info->base.dev) { - ret = ethnl_tsinfo_dump_one_net_topo(skb, - ctx->req_info->base.dev, - cb); + dev = ctx->req_info->base.dev; + netdev_lock_ops(dev); + ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); + netdev_unlock_ops(dev); } else { for_each_netdev_dump(net, dev, ctx->pos_ifindex) { + netdev_lock_ops(dev); ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); + netdev_unlock_ops(dev); if (ret < 0 && ret != -EOPNOTSUPP) break; ctx->pos_phyindex = 0; diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig index 1b048c17b6c8..fcacdf4f0ffc 100644 --- a/net/hsr/Kconfig +++ b/net/hsr/Kconfig @@ -38,3 +38,21 @@ config HSR relying on this code in a safety critical system! If unsure, say N. + +if HSR + +config PRP_DUP_DISCARD_KUNIT_TEST + tristate "PRP duplicate discard KUnit tests" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Covers the PRP duplicate discard algorithm. + Only useful for kernel devs running KUnit test harness and are not + for inclusion into a production build. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + +endif diff --git a/net/hsr/Makefile b/net/hsr/Makefile index 75df90d3b416..34e581db5c41 100644 --- a/net/hsr/Makefile +++ b/net/hsr/Makefile @@ -8,3 +8,5 @@ obj-$(CONFIG_HSR) += hsr.o hsr-y := hsr_main.o hsr_framereg.o hsr_device.o \ hsr_netlink.o hsr_slave.o hsr_forward.o hsr-$(CONFIG_DEBUG_FS) += hsr_debugfs.o + +obj-$(CONFIG_PRP_DUP_DISCARD_KUNIT_TEST) += prp_dup_discard_test.o diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c6f8614e9ed1..439cfb7ad5d1 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -616,6 +616,7 @@ static struct hsr_proto_ops hsr_ops = { .drop_frame = hsr_drop_frame, .fill_frame_info = hsr_fill_frame_info, .invalid_dan_ingress_frame = hsr_invalid_dan_ingress_frame, + .register_frame_out = hsr_register_frame_out, }; static struct hsr_proto_ops prp_ops = { @@ -626,6 +627,7 @@ static struct hsr_proto_ops prp_ops = { .fill_frame_info = prp_fill_frame_info, .handle_san_frame = prp_handle_san_frame, .update_san_info = prp_update_san_info, + .register_frame_out = prp_register_frame_out, }; void hsr_dev_setup(struct net_device *dev) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index a4bacf198555..c67c0d35921d 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -536,8 +536,8 @@ static void hsr_forward_do(struct hsr_frame_info *frame) * Also for SAN, this shouldn't be done. */ if (!frame->is_from_san && - hsr_register_frame_out(port, frame->node_src, - frame->sequence_nr)) + hsr->proto_ops->register_frame_out && + hsr->proto_ops->register_frame_out(port, frame)) continue; if (frame->is_supervision && port->type == HSR_PT_MASTER && diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 73bc6f659812..4ce471a2f387 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -35,6 +35,7 @@ static bool seq_nr_after(u16 a, u16 b) #define seq_nr_before(a, b) seq_nr_after((b), (a)) #define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) +#define PRP_DROP_WINDOW_LEN 32768 bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr) { @@ -176,8 +177,11 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, new_node->time_in[i] = now; new_node->time_out[i] = now; } - for (i = 0; i < HSR_PT_PORTS; i++) + for (i = 0; i < HSR_PT_PORTS; i++) { new_node->seq_out[i] = seq_out; + new_node->seq_expected[i] = seq_out + 1; + new_node->seq_start[i] = seq_out + 1; + } if (san && hsr->proto_ops->handle_san_frame) hsr->proto_ops->handle_san_frame(san, rx_port, new_node); @@ -482,9 +486,11 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, * 0 otherwise, or * negative error code on error */ -int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, - u16 sequence_nr) +int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) { + struct hsr_node *node = frame->node_src; + u16 sequence_nr = frame->sequence_nr; + spin_lock_bh(&node->seq_out_lock); if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) && time_is_after_jiffies(node->time_out[port->type] + @@ -499,6 +505,93 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, return 0; } +/* Adaptation of the PRP duplicate discard algorithm described in wireshark + * wiki (https://wiki.wireshark.org/PRP) + * + * A drop window is maintained for both LANs with start sequence set to the + * first sequence accepted on the LAN that has not been seen on the other LAN, + * and expected sequence set to the latest received sequence number plus one. + * + * When a frame is received on either LAN it is compared against the received + * frames on the other LAN. If it is outside the drop window of the other LAN + * the frame is accepted and the drop window is updated. + * The drop window for the other LAN is reset. + * + * 'port' is the outgoing interface + * 'frame' is the frame to be sent + * + * Return: + * 1 if frame can be shown to have been sent recently on this interface, + * 0 otherwise + */ +int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) +{ + enum hsr_port_type other_port; + enum hsr_port_type rcv_port; + struct hsr_node *node; + u16 sequence_diff; + u16 sequence_exp; + u16 sequence_nr; + + /* out-going frames are always in order + * and can be checked the same way as for HSR + */ + if (frame->port_rcv->type == HSR_PT_MASTER) + return hsr_register_frame_out(port, frame); + + /* for PRP we should only forward frames from the slave ports + * to the master port + */ + if (port->type != HSR_PT_MASTER) + return 1; + + node = frame->node_src; + sequence_nr = frame->sequence_nr; + sequence_exp = sequence_nr + 1; + rcv_port = frame->port_rcv->type; + other_port = rcv_port == HSR_PT_SLAVE_A ? HSR_PT_SLAVE_B : + HSR_PT_SLAVE_A; + + spin_lock_bh(&node->seq_out_lock); + if (time_is_before_jiffies(node->time_out[port->type] + + msecs_to_jiffies(HSR_ENTRY_FORGET_TIME)) || + (node->seq_start[rcv_port] == node->seq_expected[rcv_port] && + node->seq_start[other_port] == node->seq_expected[other_port])) { + /* the node hasn't been sending for a while + * or both drop windows are empty, forward the frame + */ + node->seq_start[rcv_port] = sequence_nr; + } else if (seq_nr_before(sequence_nr, node->seq_expected[other_port]) && + seq_nr_before_or_eq(node->seq_start[other_port], sequence_nr)) { + /* drop the frame, update the drop window for the other port + * and reset our drop window + */ + node->seq_start[other_port] = sequence_exp; + node->seq_expected[rcv_port] = sequence_exp; + node->seq_start[rcv_port] = node->seq_expected[rcv_port]; + spin_unlock_bh(&node->seq_out_lock); + return 1; + } + + /* update the drop window for the port where this frame was received + * and clear the drop window for the other port + */ + node->seq_start[other_port] = node->seq_expected[other_port]; + node->seq_expected[rcv_port] = sequence_exp; + sequence_diff = sequence_exp - node->seq_start[rcv_port]; + if (sequence_diff > PRP_DROP_WINDOW_LEN) + node->seq_start[rcv_port] = sequence_exp - PRP_DROP_WINDOW_LEN; + + node->time_out[port->type] = jiffies; + node->seq_out[port->type] = sequence_nr; + spin_unlock_bh(&node->seq_out_lock); + return 0; +} + +#if IS_MODULE(CONFIG_PRP_DUP_DISCARD_KUNIT_TEST) +EXPORT_SYMBOL(prp_register_frame_out); +#endif + static struct hsr_port *get_late_port(struct hsr_priv *hsr, struct hsr_node *node) { diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 993fa950d814..b04948659d84 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -44,8 +44,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, u16 sequence_nr); -int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, - u16 sequence_nr); +int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame); void hsr_prune_nodes(struct timer_list *t); void hsr_prune_proxy_nodes(struct timer_list *t); @@ -73,6 +72,8 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup); bool hsr_is_node_in_db(struct list_head *node_db, const unsigned char addr[ETH_ALEN]); +int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame); + struct hsr_node { struct list_head mac_list; /* Protect R/W access to seq_out */ @@ -89,6 +90,9 @@ struct hsr_node { bool san_b; u16 seq_out[HSR_PT_PORTS]; bool removed; + /* PRP specific duplicate handling */ + u16 seq_expected[HSR_PT_PORTS]; + u16 seq_start[HSR_PT_PORTS]; struct rcu_head rcu_head; }; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 7561845b8bf6..1bc47b17a296 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -175,6 +175,8 @@ struct hsr_proto_ops { struct hsr_frame_info *frame); bool (*invalid_dan_ingress_frame)(__be16 protocol); void (*update_san_info)(struct hsr_node *node, bool is_sup); + int (*register_frame_out)(struct hsr_port *port, + struct hsr_frame_info *frame); }; struct hsr_self_node { diff --git a/net/hsr/prp_dup_discard_test.c b/net/hsr/prp_dup_discard_test.c new file mode 100644 index 000000000000..e86b7b633ae8 --- /dev/null +++ b/net/hsr/prp_dup_discard_test.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <kunit/test.h> + +#include "hsr_main.h" +#include "hsr_framereg.h" + +struct prp_test_data { + struct hsr_port port; + struct hsr_port port_rcv; + struct hsr_frame_info frame; + struct hsr_node node; +}; + +static struct prp_test_data *build_prp_test_data(struct kunit *test) +{ + struct prp_test_data *data = kunit_kzalloc(test, + sizeof(struct prp_test_data), GFP_USER); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, data); + + data->frame.node_src = &data->node; + data->frame.port_rcv = &data->port_rcv; + data->port_rcv.type = HSR_PT_SLAVE_A; + data->node.seq_start[HSR_PT_SLAVE_A] = 1; + data->node.seq_expected[HSR_PT_SLAVE_A] = 1; + data->node.seq_start[HSR_PT_SLAVE_B] = 1; + data->node.seq_expected[HSR_PT_SLAVE_B] = 1; + data->node.seq_out[HSR_PT_MASTER] = 0; + data->node.time_out[HSR_PT_MASTER] = jiffies; + data->port.type = HSR_PT_MASTER; + + return data; +} + +static void check_prp_counters(struct kunit *test, + struct prp_test_data *data, + u16 seq_start_a, u16 seq_expected_a, + u16 seq_start_b, u16 seq_expected_b) +{ + KUNIT_EXPECT_EQ(test, data->node.seq_start[HSR_PT_SLAVE_A], + seq_start_a); + KUNIT_EXPECT_EQ(test, data->node.seq_start[HSR_PT_SLAVE_B], + seq_start_b); + KUNIT_EXPECT_EQ(test, data->node.seq_expected[HSR_PT_SLAVE_A], + seq_expected_a); + KUNIT_EXPECT_EQ(test, data->node.seq_expected[HSR_PT_SLAVE_B], + seq_expected_b); +} + +static void prp_dup_discard_forward(struct kunit *test) +{ + /* Normal situation, both LANs in sync. Next frame is forwarded */ + struct prp_test_data *data = build_prp_test_data(test); + + data->frame.sequence_nr = 2; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + KUNIT_EXPECT_EQ(test, jiffies, data->node.time_out[HSR_PT_MASTER]); + check_prp_counters(test, data, data->frame.sequence_nr, + data->frame.sequence_nr + 1, 1, 1); +} + +static void prp_dup_discard_inside_dropwindow(struct kunit *test) +{ + /* Normal situation, other LAN ahead by one. Frame is dropped */ + struct prp_test_data *data = build_prp_test_data(test); + unsigned long time = jiffies - 10; + + data->frame.sequence_nr = 1; + data->node.seq_expected[HSR_PT_SLAVE_B] = 3; + data->node.seq_out[HSR_PT_MASTER] = 2; + data->node.time_out[HSR_PT_MASTER] = time; + + KUNIT_EXPECT_EQ(test, 1, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, 2, data->node.seq_out[HSR_PT_MASTER]); + KUNIT_EXPECT_EQ(test, time, data->node.time_out[HSR_PT_MASTER]); + check_prp_counters(test, data, 2, 2, 2, 3); +} + +static void prp_dup_discard_node_timeout(struct kunit *test) +{ + /* Timeout situation, node hasn't sent anything for a while */ + struct prp_test_data *data = build_prp_test_data(test); + + data->frame.sequence_nr = 7; + data->node.seq_start[HSR_PT_SLAVE_A] = 1234; + data->node.seq_expected[HSR_PT_SLAVE_A] = 1235; + data->node.seq_start[HSR_PT_SLAVE_B] = 1234; + data->node.seq_expected[HSR_PT_SLAVE_B] = 1234; + data->node.seq_out[HSR_PT_MASTER] = 1234; + data->node.time_out[HSR_PT_MASTER] = + jiffies - msecs_to_jiffies(HSR_ENTRY_FORGET_TIME) - 1; + + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + KUNIT_EXPECT_EQ(test, jiffies, data->node.time_out[HSR_PT_MASTER]); + check_prp_counters(test, data, data->frame.sequence_nr, + data->frame.sequence_nr + 1, 1234, 1234); +} + +static void prp_dup_discard_out_of_sequence(struct kunit *test) +{ + /* One frame is received out of sequence on both LANs */ + struct prp_test_data *data = build_prp_test_data(test); + + data->node.seq_start[HSR_PT_SLAVE_A] = 10; + data->node.seq_expected[HSR_PT_SLAVE_A] = 10; + data->node.seq_start[HSR_PT_SLAVE_B] = 10; + data->node.seq_expected[HSR_PT_SLAVE_B] = 10; + data->node.seq_out[HSR_PT_MASTER] = 9; + + /* 1st old frame, should be accepted */ + data->frame.sequence_nr = 8; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + check_prp_counters(test, data, data->frame.sequence_nr, + data->frame.sequence_nr + 1, 10, 10); + + /* 2nd frame should be dropped */ + data->frame.sequence_nr = 8; + data->port_rcv.type = HSR_PT_SLAVE_B; + KUNIT_EXPECT_EQ(test, 1, + prp_register_frame_out(&data->port, &data->frame)); + check_prp_counters(test, data, data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1); + + /* Next frame, this is forwarded */ + data->frame.sequence_nr = 10; + data->port_rcv.type = HSR_PT_SLAVE_A; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + check_prp_counters(test, data, data->frame.sequence_nr, + data->frame.sequence_nr + 1, 9, 9); + + /* and next one is dropped */ + data->frame.sequence_nr = 10; + data->port_rcv.type = HSR_PT_SLAVE_B; + KUNIT_EXPECT_EQ(test, 1, + prp_register_frame_out(&data->port, &data->frame)); + check_prp_counters(test, data, data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1); +} + +static void prp_dup_discard_lan_b_late(struct kunit *test) +{ + /* LAN B is behind */ + struct prp_test_data *data = build_prp_test_data(test); + + data->node.seq_start[HSR_PT_SLAVE_A] = 9; + data->node.seq_expected[HSR_PT_SLAVE_A] = 9; + data->node.seq_start[HSR_PT_SLAVE_B] = 9; + data->node.seq_expected[HSR_PT_SLAVE_B] = 9; + data->node.seq_out[HSR_PT_MASTER] = 8; + + data->frame.sequence_nr = 9; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + check_prp_counters(test, data, 9, 10, 9, 9); + + data->frame.sequence_nr = 10; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + check_prp_counters(test, data, 9, 11, 9, 9); + + data->frame.sequence_nr = 9; + data->port_rcv.type = HSR_PT_SLAVE_B; + KUNIT_EXPECT_EQ(test, 1, + prp_register_frame_out(&data->port, &data->frame)); + check_prp_counters(test, data, 10, 11, 10, 10); + + data->frame.sequence_nr = 10; + data->port_rcv.type = HSR_PT_SLAVE_B; + KUNIT_EXPECT_EQ(test, 1, + prp_register_frame_out(&data->port, &data->frame)); + check_prp_counters(test, data, 11, 11, 11, 11); +} + +static struct kunit_case prp_dup_discard_test_cases[] = { + KUNIT_CASE(prp_dup_discard_forward), + KUNIT_CASE(prp_dup_discard_inside_dropwindow), + KUNIT_CASE(prp_dup_discard_node_timeout), + KUNIT_CASE(prp_dup_discard_out_of_sequence), + KUNIT_CASE(prp_dup_discard_lan_b_late), + {} +}; + +static struct kunit_suite prp_dup_discard_suite = { + .name = "prp_duplicate_discard", + .test_cases = prp_dup_discard_test_cases, +}; + +kunit_test_suite(prp_dup_discard_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for PRP duplicate discard"); +MODULE_AUTHOR("Jaakko Karrenpalo <jkarrenpalo@gmail.com>"); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 9a9da74b0a4f..018929563c6b 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -50,6 +50,7 @@ #include <linux/if_arp.h> #include <net/ipv6.h> +#include <net/netdev_lock.h> #include "6lowpan_i.h" diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 867d637d86f0..d4b983d17038 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -31,7 +31,8 @@ static const char lowpan_frags_cache_name[] = "lowpan-frags"; static struct inet_frags lowpan_frags; static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev, struct net_device *ldev); + struct sk_buff *prev, struct net_device *ldev, + int *refs); static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) { @@ -45,6 +46,7 @@ static void lowpan_frag_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; + int refs = 1; fq = container_of(frag, struct frag_queue, q); @@ -53,10 +55,10 @@ static void lowpan_frag_expire(struct timer_list *t) if (fq->q.flags & INET_FRAG_COMPLETE) goto out; - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, &refs); out: spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q); + inet_frag_putn(&fq->q, refs); } static inline struct lowpan_frag_queue * @@ -82,7 +84,8 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, } static int lowpan_frag_queue(struct lowpan_frag_queue *fq, - struct sk_buff *skb, u8 frag_type) + struct sk_buff *skb, u8 frag_type, + int *refs) { struct sk_buff *prev_tail; struct net_device *ldev; @@ -143,7 +146,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - res = lowpan_frag_reasm(fq, skb, prev_tail, ldev); + res = lowpan_frag_reasm(fq, skb, prev_tail, ldev, refs); skb->_skb_refdst = orefdst; return res; } @@ -162,11 +165,12 @@ err: * the last and the first frames arrived and all the bits are here. */ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *ldev) + struct sk_buff *prev_tail, struct net_device *ldev, + int *refs) { void *reasm_data; - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); if (!reasm_data) @@ -300,17 +304,20 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) goto err; } + rcu_read_lock(); fq = fq_find(net, cb, &hdr.source, &hdr.dest); if (fq != NULL) { - int ret; + int ret, refs = 0; spin_lock(&fq->q.lock); - ret = lowpan_frag_queue(fq, skb, frag_type); + ret = lowpan_frag_queue(fq, skb, frag_type, &refs); spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q); + rcu_read_unlock(); + inet_frag_putn(&fq->q, refs); return ret; } + rcu_read_unlock(); err: kfree_skb(skb); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 554804774628..e01492234b0b 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -121,7 +121,7 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, BPF_CALL_2(bpf_tcp_send_ack, struct tcp_sock *, tp, u32, rcv_nxt) { /* bpf_tcp_ca prog cannot have NULL tp */ - __tcp_send_ack((struct sock *)tp, rcv_nxt); + __tcp_send_ack((struct sock *)tp, rcv_nxt, 0); return 0; } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index d179a2c84222..19fae4811ab2 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -145,8 +145,7 @@ static void inet_frags_free_cb(void *ptr, void *arg) } spin_unlock_bh(&fq->lock); - if (refcount_sub_and_test(count, &fq->refcnt)) - inet_frag_destroy(fq); + inet_frag_putn(fq, count); } static LLIST_HEAD(fqdir_free_list); @@ -226,10 +225,10 @@ void fqdir_exit(struct fqdir *fqdir) } EXPORT_SYMBOL(fqdir_exit); -void inet_frag_kill(struct inet_frag_queue *fq) +void inet_frag_kill(struct inet_frag_queue *fq, int *refs) { if (del_timer(&fq->timer)) - refcount_dec(&fq->refcnt); + (*refs)++; if (!(fq->flags & INET_FRAG_COMPLETE)) { struct fqdir *fqdir = fq->fqdir; @@ -244,7 +243,7 @@ void inet_frag_kill(struct inet_frag_queue *fq) if (!READ_ONCE(fqdir->dead)) { rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, fqdir->f->rhash_params); - refcount_dec(&fq->refcnt); + (*refs)++; } else { fq->flags |= INET_FRAG_HASH_DEAD; } @@ -328,7 +327,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, timer_setup(&q->timer, f->frag_expire, 0); spin_lock_init(&q->lock); - refcount_set(&q->refcnt, 3); + /* One reference for the timer, one for the hash table. */ + refcount_set(&q->refcnt, 2); return q; } @@ -350,15 +350,20 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key, &q->node, f->rhash_params); if (*prev) { + /* We could not insert in the hash table, + * we need to cancel what inet_frag_alloc() + * anticipated. + */ + int refs = 1; + q->flags |= INET_FRAG_COMPLETE; - inet_frag_kill(q); - inet_frag_destroy(q); + inet_frag_kill(q, &refs); + inet_frag_putn(q, refs); return NULL; } return q; } -/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) { /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ @@ -368,17 +373,11 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) return NULL; - rcu_read_lock(); - prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params); if (!prev) fq = inet_frag_create(fqdir, key, &prev); - if (!IS_ERR_OR_NULL(prev)) { + if (!IS_ERR_OR_NULL(prev)) fq = prev; - if (!refcount_inc_not_zero(&fq->refcnt)) - fq = NULL; - } - rcu_read_unlock(); return fq; } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d1b5f45ee718..5bf163f756e9 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -35,8 +35,8 @@ u32 inet_ehashfn(const struct net *net, const __be32 laddr, { net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); - return __inet_ehashfn(laddr, lport, faddr, fport, - inet_ehash_secret + net_hash_mix(net)); + return lport + __inet_ehashfn(laddr, 0, faddr, fport, + inet_ehash_secret + net_hash_mix(net)); } EXPORT_SYMBOL_GPL(inet_ehashfn); @@ -538,7 +538,8 @@ EXPORT_SYMBOL_GPL(__inet_lookup_established); static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, __u16 lport, struct inet_timewait_sock **twp, - bool rcu_lookup) + bool rcu_lookup, + u32 hash) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); @@ -549,8 +550,6 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, int sdif = l3mdev_master_ifindex_by_index(net, dif); INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); - unsigned int hash = inet_ehashfn(net, daddr, lport, - saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct inet_timewait_sock *tw = NULL; const struct hlist_nulls_node *node; @@ -1007,9 +1006,10 @@ static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, + u32 hash_port0, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **, - bool rcu_lookup)) + bool rcu_lookup, u32 hash)) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_bind_hashbucket *head, *head2; @@ -1027,7 +1027,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (port) { local_bh_disable(); - ret = check_established(death_row, sk, port, NULL, false); + ret = check_established(death_row, sk, port, NULL, false, + hash_port0 + port); local_bh_enable(); return ret; } @@ -1071,7 +1072,8 @@ other_parity_scan: rcu_read_unlock(); goto next_port; } - if (!check_established(death_row, sk, port, &tw, true)) + if (!check_established(death_row, sk, port, &tw, true, + hash_port0 + port)) break; rcu_read_unlock(); goto next_port; @@ -1090,7 +1092,8 @@ other_parity_scan: goto next_port_unlock; WARN_ON(hlist_empty(&tb->bhash2)); if (!check_established(death_row, sk, - port, &tw, false)) + port, &tw, false, + hash_port0 + port)) goto ok; goto next_port_unlock; } @@ -1197,11 +1200,18 @@ error: int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { + const struct inet_sock *inet = inet_sk(sk); + const struct net *net = sock_net(sk); u64 port_offset = 0; + u32 hash_port0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); - return __inet_hash_connect(death_row, sk, port_offset, + + hash_port0 = inet_ehashfn(net, inet->inet_rcv_saddr, 0, + inet->inet_daddr, inet->inet_dport); + + return __inet_hash_connect(death_row, sk, port_offset, hash_port0, __inet_check_established); } EXPORT_SYMBOL_GPL(inet_hash_connect); @@ -1261,22 +1271,37 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { unsigned int locksz = sizeof(spinlock_t); unsigned int i, nblocks = 1; + spinlock_t *ptr = NULL; - if (locksz != 0) { - /* allocate 2 cache lines or at least one spinlock per cpu */ - nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U); - nblocks = roundup_pow_of_two(nblocks * num_possible_cpus()); + if (locksz == 0) + goto set_mask; - /* no more locks than number of hash buckets */ - nblocks = min(nblocks, hashinfo->ehash_mask + 1); + /* Allocate 2 cache lines or at least one spinlock per cpu. */ + nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U) * num_possible_cpus(); - hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL); - if (!hashinfo->ehash_locks) - return -ENOMEM; + /* At least one page per NUMA node. */ + nblocks = max(nblocks, num_online_nodes() * PAGE_SIZE / locksz); + + nblocks = roundup_pow_of_two(nblocks); - for (i = 0; i < nblocks; i++) - spin_lock_init(&hashinfo->ehash_locks[i]); + /* No more locks than number of hash buckets. */ + nblocks = min(nblocks, hashinfo->ehash_mask + 1); + + if (num_online_nodes() > 1) { + /* Use vmalloc() to allow NUMA policy to spread pages + * on all available nodes if desired. + */ + ptr = vmalloc_array(nblocks, locksz); + } + if (!ptr) { + ptr = kvmalloc_array(nblocks, locksz, GFP_KERNEL); + if (!ptr) + return -ENOMEM; } + for (i = 0; i < nblocks; i++) + spin_lock_init(&ptr[i]); + hashinfo->ehash_locks = ptr; +set_mask: hashinfo->ehash_locks_mask = nblocks - 1; return 0; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7a435746a22d..77f395b28ec7 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -76,7 +76,8 @@ static u8 ip4_frag_ecn(u8 tos) static struct inet_frags ip4_frags; static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev); + struct sk_buff *prev_tail, struct net_device *dev, + int *refs); static void ip4_frag_init(struct inet_frag_queue *q, const void *a) @@ -107,22 +108,6 @@ static void ip4_frag_free(struct inet_frag_queue *q) inet_putpeer(qp->peer); } - -/* Destruction primitives. */ - -static void ipq_put(struct ipq *ipq) -{ - inet_frag_put(&ipq->q); -} - -/* Kill ipq entry. It is not destroyed immediately, - * because caller (and someone more) holds reference count. - */ -static void ipq_kill(struct ipq *ipq) -{ - inet_frag_kill(&ipq->q); -} - static bool frag_expire_skip_icmp(u32 user) { return user == IP_DEFRAG_AF_PACKET || @@ -143,6 +128,7 @@ static void ip_expire(struct timer_list *t) struct sk_buff *head = NULL; struct net *net; struct ipq *qp; + int refs = 1; qp = container_of(frag, struct ipq, q); net = qp->q.fqdir->net; @@ -159,7 +145,7 @@ static void ip_expire(struct timer_list *t) goto out; qp->q.flags |= INET_FRAG_DROP; - ipq_kill(qp); + inet_frag_kill(&qp->q, &refs); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); @@ -202,7 +188,7 @@ out: out_rcu_unlock: rcu_read_unlock(); kfree_skb_reason(head, reason); - ipq_put(qp); + inet_frag_putn(&qp->q, refs); } /* Find the correct entry in the "incomplete datagrams" queue for @@ -278,7 +264,7 @@ static int ip_frag_reinit(struct ipq *qp) } /* Add new segment to existing queue. */ -static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) +static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb, int *refs) { struct net *net = qp->q.fqdir->net; int ihl, end, flags, offset; @@ -298,7 +284,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && unlikely(ip_frag_too_far(qp)) && unlikely(err = ip_frag_reinit(qp))) { - ipq_kill(qp); + inet_frag_kill(&qp->q, refs); goto err; } @@ -382,10 +368,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - err = ip_frag_reasm(qp, skb, prev_tail, dev); + err = ip_frag_reasm(qp, skb, prev_tail, dev, refs); skb->_skb_refdst = orefdst; if (err) - inet_frag_kill(&qp->q); + inet_frag_kill(&qp->q, refs); return err; } @@ -402,7 +388,7 @@ insert_error: err = -EINVAL; __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); discard_qp: - inet_frag_kill(&qp->q); + inet_frag_kill(&qp->q, refs); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); err: kfree_skb_reason(skb, reason); @@ -416,7 +402,8 @@ static bool ip_frag_coalesce_ok(const struct ipq *qp) /* Build a new IP datagram from all its fragments. */ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev) + struct sk_buff *prev_tail, struct net_device *dev, + int *refs) { struct net *net = qp->q.fqdir->net; struct iphdr *iph; @@ -424,7 +411,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, int len, err; u8 ecn; - ipq_kill(qp); + inet_frag_kill(&qp->q, refs); ecn = ip_frag_ecn_table[qp->ecn]; if (unlikely(ecn == 0xff)) { @@ -496,18 +483,21 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); /* Lookup (or create) queue header */ + rcu_read_lock(); qp = ip_find(net, ip_hdr(skb), user, vif); if (qp) { - int ret; + int ret, refs = 0; spin_lock(&qp->q.lock); - ret = ip_frag_queue(qp, skb); + ret = ip_frag_queue(qp, skb, &refs); spin_unlock(&qp->q.lock); - ipq_put(qp); + rcu_read_unlock(); + inet_frag_putn(&qp->q, refs); return ret; } + rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ea7a260bec8a..6e18d7ec5062 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -75,7 +75,6 @@ #include <net/checksum.h> #include <net/gso.h> #include <net/inetpeer.h> -#include <net/inet_ecn.h> #include <net/lwtunnel.h> #include <net/inet_dscp.h> #include <linux/bpf-cgroup.h> @@ -1640,7 +1639,7 @@ void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk, if (IS_ERR(rt)) return; - inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK; + inet_sk(sk)->tos = arg->tos; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 4b06dc7e04f2..1024f961ec9a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -40,6 +40,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <net/rtnetlink.h> #include <net/udp.h> #include <net/dst_metadata.h> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eb5a60c7a9cc..989c3c3d8e75 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3693,7 +3693,7 @@ EXPORT_SYMBOL(tcp_sock_set_keepcnt); int tcp_set_window_clamp(struct sock *sk, int val) { - u32 old_window_clamp, new_window_clamp; + u32 old_window_clamp, new_window_clamp, new_rcv_ssthresh; struct tcp_sock *tp = tcp_sk(sk); if (!val) { @@ -3714,12 +3714,12 @@ int tcp_set_window_clamp(struct sock *sk, int val) /* Need to apply the reserved mem provisioning only * when shrinking the window clamp. */ - if (new_window_clamp < old_window_clamp) + if (new_window_clamp < old_window_clamp) { __tcp_adjust_rcv_ssthresh(sk, new_window_clamp); - else - tp->rcv_ssthresh = clamp(new_window_clamp, - tp->rcv_ssthresh, - tp->rcv_wnd); + } else { + new_rcv_ssthresh = min(tp->rcv_wnd, new_window_clamp); + tp->rcv_ssthresh = max(new_rcv_ssthresh, tp->rcv_ssthresh); + } return 0; } @@ -4138,7 +4138,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale; } - if (tp->ecn_flags & TCP_ECN_OK) + if (tcp_ecn_mode_any(tp)) info->tcpi_options |= TCPI_OPT_ECN; if (tp->ecn_flags & TCP_ECN_SEEN) info->tcpi_options |= TCPI_OPT_ECN_SEEN; diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 8a45a4aea933..03abe0848420 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -90,7 +90,7 @@ __bpf_kfunc static void dctcp_init(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - if ((tp->ecn_flags & TCP_ECN_OK) || + if (tcp_ecn_mode_any(tp) || (sk->sk_state == TCP_LISTEN || sk->sk_state == TCP_CLOSE)) { struct dctcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_dctcp.h b/net/ipv4/tcp_dctcp.h index d69a77cbd0c7..4b0259111d81 100644 --- a/net/ipv4/tcp_dctcp.h +++ b/net/ipv4/tcp_dctcp.h @@ -28,7 +28,7 @@ static inline void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, */ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { dctcp_ece_ack_cwr(sk, *ce_state); - __tcp_send_ack(sk, *prior_rcv_nxt); + __tcp_send_ack(sk, *prior_rcv_nxt, 0); } inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index f428ecf9120f..45e174b8cd22 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -83,7 +83,7 @@ static int tcp_diag_put_md5sig(struct sk_buff *skb, #endif static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk, - const struct tcp_ulp_ops *ulp_ops) + const struct tcp_ulp_ops *ulp_ops, bool net_admin) { struct nlattr *nest; int err; @@ -97,7 +97,7 @@ static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk, goto nla_failure; if (ulp_ops->get_info) - err = ulp_ops->get_info(sk, skb); + err = ulp_ops->get_info(sk, skb, net_admin); if (err) goto nla_failure; @@ -113,6 +113,7 @@ static int tcp_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb) { struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_ulp_ops *ulp_ops; int err = 0; #ifdef CONFIG_TCP_MD5SIG @@ -129,15 +130,13 @@ static int tcp_diag_get_aux(struct sock *sk, bool net_admin, } #endif - if (net_admin) { - const struct tcp_ulp_ops *ulp_ops; - - ulp_ops = icsk->icsk_ulp_ops; - if (ulp_ops) - err = tcp_diag_put_ulp(skb, sk, ulp_ops); - if (err) + ulp_ops = icsk->icsk_ulp_ops; + if (ulp_ops) { + err = tcp_diag_put_ulp(skb, sk, ulp_ops, net_admin); + if (err < 0) return err; } + return 0; } @@ -164,7 +163,7 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) } #endif - if (net_admin && sk_fullsock(sk)) { + if (sk_fullsock(sk)) { const struct tcp_ulp_ops *ulp_ops; ulp_ops = icsk->icsk_ulp_ops; @@ -172,7 +171,7 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) size += nla_total_size(0) + nla_total_size(TCP_ULP_NAME_MAX); if (ulp_ops->get_info_size) - size += ulp_ops->get_info_size(sk); + size += ulp_ops->get_info_size(sk, net_admin); } } return size; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4e2212348088..72382ee4456d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -102,6 +102,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ #define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */ #define FLAG_DSACK_TLP 0x20000 /* DSACK for tail loss probe */ +#define FLAG_TS_PROGRESS 0x40000 /* Positive timestamp delta */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -333,15 +334,14 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) static bool tcp_in_quickack_mode(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); - const struct dst_entry *dst = __sk_dst_get(sk); - return (dst && dst_metric(dst, RTAX_QUICKACK)) || + return icsk->icsk_ack.dst_quick_ack || (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk)); } static void tcp_ecn_queue_cwr(struct tcp_sock *tp) { - if (tp->ecn_flags & TCP_ECN_OK) + if (tcp_ecn_mode_rfc3168(tp)) tp->ecn_flags |= TCP_ECN_QUEUE_CWR; } @@ -364,10 +364,13 @@ static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; } -static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) +static void tcp_data_ecn_check(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + if (tcp_ecn_disabled(tp)) + return; + switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { case INET_ECN_NOT_ECT: /* Funny extension: if ECT is not set on a segment, @@ -396,31 +399,39 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) } } -static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) -{ - if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) - __tcp_ecn_check_ce(sk, skb); -} - static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) { - if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr)) - tp->ecn_flags &= ~TCP_ECN_OK; + if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || th->cwr)) + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); } static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) { - if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) - tp->ecn_flags &= ~TCP_ECN_OK; + if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr)) + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); } static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) { - if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) + if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp)) return true; return false; } +static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count) +{ + tp->delivered_ce += ecn_count; +} + +/* Updates the delivered and delivered_ce counts */ +static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, + bool ece_ack) +{ + tp->delivered += delivered; + if (ece_ack) + tcp_count_delivered_ce(tp, delivered); +} + /* Buffer size and advertised window tuning. * * 1. Tuning sk->sk_sndbuf, when connection enters established state. @@ -859,7 +870,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) icsk->icsk_ack.lrcvtime = now; tcp_save_lrcv_flowlabel(sk, skb); - tcp_ecn_check_ce(sk, skb); + tcp_data_ecn_check(sk, skb); if (skb->len >= 128) tcp_grow_window(sk, skb, true); @@ -1156,15 +1167,6 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) } } -/* Updates the delivered and delivered_ce counts */ -static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, - bool ece_ack) -{ - tp->delivered += delivered; - if (ece_ack) - tp->delivered_ce += delivered; -} - /* This procedure tags the retransmission queue when SACKs arrive. * * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). @@ -3816,8 +3818,16 @@ static void tcp_store_ts_recent(struct tcp_sock *tp) tp->rx_opt.ts_recent_stamp = ktime_get_seconds(); } -static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) +static int __tcp_replace_ts_recent(struct tcp_sock *tp, s32 tstamp_delta) +{ + tcp_store_ts_recent(tp); + return tstamp_delta > 0 ? FLAG_TS_PROGRESS : 0; +} + +static int tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) { + s32 delta; + if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { /* PAWS bug workaround wrt. ACK frames, the PAWS discard * extra check below makes sure this can only happen @@ -3826,9 +3836,13 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) * Not only, also it occurs for expired timestamps. */ - if (tcp_paws_check(&tp->rx_opt, 0)) - tcp_store_ts_recent(tp); + if (tcp_paws_check(&tp->rx_opt, 0)) { + delta = tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent; + return __tcp_replace_ts_recent(tp, delta); + } } + + return 0; } /* This routine deals with acks during a TLP episode and ends an episode by @@ -3864,12 +3878,23 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) } } -static inline void tcp_in_ack_event(struct sock *sk, u32 flags) +static void tcp_in_ack_event(struct sock *sk, int flag) { const struct inet_connection_sock *icsk = inet_csk(sk); - if (icsk->icsk_ca_ops->in_ack_event) - icsk->icsk_ca_ops->in_ack_event(sk, flags); + if (icsk->icsk_ca_ops->in_ack_event) { + u32 ack_ev_flags = 0; + + if (flag & FLAG_WIN_UPDATE) + ack_ev_flags |= CA_ACK_WIN_UPDATE; + if (flag & FLAG_SLOWPATH) { + ack_ev_flags |= CA_ACK_SLOWPATH; + if (flag & FLAG_ECE) + ack_ev_flags |= CA_ACK_ECE; + } + + icsk->icsk_ca_ops->in_ack_event(sk, ack_ev_flags); + } } /* Congestion control has updated the cwnd already. So if we're in @@ -3974,7 +3999,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * is in window. */ if (flag & FLAG_UPDATE_TS_RECENT) - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + flag |= tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) == FLAG_SND_UNA_ADVANCED) { @@ -3986,12 +4011,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_snd_una_update(tp, ack); flag |= FLAG_WIN_UPDATE; - tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS); } else { - u32 ack_ev_flags = CA_ACK_SLOWPATH; - if (ack_seq != TCP_SKB_CB(skb)->end_seq) flag |= FLAG_DATA; else @@ -4003,19 +4024,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state); - if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { + if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) flag |= FLAG_ECE; - ack_ev_flags |= CA_ACK_ECE; - } if (sack_state.sack_delivered) tcp_count_delivered(tp, sack_state.sack_delivered, flag & FLAG_ECE); - - if (flag & FLAG_WIN_UPDATE) - ack_ev_flags |= CA_ACK_WIN_UPDATE; - - tcp_in_ack_event(sk, ack_ev_flags); } /* This is a deviation from RFC3168 since it states that: @@ -4042,6 +4056,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_rack_update_reo_wnd(sk, &rs); + tcp_in_ack_event(sk, flag); + if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); @@ -4073,6 +4089,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) return 1; no_queue: + tcp_in_ack_event(sk, flag); /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) { tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag, @@ -5020,7 +5037,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) bool fragstolen; tcp_save_lrcv_flowlabel(sk, skb); - tcp_ecn_check_ce(sk, skb); + tcp_data_ecn_check(sk, skb); if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); @@ -6157,6 +6174,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->seq == tp->rcv_nxt && !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { int tcp_header_len = tp->tcp_header_len; + s32 delta = 0; + int flag = 0; /* Timestamp header prediction: tcp_header_len * is automatically equal to th->doff*4 due to pred_flags @@ -6169,8 +6188,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) if (!tcp_parse_aligned_timestamp(tp, th)) goto slow_path; + delta = tp->rx_opt.rcv_tsval - + tp->rx_opt.ts_recent; /* If PAWS failed, check it more carefully in slow path */ - if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) + if (delta < 0) goto slow_path; /* DO NOT update ts_recent here, if checksum fails @@ -6190,12 +6211,13 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) if (tcp_header_len == (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); + flag |= __tcp_replace_ts_recent(tp, + delta); /* We know that such packets are checksummed * on entry. */ - tcp_ack(sk, skb, 0); + tcp_ack(sk, skb, flag); __kfree_skb(skb); tcp_data_snd_check(sk); /* When receiving pure ack in fast path, update @@ -6226,7 +6248,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) if (tcp_header_len == (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); + flag |= __tcp_replace_ts_recent(tp, + delta); tcp_rcv_rtt_measure_ts(sk, skb); @@ -6241,7 +6264,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { /* Well, only one small jumplet in fast path... */ - tcp_ack(sk, skb, FLAG_DATA); + tcp_ack(sk, skb, flag | FLAG_DATA); tcp_data_snd_check(sk); if (!inet_csk_ack_scheduled(sk)) goto no_ack; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d9405b012dff..4fa4fbb0ad12 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -66,6 +66,7 @@ #include <net/transp_v6.h> #include <net/ipv6.h> #include <net/inet_common.h> +#include <net/inet_ecn.h> #include <net/timewait_sock.h> #include <net/xfrm.h> #include <net/secure_seq.h> @@ -887,7 +888,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != offsetof(struct inet_timewait_sock, tw_bound_dev_if)); - arg.tos = ip_hdr(skb)->tos; + /* ECN bits of TW reset are cleared */ + arg.tos = ip_hdr(skb)->tos & ~INET_ECN_MASK; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); local_lock_nested_bh(&ipv4_tcp_sk.bh_lock); @@ -1033,11 +1035,21 @@ static void tcp_v4_send_ack(const struct sock *sk, local_bh_enable(); } -static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) +static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb, + enum tcp_tw_status tw_status) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); struct tcp_key key = {}; + u8 tos = tw->tw_tos; + + /* Cleaning only ECN bits of TW ACKs of oow data or is paws_reject, + * while not cleaning ECN bits of other TW ACKs to avoid these ACKs + * being placed in a different service queues (Classic rather than L4S) + */ + if (tw_status == TCP_TW_ACK_OOW) + tos &= ~INET_ECN_MASK; + #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; @@ -1081,7 +1093,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, &key, tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, - tw->tw_tos, + tos, tw->tw_txhash); inet_twsk_put(tw); @@ -1151,6 +1163,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, key.type = TCP_KEY_MD5; } + /* Cleaning ECN bits of TW ACKs of oow data or is paws_reject */ tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, @@ -1158,7 +1171,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, req->ts_recent, 0, &key, inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, - ip_hdr(skb)->tos, + ip_hdr(skb)->tos & ~INET_ECN_MASK, READ_ONCE(tcp_rsk(req)->txhash)); if (tcp_key_is_ao(&key)) kfree(key.traffic_key); @@ -2051,7 +2064,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, !((TCP_SKB_CB(tail)->tcp_flags & TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || ((TCP_SKB_CB(tail)->tcp_flags ^ - TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || + TCP_SKB_CB(skb)->tcp_flags) & + (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)) || !tcp_skb_can_collapse_rx(tail, skb) || thtail->doff != th->doff || memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th))) @@ -2159,7 +2173,7 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff * 4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->has_rxtstamp = @@ -2174,6 +2188,7 @@ int tcp_v4_rcv(struct sk_buff *skb) { struct net *net = dev_net_rcu(skb->dev); enum skb_drop_reason drop_reason; + enum tcp_tw_status tw_status; int sdif = inet_sdif(skb); int dif = inet_iif(skb); const struct iphdr *iph; @@ -2401,7 +2416,9 @@ do_time_wait: inet_twsk_put(inet_twsk(sk)); goto csum_error; } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) { + + tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn); + switch (tw_status) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, @@ -2422,7 +2439,8 @@ do_time_wait: /* to ACK */ fallthrough; case TCP_TW_ACK: - tcp_v4_timewait_ack(sk, skb); + case TCP_TW_ACK_OOW: + tcp_v4_timewait_ack(sk, skb, tw_status); break; case TCP_TW_RST: tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3cb8f281186b..fb9349be36b8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -44,7 +44,7 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, /* Send ACK. Note, we do not put the bucket, * it will be released by caller. */ - return TCP_TW_ACK; + return TCP_TW_ACK_OOW; } /* We are rate-limiting, so just release the tw sock and drop skb. */ @@ -461,7 +461,9 @@ void tcp_openreq_init_rwin(struct request_sock *req, static void tcp_ecn_openreq_child(struct tcp_sock *tp, const struct request_sock *req) { - tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; + tcp_ecn_mode_set(tp, inet_rsk(req)->ecn_ok ? + TCP_ECN_MODE_RFC3168 : + TCP_ECN_DISABLED); } void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index ecef16c58c07..934f777f29d3 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -13,12 +13,15 @@ #include <net/tcp.h> #include <net/protocol.h> -static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, +static void tcp_gso_tstamp(struct sk_buff *skb, struct sk_buff *gso_skb, unsigned int seq, unsigned int mss) { + u32 flags = skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP; + u32 ts_seq = skb_shinfo(gso_skb)->tskey; + while (skb) { if (before(ts_seq, seq + mss)) { - skb_shinfo(skb)->tx_flags |= SKBTX_SW_TSTAMP; + skb_shinfo(skb)->tx_flags |= flags; skb_shinfo(skb)->tskey = ts_seq; return; } @@ -139,6 +142,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, struct sk_buff *gso_skb = skb; __sum16 newcheck; bool ooo_okay, copy_destructor; + bool ecn_cwr_mask; __wsum delta; th = tcp_hdr(skb); @@ -193,11 +197,13 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th = tcp_hdr(skb); seq = ntohl(th->seq); - if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP)) - tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss); + if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP)) + tcp_gso_tstamp(segs, gso_skb, seq, mss); newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta)); + ecn_cwr_mask = !!(skb_shinfo(gso_skb)->gso_type & SKB_GSO_TCP_ACCECN); + while (skb->next) { th->fin = th->psh = 0; th->check = newcheck; @@ -217,7 +223,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th = tcp_hdr(skb); th->seq = htonl(seq); - th->cwr = 0; + + th->cwr &= ecn_cwr_mask; } /* Following permits TCP Small Queues to work well with GSO : @@ -325,7 +332,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, th2 = tcp_hdr(p); flush = (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & - ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); + ~(TCP_FLAG_FIN | TCP_FLAG_PSH)); flush |= (__force int)(th->ack_seq ^ th2->ack_seq); for (i = sizeof(*th); i < thlen; i += 4) flush |= *(u32 *)((u8 *)th + i) ^ @@ -401,7 +408,7 @@ void tcp_gro_complete(struct sk_buff *skb) shinfo->gso_segs = NAPI_GRO_CB(skb)->count; if (th->cwr) - shinfo->gso_type |= SKB_GSO_TCP_ECN; + shinfo->gso_type |= SKB_GSO_TCP_ACCECN; } EXPORT_SYMBOL(tcp_gro_complete); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 24e56bf96747..e0a4e5432399 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -325,7 +325,7 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) const struct tcp_sock *tp = tcp_sk(sk); TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; - if (!(tp->ecn_flags & TCP_ECN_OK)) + if (tcp_ecn_disabled(tp)) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; else if (tcp_ca_needs_ecn(sk) || tcp_bpf_ca_needs_ecn(sk)) @@ -351,7 +351,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) if (use_ecn) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; - tp->ecn_flags = TCP_ECN_OK; + tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) INET_ECN_xmit(sk); } @@ -381,7 +381,7 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); - if (tp->ecn_flags & TCP_ECN_OK) { + if (tcp_ecn_mode_rfc3168(tp)) { /* Not-retransmitted data segment: set ECT and inject CWR. */ if (skb->len != tcp_header_len && !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { @@ -403,7 +403,7 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, /* Constructs common control bits of non-data skb. If SYN/FIN is present, * auto increment end seqno. */ -static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) +static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u16 flags) { skb->ip_summed = CHECKSUM_PARTIAL; @@ -1395,7 +1395,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, th->seq = htonl(tcb->seq); th->ack_seq = htonl(rcv_nxt); *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | - tcb->tcp_flags); + (tcb->tcp_flags & TCPHDR_FLAGS_MASK)); th->check = 0; th->urg_ptr = 0; @@ -1616,8 +1616,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, struct sk_buff *buff; int old_factor; long limit; + u16 flags; int nlen; - u8 flags; if (WARN_ON(len > skb->len)) return -EINVAL; @@ -2171,7 +2171,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, { int nlen = skb->len - len; struct sk_buff *buff; - u8 flags; + u16 flags; /* All of a TSO frame must be composed of paged data. */ DEBUG_NET_WARN_ON_ONCE(skb->len != skb->data_len); @@ -4240,7 +4240,7 @@ void tcp_send_delayed_ack(struct sock *sk) } /* This routine sends an ack and also updates the window. */ -void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt, u16 flags) { struct sk_buff *buff; @@ -4269,7 +4269,7 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) /* Reserve space for headers and prepare control bits. */ skb_reserve(buff, MAX_TCP_HEADER); - tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); + tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK | flags); /* We do not want pure acks influencing TCP Small Queues or fq/pacing * too much. @@ -4284,7 +4284,7 @@ EXPORT_SYMBOL_GPL(__tcp_send_ack); void tcp_send_ack(struct sock *sk) { - __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); + __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt, 0); } /* This routine sends a packet with an out of date sequence diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 17c7736d8349..db606f7e4163 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1848,7 +1848,7 @@ static struct sk_buff *__first_packet_length(struct sock *sk, atomic_inc(&sk->sk_drops); __skb_unlink(skb, rcvq); *total += skb->truesize; - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); } else { udp_skb_csum_unnecessary_set(skb); break; @@ -2002,7 +2002,7 @@ try_again: __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, is_udplite); __UDP_INC_STATS(net, UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); goto try_again; } @@ -2117,7 +2117,7 @@ csum_copy_err: UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); /* starting over for a new packet, but check if we need to yield */ cond_resched(); @@ -2891,8 +2891,10 @@ void udp_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (udp_test_bit(ENCAP_ENABLED, sk)) + if (udp_test_bit(ENCAP_ENABLED, sk)) { static_branch_dec(&udp_encap_needed_key); + udp_tunnel_cleanup_gro(sk); + } } } @@ -3804,6 +3806,15 @@ fallback: static int __net_init udp_pernet_init(struct net *net) { +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) + int i; + + /* No tunnel is configured */ + for (i = 0; i < ARRAY_SIZE(net->ipv4.udp_tunnel_gro); ++i) { + INIT_HLIST_HEAD(&net->ipv4.udp_tunnel_gro[i].list); + RCU_INIT_POINTER(net->ipv4.udp_tunnel_gro[i].sk, NULL); + } +#endif udp_sysctl_init(net); udp_set_table(net); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index c1a85b300ee8..088aa8cb8ac0 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -12,6 +12,164 @@ #include <net/udp.h> #include <net/protocol.h> #include <net/inet_common.h> +#include <net/udp_tunnel.h> + +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) + +/* + * Dummy GRO tunnel callback, exists mainly to avoid dangling/NULL + * values for the udp tunnel static call. + */ +static struct sk_buff *dummy_gro_rcv(struct sock *sk, + struct list_head *head, + struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->flush = 1; + return NULL; +} + +typedef struct sk_buff *(*udp_tunnel_gro_rcv_t)(struct sock *sk, + struct list_head *head, + struct sk_buff *skb); + +struct udp_tunnel_type_entry { + udp_tunnel_gro_rcv_t gro_receive; + refcount_t count; +}; + +#define UDP_MAX_TUNNEL_TYPES (IS_ENABLED(CONFIG_GENEVE) + \ + IS_ENABLED(CONFIG_VXLAN) * 2 + \ + IS_ENABLED(CONFIG_NET_FOU) * 2) + +DEFINE_STATIC_CALL(udp_tunnel_gro_rcv, dummy_gro_rcv); +static DEFINE_STATIC_KEY_FALSE(udp_tunnel_static_call); +static struct mutex udp_tunnel_gro_type_lock; +static struct udp_tunnel_type_entry udp_tunnel_gro_types[UDP_MAX_TUNNEL_TYPES]; +static unsigned int udp_tunnel_gro_type_nr; +static DEFINE_SPINLOCK(udp_tunnel_gro_lock); + +void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add) +{ + bool is_ipv6 = sk->sk_family == AF_INET6; + struct udp_sock *tup, *up = udp_sk(sk); + struct udp_tunnel_gro *udp_tunnel_gro; + + spin_lock(&udp_tunnel_gro_lock); + udp_tunnel_gro = &net->ipv4.udp_tunnel_gro[is_ipv6]; + if (add) + hlist_add_head(&up->tunnel_list, &udp_tunnel_gro->list); + else + hlist_del_init(&up->tunnel_list); + + if (udp_tunnel_gro->list.first && + !udp_tunnel_gro->list.first->next) { + tup = hlist_entry(udp_tunnel_gro->list.first, struct udp_sock, + tunnel_list); + + rcu_assign_pointer(udp_tunnel_gro->sk, (struct sock *)tup); + } else { + RCU_INIT_POINTER(udp_tunnel_gro->sk, NULL); + } + + spin_unlock(&udp_tunnel_gro_lock); +} +EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_lookup); + +void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) +{ + struct udp_tunnel_type_entry *cur = NULL; + struct udp_sock *up = udp_sk(sk); + int i, old_gro_type_nr; + + if (!up->gro_receive) + return; + + mutex_lock(&udp_tunnel_gro_type_lock); + for (i = 0; i < udp_tunnel_gro_type_nr; i++) + if (udp_tunnel_gro_types[i].gro_receive == up->gro_receive) + cur = &udp_tunnel_gro_types[i]; + + old_gro_type_nr = udp_tunnel_gro_type_nr; + if (add) { + /* + * Update the matching entry, if found, or add a new one + * if needed + */ + if (cur) { + refcount_inc(&cur->count); + goto out; + } + + if (unlikely(udp_tunnel_gro_type_nr == UDP_MAX_TUNNEL_TYPES)) { + pr_err_once("Too many UDP tunnel types, please increase UDP_MAX_TUNNEL_TYPES\n"); + /* Ensure static call will never be enabled */ + udp_tunnel_gro_type_nr = UDP_MAX_TUNNEL_TYPES + 2; + goto out; + } + + cur = &udp_tunnel_gro_types[udp_tunnel_gro_type_nr++]; + refcount_set(&cur->count, 1); + cur->gro_receive = up->gro_receive; + } else { + /* + * The stack cleanups only successfully added tunnel, the + * lookup on removal should never fail. + */ + if (WARN_ON_ONCE(!cur)) + goto out; + + if (!refcount_dec_and_test(&cur->count)) + goto out; + + /* avoid gaps, so that the enable tunnel has always id 0 */ + *cur = udp_tunnel_gro_types[--udp_tunnel_gro_type_nr]; + } + + if (udp_tunnel_gro_type_nr == 1) { + static_call_update(udp_tunnel_gro_rcv, + udp_tunnel_gro_types[0].gro_receive); + static_branch_enable(&udp_tunnel_static_call); + } else if (old_gro_type_nr == 1) { + static_branch_disable(&udp_tunnel_static_call); + static_call_update(udp_tunnel_gro_rcv, dummy_gro_rcv); + } + +out: + mutex_unlock(&udp_tunnel_gro_type_lock); +} +EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_rcv); + +static void udp_tunnel_gro_init(void) +{ + mutex_init(&udp_tunnel_gro_type_lock); +} + +static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, + struct list_head *head, + struct sk_buff *skb) +{ + if (static_branch_likely(&udp_tunnel_static_call)) { + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + return static_call(udp_tunnel_gro_rcv)(sk, head, skb); + } + return call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); +} + +#else + +static void udp_tunnel_gro_init(void) {} + +static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, + struct list_head *head, + struct sk_buff *skb) +{ + return call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); +} + +#endif static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, @@ -321,13 +479,17 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, /* clear destructor to avoid skb_segment assigning it to tail */ copy_dtor = gso_skb->destructor == sock_wfree; - if (copy_dtor) + if (copy_dtor) { gso_skb->destructor = NULL; + gso_skb->sk = NULL; + } segs = skb_segment(gso_skb, features); if (IS_ERR_OR_NULL(segs)) { - if (copy_dtor) + if (copy_dtor) { gso_skb->destructor = sock_wfree; + gso_skb->sk = sk; + } return segs; } @@ -618,7 +780,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); - pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); + pp = udp_tunnel_gro_rcv(sk, head, skb); out: skb_gro_flush_final(skb, pp, flush); @@ -631,8 +793,13 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, { const struct iphdr *iph = skb_gro_network_header(skb); struct net *net = dev_net_rcu(skb->dev); + struct sock *sk; int iif, sdif; + sk = udp_tunnel_sk(net, false); + if (sk && dport == htons(sk->sk_num)) + return sk; + inet_get_iif_sdif(skb, &iif, &sdif); return __udp4_lib_lookup(net, iph->saddr, sport, @@ -763,5 +930,7 @@ int __init udpv4_offload_init(void) .gro_complete = udp4_gro_complete, }, }; + + udp_tunnel_gro_init(); return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP); } diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 619a53eb672d..c49fceea8313 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -58,6 +58,15 @@ error: } EXPORT_SYMBOL(udp_sock_create4); +static bool sk_saddr_any(struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + return ipv6_addr_any(&sk->sk_v6_rcv_saddr); +#else + return !sk->sk_rcv_saddr; +#endif +} + void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *cfg) { @@ -80,6 +89,11 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->gro_complete = cfg->gro_complete; udp_tunnel_encap_enable(sk); + + udp_tunnel_update_gro_rcv(sock->sk, true); + + if (!sk->sk_dport && !sk->sk_bound_dev_if && sk_saddr_any(sock->sk)) + udp_tunnel_update_gro_lookup(net, sock->sk, true); } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ac8cc1076536..8b6258819dad 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3209,16 +3209,13 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen, offset = 0; + int scope, plen; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ - if (idev->dev->addr_len == sizeof(struct in6_addr)) - offset = sizeof(struct in6_addr) - 4; - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3529,7 +3526,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - if (dev->type == ARPHRD_ETHER) { + /* Generate the IPv6 link-local address using addrconf_addr_gen(), + * unless we have an IPv4 GRE device not bound to an IP address and + * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this + * case). Such devices fall back to add_v4_addrs() instead. + */ + if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; } diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index ff7e734e335b..7d574f5132e2 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -88,13 +88,15 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } - if (ilwt->connected) { + /* cache only if we don't create a dst reference loop */ + if (ilwt->connected && orig_dst->lwtstate != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr); local_bh_enable(); } } + skb_dst_drop(skb); skb_dst_set(skb, dst); return dst_output(net, sk, skb); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 9be315496459..76ee521189eb 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -35,8 +35,8 @@ u32 inet6_ehashfn(const struct net *net, lhash = (__force u32)laddr->s6_addr32[3]; fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret); - return __inet6_ehashfn(lhash, lport, fhash, fport, - inet6_ehash_secret + net_hash_mix(net)); + return lport + __inet6_ehashfn(lhash, 0, fhash, fport, + inet6_ehash_secret + net_hash_mix(net)); } EXPORT_SYMBOL_GPL(inet6_ehashfn); @@ -264,7 +264,8 @@ EXPORT_SYMBOL_GPL(inet6_lookup); static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, const __u16 lport, struct inet_timewait_sock **twp, - bool rcu_lookup) + bool rcu_lookup, + u32 hash) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); @@ -274,8 +275,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct net *net = sock_net(sk); const int sdif = l3mdev_master_ifindex_by_index(net, dif); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); - const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, - inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct inet_timewait_sock *tw = NULL; const struct hlist_nulls_node *node; @@ -354,11 +353,19 @@ static u64 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { + const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *saddr = &sk->sk_v6_daddr; + const struct inet_sock *inet = inet_sk(sk); + const struct net *net = sock_net(sk); u64 port_offset = 0; + u32 hash_port0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); - return __inet_hash_connect(death_row, sk, port_offset, + + hash_port0 = inet6_ehashfn(net, daddr, 0, saddr, inet->inet_dport); + + return __inet_hash_connect(death_row, sk, port_offset, hash_port0, __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c6ebb6a6d390..957ca98fa70f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -43,6 +43,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <net/rtnetlink.h> #include <net/ipv6.h> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d577bf2f3053..581bc6289081 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1386,6 +1386,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, } v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; + v6_cork->dontfrag = ipc6->dontfrag; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); @@ -1421,7 +1422,7 @@ static int __ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, - unsigned int flags, struct ipcm6_cookie *ipc6) + unsigned int flags) { struct sk_buff *skb, *skb_prev = NULL; struct inet_cork *cork = &cork_full->base; @@ -1475,7 +1476,7 @@ static int __ip6_append_data(struct sock *sk, if (headersize + transhdrlen > mtu) goto emsgsize; - if (cork->length + length > mtu - headersize && ipc6->dontfrag && + if (cork->length + length > mtu - headersize && v6_cork->dontfrag && (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_ICMPV6 || sk->sk_protocol == IPPROTO_RAW)) { @@ -1855,7 +1856,7 @@ int ip6_append_data(struct sock *sk, return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, &np->cork, sk_page_frag(sk), getfrag, - from, length, transhdrlen, flags, ipc6); + from, length, transhdrlen, flags); } EXPORT_SYMBOL_GPL(ip6_append_data); @@ -2054,13 +2055,11 @@ struct sk_buff *ip6_make_skb(struct sock *sk, ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); } - if (ipc6->dontfrag < 0) - ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, - flags, ipc6); + flags); if (err) { __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); return ERR_PTR(err); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 170a6ac30889..a04dd1bb4b19 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -52,6 +52,7 @@ #include <net/inet_ecn.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <net/dst_metadata.h> #include <net/inet_dscp.h> diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 83c055996fbb..09ec4b0ad7dc 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -45,6 +45,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <linux/etherdevice.h> #define IP6_VTI_HASH_SIZE_SHIFT 5 diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 4120e67a8ce6..d6bd8f7079bb 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -123,7 +123,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) #endif static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev); + struct sk_buff *prev_tail, struct net_device *dev, + int *refs); static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { @@ -167,7 +168,8 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, - const struct frag_hdr *fhdr, int nhoff) + const struct frag_hdr *fhdr, int nhoff, + int *refs) { unsigned int payload_len; struct net_device *dev; @@ -221,7 +223,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, * this case. -DaveM */ pr_debug("end of fragment not rounded to 8 bytes.\n"); - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); return -EPROTO; } if (end > fq->q.len) { @@ -287,7 +289,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - err = nf_ct_frag6_reasm(fq, skb, prev, dev); + err = nf_ct_frag6_reasm(fq, skb, prev, dev, refs); skb->_skb_refdst = orefdst; /* After queue has assumed skb ownership, only 0 or @@ -301,7 +303,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, return -EINPROGRESS; insert_error: - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); err: skb_dst_drop(skb); return -EINVAL; @@ -315,13 +317,14 @@ err: * the last and the first frames arrived and all the bits are here. */ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev) + struct sk_buff *prev_tail, struct net_device *dev, + int *refs) { void *reasm_data; int payload_len; u8 ecn; - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) @@ -372,7 +375,7 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, return 0; err: - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); return -EINVAL; } @@ -447,6 +450,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr; + int refs = 0; u8 prevhdr; /* Jumbo payload inhibits frag. header */ @@ -473,23 +477,26 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); + rcu_read_lock(); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { + rcu_read_unlock(); pr_debug("Can't find and can't create new queue\n"); return -ENOMEM; } spin_lock_bh(&fq->q.lock); - ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff); + ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff, &refs); if (ret == -EPROTO) { skb->transport_header = savethdr; ret = 0; } spin_unlock_bh(&fq->q.lock); - inet_frag_put(&fq->q); + rcu_read_unlock(); + inet_frag_putn(&fq->q, refs); return ret; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index a48be617a8ab..49740898bc13 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -68,7 +68,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) static struct inet_frags ip6_frags; static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev); + struct sk_buff *prev_tail, struct net_device *dev, + int *refs); static void ip6_frag_expire(struct timer_list *t) { @@ -105,7 +106,7 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff, - u32 *prob_offset) + u32 *prob_offset, int *refs) { struct net *net = dev_net(skb_dst(skb)->dev); int offset, end, fragsize; @@ -220,7 +221,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - err = ip6_frag_reasm(fq, skb, prev_tail, dev); + err = ip6_frag_reasm(fq, skb, prev_tail, dev, refs); skb->_skb_refdst = orefdst; return err; } @@ -238,7 +239,7 @@ insert_error: __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASM_OVERLAPS); discard_fq: - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); err: @@ -254,7 +255,8 @@ err: * the last and the first frames arrived and all the bits are here. */ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev) + struct sk_buff *prev_tail, struct net_device *dev, + int *refs) { struct net *net = fq->q.fqdir->net; unsigned int nhoff; @@ -262,7 +264,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, int payload_len; u8 ecn; - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) @@ -303,9 +305,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, skb_postpush_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); - rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS); - rcu_read_unlock(); fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; fq->q.last_run_head = NULL; @@ -317,10 +317,8 @@ out_oversize: out_oom: net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); out_fail: - rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS); - rcu_read_unlock(); - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); return -1; } @@ -377,19 +375,21 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } iif = skb->dev ? skb->dev->ifindex : 0; + rcu_read_lock(); fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { u32 prob_offset = 0; - int ret; + int ret, refs = 0; spin_lock(&fq->q.lock); fq->iif = iif; ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff, - &prob_offset); + &prob_offset, &refs); spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q); + rcu_read_unlock(); + inet_frag_putn(&fq->q, refs); if (prob_offset) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); @@ -398,6 +398,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } return ret; } + rcu_read_unlock(); __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6f04703fe638..9a0f32acb750 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -51,6 +51,7 @@ #include <net/dsfield.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <net/inet_dscp.h> /* diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 85c4820bfe15..e182ee0a2330 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -999,7 +999,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (!IS_ERR(dst)) { skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, - tclass & ~INET_ECN_MASK, priority); + tclass, priority); TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS(net, TCP_MIB_OUTRSTS); @@ -1135,7 +1135,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, trace_tcp_send_reset(sk, skb, reason); tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, - ipv6_get_dsfield(ipv6h), label, priority, txhash, + ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, + label, priority, txhash, &key); #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) @@ -1155,11 +1156,16 @@ static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, tclass, label, priority, txhash, key); } -static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) +static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, + enum tcp_tw_status tw_status) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + u8 tclass = tw->tw_tclass; struct tcp_key key = {}; + + if (tw_status == TCP_TW_ACK_OOW) + tclass &= ~INET_ECN_MASK; #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; @@ -1203,7 +1209,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_tw_tsval(tcptw), READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, - &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), + &key, tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, tw->tw_txhash); #ifdef CONFIG_TCP_AO @@ -1280,7 +1286,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, tcp_rsk_tsval(tcp_rsk(req)), req->ts_recent, sk->sk_bound_dev_if, - &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0, + &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, + 0, READ_ONCE(sk->sk_priority), READ_ONCE(tcp_rsk(req)->txhash)); if (tcp_key_is_ao(&key)) @@ -1731,7 +1738,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->has_rxtstamp = @@ -1742,6 +1749,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { struct net *net = dev_net_rcu(skb->dev); enum skb_drop_reason drop_reason; + enum tcp_tw_status tw_status; int sdif = inet6_sdif(skb); int dif = inet6_iif(skb); const struct tcphdr *th; @@ -1962,7 +1970,8 @@ do_time_wait: goto csum_error; } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) { + tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn); + switch (tw_status) { case TCP_TW_SYN: { struct sock *sk2; @@ -1987,7 +1996,8 @@ do_time_wait: /* to ACK */ fallthrough; case TCP_TW_ACK: - tcp_v6_timewait_ack(sk, skb); + case TCP_TW_ACK_OOW: + tcp_v6_timewait_ack(sk, skb, tw_status); break; case TCP_TW_RST: tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3a0d6c5a8286..7317f8e053f1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,6 +46,7 @@ #include <net/tcp_states.h> #include <net/ip6_checksum.h> #include <net/ip6_tunnel.h> +#include <net/udp_tunnel.h> #include <net/xfrm.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> @@ -586,7 +587,7 @@ csum_copy_err: SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS); SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); /* starting over for a new packet, but check if we need to yield */ cond_resched(); @@ -1825,6 +1826,7 @@ void udpv6_destroy_sock(struct sock *sk) if (udp_test_bit(ENCAP_ENABLED, sk)) { static_branch_dec(&udpv6_encap_needed_key); udp_encap_disable(); + udp_tunnel_cleanup_gro(sk); } } } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 404212dfc99a..d8445ac1b2e4 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -118,8 +118,13 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, { const struct ipv6hdr *iph = skb_gro_network_header(skb); struct net *net = dev_net_rcu(skb->dev); + struct sock *sk; int iif, sdif; + sk = udp_tunnel_sk(net, true); + if (sk && dport == htons(sk->sk_num)) + return sk; + inet6_get_iif_sdif(skb, &iif, &sdif); return __udp6_lib_lookup(net, &iph->saddr, sport, diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index e83691073496..cf0b66f4fb29 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -25,6 +25,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/udp.h> diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index 06fb8e6944b0..7a0cae9a8111 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -24,7 +24,7 @@ #include <net/llc_s_ac.h> #include <net/llc_s_ev.h> #include <net/llc_sap.h> - +#include <net/sock.h> /** * llc_sap_action_unitdata_ind - forward UI PDU to network layer @@ -40,6 +40,26 @@ int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb) return 0; } +static int llc_prepare_and_xmit(struct sk_buff *skb) +{ + struct llc_sap_state_ev *ev = llc_sap_ev(skb); + struct sk_buff *nskb; + int rc; + + rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); + if (rc) + return rc; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + if (skb->sk) + skb_set_owner_w(nskb, skb->sk); + + return dev_queue_xmit(nskb); +} + /** * llc_sap_action_send_ui - sends UI PDU resp to UNITDATA REQ to MAC layer * @sap: SAP @@ -52,17 +72,12 @@ int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb) int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } /** @@ -77,17 +92,12 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } /** @@ -133,17 +143,12 @@ out: int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_test_cmd(skb); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 299d38e9e863..35349a7f16cb 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -116,8 +116,14 @@ void drv_remove_interface(struct ieee80211_local *local, sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; - /* Remove driver debugfs entries */ - ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links); + /* + * Remove driver debugfs entries. + * The virtual monitor interface doesn't get a debugfs + * entry, so it's exempt here. + */ + if (sdata != rcu_access_pointer(local->monitor_sdata)) + ieee80211_debugfs_recreate_netdev(sdata, + sdata->vif.valid_links); trace_drv_remove_interface(local, sdata); local->ops->remove_interface(&local->hw, &sdata->vif); diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index 7a3116c36df9..fd41046e3b68 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c @@ -2,7 +2,7 @@ /* * EHT handling * - * Copyright(c) 2021-2024 Intel Corporation + * Copyright(c) 2021-2025 Intel Corporation */ #include "ieee80211_i.h" @@ -76,6 +76,13 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta); link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + /* + * The MPDU length bits are reserved on all but 2.4 GHz and get set via + * VHT (5 GHz) or HE (6 GHz) capabilities. + */ + if (sband->band != NL80211_BAND_2GHZ) + return; + switch (u8_get_bits(eht_cap->eht_cap_elem.mac_cap_info[0], IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK)) { case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454: diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 33c7c62d19f5..b0423046028c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1208,16 +1208,17 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) return; } - RCU_INIT_POINTER(local->monitor_sdata, NULL); - mutex_unlock(&local->iflist_mtx); - - synchronize_net(); - + clear_bit(SDATA_STATE_RUNNING, &sdata->state); ieee80211_link_release_channel(&sdata->deflink); if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) drv_remove_interface(local, sdata); + RCU_INIT_POINTER(local->monitor_sdata, NULL); + mutex_unlock(&local->iflist_mtx); + + synchronize_net(); + kfree(sdata); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 23d85a1abbc5..2b9a36e02c62 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5197,6 +5197,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, parse_params.start = bss_ies->data; parse_params.len = bss_ies->len; parse_params.bss = cbss; + parse_params.link_id = -1; bss_elems = ieee802_11_parse_elems_full(&parse_params); if (!bss_elems) { ret = false; diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index cd318c1c67be..6da39c864f45 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -47,6 +47,9 @@ struct ieee80211_elems_parse { /* The EPCS Multi-Link element in the original elements */ const struct element *ml_epcs_elem; + bool multi_link_inner; + bool skip_vendor; + /* * scratch buffer that can be used for various element parsing related * tasks, e.g., element de-fragmentation etc. @@ -152,12 +155,11 @@ ieee80211_parse_extension_element(u32 *crc, switch (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: - if (elems_parse->ml_basic_elem) { + if (elems_parse->multi_link_inner) { elems->parse_error |= IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC; break; } - elems_parse->ml_basic_elem = elem; break; case IEEE80211_ML_CONTROL_TYPE_RECONF: elems_parse->ml_reconf_elem = elem; @@ -399,6 +401,9 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_VENDOR_SPECIFIC: + if (elems_parse->skip_vendor) + break; + if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && pos[2] == 0xf2) { /* Microsoft OUI (00:50:F2) */ @@ -866,21 +871,36 @@ ieee80211_mle_get_sta_prof(struct ieee80211_elems_parse *elems_parse, } } -static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, - struct ieee80211_elems_parse_params *params) +static const struct element * +ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse, + struct ieee80211_elems_parse_params *params, + struct ieee80211_elems_parse_params *sub) { struct ieee802_11_elems *elems = &elems_parse->elems; struct ieee80211_mle_per_sta_profile *prof; - struct ieee80211_elems_parse_params sub = { - .mode = params->mode, - .action = params->action, - .from_ap = params->from_ap, - .link_id = -1, - }; - ssize_t ml_len = elems->ml_basic_len; - const struct element *non_inherit = NULL; + const struct element *tmp; + ssize_t ml_len; const u8 *end; + if (params->mode < IEEE80211_CONN_MODE_EHT) + return NULL; + + for_each_element_extid(tmp, WLAN_EID_EXT_EHT_MULTI_LINK, + elems->ie_start, elems->total_len) { + const struct ieee80211_multi_link_elem *mle = + (void *)tmp->data + 1; + + if (!ieee80211_mle_size_ok(tmp->data + 1, tmp->datalen - 1)) + continue; + + if (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE) != + IEEE80211_ML_CONTROL_TYPE_BASIC) + continue; + + elems_parse->ml_basic_elem = tmp; + break; + } + ml_len = cfg80211_defragment_element(elems_parse->ml_basic_elem, elems->ie_start, elems->total_len, @@ -891,26 +911,26 @@ static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, WLAN_EID_FRAGMENT); if (ml_len < 0) - return; + return NULL; elems->ml_basic = (const void *)elems_parse->scratch_pos; elems->ml_basic_len = ml_len; elems_parse->scratch_pos += ml_len; if (params->link_id == -1) - return; + return NULL; ieee80211_mle_get_sta_prof(elems_parse, params->link_id); prof = elems->prof; if (!prof) - return; + return NULL; /* check if we have the 4 bytes for the fixed part in assoc response */ if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) { elems->prof = NULL; elems->sta_prof_len = 0; - return; + return NULL; } /* @@ -919,13 +939,17 @@ static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, * the -1 is because the 'sta_info_len' is accounted to as part of the * per-STA profile, but not part of the 'u8 variable[]' portion. */ - sub.start = prof->variable + prof->sta_info_len - 1 + 4; + sub->start = prof->variable + prof->sta_info_len - 1 + 4; end = (const u8 *)prof + elems->sta_prof_len; - sub.len = end - sub.start; + sub->len = end - sub->start; - non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - sub.start, sub.len); - _ieee802_11_parse_elems_full(&sub, elems_parse, non_inherit); + sub->mode = params->mode; + sub->action = params->action; + sub->from_ap = params->from_ap; + sub->link_id = -1; + + return cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub->start, sub->len); } static void @@ -973,15 +997,19 @@ ieee80211_mle_defrag_epcs(struct ieee80211_elems_parse *elems_parse) struct ieee802_11_elems * ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) { + struct ieee80211_elems_parse_params sub = {}; struct ieee80211_elems_parse *elems_parse; - struct ieee802_11_elems *elems; const struct element *non_inherit = NULL; - u8 *nontransmitted_profile; - int nontransmitted_profile_len = 0; + struct ieee802_11_elems *elems; size_t scratch_len = 3 * params->len; + bool multi_link_inner = false; BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0); + /* cannot parse for both a specific link and non-transmitted BSS */ + if (WARN_ON(params->link_id >= 0 && params->bss)) + return NULL; + elems_parse = kzalloc(struct_size(elems_parse, scratch, scratch_len), GFP_ATOMIC); if (!elems_parse) @@ -998,34 +1026,51 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) ieee80211_clear_tpe(&elems->tpe); ieee80211_clear_tpe(&elems->csa_tpe); - nontransmitted_profile = elems_parse->scratch_pos; - nontransmitted_profile_len = - ieee802_11_find_bssid_profile(params->start, params->len, - elems, params->bss, - nontransmitted_profile); - elems_parse->scratch_pos += nontransmitted_profile_len; - non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - nontransmitted_profile, - nontransmitted_profile_len); + /* + * If we're looking for a non-transmitted BSS then we cannot at + * the same time be looking for a second link as the two can only + * appear in the same frame carrying info for different BSSes. + * + * In any case, we only look for one at a time, as encoded by + * the WARN_ON above. + */ + if (params->bss) { + int nontx_len = + ieee802_11_find_bssid_profile(params->start, + params->len, + elems, params->bss, + elems_parse->scratch_pos); + sub.start = elems_parse->scratch_pos; + sub.mode = params->mode; + sub.len = nontx_len; + sub.action = params->action; + sub.link_id = params->link_id; + + /* consume the space used for non-transmitted profile */ + elems_parse->scratch_pos += nontx_len; + + non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub.start, nontx_len); + } else { + /* must always parse to get elems_parse->ml_basic_elem */ + non_inherit = ieee80211_prep_mle_link_parse(elems_parse, params, + &sub); + multi_link_inner = true; + } + elems_parse->skip_vendor = + cfg80211_find_elem(WLAN_EID_VENDOR_SPECIFIC, + sub.start, sub.len); elems->crc = _ieee802_11_parse_elems_full(params, elems_parse, non_inherit); - /* Override with nontransmitted profile, if found */ - if (nontransmitted_profile_len) { - struct ieee80211_elems_parse_params sub = { - .mode = params->mode, - .start = nontransmitted_profile, - .len = nontransmitted_profile_len, - .action = params->action, - .link_id = params->link_id, - }; - + /* Override with nontransmitted/per-STA profile if found */ + if (sub.len) { + elems_parse->multi_link_inner = multi_link_inner; + elems_parse->skip_vendor = false; _ieee802_11_parse_elems_full(&sub, elems_parse, NULL); } - ieee80211_mle_parse_link(elems_parse, params); - ieee80211_mle_defrag_reconf(elems_parse); ieee80211_mle_defrag_epcs(elems_parse); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f40e2ea1b09a..f7f89cd1b7d7 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/jiffies.h> @@ -3321,8 +3321,8 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, return; } - if (!ether_addr_equal(mgmt->sa, sdata->deflink.u.mgd.bssid) || - !ether_addr_equal(mgmt->bssid, sdata->deflink.u.mgd.bssid)) { + if (!ether_addr_equal(mgmt->sa, sdata->vif.cfg.ap_addr) || + !ether_addr_equal(mgmt->bssid, sdata->vif.cfg.ap_addr)) { /* Not from the current AP or not associated yet. */ return; } @@ -3338,9 +3338,9 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, local->hw.extra_tx_headroom); resp = skb_put_zero(skb, 24); - memcpy(resp->da, mgmt->sa, ETH_ALEN); + memcpy(resp->da, sdata->vif.cfg.ap_addr, ETH_ALEN); memcpy(resp->sa, sdata->vif.addr, ETH_ALEN); - memcpy(resp->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(resp->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query)); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a4b4506cd35b..30cdc783999d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include <linux/module.h> @@ -1335,9 +1335,13 @@ static int _sta_info_move_state(struct sta_info *sta, sta->sta.addr, new_state); /* notify the driver before the actual changes so it can - * fail the transition + * fail the transition if the state is increasing. + * The driver is required not to fail when the transition + * is decreasing the state, so first, do all the preparation + * work and only then, notify the driver. */ - if (test_sta_flag(sta, WLAN_STA_INSERTED)) { + if (new_state > sta->sta_state && + test_sta_flag(sta, WLAN_STA_INSERTED)) { int err = drv_sta_state(sta->local, sta->sdata, sta, sta->sta_state, new_state); if (err) @@ -1413,6 +1417,16 @@ static int _sta_info_move_state(struct sta_info *sta, break; } + if (new_state < sta->sta_state && + test_sta_flag(sta, WLAN_STA_INSERTED)) { + int err = drv_sta_state(sta->local, sta->sdata, sta, + sta->sta_state, new_state); + + WARN_ONCE(err, + "Driver is not allowed to fail if the sta_state is transitioning down the list: %d\n", + err); + } + sta->sta_state = new_state; return 0; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 49fa38fbe242..dec6e16b8c7d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -6,7 +6,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * utilities for mac80211 */ @@ -687,7 +687,7 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, unsigned int queues, bool drop) { - if (!local->ops->flush) + if (!local->ops->flush && !drop) return; /* @@ -714,7 +714,8 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, } } - drv_flush(local, sdata, queues, drop); + if (local->ops->flush) + drv_flush(local, sdata, queues, drop); ieee80211_wake_queues_by_reason(&local->hw, queues, IEEE80211_QUEUE_STOP_REASON_FLUSH, @@ -2193,8 +2194,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_reconfig_roc(local); /* Requeue all works */ - list_for_each_entry(sdata, &local->interfaces, list) - wiphy_work_queue(local->hw.wiphy, &sdata->work); + list_for_each_entry(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + wiphy_work_queue(local->hw.wiphy, &sdata->work); + } } ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, diff --git a/net/mctp/route.c b/net/mctp/route.c index 3f2bd65ff5e3..4c460160914f 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -332,8 +332,14 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) & MCTP_HDR_SEQ_MASK; if (!key->reasm_head) { - key->reasm_head = skb; - key->reasm_tailp = &(skb_shinfo(skb)->frag_list); + /* Since we're manipulating the shared frag_list, ensure it isn't + * shared with any other SKBs. + */ + key->reasm_head = skb_unshare(skb, GFP_ATOMIC); + if (!key->reasm_head) + return -ENOMEM; + + key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list); key->last_seq = this_seq; return 0; } diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 17165b86ce22..06c1897b685a 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -921,6 +921,114 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test) __mctp_route_test_fini(test, dev, rt, sock); } +/* Input route to socket, using a fragmented message created from clones. + */ +static void mctp_test_route_input_cloned_frag(struct kunit *test) +{ + /* 5 packet fragments, forming 2 complete messages */ + const struct mctp_hdr hdrs[5] = { + RX_FRAG(FL_S, 0), + RX_FRAG(0, 1), + RX_FRAG(FL_E, 2), + RX_FRAG(FL_S, 0), + RX_FRAG(FL_E, 1), + }; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct sk_buff *skb[5]; + struct sk_buff *rx_skb; + struct socket *sock; + size_t data_len; + u8 compare[100]; + u8 flat[100]; + size_t total; + void *p; + int rc; + + /* Arbitrary length */ + data_len = 3; + total = data_len + sizeof(struct mctp_hdr); + + __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + + /* Create a single skb initially with concatenated packets */ + skb[0] = mctp_test_create_skb(&hdrs[0], 5 * total); + mctp_test_skb_set_dev(skb[0], dev); + memset(skb[0]->data, 0 * 0x11, skb[0]->len); + memcpy(skb[0]->data, &hdrs[0], sizeof(struct mctp_hdr)); + + /* Extract and populate packets */ + for (int i = 1; i < 5; i++) { + skb[i] = skb_clone(skb[i - 1], GFP_ATOMIC); + KUNIT_ASSERT_TRUE(test, skb[i]); + p = skb_pull(skb[i], total); + KUNIT_ASSERT_TRUE(test, p); + skb_reset_network_header(skb[i]); + memcpy(skb[i]->data, &hdrs[i], sizeof(struct mctp_hdr)); + memset(&skb[i]->data[sizeof(struct mctp_hdr)], i * 0x11, data_len); + } + for (int i = 0; i < 5; i++) + skb_trim(skb[i], total); + + /* SOM packets have a type byte to match the socket */ + skb[0]->data[4] = 0; + skb[3]->data[4] = 0; + + skb_dump("pkt1 ", skb[0], false); + skb_dump("pkt2 ", skb[1], false); + skb_dump("pkt3 ", skb[2], false); + skb_dump("pkt4 ", skb[3], false); + skb_dump("pkt5 ", skb[4], false); + + for (int i = 0; i < 5; i++) { + KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1); + /* Take a reference so we can check refcounts at the end */ + skb_get(skb[i]); + } + + /* Feed the fragments into MCTP core */ + for (int i = 0; i < 5; i++) { + rc = mctp_route_input(&rt->rt, skb[i]); + KUNIT_EXPECT_EQ(test, rc, 0); + } + + /* Receive first reassembled message */ + rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_EQ(test, rx_skb->len, 3 * data_len); + rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len); + for (int i = 0; i < rx_skb->len; i++) + compare[i] = (i / data_len) * 0x11; + /* Set type byte */ + compare[0] = 0; + + KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len); + KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1); + kfree_skb(rx_skb); + + /* Receive second reassembled message */ + rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_EQ(test, rx_skb->len, 2 * data_len); + rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len); + for (int i = 0; i < rx_skb->len; i++) + compare[i] = (i / data_len + 3) * 0x11; + /* Set type byte */ + compare[0] = 0; + + KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len); + KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1); + kfree_skb(rx_skb); + + /* Check input skb refcounts */ + for (int i = 0; i < 5; i++) { + KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1); + kfree_skb(skb[i]); + } + + __mctp_route_test_fini(test, dev, rt, sock); +} + #if IS_ENABLED(CONFIG_MCTP_FLOWS) static void mctp_test_flow_init(struct kunit *test, @@ -1144,6 +1252,7 @@ static struct kunit_case mctp_test_cases[] = { KUNIT_CASE(mctp_test_packet_flow), KUNIT_CASE(mctp_test_fragment_flow), KUNIT_CASE(mctp_test_route_output_key_create), + KUNIT_CASE(mctp_test_route_input_cloned_frag), {} }; diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index bcf1dbf3a432..89bf6c47c818 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o \ - mptcp_pm_gen.o + mptcp_pm_gen.o pm_kernel.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 02205f7994d7..70cf9ebce833 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -12,7 +12,7 @@ #include <net/netlink.h> #include "protocol.h" -static int subflow_get_info(struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(struct sock *sk, struct sk_buff *skb, bool net_admin) { struct mptcp_subflow_context *sf; struct nlattr *start; @@ -56,15 +56,6 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) if (nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_TOKEN_REM, sf->remote_token) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_TOKEN_LOC, sf->token) || - nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ, - sf->rel_write_seq) || - nla_put_u64_64bit(skb, MPTCP_SUBFLOW_ATTR_MAP_SEQ, sf->map_seq, - MPTCP_SUBFLOW_ATTR_PAD) || - nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_MAP_SFSEQ, - sf->map_subflow_seq) || - nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_SSN_OFFSET, sf->ssn_offset) || - nla_put_u16(skb, MPTCP_SUBFLOW_ATTR_MAP_DATALEN, - sf->map_data_len) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { @@ -72,6 +63,21 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) goto nla_failure; } + /* Only export seq related counters to user with CAP_NET_ADMIN */ + if (net_admin && + (nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ, + sf->rel_write_seq) || + nla_put_u64_64bit(skb, MPTCP_SUBFLOW_ATTR_MAP_SEQ, sf->map_seq, + MPTCP_SUBFLOW_ATTR_PAD) || + nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_MAP_SFSEQ, + sf->map_subflow_seq) || + nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_SSN_OFFSET, sf->ssn_offset) || + nla_put_u16(skb, MPTCP_SUBFLOW_ATTR_MAP_DATALEN, + sf->map_data_len))) { + err = -EMSGSIZE; + goto nla_failure; + } + rcu_read_unlock(); unlock_sock_fast(sk, slow); nla_nest_end(skb, start); @@ -84,22 +90,26 @@ nla_failure: return err; } -static size_t subflow_get_info_size(const struct sock *sk) +static size_t subflow_get_info_size(const struct sock *sk, bool net_admin) { size_t size = 0; size += nla_total_size(0) + /* INET_ULP_INFO_MPTCP */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_TOKEN_REM */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_TOKEN_LOC */ - nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */ - nla_total_size_64bit(8) + /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */ - nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */ - nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ - nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_FLAGS */ nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_REM */ nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_LOC */ 0; + + if (net_admin) + size += nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */ + nla_total_size_64bit(8) + /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */ + nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */ + nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ + nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */ + 0; + return size; } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 6c8cadf84f31..833839d7286e 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -5,12 +5,384 @@ */ #define pr_fmt(fmt) "MPTCP: " fmt -#include <linux/kernel.h> -#include <net/mptcp.h> #include "protocol.h" - #include "mib.h" -#include "mptcp_pm_gen.h" + +#define ADD_ADDR_RETRANS_MAX 3 + +struct mptcp_pm_add_entry { + struct list_head list; + struct mptcp_addr_info addr; + u8 retrans_times; + struct timer_list add_timer; + struct mptcp_sock *sock; +}; + +/* path manager helpers */ + +/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses, + * otherwise allow any matching local/remote pair + */ +bool mptcp_pm_addr_families_match(const struct sock *sk, + const struct mptcp_addr_info *loc, + const struct mptcp_addr_info *rem) +{ + bool mptcp_is_v4 = sk->sk_family == AF_INET; + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6); + bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6); + + if (mptcp_is_v4) + return loc_is_v4 && rem_is_v4; + + if (ipv6_only_sock(sk)) + return !loc_is_v4 && !rem_is_v4; + + return loc_is_v4 == rem_is_v4; +#else + return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET; +#endif +} + +bool mptcp_addresses_equal(const struct mptcp_addr_info *a, + const struct mptcp_addr_info *b, bool use_port) +{ + bool addr_equals = false; + + if (a->family == b->family) { + if (a->family == AF_INET) + addr_equals = a->addr.s_addr == b->addr.s_addr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else + addr_equals = ipv6_addr_equal(&a->addr6, &b->addr6); + } else if (a->family == AF_INET) { + if (ipv6_addr_v4mapped(&b->addr6)) + addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; + } else if (b->family == AF_INET) { + if (ipv6_addr_v4mapped(&a->addr6)) + addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; +#endif + } + + if (!addr_equals) + return false; + if (!use_port) + return true; + + return a->port == b->port; +} + +void mptcp_local_address(const struct sock_common *skc, + struct mptcp_addr_info *addr) +{ + addr->family = skc->skc_family; + addr->port = htons(skc->skc_num); + if (addr->family == AF_INET) + addr->addr.s_addr = skc->skc_rcv_saddr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (addr->family == AF_INET6) + addr->addr6 = skc->skc_v6_rcv_saddr; +#endif +} + +void mptcp_remote_address(const struct sock_common *skc, + struct mptcp_addr_info *addr) +{ + addr->family = skc->skc_family; + addr->port = skc->skc_dport; + if (addr->family == AF_INET) + addr->addr.s_addr = skc->skc_daddr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (addr->family == AF_INET6) + addr->addr6 = skc->skc_v6_daddr; +#endif +} + +static bool mptcp_pm_is_init_remote_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *remote) +{ + struct mptcp_addr_info mpc_remote; + + mptcp_remote_address((struct sock_common *)msk, &mpc_remote); + return mptcp_addresses_equal(&mpc_remote, remote, remote->port); +} + +bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, + const struct mptcp_addr_info *saddr) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info cur; + struct sock_common *skc; + + list_for_each_entry(subflow, list, node) { + skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); + + mptcp_local_address(skc, &cur); + if (mptcp_addresses_equal(&cur, saddr, saddr->port)) + return true; + } + + return false; +} + +static struct mptcp_pm_add_entry * +mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + struct mptcp_pm_add_entry *entry; + + lockdep_assert_held(&msk->pm.lock); + + list_for_each_entry(entry, &msk->pm.anno_list, list) { + if (mptcp_addresses_equal(&entry->addr, addr, true)) + return entry; + } + + return NULL; +} + +bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + struct mptcp_pm_add_entry *entry; + + entry = mptcp_pm_del_add_timer(msk, addr, false); + kfree(entry); + return entry; +} + +bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) +{ + struct mptcp_pm_add_entry *entry; + struct mptcp_addr_info saddr; + bool ret = false; + + mptcp_local_address((struct sock_common *)sk, &saddr); + + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.anno_list, list) { + if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { + ret = true; + goto out; + } + } + +out: + spin_unlock_bh(&msk->pm.lock); + return ret; +} + +static void __mptcp_pm_send_ack(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + bool prio, bool backup) +{ + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; + + pr_debug("send ack for %s\n", + prio ? "mp_prio" : + (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); + + slow = lock_sock_fast(ssk); + if (prio) { + subflow->send_mp_prio = 1; + subflow->request_bkup = backup; + } + + __mptcp_subflow_send_ack(ssk); + unlock_sock_fast(ssk, slow); +} + +void mptcp_pm_send_ack(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + bool prio, bool backup) +{ + spin_unlock_bh(&msk->pm.lock); + __mptcp_pm_send_ack(msk, subflow, prio, backup); + spin_lock_bh(&msk->pm.lock); +} + +void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow, *alt = NULL; + + msk_owned_by_me(msk); + lockdep_assert_held(&msk->pm.lock); + + if (!mptcp_pm_should_add_signal(msk) && + !mptcp_pm_should_rm_signal(msk)) + return; + + mptcp_for_each_subflow(msk, subflow) { + if (__mptcp_subflow_active(subflow)) { + if (!subflow->stale) { + mptcp_pm_send_ack(msk, subflow, false, false); + return; + } + + if (!alt) + alt = subflow; + } + } + + if (alt) + mptcp_pm_send_ack(msk, alt, false, false); +} + +int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + struct mptcp_addr_info *rem, + u8 bkup) +{ + struct mptcp_subflow_context *subflow; + + pr_debug("bkup=%d\n", bkup); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + struct mptcp_addr_info local, remote; + + mptcp_local_address((struct sock_common *)ssk, &local); + if (!mptcp_addresses_equal(&local, addr, addr->port)) + continue; + + if (rem && rem->family != AF_UNSPEC) { + mptcp_remote_address((struct sock_common *)ssk, &remote); + if (!mptcp_addresses_equal(&remote, rem, rem->port)) + continue; + } + + __mptcp_pm_send_ack(msk, subflow, true, bkup); + return 0; + } + + return -EINVAL; +} + +static void mptcp_pm_add_timer(struct timer_list *timer) +{ + struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); + struct mptcp_sock *msk = entry->sock; + struct sock *sk = (struct sock *)msk; + + pr_debug("msk=%p\n", msk); + + if (!msk) + return; + + if (inet_sk_state_load(sk) == TCP_CLOSE) + return; + + if (!entry->addr.id) + return; + + if (mptcp_pm_should_add_signal_addr(msk)) { + sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); + goto out; + } + + spin_lock_bh(&msk->pm.lock); + + if (!mptcp_pm_should_add_signal_addr(msk)) { + pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); + mptcp_pm_announce_addr(msk, &entry->addr, false); + mptcp_pm_add_addr_send_ack(msk); + entry->retrans_times++; + } + + if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) + sk_reset_timer(sk, timer, + jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); + + spin_unlock_bh(&msk->pm.lock); + + if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) + mptcp_pm_subflow_established(msk); + +out: + __sock_put(sk); +} + +struct mptcp_pm_add_entry * +mptcp_pm_del_add_timer(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr, bool check_id) +{ + struct mptcp_pm_add_entry *entry; + struct sock *sk = (struct sock *)msk; + struct timer_list *add_timer = NULL; + + spin_lock_bh(&msk->pm.lock); + entry = mptcp_lookup_anno_list_by_saddr(msk, addr); + if (entry && (!check_id || entry->addr.id == addr->id)) { + entry->retrans_times = ADD_ADDR_RETRANS_MAX; + add_timer = &entry->add_timer; + } + if (!check_id && entry) + list_del(&entry->list); + spin_unlock_bh(&msk->pm.lock); + + /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ + if (add_timer) + sk_stop_timer_sync(sk, add_timer); + + return entry; +} + +bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + struct mptcp_pm_add_entry *add_entry = NULL; + struct sock *sk = (struct sock *)msk; + struct net *net = sock_net(sk); + + lockdep_assert_held(&msk->pm.lock); + + add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); + + if (add_entry) { + if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) + return false; + + sk_reset_timer(sk, &add_entry->add_timer, + jiffies + mptcp_get_add_addr_timeout(net)); + return true; + } + + add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); + if (!add_entry) + return false; + + list_add(&add_entry->list, &msk->pm.anno_list); + + add_entry->addr = *addr; + add_entry->sock = msk; + add_entry->retrans_times = 0; + + timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); + sk_reset_timer(sk, &add_entry->add_timer, + jiffies + mptcp_get_add_addr_timeout(net)); + + return true; +} + +static void mptcp_pm_free_anno_list(struct mptcp_sock *msk) +{ + struct mptcp_pm_add_entry *entry, *tmp; + struct sock *sk = (struct sock *)msk; + LIST_HEAD(free_list); + + pr_debug("msk=%p\n", msk); + + spin_lock_bh(&msk->pm.lock); + list_splice_init(&msk->pm.anno_list, &free_list); + spin_unlock_bh(&msk->pm.lock); + + list_for_each_entry_safe(entry, tmp, &free_list, list) { + sk_stop_timer_sync(sk, &entry->add_timer); + kfree(entry); + } +} /* path manager command handlers */ @@ -57,7 +429,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ msk->pm.rm_list_tx = *rm_list; rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); WRITE_ONCE(msk->pm.addr_signal, rm_addr); - mptcp_pm_nl_addr_send_ack(msk); + mptcp_pm_addr_send_ack(msk); return 0; } @@ -231,7 +603,7 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); } /* id0 should not have a different address */ - } else if ((addr->id == 0 && !mptcp_pm_nl_is_init_remote_addr(msk, addr)) || + } else if ((addr->id == 0 && !mptcp_pm_is_init_remote_addr(msk, addr)) || (addr->id > 0 && !READ_ONCE(pm->accept_addr))) { mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_add_addr_send_ack(msk); @@ -270,6 +642,80 @@ void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); } +static void mptcp_pm_rm_addr_or_subflow(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list, + enum linux_mptcp_mib_field rm_type) +{ + struct mptcp_subflow_context *subflow, *tmp; + struct sock *sk = (struct sock *)msk; + u8 i; + + pr_debug("%s rm_list_nr %d\n", + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); + + msk_owned_by_me(msk); + + if (sk->sk_state == TCP_LISTEN) + return; + + if (!rm_list->nr) + return; + + if (list_empty(&msk->conn_list)) + return; + + for (i = 0; i < rm_list->nr; i++) { + u8 rm_id = rm_list->ids[i]; + bool removed = false; + + mptcp_for_each_subflow_safe(msk, subflow, tmp) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u8 remote_id = READ_ONCE(subflow->remote_id); + int how = RCV_SHUTDOWN | SEND_SHUTDOWN; + u8 id = subflow_get_local_id(subflow); + + if ((1 << inet_sk_state_load(ssk)) & + (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE)) + continue; + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) + continue; + if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id) + continue; + + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n", + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", + i, rm_id, id, remote_id, msk->mpc_endpoint_id); + spin_unlock_bh(&msk->pm.lock); + mptcp_subflow_shutdown(sk, ssk, how); + removed |= subflow->request_join; + + /* the following takes care of updating the subflows counter */ + mptcp_close_ssk(sk, ssk, subflow); + spin_lock_bh(&msk->pm.lock); + + if (rm_type == MPTCP_MIB_RMSUBFLOW) + __MPTCP_INC_STATS(sock_net(sk), rm_type); + } + + if (rm_type == MPTCP_MIB_RMADDR) { + __MPTCP_INC_STATS(sock_net(sk), rm_type); + if (removed && mptcp_pm_is_kernel(msk)) + mptcp_pm_nl_rm_addr(msk, rm_id); + } + } +} + +static void mptcp_pm_rm_addr_recv(struct mptcp_sock *msk) +{ + mptcp_pm_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); +} + +void mptcp_pm_rm_subflow(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) +{ + mptcp_pm_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); +} + void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) { @@ -325,8 +771,6 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) } } -/* path manager helpers */ - bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, unsigned int opt_size, unsigned int remaining, struct mptcp_addr_info *addr, bool *echo, @@ -406,7 +850,7 @@ out_unlock: int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) { - struct mptcp_addr_info skc_local; + struct mptcp_pm_addr_entry skc_local = { 0 }; struct mptcp_addr_info msk_local; if (WARN_ON_ONCE(!msk)) @@ -416,10 +860,13 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) * addr */ mptcp_local_address((struct sock_common *)msk, &msk_local); - mptcp_local_address((struct sock_common *)skc, &skc_local); - if (mptcp_addresses_equal(&msk_local, &skc_local, false)) + mptcp_local_address((struct sock_common *)skc, &skc_local.addr); + if (mptcp_addresses_equal(&msk_local, &skc_local.addr, false)) return 0; + skc_local.addr.id = 0; + skc_local.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; + if (mptcp_pm_is_userspace(msk)) return mptcp_userspace_pm_get_local_id(msk, &skc_local); return mptcp_pm_nl_get_local_id(msk, &skc_local); @@ -437,119 +884,41 @@ bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_is_backup(msk, &skc_local); } -static int mptcp_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, - struct genl_info *info) +static void mptcp_pm_subflows_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) { - if (info->attrs[MPTCP_PM_ATTR_TOKEN]) - return mptcp_userspace_pm_get_addr(id, addr, info); - return mptcp_pm_nl_get_addr(id, addr, info); -} - -int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) -{ - struct mptcp_pm_addr_entry addr; - struct nlattr *attr; - struct sk_buff *msg; - void *reply; - int ret; - - if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) - return -EINVAL; - - attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - return ret; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, - info->genlhdr->cmd); - if (!reply) { - GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); - ret = -EMSGSIZE; - goto fail; - } - - ret = mptcp_pm_get_addr(addr.addr.id, &addr, info); - if (ret) { - NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); - goto fail; - } - - ret = mptcp_nl_fill_addr(msg, &addr); - if (ret) - goto fail; - - genlmsg_end(msg, reply); - ret = genlmsg_reply(msg, info); - return ret; - -fail: - nlmsg_free(msg); - return ret; -} - -int mptcp_pm_genl_fill_addr(struct sk_buff *msg, - struct netlink_callback *cb, - struct mptcp_pm_addr_entry *entry) -{ - void *hdr; - - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, &mptcp_genl_family, - NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); - if (!hdr) - return -EINVAL; + struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = (struct sock *)msk; + unsigned int active_max_loss_cnt; + struct net *net = sock_net(sk); + unsigned int stale_loss_cnt; + bool slow; + + stale_loss_cnt = mptcp_stale_loss_cnt(net); + if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) + return; - if (mptcp_nl_fill_addr(msg, entry) < 0) { - genlmsg_cancel(msg, hdr); - return -EINVAL; + /* look for another available subflow not in loss state */ + active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); + mptcp_for_each_subflow(msk, iter) { + if (iter != subflow && mptcp_subflow_active(iter) && + iter->stale_count < active_max_loss_cnt) { + /* we have some alternatives, try to mark this subflow as idle ...*/ + slow = lock_sock_fast(ssk); + if (!tcp_rtx_and_write_queues_empty(ssk)) { + subflow->stale = 1; + __mptcp_retransmit_pending_data(sk); + MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE); + } + unlock_sock_fast(ssk, slow); + + /* always try to push the pending data regardless of re-injections: + * we can possibly use backup subflows now, and subflow selection + * is cheap under the msk socket lock + */ + __mptcp_push_pending(sk, 0); + return; + } } - - genlmsg_end(msg, hdr); - return 0; -} - -static int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) -{ - const struct genl_info *info = genl_info_dump(cb); - - if (info->attrs[MPTCP_PM_ATTR_TOKEN]) - return mptcp_userspace_pm_dump_addr(msg, cb); - return mptcp_pm_nl_dump_addr(msg, cb); -} - -int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) -{ - return mptcp_pm_dump_addr(msg, cb); -} - -static int mptcp_pm_set_flags(struct genl_info *info) -{ - struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; - struct nlattr *attr_loc; - int ret = -EINVAL; - - if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR)) - return ret; - - attr_loc = info->attrs[MPTCP_PM_ATTR_ADDR]; - ret = mptcp_pm_parse_entry(attr_loc, info, false, &loc); - if (ret < 0) - return ret; - - if (info->attrs[MPTCP_PM_ATTR_TOKEN]) - return mptcp_userspace_pm_set_flags(&loc, info); - return mptcp_pm_nl_set_flags(&loc, info); -} - -int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) -{ - return mptcp_pm_set_flags(info); } void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) @@ -564,36 +933,44 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { if (subflow->stale_count < U8_MAX) subflow->stale_count++; - mptcp_pm_nl_subflow_chk_stale(msk, ssk); + mptcp_pm_subflows_chk_stale(msk, ssk); } else { subflow->stale_count = 0; mptcp_subflow_set_active(subflow); } } -/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses, - * otherwise allow any matching local/remote pair - */ -bool mptcp_pm_addr_families_match(const struct sock *sk, - const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *rem) +void mptcp_pm_worker(struct mptcp_sock *msk) { - bool mptcp_is_v4 = sk->sk_family == AF_INET; + struct mptcp_pm_data *pm = &msk->pm; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6); - bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6); + msk_owned_by_me(msk); - if (mptcp_is_v4) - return loc_is_v4 && rem_is_v4; + if (!(pm->status & MPTCP_PM_WORK_MASK)) + return; - if (ipv6_only_sock(sk)) - return !loc_is_v4 && !rem_is_v4; + spin_lock_bh(&msk->pm.lock); - return loc_is_v4 == rem_is_v4; -#else - return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET; -#endif + pr_debug("msk=%p status=%x\n", msk, pm->status); + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); + mptcp_pm_addr_send_ack(msk); + } + if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { + pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); + mptcp_pm_rm_addr_recv(msk); + } + __mptcp_pm_kernel_worker(msk); + + spin_unlock_bh(&msk->pm.lock); +} + +void mptcp_pm_destroy(struct mptcp_sock *msk) +{ + mptcp_pm_free_anno_list(msk); + + if (mptcp_pm_is_userspace(msk)) + mptcp_userspace_pm_free_local_addr_list(msk); } void mptcp_pm_data_reset(struct mptcp_sock *msk) diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c new file mode 100644 index 000000000000..daf8f98a3164 --- /dev/null +++ b/net/mptcp/pm_kernel.c @@ -0,0 +1,1410 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Multipath TCP + * + * Copyright (c) 2025, Matthieu Baerts. + */ + +#define pr_fmt(fmt) "MPTCP: " fmt + +#include <net/netns/generic.h> + +#include "protocol.h" +#include "mib.h" +#include "mptcp_pm_gen.h" + +static int pm_nl_pernet_id; + +struct pm_nl_pernet { + /* protects pernet updates */ + spinlock_t lock; + struct list_head local_addr_list; + unsigned int addrs; + unsigned int stale_loss_cnt; + unsigned int add_addr_signal_max; + unsigned int add_addr_accept_max; + unsigned int local_addr_max; + unsigned int subflows_max; + unsigned int next_id; + DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); +}; + +#define MPTCP_PM_ADDR_MAX 8 + +static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) +{ + return net_generic(net, pm_nl_pernet_id); +} + +static struct pm_nl_pernet * +pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) +{ + return pm_nl_get_pernet(sock_net((struct sock *)msk)); +} + +static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) +{ + return pm_nl_get_pernet(genl_info_net(info)); +} + +unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) +{ + const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + + return READ_ONCE(pernet->add_addr_signal_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); + +unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + + return READ_ONCE(pernet->add_addr_accept_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); + +unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + + return READ_ONCE(pernet->subflows_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); + +unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + + return READ_ONCE(pernet->local_addr_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); + +static bool lookup_subflow_by_daddr(const struct list_head *list, + const struct mptcp_addr_info *daddr) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info cur; + + list_for_each_entry(subflow, list, node) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + if (!((1 << inet_sk_state_load(ssk)) & + (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) + continue; + + mptcp_remote_address((struct sock_common *)ssk, &cur); + if (mptcp_addresses_equal(&cur, daddr, daddr->port)) + return true; + } + + return false; +} + +static bool +select_local_address(const struct pm_nl_pernet *pernet, + const struct mptcp_sock *msk, + struct mptcp_pm_local *new_local) +{ + struct mptcp_pm_addr_entry *entry; + bool found = false; + + msk_owned_by_me(msk); + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) + continue; + + if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) + continue; + + new_local->addr = entry->addr; + new_local->flags = entry->flags; + new_local->ifindex = entry->ifindex; + found = true; + break; + } + rcu_read_unlock(); + + return found; +} + +static bool +select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, + struct mptcp_pm_local *new_local) +{ + struct mptcp_pm_addr_entry *entry; + bool found = false; + + rcu_read_lock(); + /* do not keep any additional per socket state, just signal + * the address list in order. + * Note: removal from the local address list during the msk life-cycle + * can lead to additional addresses not being announced. + */ + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) + continue; + + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) + continue; + + new_local->addr = entry->addr; + new_local->flags = entry->flags; + new_local->ifindex = entry->ifindex; + found = true; + break; + } + rcu_read_unlock(); + + return found; +} + +/* Fill all the remote addresses into the array addrs[], + * and return the array size. + */ +static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, + struct mptcp_addr_info *local, + bool fullmesh, + struct mptcp_addr_info *addrs) +{ + bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); + struct sock *sk = (struct sock *)msk, *ssk; + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info remote = { 0 }; + unsigned int subflows_max; + int i = 0; + + subflows_max = mptcp_pm_get_subflows_max(msk); + mptcp_remote_address((struct sock_common *)sk, &remote); + + /* Non-fullmesh endpoint, fill in the single entry + * corresponding to the primary MPC subflow remote address + */ + if (!fullmesh) { + if (deny_id0) + return 0; + + if (!mptcp_pm_addr_families_match(sk, local, &remote)) + return 0; + + msk->pm.subflows++; + addrs[i++] = remote; + } else { + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + + /* Forbid creation of new subflows matching existing + * ones, possibly already created by incoming ADD_ADDR + */ + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + mptcp_for_each_subflow(msk, subflow) + if (READ_ONCE(subflow->local_id) == local->id) + __set_bit(subflow->remote_id, unavail_id); + + mptcp_for_each_subflow(msk, subflow) { + ssk = mptcp_subflow_tcp_sock(subflow); + mptcp_remote_address((struct sock_common *)ssk, &addrs[i]); + addrs[i].id = READ_ONCE(subflow->remote_id); + if (deny_id0 && !addrs[i].id) + continue; + + if (test_bit(addrs[i].id, unavail_id)) + continue; + + if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) + continue; + + if (msk->pm.subflows < subflows_max) { + /* forbid creating multiple address towards + * this id + */ + __set_bit(addrs[i].id, unavail_id); + msk->pm.subflows++; + i++; + } + } + } + + return i; +} + +static struct mptcp_pm_addr_entry * +__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, + lockdep_is_held(&pernet->lock)) { + if (entry->addr.id == id) + return entry; + } + return NULL; +} + +static struct mptcp_pm_addr_entry * +__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, + lockdep_is_held(&pernet->lock)) { + if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) + return entry; + } + return NULL; +} + +static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) +{ + struct sock *sk = (struct sock *)msk; + unsigned int add_addr_signal_max; + bool signal_and_subflow = false; + unsigned int local_addr_max; + struct pm_nl_pernet *pernet; + struct mptcp_pm_local local; + unsigned int subflows_max; + + pernet = pm_nl_get_pernet(sock_net(sk)); + + add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); + local_addr_max = mptcp_pm_get_local_addr_max(msk); + subflows_max = mptcp_pm_get_subflows_max(msk); + + /* do lazy endpoint usage accounting for the MPC subflows */ + if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); + struct mptcp_pm_addr_entry *entry; + struct mptcp_addr_info mpc_addr; + bool backup = false; + + mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); + rcu_read_lock(); + entry = __lookup_addr(pernet, &mpc_addr); + if (entry) { + __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); + msk->mpc_endpoint_id = entry->addr.id; + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + } + rcu_read_unlock(); + + if (backup) + mptcp_pm_send_ack(msk, subflow, true, backup); + + msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); + } + + pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", + msk->pm.local_addr_used, local_addr_max, + msk->pm.add_addr_signaled, add_addr_signal_max, + msk->pm.subflows, subflows_max); + + /* check first for announce */ + if (msk->pm.add_addr_signaled < add_addr_signal_max) { + /* due to racing events on both ends we can reach here while + * previous add address is still running: if we invoke now + * mptcp_pm_announce_addr(), that will fail and the + * corresponding id will be marked as used. + * Instead let the PM machinery reschedule us when the + * current address announce will be completed. + */ + if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) + return; + + if (!select_signal_address(pernet, msk, &local)) + goto subflow; + + /* If the alloc fails, we are on memory pressure, not worth + * continuing, and trying to create subflows. + */ + if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) + return; + + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled++; + + /* Special case for ID0: set the correct ID */ + if (local.addr.id == msk->mpc_endpoint_id) + local.addr.id = 0; + + mptcp_pm_announce_addr(msk, &local.addr, false); + mptcp_pm_addr_send_ack(msk); + + if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) + signal_and_subflow = true; + } + +subflow: + /* check if should create a new subflow */ + while (msk->pm.local_addr_used < local_addr_max && + msk->pm.subflows < subflows_max) { + struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; + bool fullmesh; + int i, nr; + + if (signal_and_subflow) + signal_and_subflow = false; + else if (!select_local_address(pernet, msk, &local)) + break; + + fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + + /* Special case for ID0: set the correct ID */ + if (local.addr.id == msk->mpc_endpoint_id) + local.addr.id = 0; + else /* local_addr_used is not decr for ID 0 */ + msk->pm.local_addr_used++; + + nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); + if (nr == 0) + continue; + + spin_unlock_bh(&msk->pm.lock); + for (i = 0; i < nr; i++) + __mptcp_subflow_connect(sk, &local, &addrs[i]); + spin_lock_bh(&msk->pm.lock); + } + mptcp_pm_nl_check_work_pending(msk); +} + +static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) +{ + mptcp_pm_create_subflow_or_signal_addr(msk); +} + +static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) +{ + mptcp_pm_create_subflow_or_signal_addr(msk); +} + +/* Fill all the local addresses into the array addrs[], + * and return the array size. + */ +static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, + struct mptcp_addr_info *remote, + struct mptcp_pm_local *locals) +{ + struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry *entry; + struct mptcp_addr_info mpc_addr; + struct pm_nl_pernet *pernet; + unsigned int subflows_max; + int i = 0; + + pernet = pm_nl_get_pernet_from_msk(msk); + subflows_max = mptcp_pm_get_subflows_max(msk); + + mptcp_local_address((struct sock_common *)msk, &mpc_addr); + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) + continue; + + if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) + continue; + + if (msk->pm.subflows < subflows_max) { + locals[i].addr = entry->addr; + locals[i].flags = entry->flags; + locals[i].ifindex = entry->ifindex; + + /* Special case for ID0: set the correct ID */ + if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port)) + locals[i].addr.id = 0; + + msk->pm.subflows++; + i++; + } + } + rcu_read_unlock(); + + /* If the array is empty, fill in the single + * 'IPADDRANY' local address + */ + if (!i) { + memset(&locals[i], 0, sizeof(locals[i])); + locals[i].addr.family = +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + remote->family == AF_INET6 && + ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : +#endif + remote->family; + + if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote)) + return 0; + + msk->pm.subflows++; + i++; + } + + return i; +} + +static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) +{ + struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; + struct sock *sk = (struct sock *)msk; + unsigned int add_addr_accept_max; + struct mptcp_addr_info remote; + unsigned int subflows_max; + bool sf_created = false; + int i, nr; + + add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); + subflows_max = mptcp_pm_get_subflows_max(msk); + + pr_debug("accepted %d:%d remote family %d\n", + msk->pm.add_addr_accepted, add_addr_accept_max, + msk->pm.remote.family); + + remote = msk->pm.remote; + mptcp_pm_announce_addr(msk, &remote, true); + mptcp_pm_addr_send_ack(msk); + + if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) + return; + + /* pick id 0 port, if none is provided the remote address */ + if (!remote.port) + remote.port = sk->sk_dport; + + /* connect to the specified remote address, using whatever + * local address the routing configuration will pick. + */ + nr = fill_local_addresses_vec(msk, &remote, locals); + if (nr == 0) + return; + + spin_unlock_bh(&msk->pm.lock); + for (i = 0; i < nr; i++) + if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) + sf_created = true; + spin_lock_bh(&msk->pm.lock); + + if (sf_created) { + /* add_addr_accepted is not decr for ID 0 */ + if (remote.id) + msk->pm.add_addr_accepted++; + if (msk->pm.add_addr_accepted >= add_addr_accept_max || + msk->pm.subflows >= subflows_max) + WRITE_ONCE(msk->pm.accept_addr, false); + } +} + +void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id) +{ + if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { + /* Note: if the subflow has been closed before, this + * add_addr_accepted counter will not be decremented. + */ + if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) + WRITE_ONCE(msk->pm.accept_addr, true); + } +} + +static bool address_use_port(struct mptcp_pm_addr_entry *entry) +{ + return (entry->flags & + (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == + MPTCP_PM_ADDR_FLAG_SIGNAL; +} + +/* caller must ensure the RCU grace period is already elapsed */ +static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) +{ + if (entry->lsk) + sock_release(entry->lsk); + kfree(entry); +} + +static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, + struct mptcp_pm_addr_entry *entry, + bool needs_id, bool replace) +{ + struct mptcp_pm_addr_entry *cur, *del_entry = NULL; + unsigned int addr_max; + int ret = -EINVAL; + + spin_lock_bh(&pernet->lock); + /* to keep the code simple, don't do IDR-like allocation for address ID, + * just bail when we exceed limits + */ + if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) + pernet->next_id = 1; + if (pernet->addrs >= MPTCP_PM_ADDR_MAX) { + ret = -ERANGE; + goto out; + } + if (test_bit(entry->addr.id, pernet->id_bitmap)) { + ret = -EBUSY; + goto out; + } + + /* do not insert duplicate address, differentiate on port only + * singled addresses + */ + if (!address_use_port(entry)) + entry->addr.port = 0; + list_for_each_entry(cur, &pernet->local_addr_list, list) { + if (mptcp_addresses_equal(&cur->addr, &entry->addr, + cur->addr.port || entry->addr.port)) { + /* allow replacing the exiting endpoint only if such + * endpoint is an implicit one and the user-space + * did not provide an endpoint id + */ + if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { + ret = -EEXIST; + goto out; + } + if (entry->addr.id) + goto out; + + /* allow callers that only need to look up the local + * addr's id to skip replacement. This allows them to + * avoid calling synchronize_rcu in the packet recv + * path. + */ + if (!replace) { + kfree(entry); + ret = cur->addr.id; + goto out; + } + + pernet->addrs--; + entry->addr.id = cur->addr.id; + list_del_rcu(&cur->list); + del_entry = cur; + break; + } + } + + if (!entry->addr.id && needs_id) { +find_next: + entry->addr.id = find_next_zero_bit(pernet->id_bitmap, + MPTCP_PM_MAX_ADDR_ID + 1, + pernet->next_id); + if (!entry->addr.id && pernet->next_id != 1) { + pernet->next_id = 1; + goto find_next; + } + } + + if (!entry->addr.id && needs_id) + goto out; + + __set_bit(entry->addr.id, pernet->id_bitmap); + if (entry->addr.id > pernet->next_id) + pernet->next_id = entry->addr.id; + + if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + addr_max = pernet->add_addr_signal_max; + WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); + } + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + addr_max = pernet->local_addr_max; + WRITE_ONCE(pernet->local_addr_max, addr_max + 1); + } + + pernet->addrs++; + if (!entry->addr.port) + list_add_tail_rcu(&entry->list, &pernet->local_addr_list); + else + list_add_rcu(&entry->list, &pernet->local_addr_list); + ret = entry->addr.id; + +out: + spin_unlock_bh(&pernet->lock); + + /* just replaced an existing entry, free it */ + if (del_entry) { + synchronize_rcu(); + __mptcp_pm_release_addr_entry(del_entry); + } + return ret; +} + +static struct lock_class_key mptcp_slock_keys[2]; +static struct lock_class_key mptcp_keys[2]; + +static int mptcp_pm_nl_create_listen_socket(struct sock *sk, + struct mptcp_pm_addr_entry *entry) +{ + bool is_ipv6 = sk->sk_family == AF_INET6; + int addrlen = sizeof(struct sockaddr_in); + struct sockaddr_storage addr; + struct sock *newsk, *ssk; + int backlog = 1024; + int err; + + err = sock_create_kern(sock_net(sk), entry->addr.family, + SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); + if (err) + return err; + + newsk = entry->lsk->sk; + if (!newsk) + return -EINVAL; + + /* The subflow socket lock is acquired in a nested to the msk one + * in several places, even by the TCP stack, and this msk is a kernel + * socket: lockdep complains. Instead of propagating the _nested + * modifiers in several places, re-init the lock class for the msk + * socket to an mptcp specific one. + */ + sock_lock_init_class_and_name(newsk, + is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", + &mptcp_slock_keys[is_ipv6], + is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", + &mptcp_keys[is_ipv6]); + + lock_sock(newsk); + ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); + release_sock(newsk); + if (IS_ERR(ssk)) + return PTR_ERR(ssk); + + mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (entry->addr.family == AF_INET6) + addrlen = sizeof(struct sockaddr_in6); +#endif + if (ssk->sk_family == AF_INET) + err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (ssk->sk_family == AF_INET6) + err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); +#endif + if (err) + return err; + + /* We don't use mptcp_set_state() here because it needs to be called + * under the msk socket lock. For the moment, that will not bring + * anything more than only calling inet_sk_state_store(), because the + * old status is known (TCP_CLOSE). + */ + inet_sk_state_store(newsk, TCP_LISTEN); + lock_sock(ssk); + WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); + err = __inet_listen_sk(ssk, backlog); + if (!err) + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); + release_sock(ssk); + return err; +} + +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *skc) +{ + struct mptcp_pm_addr_entry *entry; + struct pm_nl_pernet *pernet; + int ret; + + pernet = pm_nl_get_pernet_from_msk(msk); + + rcu_read_lock(); + entry = __lookup_addr(pernet, &skc->addr); + ret = entry ? entry->addr.id : -1; + rcu_read_unlock(); + if (ret >= 0) + return ret; + + /* address not found, add to local list */ + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + return -ENOMEM; + + *entry = *skc; + entry->addr.port = 0; + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); + if (ret < 0) + kfree(entry); + + return ret; +} + +bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + struct mptcp_pm_addr_entry *entry; + bool backup; + + rcu_read_lock(); + entry = __lookup_addr(pernet, skc); + backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + rcu_read_unlock(); + + return backup; +} + +static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, + struct mptcp_addr_info *addr) +{ + struct mptcp_sock *msk; + long s_slot = 0, s_num = 0; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + struct mptcp_addr_info mpc_addr; + + if (!READ_ONCE(msk->fully_established) || + mptcp_pm_is_userspace(msk)) + goto next; + + /* if the endp linked to the init sf is re-added with a != ID */ + mptcp_local_address((struct sock_common *)msk, &mpc_addr); + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) + msk->mpc_endpoint_id = addr->id; + mptcp_pm_create_subflow_or_signal_addr(msk); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return 0; +} + +static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, + struct genl_info *info) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, + mptcp_pm_address_nl_policy, info->extack) && + tb[MPTCP_PM_ADDR_ATTR_ID]) + return true; + return false; +} + +/* Add an MPTCP endpoint */ +int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + struct mptcp_pm_addr_entry addr, *entry; + struct nlattr *attr; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; + + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; + ret = mptcp_pm_parse_entry(attr, info, true, &addr); + if (ret < 0) + return ret; + + if (addr.addr.port && !address_use_port(&addr)) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "flags must have signal and not subflow when using port"); + return -EINVAL; + } + + if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && + addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "flags mustn't have both signal and fullmesh"); + return -EINVAL; + } + + if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "can't create IMPLICIT endpoint"); + return -EINVAL; + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); + if (!entry) { + GENL_SET_ERR_MSG(info, "can't allocate addr"); + return -ENOMEM; + } + + *entry = addr; + if (entry->addr.port) { + ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); + if (ret) { + GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); + goto out_free; + } + } + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, + !mptcp_pm_has_addr_attr_id(attr, info), + true); + if (ret < 0) { + GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); + goto out_free; + } + + mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); + return 0; + +out_free: + __mptcp_pm_release_addr_entry(entry); + return ret; +} + +static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; +} + +static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr, + bool force) +{ + struct mptcp_rm_list list = { .nr = 0 }; + bool ret; + + list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); + + ret = mptcp_remove_anno_list_by_saddr(msk, addr); + if (ret || force) { + spin_lock_bh(&msk->pm.lock); + if (ret) { + __set_bit(addr->id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled--; + } + mptcp_pm_remove_addr(msk, &list); + spin_unlock_bh(&msk->pm.lock); + } + return ret; +} + +static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) +{ + /* If it was marked as used, and not ID 0, decrement local_addr_used */ + if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && + id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) + msk->pm.local_addr_used--; +} + +static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, + const struct mptcp_pm_addr_entry *entry) +{ + const struct mptcp_addr_info *addr = &entry->addr; + struct mptcp_rm_list list = { .nr = 1 }; + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + pr_debug("remove_id=%d\n", addr->id); + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + bool remove_subflow; + + if (mptcp_pm_is_userspace(msk)) + goto next; + + lock_sock(sk); + remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); + mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && + !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); + + list.ids[0] = mptcp_endp_get_local_id(msk, addr); + if (remove_subflow) { + spin_lock_bh(&msk->pm.lock); + mptcp_pm_rm_subflow(msk, &list); + spin_unlock_bh(&msk->pm.lock); + } + + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + spin_lock_bh(&msk->pm.lock); + __mark_subflow_endp_available(msk, list.ids[0]); + spin_unlock_bh(&msk->pm.lock); + } + + if (msk->mpc_endpoint_id == entry->addr.id) + msk->mpc_endpoint_id = 0; + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return 0; +} + +static int mptcp_nl_remove_id_zero_address(struct net *net, + struct mptcp_addr_info *addr) +{ + struct mptcp_rm_list list = { .nr = 0 }; + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + list.ids[list.nr++] = 0; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + struct mptcp_addr_info msk_local; + + if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) + goto next; + + mptcp_local_address((struct sock_common *)msk, &msk_local); + if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) + goto next; + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + mptcp_pm_rm_subflow(msk, &list); + __mark_subflow_endp_available(msk, 0); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return 0; +} + +/* Remove an MPTCP endpoint */ +int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + struct mptcp_pm_addr_entry addr, *entry; + unsigned int addr_max; + struct nlattr *attr; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; + + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; + ret = mptcp_pm_parse_entry(attr, info, false, &addr); + if (ret < 0) + return ret; + + /* the zero id address is special: the first address used by the msk + * always gets such an id, so different subflows can have different zero + * id addresses. Additionally zero id is not accounted for in id_bitmap. + * Let's use an 'mptcp_rm_list' instead of the common remove code. + */ + if (addr.addr.id == 0) + return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); + + spin_lock_bh(&pernet->lock); + entry = __lookup_addr_by_id(pernet, addr.addr.id); + if (!entry) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } + if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + addr_max = pernet->add_addr_signal_max; + WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); + } + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + addr_max = pernet->local_addr_max; + WRITE_ONCE(pernet->local_addr_max, addr_max - 1); + } + + pernet->addrs--; + list_del_rcu(&entry->list); + __clear_bit(entry->addr.id, pernet->id_bitmap); + spin_unlock_bh(&pernet->lock); + + mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); + synchronize_rcu(); + __mptcp_pm_release_addr_entry(entry); + + return ret; +} + +static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list) +{ + struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, rm_list, list) { + if (slist.nr < MPTCP_RM_IDS_MAX && + mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) + slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); + + if (alist.nr < MPTCP_RM_IDS_MAX && + mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) + alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); + } + + spin_lock_bh(&msk->pm.lock); + if (alist.nr) { + msk->pm.add_addr_signaled -= alist.nr; + mptcp_pm_remove_addr(msk, &alist); + } + if (slist.nr) + mptcp_pm_rm_subflow(msk, &slist); + /* Reset counters: maybe some subflows have been removed before */ + bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + msk->pm.local_addr_used = 0; + spin_unlock_bh(&msk->pm.lock); +} + +static void mptcp_nl_flush_addrs_list(struct net *net, + struct list_head *rm_list) +{ + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + if (list_empty(rm_list)) + return; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + + if (!mptcp_pm_is_userspace(msk)) { + lock_sock(sk); + mptcp_pm_flush_addrs_and_subflows(msk, rm_list); + release_sock(sk); + } + + sock_put(sk); + cond_resched(); + } +} + +/* caller must ensure the RCU grace period is already elapsed */ +static void __flush_addrs(struct list_head *list) +{ + while (!list_empty(list)) { + struct mptcp_pm_addr_entry *cur; + + cur = list_entry(list->next, + struct mptcp_pm_addr_entry, list); + list_del_rcu(&cur->list); + __mptcp_pm_release_addr_entry(cur); + } +} + +static void __reset_counters(struct pm_nl_pernet *pernet) +{ + WRITE_ONCE(pernet->add_addr_signal_max, 0); + WRITE_ONCE(pernet->add_addr_accept_max, 0); + WRITE_ONCE(pernet->local_addr_max, 0); + pernet->addrs = 0; +} + +int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + LIST_HEAD(free_list); + + spin_lock_bh(&pernet->lock); + list_splice_init(&pernet->local_addr_list, &free_list); + __reset_counters(pernet); + pernet->next_id = 1; + bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + spin_unlock_bh(&pernet->lock); + mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); + synchronize_rcu(); + __flush_addrs(&free_list); + return 0; +} + +int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, + struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + struct mptcp_pm_addr_entry *entry; + int ret = -EINVAL; + + rcu_read_lock(); + entry = __lookup_addr_by_id(pernet, id); + if (entry) { + *addr = *entry; + ret = 0; + } + rcu_read_unlock(); + + return ret; +} + +int mptcp_pm_nl_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct net *net = sock_net(msg->sk); + struct mptcp_pm_addr_entry *entry; + struct pm_nl_pernet *pernet; + int id = cb->args[0]; + int i; + + pernet = pm_nl_get_pernet(net); + + rcu_read_lock(); + for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { + if (test_bit(i, pernet->id_bitmap)) { + entry = __lookup_addr_by_id(pernet, i); + if (!entry) + break; + + if (entry->addr.id <= id) + continue; + + if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) + break; + + id = entry->addr.id; + } + } + rcu_read_unlock(); + + cb->args[0] = id; + return msg->len; +} + +static int parse_limit(struct genl_info *info, int id, unsigned int *limit) +{ + struct nlattr *attr = info->attrs[id]; + + if (!attr) + return 0; + + *limit = nla_get_u32(attr); + if (*limit > MPTCP_PM_ADDR_MAX) { + NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, + "limit greater than maximum (%u)", + MPTCP_PM_ADDR_MAX); + return -EINVAL; + } + return 0; +} + +int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + unsigned int rcv_addrs, subflows; + int ret; + + spin_lock_bh(&pernet->lock); + rcv_addrs = pernet->add_addr_accept_max; + ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); + if (ret) + goto unlock; + + subflows = pernet->subflows_max; + ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); + if (ret) + goto unlock; + + WRITE_ONCE(pernet->add_addr_accept_max, rcv_addrs); + WRITE_ONCE(pernet->subflows_max, subflows); + +unlock: + spin_unlock_bh(&pernet->lock); + return ret; +} + +int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + struct sk_buff *msg; + void *reply; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, + MPTCP_PM_CMD_GET_LIMITS); + if (!reply) + goto fail; + + if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, + READ_ONCE(pernet->add_addr_accept_max))) + goto fail; + + if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, + READ_ONCE(pernet->subflows_max))) + goto fail; + + genlmsg_end(msg, reply); + return genlmsg_reply(msg, info); + +fail: + GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); + nlmsg_free(msg); + return -EMSGSIZE; +} + +static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) +{ + struct mptcp_rm_list list = { .nr = 0 }; + + list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); + + spin_lock_bh(&msk->pm.lock); + mptcp_pm_rm_subflow(msk, &list); + __mark_subflow_endp_available(msk, list.ids[0]); + mptcp_pm_create_subflow_or_signal_addr(msk); + spin_unlock_bh(&msk->pm.lock); +} + +static void mptcp_pm_nl_set_flags_all(struct net *net, + struct mptcp_pm_addr_entry *local, + u8 changed) +{ + u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); + u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow) + return; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + + if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) + goto next; + + lock_sock(sk); + if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) + mptcp_pm_mp_prio_send_ack(msk, &local->addr, NULL, bkup); + /* Subflows will only be recreated if the SUBFLOW flag is set */ + if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)) + mptcp_pm_nl_fullmesh(msk, &local->addr); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } +} + +int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info) +{ + struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | + MPTCP_PM_ADDR_FLAG_FULLMESH; + struct net *net = genl_info_net(info); + struct mptcp_pm_addr_entry *entry; + struct pm_nl_pernet *pernet; + u8 lookup_by_id = 0; + + pernet = pm_nl_get_pernet(net); + + if (local->addr.family == AF_UNSPEC) { + lookup_by_id = 1; + if (!local->addr.id) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "missing address ID"); + return -EOPNOTSUPP; + } + } + + spin_lock_bh(&pernet->lock); + entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) : + __lookup_addr(pernet, &local->addr); + if (!entry) { + spin_unlock_bh(&pernet->lock); + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); + return -EINVAL; + } + if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | + MPTCP_PM_ADDR_FLAG_IMPLICIT))) { + spin_unlock_bh(&pernet->lock); + NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags"); + return -EINVAL; + } + + changed = (local->flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (local->flags & mask); + *local = *entry; + spin_unlock_bh(&pernet->lock); + + mptcp_pm_nl_set_flags_all(net, local, changed); + return 0; +} + +bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + + if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || + (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, + MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { + WRITE_ONCE(msk->pm.work_pending, false); + return false; + } + return true; +} + +/* Called under PM lock */ +void __mptcp_pm_kernel_worker(struct mptcp_sock *msk) +{ + struct mptcp_pm_data *pm = &msk->pm; + + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); + mptcp_pm_nl_add_addr_received(msk); + } + if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { + pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); + mptcp_pm_nl_fully_established(msk); + } + if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { + pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); + mptcp_pm_nl_subflow_established(msk); + } +} + +static int __net_init pm_nl_init_net(struct net *net) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); + + INIT_LIST_HEAD_RCU(&pernet->local_addr_list); + + /* Cit. 2 subflows ought to be enough for anybody. */ + pernet->subflows_max = 2; + pernet->next_id = 1; + pernet->stale_loss_cnt = 4; + spin_lock_init(&pernet->lock); + + /* No need to initialize other pernet fields, the struct is zeroed at + * allocation time. + */ + + return 0; +} + +static void __net_exit pm_nl_exit_net(struct list_head *net_list) +{ + struct net *net; + + list_for_each_entry(net, net_list, exit_list) { + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); + + /* net is removed from namespace list, can't race with + * other modifiers, also netns core already waited for a + * RCU grace period. + */ + __flush_addrs(&pernet->local_addr_list); + } +} + +static struct pernet_operations mptcp_pm_pernet_ops = { + .init = pm_nl_init_net, + .exit_batch = pm_nl_exit_net, + .id = &pm_nl_pernet_id, + .size = sizeof(struct pm_nl_pernet), +}; + +void __init mptcp_pm_nl_init(void) +{ + if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) + panic("Failed to register MPTCP PM pernet subsystem.\n"); + + if (genl_register_family(&mptcp_genl_family)) + panic("Failed to register MPTCP PM netlink family\n"); +} diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 4bebc4963c42..b2e5bbdcd5df 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -6,1186 +6,9 @@ #define pr_fmt(fmt) "MPTCP: " fmt -#include <linux/inet.h> -#include <linux/kernel.h> -#include <net/inet_common.h> -#include <net/netns/generic.h> -#include <net/mptcp.h> - #include "protocol.h" -#include "mib.h" #include "mptcp_pm_gen.h" -static int pm_nl_pernet_id; - -struct mptcp_pm_add_entry { - struct list_head list; - struct mptcp_addr_info addr; - u8 retrans_times; - struct timer_list add_timer; - struct mptcp_sock *sock; -}; - -struct pm_nl_pernet { - /* protects pernet updates */ - spinlock_t lock; - struct list_head local_addr_list; - unsigned int addrs; - unsigned int stale_loss_cnt; - unsigned int add_addr_signal_max; - unsigned int add_addr_accept_max; - unsigned int local_addr_max; - unsigned int subflows_max; - unsigned int next_id; - DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); -}; - -#define MPTCP_PM_ADDR_MAX 8 -#define ADD_ADDR_RETRANS_MAX 3 - -static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) -{ - return net_generic(net, pm_nl_pernet_id); -} - -static struct pm_nl_pernet * -pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) -{ - return pm_nl_get_pernet(sock_net((struct sock *)msk)); -} - -bool mptcp_addresses_equal(const struct mptcp_addr_info *a, - const struct mptcp_addr_info *b, bool use_port) -{ - bool addr_equals = false; - - if (a->family == b->family) { - if (a->family == AF_INET) - addr_equals = a->addr.s_addr == b->addr.s_addr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else - addr_equals = ipv6_addr_equal(&a->addr6, &b->addr6); - } else if (a->family == AF_INET) { - if (ipv6_addr_v4mapped(&b->addr6)) - addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; - } else if (b->family == AF_INET) { - if (ipv6_addr_v4mapped(&a->addr6)) - addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; -#endif - } - - if (!addr_equals) - return false; - if (!use_port) - return true; - - return a->port == b->port; -} - -void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) -{ - addr->family = skc->skc_family; - addr->port = htons(skc->skc_num); - if (addr->family == AF_INET) - addr->addr.s_addr = skc->skc_rcv_saddr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (addr->family == AF_INET6) - addr->addr6 = skc->skc_v6_rcv_saddr; -#endif -} - -static void remote_address(const struct sock_common *skc, - struct mptcp_addr_info *addr) -{ - addr->family = skc->skc_family; - addr->port = skc->skc_dport; - if (addr->family == AF_INET) - addr->addr.s_addr = skc->skc_daddr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (addr->family == AF_INET6) - addr->addr6 = skc->skc_v6_daddr; -#endif -} - -bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, - const struct mptcp_addr_info *saddr) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_addr_info cur; - struct sock_common *skc; - - list_for_each_entry(subflow, list, node) { - skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); - - mptcp_local_address(skc, &cur); - if (mptcp_addresses_equal(&cur, saddr, saddr->port)) - return true; - } - - return false; -} - -static bool lookup_subflow_by_daddr(const struct list_head *list, - const struct mptcp_addr_info *daddr) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_addr_info cur; - - list_for_each_entry(subflow, list, node) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - if (!((1 << inet_sk_state_load(ssk)) & - (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) - continue; - - remote_address((struct sock_common *)ssk, &cur); - if (mptcp_addresses_equal(&cur, daddr, daddr->port)) - return true; - } - - return false; -} - -static bool -select_local_address(const struct pm_nl_pernet *pernet, - const struct mptcp_sock *msk, - struct mptcp_pm_local *new_local) -{ - struct mptcp_pm_addr_entry *entry; - bool found = false; - - msk_owned_by_me(msk); - - rcu_read_lock(); - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) - continue; - - if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) - continue; - - new_local->addr = entry->addr; - new_local->flags = entry->flags; - new_local->ifindex = entry->ifindex; - found = true; - break; - } - rcu_read_unlock(); - - return found; -} - -static bool -select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, - struct mptcp_pm_local *new_local) -{ - struct mptcp_pm_addr_entry *entry; - bool found = false; - - rcu_read_lock(); - /* do not keep any additional per socket state, just signal - * the address list in order. - * Note: removal from the local address list during the msk life-cycle - * can lead to additional addresses not being announced. - */ - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) - continue; - - if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) - continue; - - new_local->addr = entry->addr; - new_local->flags = entry->flags; - new_local->ifindex = entry->ifindex; - found = true; - break; - } - rcu_read_unlock(); - - return found; -} - -unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) -{ - const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - - return READ_ONCE(pernet->add_addr_signal_max); -} -EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); - -unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - - return READ_ONCE(pernet->add_addr_accept_max); -} -EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); - -unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - - return READ_ONCE(pernet->subflows_max); -} -EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); - -unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - - return READ_ONCE(pernet->local_addr_max); -} -EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); - -bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - - if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || - (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, - MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { - WRITE_ONCE(msk->pm.work_pending, false); - return false; - } - return true; -} - -struct mptcp_pm_add_entry * -mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, - const struct mptcp_addr_info *addr) -{ - struct mptcp_pm_add_entry *entry; - - lockdep_assert_held(&msk->pm.lock); - - list_for_each_entry(entry, &msk->pm.anno_list, list) { - if (mptcp_addresses_equal(&entry->addr, addr, true)) - return entry; - } - - return NULL; -} - -bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) -{ - struct mptcp_pm_add_entry *entry; - struct mptcp_addr_info saddr; - bool ret = false; - - mptcp_local_address((struct sock_common *)sk, &saddr); - - spin_lock_bh(&msk->pm.lock); - list_for_each_entry(entry, &msk->pm.anno_list, list) { - if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { - ret = true; - goto out; - } - } - -out: - spin_unlock_bh(&msk->pm.lock); - return ret; -} - -static void mptcp_pm_add_timer(struct timer_list *timer) -{ - struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); - struct mptcp_sock *msk = entry->sock; - struct sock *sk = (struct sock *)msk; - - pr_debug("msk=%p\n", msk); - - if (!msk) - return; - - if (inet_sk_state_load(sk) == TCP_CLOSE) - return; - - if (!entry->addr.id) - return; - - if (mptcp_pm_should_add_signal_addr(msk)) { - sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); - goto out; - } - - spin_lock_bh(&msk->pm.lock); - - if (!mptcp_pm_should_add_signal_addr(msk)) { - pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); - mptcp_pm_announce_addr(msk, &entry->addr, false); - mptcp_pm_add_addr_send_ack(msk); - entry->retrans_times++; - } - - if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) - sk_reset_timer(sk, timer, - jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); - - spin_unlock_bh(&msk->pm.lock); - - if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) - mptcp_pm_subflow_established(msk); - -out: - __sock_put(sk); -} - -struct mptcp_pm_add_entry * -mptcp_pm_del_add_timer(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr, bool check_id) -{ - struct mptcp_pm_add_entry *entry; - struct sock *sk = (struct sock *)msk; - struct timer_list *add_timer = NULL; - - spin_lock_bh(&msk->pm.lock); - entry = mptcp_lookup_anno_list_by_saddr(msk, addr); - if (entry && (!check_id || entry->addr.id == addr->id)) { - entry->retrans_times = ADD_ADDR_RETRANS_MAX; - add_timer = &entry->add_timer; - } - if (!check_id && entry) - list_del(&entry->list); - spin_unlock_bh(&msk->pm.lock); - - /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ - if (add_timer) - sk_stop_timer_sync(sk, add_timer); - - return entry; -} - -bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr) -{ - struct mptcp_pm_add_entry *add_entry = NULL; - struct sock *sk = (struct sock *)msk; - struct net *net = sock_net(sk); - - lockdep_assert_held(&msk->pm.lock); - - add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); - - if (add_entry) { - if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) - return false; - - sk_reset_timer(sk, &add_entry->add_timer, - jiffies + mptcp_get_add_addr_timeout(net)); - return true; - } - - add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); - if (!add_entry) - return false; - - list_add(&add_entry->list, &msk->pm.anno_list); - - add_entry->addr = *addr; - add_entry->sock = msk; - add_entry->retrans_times = 0; - - timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); - sk_reset_timer(sk, &add_entry->add_timer, - jiffies + mptcp_get_add_addr_timeout(net)); - - return true; -} - -void mptcp_pm_free_anno_list(struct mptcp_sock *msk) -{ - struct mptcp_pm_add_entry *entry, *tmp; - struct sock *sk = (struct sock *)msk; - LIST_HEAD(free_list); - - pr_debug("msk=%p\n", msk); - - spin_lock_bh(&msk->pm.lock); - list_splice_init(&msk->pm.anno_list, &free_list); - spin_unlock_bh(&msk->pm.lock); - - list_for_each_entry_safe(entry, tmp, &free_list, list) { - sk_stop_timer_sync(sk, &entry->add_timer); - kfree(entry); - } -} - -/* Fill all the remote addresses into the array addrs[], - * and return the array size. - */ -static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, - struct mptcp_addr_info *local, - bool fullmesh, - struct mptcp_addr_info *addrs) -{ - bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); - struct sock *sk = (struct sock *)msk, *ssk; - struct mptcp_subflow_context *subflow; - struct mptcp_addr_info remote = { 0 }; - unsigned int subflows_max; - int i = 0; - - subflows_max = mptcp_pm_get_subflows_max(msk); - remote_address((struct sock_common *)sk, &remote); - - /* Non-fullmesh endpoint, fill in the single entry - * corresponding to the primary MPC subflow remote address - */ - if (!fullmesh) { - if (deny_id0) - return 0; - - if (!mptcp_pm_addr_families_match(sk, local, &remote)) - return 0; - - msk->pm.subflows++; - addrs[i++] = remote; - } else { - DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); - - /* Forbid creation of new subflows matching existing - * ones, possibly already created by incoming ADD_ADDR - */ - bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); - mptcp_for_each_subflow(msk, subflow) - if (READ_ONCE(subflow->local_id) == local->id) - __set_bit(subflow->remote_id, unavail_id); - - mptcp_for_each_subflow(msk, subflow) { - ssk = mptcp_subflow_tcp_sock(subflow); - remote_address((struct sock_common *)ssk, &addrs[i]); - addrs[i].id = READ_ONCE(subflow->remote_id); - if (deny_id0 && !addrs[i].id) - continue; - - if (test_bit(addrs[i].id, unavail_id)) - continue; - - if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) - continue; - - if (msk->pm.subflows < subflows_max) { - /* forbid creating multiple address towards - * this id - */ - __set_bit(addrs[i].id, unavail_id); - msk->pm.subflows++; - i++; - } - } - } - - return i; -} - -static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - bool prio, bool backup) -{ - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow; - - pr_debug("send ack for %s\n", - prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); - - slow = lock_sock_fast(ssk); - if (prio) { - subflow->send_mp_prio = 1; - subflow->request_bkup = backup; - } - - __mptcp_subflow_send_ack(ssk); - unlock_sock_fast(ssk, slow); -} - -static void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - bool prio, bool backup) -{ - spin_unlock_bh(&msk->pm.lock); - __mptcp_pm_send_ack(msk, subflow, prio, backup); - spin_lock_bh(&msk->pm.lock); -} - -static struct mptcp_pm_addr_entry * -__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) -{ - struct mptcp_pm_addr_entry *entry; - - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, - lockdep_is_held(&pernet->lock)) { - if (entry->addr.id == id) - return entry; - } - return NULL; -} - -static struct mptcp_pm_addr_entry * -__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) -{ - struct mptcp_pm_addr_entry *entry; - - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, - lockdep_is_held(&pernet->lock)) { - if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) - return entry; - } - return NULL; -} - -static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) -{ - struct sock *sk = (struct sock *)msk; - unsigned int add_addr_signal_max; - bool signal_and_subflow = false; - unsigned int local_addr_max; - struct pm_nl_pernet *pernet; - struct mptcp_pm_local local; - unsigned int subflows_max; - - pernet = pm_nl_get_pernet(sock_net(sk)); - - add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); - local_addr_max = mptcp_pm_get_local_addr_max(msk); - subflows_max = mptcp_pm_get_subflows_max(msk); - - /* do lazy endpoint usage accounting for the MPC subflows */ - if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); - struct mptcp_pm_addr_entry *entry; - struct mptcp_addr_info mpc_addr; - bool backup = false; - - mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); - rcu_read_lock(); - entry = __lookup_addr(pernet, &mpc_addr); - if (entry) { - __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); - msk->mpc_endpoint_id = entry->addr.id; - backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); - } - rcu_read_unlock(); - - if (backup) - mptcp_pm_send_ack(msk, subflow, true, backup); - - msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); - } - - pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", - msk->pm.local_addr_used, local_addr_max, - msk->pm.add_addr_signaled, add_addr_signal_max, - msk->pm.subflows, subflows_max); - - /* check first for announce */ - if (msk->pm.add_addr_signaled < add_addr_signal_max) { - /* due to racing events on both ends we can reach here while - * previous add address is still running: if we invoke now - * mptcp_pm_announce_addr(), that will fail and the - * corresponding id will be marked as used. - * Instead let the PM machinery reschedule us when the - * current address announce will be completed. - */ - if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) - return; - - if (!select_signal_address(pernet, msk, &local)) - goto subflow; - - /* If the alloc fails, we are on memory pressure, not worth - * continuing, and trying to create subflows. - */ - if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) - return; - - __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); - msk->pm.add_addr_signaled++; - - /* Special case for ID0: set the correct ID */ - if (local.addr.id == msk->mpc_endpoint_id) - local.addr.id = 0; - - mptcp_pm_announce_addr(msk, &local.addr, false); - mptcp_pm_nl_addr_send_ack(msk); - - if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) - signal_and_subflow = true; - } - -subflow: - /* check if should create a new subflow */ - while (msk->pm.local_addr_used < local_addr_max && - msk->pm.subflows < subflows_max) { - struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; - bool fullmesh; - int i, nr; - - if (signal_and_subflow) - signal_and_subflow = false; - else if (!select_local_address(pernet, msk, &local)) - break; - - fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); - - __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); - - /* Special case for ID0: set the correct ID */ - if (local.addr.id == msk->mpc_endpoint_id) - local.addr.id = 0; - else /* local_addr_used is not decr for ID 0 */ - msk->pm.local_addr_used++; - - nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); - if (nr == 0) - continue; - - spin_unlock_bh(&msk->pm.lock); - for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &local, &addrs[i]); - spin_lock_bh(&msk->pm.lock); - } - mptcp_pm_nl_check_work_pending(msk); -} - -static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) -{ - mptcp_pm_create_subflow_or_signal_addr(msk); -} - -static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) -{ - mptcp_pm_create_subflow_or_signal_addr(msk); -} - -/* Fill all the local addresses into the array addrs[], - * and return the array size. - */ -static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, - struct mptcp_addr_info *remote, - struct mptcp_pm_local *locals) -{ - struct sock *sk = (struct sock *)msk; - struct mptcp_pm_addr_entry *entry; - struct mptcp_addr_info mpc_addr; - struct pm_nl_pernet *pernet; - unsigned int subflows_max; - int i = 0; - - pernet = pm_nl_get_pernet_from_msk(msk); - subflows_max = mptcp_pm_get_subflows_max(msk); - - mptcp_local_address((struct sock_common *)msk, &mpc_addr); - - rcu_read_lock(); - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) - continue; - - if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) - continue; - - if (msk->pm.subflows < subflows_max) { - locals[i].addr = entry->addr; - locals[i].flags = entry->flags; - locals[i].ifindex = entry->ifindex; - - /* Special case for ID0: set the correct ID */ - if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port)) - locals[i].addr.id = 0; - - msk->pm.subflows++; - i++; - } - } - rcu_read_unlock(); - - /* If the array is empty, fill in the single - * 'IPADDRANY' local address - */ - if (!i) { - memset(&locals[i], 0, sizeof(locals[i])); - locals[i].addr.family = -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - remote->family == AF_INET6 && - ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : -#endif - remote->family; - - if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote)) - return 0; - - msk->pm.subflows++; - i++; - } - - return i; -} - -static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) -{ - struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; - struct sock *sk = (struct sock *)msk; - unsigned int add_addr_accept_max; - struct mptcp_addr_info remote; - unsigned int subflows_max; - bool sf_created = false; - int i, nr; - - add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); - subflows_max = mptcp_pm_get_subflows_max(msk); - - pr_debug("accepted %d:%d remote family %d\n", - msk->pm.add_addr_accepted, add_addr_accept_max, - msk->pm.remote.family); - - remote = msk->pm.remote; - mptcp_pm_announce_addr(msk, &remote, true); - mptcp_pm_nl_addr_send_ack(msk); - - if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) - return; - - /* pick id 0 port, if none is provided the remote address */ - if (!remote.port) - remote.port = sk->sk_dport; - - /* connect to the specified remote address, using whatever - * local address the routing configuration will pick. - */ - nr = fill_local_addresses_vec(msk, &remote, locals); - if (nr == 0) - return; - - spin_unlock_bh(&msk->pm.lock); - for (i = 0; i < nr; i++) - if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) - sf_created = true; - spin_lock_bh(&msk->pm.lock); - - if (sf_created) { - /* add_addr_accepted is not decr for ID 0 */ - if (remote.id) - msk->pm.add_addr_accepted++; - if (msk->pm.add_addr_accepted >= add_addr_accept_max || - msk->pm.subflows >= subflows_max) - WRITE_ONCE(msk->pm.accept_addr, false); - } -} - -bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, - const struct mptcp_addr_info *remote) -{ - struct mptcp_addr_info mpc_remote; - - remote_address((struct sock_common *)msk, &mpc_remote); - return mptcp_addresses_equal(&mpc_remote, remote, remote->port); -} - -void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow, *alt = NULL; - - msk_owned_by_me(msk); - lockdep_assert_held(&msk->pm.lock); - - if (!mptcp_pm_should_add_signal(msk) && - !mptcp_pm_should_rm_signal(msk)) - return; - - mptcp_for_each_subflow(msk, subflow) { - if (__mptcp_subflow_active(subflow)) { - if (!subflow->stale) { - mptcp_pm_send_ack(msk, subflow, false, false); - return; - } - - if (!alt) - alt = subflow; - } - } - - if (alt) - mptcp_pm_send_ack(msk, alt, false, false); -} - -int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - struct mptcp_addr_info *rem, - u8 bkup) -{ - struct mptcp_subflow_context *subflow; - - pr_debug("bkup=%d\n", bkup); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - struct mptcp_addr_info local, remote; - - mptcp_local_address((struct sock_common *)ssk, &local); - if (!mptcp_addresses_equal(&local, addr, addr->port)) - continue; - - if (rem && rem->family != AF_UNSPEC) { - remote_address((struct sock_common *)ssk, &remote); - if (!mptcp_addresses_equal(&remote, rem, rem->port)) - continue; - } - - __mptcp_pm_send_ack(msk, subflow, true, bkup); - return 0; - } - - return -EINVAL; -} - -static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list, - enum linux_mptcp_mib_field rm_type) -{ - struct mptcp_subflow_context *subflow, *tmp; - struct sock *sk = (struct sock *)msk; - u8 i; - - pr_debug("%s rm_list_nr %d\n", - rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); - - msk_owned_by_me(msk); - - if (sk->sk_state == TCP_LISTEN) - return; - - if (!rm_list->nr) - return; - - if (list_empty(&msk->conn_list)) - return; - - for (i = 0; i < rm_list->nr; i++) { - u8 rm_id = rm_list->ids[i]; - bool removed = false; - - mptcp_for_each_subflow_safe(msk, subflow, tmp) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - u8 remote_id = READ_ONCE(subflow->remote_id); - int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - u8 id = subflow_get_local_id(subflow); - - if ((1 << inet_sk_state_load(ssk)) & - (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE)) - continue; - if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) - continue; - if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id) - continue; - - pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n", - rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, id, remote_id, msk->mpc_endpoint_id); - spin_unlock_bh(&msk->pm.lock); - mptcp_subflow_shutdown(sk, ssk, how); - removed |= subflow->request_join; - - /* the following takes care of updating the subflows counter */ - mptcp_close_ssk(sk, ssk, subflow); - spin_lock_bh(&msk->pm.lock); - - if (rm_type == MPTCP_MIB_RMSUBFLOW) - __MPTCP_INC_STATS(sock_net(sk), rm_type); - } - - if (rm_type == MPTCP_MIB_RMADDR) - __MPTCP_INC_STATS(sock_net(sk), rm_type); - - if (!removed) - continue; - - if (!mptcp_pm_is_kernel(msk)) - continue; - - if (rm_type == MPTCP_MIB_RMADDR && rm_id && - !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { - /* Note: if the subflow has been closed before, this - * add_addr_accepted counter will not be decremented. - */ - if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) - WRITE_ONCE(msk->pm.accept_addr, true); - } - } -} - -static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) -{ - mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); -} - -static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list) -{ - mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); -} - -void mptcp_pm_nl_work(struct mptcp_sock *msk) -{ - struct mptcp_pm_data *pm = &msk->pm; - - msk_owned_by_me(msk); - - if (!(pm->status & MPTCP_PM_WORK_MASK)) - return; - - spin_lock_bh(&msk->pm.lock); - - pr_debug("msk=%p status=%x\n", msk, pm->status); - if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { - pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); - mptcp_pm_nl_add_addr_received(msk); - } - if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { - pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); - mptcp_pm_nl_addr_send_ack(msk); - } - if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { - pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); - mptcp_pm_nl_rm_addr_received(msk); - } - if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { - pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); - mptcp_pm_nl_fully_established(msk); - } - if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { - pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); - mptcp_pm_nl_subflow_established(msk); - } - - spin_unlock_bh(&msk->pm.lock); -} - -static bool address_use_port(struct mptcp_pm_addr_entry *entry) -{ - return (entry->flags & - (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == - MPTCP_PM_ADDR_FLAG_SIGNAL; -} - -/* caller must ensure the RCU grace period is already elapsed */ -static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) -{ - if (entry->lsk) - sock_release(entry->lsk); - kfree(entry); -} - -static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, - struct mptcp_pm_addr_entry *entry, - bool needs_id) -{ - struct mptcp_pm_addr_entry *cur, *del_entry = NULL; - unsigned int addr_max; - int ret = -EINVAL; - - spin_lock_bh(&pernet->lock); - /* to keep the code simple, don't do IDR-like allocation for address ID, - * just bail when we exceed limits - */ - if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) - pernet->next_id = 1; - if (pernet->addrs >= MPTCP_PM_ADDR_MAX) { - ret = -ERANGE; - goto out; - } - if (test_bit(entry->addr.id, pernet->id_bitmap)) { - ret = -EBUSY; - goto out; - } - - /* do not insert duplicate address, differentiate on port only - * singled addresses - */ - if (!address_use_port(entry)) - entry->addr.port = 0; - list_for_each_entry(cur, &pernet->local_addr_list, list) { - if (mptcp_addresses_equal(&cur->addr, &entry->addr, - cur->addr.port || entry->addr.port)) { - /* allow replacing the exiting endpoint only if such - * endpoint is an implicit one and the user-space - * did not provide an endpoint id - */ - if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { - ret = -EEXIST; - goto out; - } - if (entry->addr.id) - goto out; - - pernet->addrs--; - entry->addr.id = cur->addr.id; - list_del_rcu(&cur->list); - del_entry = cur; - break; - } - } - - if (!entry->addr.id && needs_id) { -find_next: - entry->addr.id = find_next_zero_bit(pernet->id_bitmap, - MPTCP_PM_MAX_ADDR_ID + 1, - pernet->next_id); - if (!entry->addr.id && pernet->next_id != 1) { - pernet->next_id = 1; - goto find_next; - } - } - - if (!entry->addr.id && needs_id) - goto out; - - __set_bit(entry->addr.id, pernet->id_bitmap); - if (entry->addr.id > pernet->next_id) - pernet->next_id = entry->addr.id; - - if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { - addr_max = pernet->add_addr_signal_max; - WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); - } - if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { - addr_max = pernet->local_addr_max; - WRITE_ONCE(pernet->local_addr_max, addr_max + 1); - } - - pernet->addrs++; - if (!entry->addr.port) - list_add_tail_rcu(&entry->list, &pernet->local_addr_list); - else - list_add_rcu(&entry->list, &pernet->local_addr_list); - ret = entry->addr.id; - -out: - spin_unlock_bh(&pernet->lock); - - /* just replaced an existing entry, free it */ - if (del_entry) { - synchronize_rcu(); - __mptcp_pm_release_addr_entry(del_entry); - } - return ret; -} - -static struct lock_class_key mptcp_slock_keys[2]; -static struct lock_class_key mptcp_keys[2]; - -static int mptcp_pm_nl_create_listen_socket(struct sock *sk, - struct mptcp_pm_addr_entry *entry) -{ - bool is_ipv6 = sk->sk_family == AF_INET6; - int addrlen = sizeof(struct sockaddr_in); - struct sockaddr_storage addr; - struct sock *newsk, *ssk; - int backlog = 1024; - int err; - - err = sock_create_kern(sock_net(sk), entry->addr.family, - SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); - if (err) - return err; - - newsk = entry->lsk->sk; - if (!newsk) - return -EINVAL; - - /* The subflow socket lock is acquired in a nested to the msk one - * in several places, even by the TCP stack, and this msk is a kernel - * socket: lockdep complains. Instead of propagating the _nested - * modifiers in several places, re-init the lock class for the msk - * socket to an mptcp specific one. - */ - sock_lock_init_class_and_name(newsk, - is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", - &mptcp_slock_keys[is_ipv6], - is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", - &mptcp_keys[is_ipv6]); - - lock_sock(newsk); - ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); - release_sock(newsk); - if (IS_ERR(ssk)) - return PTR_ERR(ssk); - - mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (entry->addr.family == AF_INET6) - addrlen = sizeof(struct sockaddr_in6); -#endif - if (ssk->sk_family == AF_INET) - err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (ssk->sk_family == AF_INET6) - err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); -#endif - if (err) - return err; - - /* We don't use mptcp_set_state() here because it needs to be called - * under the msk socket lock. For the moment, that will not bring - * anything more than only calling inet_sk_state_store(), because the - * old status is known (TCP_CLOSE). - */ - inet_sk_state_store(newsk, TCP_LISTEN); - lock_sock(ssk); - WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); - err = __inet_listen_sk(ssk, backlog); - if (!err) - mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); - release_sock(ssk); - return err; -} - -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) -{ - struct mptcp_pm_addr_entry *entry; - struct pm_nl_pernet *pernet; - int ret; - - pernet = pm_nl_get_pernet_from_msk(msk); - - rcu_read_lock(); - entry = __lookup_addr(pernet, skc); - ret = entry ? entry->addr.id : -1; - rcu_read_unlock(); - if (ret >= 0) - return ret; - - /* address not found, add to local list */ - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - if (!entry) - return -ENOMEM; - - entry->addr = *skc; - entry->addr.id = 0; - entry->addr.port = 0; - entry->ifindex = 0; - entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; - entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); - if (ret < 0) - kfree(entry); - - return ret; -} - -bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - struct mptcp_pm_addr_entry *entry; - bool backup; - - rcu_read_lock(); - entry = __lookup_addr(pernet, skc); - backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); - rcu_read_unlock(); - - return backup; -} - #define MPTCP_PM_CMD_GRP_OFFSET 0 #define MPTCP_PM_EV_GRP_OFFSET 1 @@ -1196,43 +19,6 @@ static const struct genl_multicast_group mptcp_pm_mcgrps[] = { }, }; -void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) -{ - struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); - struct sock *sk = (struct sock *)msk; - unsigned int active_max_loss_cnt; - struct net *net = sock_net(sk); - unsigned int stale_loss_cnt; - bool slow; - - stale_loss_cnt = mptcp_stale_loss_cnt(net); - if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) - return; - - /* look for another available subflow not in loss state */ - active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); - mptcp_for_each_subflow(msk, iter) { - if (iter != subflow && mptcp_subflow_active(iter) && - iter->stale_count < active_max_loss_cnt) { - /* we have some alternatives, try to mark this subflow as idle ...*/ - slow = lock_sock_fast(ssk); - if (!tcp_rtx_and_write_queues_empty(ssk)) { - subflow->stale = 1; - __mptcp_retransmit_pending_data(sk); - MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE); - } - unlock_sock_fast(ssk, slow); - - /* always try to push the pending data regardless of re-injections: - * we can possibly use backup subflows now, and subflow selection - * is cheap under the msk socket lock - */ - __mptcp_push_pending(sk, 0); - return; - } - } -} - static int mptcp_pm_family_to_addr(int family) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -1341,396 +127,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, return 0; } -static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) -{ - return pm_nl_get_pernet(genl_info_net(info)); -} - -static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, - struct mptcp_addr_info *addr) -{ - struct mptcp_sock *msk; - long s_slot = 0, s_num = 0; - - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { - struct sock *sk = (struct sock *)msk; - struct mptcp_addr_info mpc_addr; - - if (!READ_ONCE(msk->fully_established) || - mptcp_pm_is_userspace(msk)) - goto next; - - /* if the endp linked to the init sf is re-added with a != ID */ - mptcp_local_address((struct sock_common *)msk, &mpc_addr); - - lock_sock(sk); - spin_lock_bh(&msk->pm.lock); - if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) - msk->mpc_endpoint_id = addr->id; - mptcp_pm_create_subflow_or_signal_addr(msk); - spin_unlock_bh(&msk->pm.lock); - release_sock(sk); - -next: - sock_put(sk); - cond_resched(); - } - - return 0; -} - -static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, - struct genl_info *info) -{ - struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; - - if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, - mptcp_pm_address_nl_policy, info->extack) && - tb[MPTCP_PM_ADDR_ATTR_ID]) - return true; - return false; -} - -int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) -{ - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - struct mptcp_pm_addr_entry addr, *entry; - struct nlattr *attr; - int ret; - - if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) - return -EINVAL; - - attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; - ret = mptcp_pm_parse_entry(attr, info, true, &addr); - if (ret < 0) - return ret; - - if (addr.addr.port && !address_use_port(&addr)) { - NL_SET_ERR_MSG_ATTR(info->extack, attr, - "flags must have signal and not subflow when using port"); - return -EINVAL; - } - - if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && - addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { - NL_SET_ERR_MSG_ATTR(info->extack, attr, - "flags mustn't have both signal and fullmesh"); - return -EINVAL; - } - - if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { - NL_SET_ERR_MSG_ATTR(info->extack, attr, - "can't create IMPLICIT endpoint"); - return -EINVAL; - } - - entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); - if (!entry) { - GENL_SET_ERR_MSG(info, "can't allocate addr"); - return -ENOMEM; - } - - *entry = addr; - if (entry->addr.port) { - ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); - if (ret) { - GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); - goto out_free; - } - } - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, - !mptcp_pm_has_addr_attr_id(attr, info)); - if (ret < 0) { - GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); - goto out_free; - } - - mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); - return 0; - -out_free: - __mptcp_pm_release_addr_entry(entry); - return ret; -} - -bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr) -{ - struct mptcp_pm_add_entry *entry; - - entry = mptcp_pm_del_add_timer(msk, addr, false); - if (entry) { - kfree(entry); - return true; - } - - return false; -} - -static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr) -{ - return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; -} - -static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr, - bool force) -{ - struct mptcp_rm_list list = { .nr = 0 }; - bool ret; - - list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); - - ret = mptcp_remove_anno_list_by_saddr(msk, addr); - if (ret || force) { - spin_lock_bh(&msk->pm.lock); - if (ret) { - __set_bit(addr->id, msk->pm.id_avail_bitmap); - msk->pm.add_addr_signaled--; - } - mptcp_pm_remove_addr(msk, &list); - spin_unlock_bh(&msk->pm.lock); - } - return ret; -} - -static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) -{ - /* If it was marked as used, and not ID 0, decrement local_addr_used */ - if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && - id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) - msk->pm.local_addr_used--; -} - -static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, - const struct mptcp_pm_addr_entry *entry) -{ - const struct mptcp_addr_info *addr = &entry->addr; - struct mptcp_rm_list list = { .nr = 1 }; - long s_slot = 0, s_num = 0; - struct mptcp_sock *msk; - - pr_debug("remove_id=%d\n", addr->id); - - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { - struct sock *sk = (struct sock *)msk; - bool remove_subflow; - - if (mptcp_pm_is_userspace(msk)) - goto next; - - lock_sock(sk); - remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); - mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && - !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); - - list.ids[0] = mptcp_endp_get_local_id(msk, addr); - if (remove_subflow) { - spin_lock_bh(&msk->pm.lock); - mptcp_pm_nl_rm_subflow_received(msk, &list); - spin_unlock_bh(&msk->pm.lock); - } - - if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { - spin_lock_bh(&msk->pm.lock); - __mark_subflow_endp_available(msk, list.ids[0]); - spin_unlock_bh(&msk->pm.lock); - } - - if (msk->mpc_endpoint_id == entry->addr.id) - msk->mpc_endpoint_id = 0; - release_sock(sk); - -next: - sock_put(sk); - cond_resched(); - } - - return 0; -} - -static int mptcp_nl_remove_id_zero_address(struct net *net, - struct mptcp_addr_info *addr) -{ - struct mptcp_rm_list list = { .nr = 0 }; - long s_slot = 0, s_num = 0; - struct mptcp_sock *msk; - - list.ids[list.nr++] = 0; - - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { - struct sock *sk = (struct sock *)msk; - struct mptcp_addr_info msk_local; - - if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) - goto next; - - mptcp_local_address((struct sock_common *)msk, &msk_local); - if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) - goto next; - - lock_sock(sk); - spin_lock_bh(&msk->pm.lock); - mptcp_pm_remove_addr(msk, &list); - mptcp_pm_nl_rm_subflow_received(msk, &list); - __mark_subflow_endp_available(msk, 0); - spin_unlock_bh(&msk->pm.lock); - release_sock(sk); - -next: - sock_put(sk); - cond_resched(); - } - - return 0; -} - -int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) -{ - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - struct mptcp_pm_addr_entry addr, *entry; - unsigned int addr_max; - struct nlattr *attr; - int ret; - - if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) - return -EINVAL; - - attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - return ret; - - /* the zero id address is special: the first address used by the msk - * always gets such an id, so different subflows can have different zero - * id addresses. Additionally zero id is not accounted for in id_bitmap. - * Let's use an 'mptcp_rm_list' instead of the common remove code. - */ - if (addr.addr.id == 0) - return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); - - spin_lock_bh(&pernet->lock); - entry = __lookup_addr_by_id(pernet, addr.addr.id); - if (!entry) { - NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); - spin_unlock_bh(&pernet->lock); - return -EINVAL; - } - if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { - addr_max = pernet->add_addr_signal_max; - WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); - } - if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { - addr_max = pernet->local_addr_max; - WRITE_ONCE(pernet->local_addr_max, addr_max - 1); - } - - pernet->addrs--; - list_del_rcu(&entry->list); - __clear_bit(entry->addr.id, pernet->id_bitmap); - spin_unlock_bh(&pernet->lock); - - mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); - synchronize_rcu(); - __mptcp_pm_release_addr_entry(entry); - - return ret; -} - -static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list) -{ - struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; - struct mptcp_pm_addr_entry *entry; - - list_for_each_entry(entry, rm_list, list) { - if (slist.nr < MPTCP_RM_IDS_MAX && - mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) - slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); - - if (alist.nr < MPTCP_RM_IDS_MAX && - mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) - alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); - } - - spin_lock_bh(&msk->pm.lock); - if (alist.nr) { - msk->pm.add_addr_signaled -= alist.nr; - mptcp_pm_remove_addr(msk, &alist); - } - if (slist.nr) - mptcp_pm_nl_rm_subflow_received(msk, &slist); - /* Reset counters: maybe some subflows have been removed before */ - bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); - msk->pm.local_addr_used = 0; - spin_unlock_bh(&msk->pm.lock); -} - -static void mptcp_nl_flush_addrs_list(struct net *net, - struct list_head *rm_list) -{ - long s_slot = 0, s_num = 0; - struct mptcp_sock *msk; - - if (list_empty(rm_list)) - return; - - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { - struct sock *sk = (struct sock *)msk; - - if (!mptcp_pm_is_userspace(msk)) { - lock_sock(sk); - mptcp_pm_flush_addrs_and_subflows(msk, rm_list); - release_sock(sk); - } - - sock_put(sk); - cond_resched(); - } -} - -/* caller must ensure the RCU grace period is already elapsed */ -static void __flush_addrs(struct list_head *list) -{ - while (!list_empty(list)) { - struct mptcp_pm_addr_entry *cur; - - cur = list_entry(list->next, - struct mptcp_pm_addr_entry, list); - list_del_rcu(&cur->list); - __mptcp_pm_release_addr_entry(cur); - } -} - -static void __reset_counters(struct pm_nl_pernet *pernet) -{ - WRITE_ONCE(pernet->add_addr_signal_max, 0); - WRITE_ONCE(pernet->add_addr_accept_max, 0); - WRITE_ONCE(pernet->local_addr_max, 0); - pernet->addrs = 0; -} - -int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) -{ - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - LIST_HEAD(free_list); - - spin_lock_bh(&pernet->lock); - list_splice_init(&pernet->local_addr_list, &free_list); - __reset_counters(pernet); - pernet->next_id = 1; - bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); - spin_unlock_bh(&pernet->lock); - mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); - synchronize_rcu(); - __flush_addrs(&free_list); - return 0; -} - -int mptcp_nl_fill_addr(struct sk_buff *skb, - struct mptcp_pm_addr_entry *entry) +static int mptcp_nl_fill_addr(struct sk_buff *skb, + struct mptcp_pm_addr_entry *entry) { struct mptcp_addr_info *addr = &entry->addr; struct nlattr *attr; @@ -1768,224 +166,119 @@ nla_put_failure: return -EMSGSIZE; } -int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, - struct genl_info *info) +static int mptcp_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, + struct genl_info *info) { - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - struct mptcp_pm_addr_entry *entry; - int ret = -EINVAL; - - rcu_read_lock(); - entry = __lookup_addr_by_id(pernet, id); - if (entry) { - *addr = *entry; - ret = 0; - } - rcu_read_unlock(); - - return ret; + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_get_addr(id, addr, info); + return mptcp_pm_nl_get_addr(id, addr, info); } -int mptcp_pm_nl_dump_addr(struct sk_buff *msg, - struct netlink_callback *cb) +int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) { - struct net *net = sock_net(msg->sk); - struct mptcp_pm_addr_entry *entry; - struct pm_nl_pernet *pernet; - int id = cb->args[0]; - int i; - - pernet = pm_nl_get_pernet(net); - - rcu_read_lock(); - for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { - if (test_bit(i, pernet->id_bitmap)) { - entry = __lookup_addr_by_id(pernet, i); - if (!entry) - break; - - if (entry->addr.id <= id) - continue; - - if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) - break; - - id = entry->addr.id; - } - } - rcu_read_unlock(); - - cb->args[0] = id; - return msg->len; -} - -static int parse_limit(struct genl_info *info, int id, unsigned int *limit) -{ - struct nlattr *attr = info->attrs[id]; - - if (!attr) - return 0; - - *limit = nla_get_u32(attr); - if (*limit > MPTCP_PM_ADDR_MAX) { - NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, - "limit greater than maximum (%u)", - MPTCP_PM_ADDR_MAX); - return -EINVAL; - } - return 0; -} - -int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) -{ - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - unsigned int rcv_addrs, subflows; + struct mptcp_pm_addr_entry addr; + struct nlattr *attr; + struct sk_buff *msg; + void *reply; int ret; - spin_lock_bh(&pernet->lock); - rcv_addrs = pernet->add_addr_accept_max; - ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); - if (ret) - goto unlock; - - subflows = pernet->subflows_max; - ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); - if (ret) - goto unlock; - - WRITE_ONCE(pernet->add_addr_accept_max, rcv_addrs); - WRITE_ONCE(pernet->subflows_max, subflows); - -unlock: - spin_unlock_bh(&pernet->lock); - return ret; -} + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; -int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) -{ - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - struct sk_buff *msg; - void *reply; + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; + ret = mptcp_pm_parse_entry(attr, info, false, &addr); + if (ret < 0) + return ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, - MPTCP_PM_CMD_GET_LIMITS); - if (!reply) + info->genlhdr->cmd); + if (!reply) { + GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); + ret = -EMSGSIZE; goto fail; + } - if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, - READ_ONCE(pernet->add_addr_accept_max))) + ret = mptcp_pm_get_addr(addr.addr.id, &addr, info); + if (ret) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); goto fail; + } - if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, - READ_ONCE(pernet->subflows_max))) + ret = mptcp_nl_fill_addr(msg, &addr); + if (ret) goto fail; genlmsg_end(msg, reply); - return genlmsg_reply(msg, info); + ret = genlmsg_reply(msg, info); + return ret; fail: - GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); nlmsg_free(msg); - return -EMSGSIZE; + return ret; } -static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, - struct mptcp_addr_info *addr) +int mptcp_pm_genl_fill_addr(struct sk_buff *msg, + struct netlink_callback *cb, + struct mptcp_pm_addr_entry *entry) { - struct mptcp_rm_list list = { .nr = 0 }; + void *hdr; - list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &mptcp_genl_family, + NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); + if (!hdr) + return -EINVAL; - spin_lock_bh(&msk->pm.lock); - mptcp_pm_nl_rm_subflow_received(msk, &list); - __mark_subflow_endp_available(msk, list.ids[0]); - mptcp_pm_create_subflow_or_signal_addr(msk); - spin_unlock_bh(&msk->pm.lock); + if (mptcp_nl_fill_addr(msg, entry) < 0) { + genlmsg_cancel(msg, hdr); + return -EINVAL; + } + + genlmsg_end(msg, hdr); + return 0; } -static void mptcp_nl_set_flags(struct net *net, - struct mptcp_pm_addr_entry *local, - u8 changed) +static int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { - u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); - u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); - long s_slot = 0, s_num = 0; - struct mptcp_sock *msk; + const struct genl_info *info = genl_info_dump(cb); - if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow) - return; + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_dump_addr(msg, cb); + return mptcp_pm_nl_dump_addr(msg, cb); +} - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { - struct sock *sk = (struct sock *)msk; +int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return mptcp_pm_dump_addr(msg, cb); +} - if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) - goto next; +static int mptcp_pm_set_flags(struct genl_info *info) +{ + struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; + struct nlattr *attr_loc; + int ret = -EINVAL; - lock_sock(sk); - if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) - mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, NULL, bkup); - /* Subflows will only be recreated if the SUBFLOW flag is set */ - if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)) - mptcp_pm_nl_fullmesh(msk, &local->addr); - release_sock(sk); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR)) + return ret; -next: - sock_put(sk); - cond_resched(); - } + attr_loc = info->attrs[MPTCP_PM_ATTR_ADDR]; + ret = mptcp_pm_parse_entry(attr_loc, info, false, &loc); + if (ret < 0) + return ret; - return; + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_set_flags(&loc, info); + return mptcp_pm_nl_set_flags(&loc, info); } -int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, - struct genl_info *info) +int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; - u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | - MPTCP_PM_ADDR_FLAG_FULLMESH; - struct net *net = genl_info_net(info); - struct mptcp_pm_addr_entry *entry; - struct pm_nl_pernet *pernet; - u8 lookup_by_id = 0; - - pernet = pm_nl_get_pernet(net); - - if (local->addr.family == AF_UNSPEC) { - lookup_by_id = 1; - if (!local->addr.id) { - NL_SET_ERR_MSG_ATTR(info->extack, attr, - "missing address ID"); - return -EOPNOTSUPP; - } - } - - spin_lock_bh(&pernet->lock); - entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) : - __lookup_addr(pernet, &local->addr); - if (!entry) { - spin_unlock_bh(&pernet->lock); - NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); - return -EINVAL; - } - if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && - (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | - MPTCP_PM_ADDR_FLAG_IMPLICIT))) { - spin_unlock_bh(&pernet->lock); - NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags"); - return -EINVAL; - } - - changed = (local->flags ^ entry->flags) & mask; - entry->flags = (entry->flags & ~mask) | (local->flags & mask); - *local = *entry; - spin_unlock_bh(&pernet->lock); - - mptcp_nl_set_flags(net, local, changed); - return 0; + return mptcp_pm_set_flags(info); } static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) @@ -2332,53 +625,3 @@ struct genl_family mptcp_genl_family __ro_after_init = { .mcgrps = mptcp_pm_mcgrps, .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), }; - -static int __net_init pm_nl_init_net(struct net *net) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); - - INIT_LIST_HEAD_RCU(&pernet->local_addr_list); - - /* Cit. 2 subflows ought to be enough for anybody. */ - pernet->subflows_max = 2; - pernet->next_id = 1; - pernet->stale_loss_cnt = 4; - spin_lock_init(&pernet->lock); - - /* No need to initialize other pernet fields, the struct is zeroed at - * allocation time. - */ - - return 0; -} - -static void __net_exit pm_nl_exit_net(struct list_head *net_list) -{ - struct net *net; - - list_for_each_entry(net, net_list, exit_list) { - struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); - - /* net is removed from namespace list, can't race with - * other modifiers, also netns core already waited for a - * RCU grace period. - */ - __flush_addrs(&pernet->local_addr_list); - } -} - -static struct pernet_operations mptcp_pm_pernet_ops = { - .init = pm_nl_init_net, - .exit_batch = pm_nl_exit_net, - .id = &pm_nl_pernet_id, - .size = sizeof(struct pm_nl_pernet), -}; - -void __init mptcp_pm_nl_init(void) -{ - if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) - panic("Failed to register MPTCP PM pernet subsystem.\n"); - - if (genl_register_family(&mptcp_genl_family)) - panic("Failed to register MPTCP PM netlink family\n"); -} diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 7e7d01bef5d4..13856df22673 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -12,15 +12,12 @@ list_for_each_entry(__entry, \ &((__msk)->pm.userspace_pm_local_addr_list), list) -void mptcp_free_local_addr_list(struct mptcp_sock *msk) +void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk) { struct mptcp_pm_addr_entry *entry, *tmp; struct sock *sk = (struct sock *)msk; LIST_HEAD(free_list); - if (!mptcp_pm_is_userspace(msk)) - return; - spin_lock_bh(&msk->pm.lock); list_splice_init(&msk->pm.userspace_pm_local_addr_list, &free_list); spin_unlock_bh(&msk->pm.lock); @@ -130,27 +127,22 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) } int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, - struct mptcp_addr_info *skc) + struct mptcp_pm_addr_entry *skc) { - struct mptcp_pm_addr_entry *entry = NULL, new_entry; __be16 msk_sport = ((struct inet_sock *) inet_sk((struct sock *)msk))->inet_sport; + struct mptcp_pm_addr_entry *entry; spin_lock_bh(&msk->pm.lock); - entry = mptcp_userspace_pm_lookup_addr(msk, skc); + entry = mptcp_userspace_pm_lookup_addr(msk, &skc->addr); spin_unlock_bh(&msk->pm.lock); if (entry) return entry->addr.id; - memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); - new_entry.addr = *skc; - new_entry.addr.id = 0; - new_entry.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; - - if (new_entry.addr.port == msk_sport) - new_entry.addr.port = 0; + if (skc->addr.port == msk_sport) + skc->addr.port = 0; - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); + return mptcp_userspace_pm_append_new_local_addr(msk, skc, true); } bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, @@ -239,7 +231,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) { msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &addr_val.addr, false); - mptcp_pm_nl_addr_send_ack(msk); + mptcp_pm_addr_send_ack(msk); } spin_unlock_bh(&msk->pm.lock); @@ -610,10 +602,10 @@ int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, spin_unlock_bh(&msk->pm.lock); lock_sock(sk); - ret = mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, &rem, bkup); + ret = mptcp_pm_mp_prio_send_ack(msk, &local->addr, &rem, bkup); release_sock(sk); - /* mptcp_pm_nl_mp_prio_send_ack() only fails in one case */ + /* mptcp_pm_mp_prio_send_ack() only fails in one case */ if (ret < 0) GENL_SET_ERR_MSG(info, "subflow not found"); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index ec23e65ef0f1..ad780ae1d30d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2681,7 +2681,7 @@ static void mptcp_worker(struct work_struct *work) mptcp_check_fastclose(msk); - mptcp_pm_nl_work(msk); + mptcp_pm_worker(msk); mptcp_check_send_data_fin(sk); mptcp_check_data_fin_ack(sk); @@ -3302,8 +3302,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) * inet_sock_destruct() will dispose it */ mptcp_token_destroy(msk); - mptcp_pm_free_anno_list(msk); - mptcp_free_local_addr_list(msk); + mptcp_pm_destroy(msk); } static void mptcp_destroy(struct sock *sk) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7b74dedc7936..c51b6a22d5e0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -724,7 +724,10 @@ void mptcp_set_state(struct sock *sk, int state); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); -void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr); +void mptcp_local_address(const struct sock_common *skc, + struct mptcp_addr_info *addr); +void mptcp_remote_address(const struct sock_common *skc, + struct mptcp_addr_info *addr); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, @@ -983,6 +986,7 @@ __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum su void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_data_reset(struct mptcp_sock *msk); +void mptcp_pm_destroy(struct mptcp_sock *msk); int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, struct mptcp_addr_info *addr); int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, @@ -992,7 +996,6 @@ bool mptcp_pm_addr_families_match(const struct sock *sk, const struct mptcp_addr_info *loc, const struct mptcp_addr_info *rem); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); -void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); @@ -1006,27 +1009,27 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); -bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, - const struct mptcp_addr_info *remote); -void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); +void mptcp_pm_send_ack(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + bool prio, bool backup); +void mptcp_pm_addr_send_ack(struct mptcp_sock *msk); +void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id); +void mptcp_pm_rm_subflow(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); -int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - struct mptcp_addr_info *rem, - u8 bkup); +int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + struct mptcp_addr_info *rem, + u8 bkup); bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); -void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool check_id); -struct mptcp_pm_add_entry * -mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, - const struct mptcp_addr_info *addr); bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, const struct mptcp_addr_info *saddr); bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, @@ -1042,7 +1045,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry); -void mptcp_free_local_addr_list(struct mptcp_sock *msk); +void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); @@ -1054,8 +1057,6 @@ bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); -int mptcp_nl_fill_addr(struct sk_buff *skb, - struct mptcp_pm_addr_entry *entry); int mptcp_pm_genl_fill_addr(struct sk_buff *msg, struct netlink_callback *cb, struct mptcp_pm_addr_entry *entry); @@ -1121,8 +1122,10 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); -int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *skc); +int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *skc); bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); @@ -1145,7 +1148,8 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo } void __init mptcp_pm_nl_init(void); -void mptcp_pm_nl_work(struct mptcp_sock *msk); +void mptcp_pm_worker(struct mptcp_sock *msk); +void __mptcp_pm_kernel_worker(struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7d13110ce188..0633276d96bf 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3091,12 +3091,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_SERVICES: { struct ip_vs_get_services *get; - int size; + size_t size; get = (struct ip_vs_get_services *)arg; size = struct_size(get, entrytable, get->num_services); if (*len != size) { - pr_err("length: %u != %u\n", *len, size); + pr_err("length: %u != %zu\n", *len, size); ret = -EINVAL; goto out; } @@ -3132,12 +3132,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_DESTS: { struct ip_vs_get_dests *get; - int size; + size_t size; get = (struct ip_vs_get_dests *)arg; size = struct_size(get, entrytable, get->num_dests); if (*len != size) { - pr_err("length: %u != %u\n", *len, size); + pr_err("length: %u != %zu\n", *len, size); ret = -EINVAL; goto out; } diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 4890af4dc263..913ede2f57f9 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -132,7 +132,7 @@ static int __nf_conncount_add(struct net *net, struct nf_conn *found_ct; unsigned int collect = 0; - if (time_is_after_eq_jiffies((unsigned long)list->last_gc)) + if ((u32)jiffies == list->last_gc) goto add_new_node; /* check the saved connections */ @@ -234,7 +234,7 @@ bool nf_conncount_gc_list(struct net *net, bool ret = false; /* don't bother if we just did GC */ - if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc))) + if ((u32)jiffies == READ_ONCE(list->last_gc)) return false; /* don't bother if other cpu is already doing GC */ @@ -377,6 +377,8 @@ restart: conn->tuple = *tuple; conn->zone = *zone; + conn->cpu = raw_smp_processor_id(); + conn->jiffies32 = (u32)jiffies; memcpy(rbconn->key, key, sizeof(u32) * data->keylen); nf_conncount_list_init(&rbconn->list); diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 58402226045e..86d5fc5d28e3 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -216,7 +216,9 @@ nf_log_dump_tcp_header(struct nf_log_buf *m, /* Max length: 9 "RES=0x3C " */ nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); - /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ + /* Max length: 35 "AE CWR ECE URG ACK PSH RST SYN FIN " */ + if (th->ae) + nf_log_buf_add(m, "AE "); if (th->cwr) nf_log_buf_add(m, "CWR "); if (th->ece) @@ -516,7 +518,7 @@ dump_ipv4_packet(struct net *net, struct nf_log_buf *m, /* Proto Max log string length */ /* IP: 40+46+6+11+127 = 230 */ - /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ + /* TCP: 10+max(25,20+30+13+9+35+11+127) = 255 */ /* UDP: 10+max(25,20) = 35 */ /* UDPLITE: 14+max(25,20) = 39 */ /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ @@ -526,7 +528,7 @@ dump_ipv4_packet(struct net *net, struct nf_log_buf *m, /* (ICMP allows recursion one level deep) */ /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ - /* maxlen = 230+ 91 + 230 + 252 = 803 */ + /* maxlen = 230+ 91 + 230 + 255 = 806 */ } static noinline_for_stack void diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a34de9c17cf1..c2df81b7e950 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -34,7 +34,6 @@ unsigned int nf_tables_net_id __read_mostly; static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); -static LIST_HEAD(nf_tables_destroy_list); static LIST_HEAD(nf_tables_gc_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); static DEFINE_SPINLOCK(nf_tables_gc_list_lock); @@ -125,7 +124,6 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s table->validate_state = new_validate_state; } static void nf_tables_trans_destroy_work(struct work_struct *w); -static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); static void nft_trans_gc_work(struct work_struct *work); static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); @@ -10006,11 +10004,12 @@ static void nft_commit_release(struct nft_trans *trans) static void nf_tables_trans_destroy_work(struct work_struct *w) { + struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work); struct nft_trans *trans, *next; LIST_HEAD(head); spin_lock(&nf_tables_destroy_list_lock); - list_splice_init(&nf_tables_destroy_list, &head); + list_splice_init(&nft_net->destroy_list, &head); spin_unlock(&nf_tables_destroy_list_lock); if (list_empty(&head)) @@ -10024,9 +10023,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) } } -void nf_tables_trans_destroy_flush_work(void) +void nf_tables_trans_destroy_flush_work(struct net *net) { - flush_work(&trans_destroy_work); + struct nftables_pernet *nft_net = nft_pernet(net); + + flush_work(&nft_net->destroy_work); } EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); @@ -10484,11 +10485,11 @@ static void nf_tables_commit_release(struct net *net) trans->put_net = true; spin_lock(&nf_tables_destroy_list_lock); - list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list); + list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list); spin_unlock(&nf_tables_destroy_list_lock); nf_tables_module_autoload_cleanup(net); - schedule_work(&trans_destroy_work); + schedule_work(&nft_net->destroy_work); mutex_unlock(&nft_net->commit_mutex); } @@ -11853,7 +11854,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, gc_seq = nft_gc_seq_begin(nft_net); - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(net); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && @@ -11895,6 +11896,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->destroy_list); INIT_LIST_HEAD(&nft_net->commit_set_list); INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); @@ -11903,6 +11905,7 @@ static int __net_init nf_tables_init_net(struct net *net) nft_net->base_seq = 1; nft_net->gc_seq = 0; nft_net->validate_state = NFT_VALIDATE_SKIP; + INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work); return 0; } @@ -11931,14 +11934,17 @@ static void __net_exit nf_tables_exit_net(struct net *net) if (!list_empty(&nft_net->module_list)) nf_tables_module_autoload_cleanup(net); + cancel_work_sync(&nft_net->destroy_work); __nft_release_tables(net); nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&nft_net->commit_mutex); + WARN_ON_ONCE(!list_empty(&nft_net->tables)); WARN_ON_ONCE(!list_empty(&nft_net->module_list)); WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); + WARN_ON_ONCE(!list_empty(&nft_net->destroy_list)); } static void nf_tables_exit_batch(struct list_head *net_exit_list) @@ -12029,10 +12035,8 @@ static void __exit nf_tables_module_exit(void) unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); - nf_tables_trans_destroy_flush_work(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); - cancel_work_sync(&trans_destroy_work); rcu_barrier(); rhltable_destroy(&nft_objname_ht); nf_tables_core_module_exit(); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7ca4f0d21fe2..72711d62fddf 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -228,7 +228,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return 0; } -static void nft_compat_wait_for_destructors(void) +static void nft_compat_wait_for_destructors(struct net *net) { /* xtables matches or targets can have side effects, e.g. * creation/destruction of /proc files. @@ -236,7 +236,7 @@ static void nft_compat_wait_for_destructors(void) * work queue. If we have pending invocations we thus * need to wait for those to finish. */ - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(net); } static int @@ -262,7 +262,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); - nft_compat_wait_for_destructors(); + nft_compat_wait_for_destructors(ctx->net); ret = xt_check_target(&par, size, proto, inv); if (ret < 0) { @@ -515,7 +515,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - nft_compat_wait_for_destructors(); + nft_compat_wait_for_destructors(ctx->net); return xt_check_match(&par, size, proto, inv); } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 2e59aba681a1..d526e69a2a2b 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -230,6 +230,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, enum ip_conntrack_info ctinfo; u16 value = nft_reg_load16(®s->data[priv->sreg]); struct nf_conn *ct; + int oldcnt; ct = nf_ct_get(skb, &ctinfo); if (ct) /* already tracked */ @@ -250,10 +251,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); - if (likely(refcount_read(&ct->ct_general.use) == 1)) { - refcount_inc(&ct->ct_general.use); + __refcount_inc(&ct->ct_general.use, &oldcnt); + if (likely(oldcnt == 1)) { nf_ct_zone_add(ct, &zone); } else { + refcount_dec(&ct->ct_general.use); /* previous skb got queued to userspace, allocate temporary * one until percpu template can be reused. */ diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index b8d03364566c..c74012c99125 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -85,7 +85,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; struct iphdr *iph, _iph; - unsigned int start; bool found = false; __be32 info; int optlen; @@ -93,7 +92,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (!iph) return -EBADMSG; - start = sizeof(struct iphdr); optlen = iph->ihl * 4 - (int)sizeof(struct iphdr); if (optlen <= 0) @@ -103,7 +101,7 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, /* Copy the options since __ip_options_compile() modifies * the options. */ - if (skb_copy_bits(skb, start, opt->__data, optlen)) + if (skb_copy_bits(skb, sizeof(struct iphdr), opt->__data, optlen)) return -EBADMSG; opt->optlen = optlen; @@ -118,18 +116,18 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, found = target == IPOPT_SSRR ? opt->is_strictroute : !opt->is_strictroute; if (found) - *offset = opt->srr + start; + *offset = opt->srr; break; case IPOPT_RR: if (!opt->rr) break; - *offset = opt->rr + start; + *offset = opt->rr; found = true; break; case IPOPT_RA: if (!opt->router_alert) break; - *offset = opt->router_alert + start; + *offset = opt->router_alert; found = true; break; default: diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3bb4810234aa..e573e9221302 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1368,8 +1368,11 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) attr == OVS_KEY_ATTR_CT_MARK) return true; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - attr == OVS_KEY_ATTR_CT_LABELS) - return true; + attr == OVS_KEY_ATTR_CT_LABELS) { + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + return ovs_net->xt_label; + } return false; } @@ -1378,7 +1381,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log) { - unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_conntrack_info ct_info; const char *helper = NULL; u16 family; @@ -1407,12 +1409,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, return -ENOMEM; } - if (nf_connlabels_get(net, n_bits - 1)) { - nf_ct_tmpl_free(ct_info.ct); - OVS_NLERR(log, "Failed to set connlabel length"); - return -EOPNOTSUPP; - } - if (ct_info.timeout[0]) { if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, ct_info.timeout)) @@ -1581,7 +1577,6 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) if (ct_info->ct) { if (ct_info->timeout[0]) nf_ct_destroy_timeout(ct_info->ct); - nf_connlabels_put(nf_ct_net(ct_info->ct)); nf_ct_tmpl_free(ct_info->ct); } } @@ -2006,9 +2001,17 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = { int ovs_ct_init(struct net *net) { -#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) + unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + if (nf_connlabels_get(net, n_bits - 1)) { + ovs_net->xt_label = false; + OVS_NLERR(true, "Failed to set connlabel length"); + } else { + ovs_net->xt_label = true; + } + +#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) return ovs_ct_limit_init(net, ovs_net); #else return 0; @@ -2017,9 +2020,12 @@ int ovs_ct_init(struct net *net) void ovs_ct_exit(struct net *net) { -#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_net *ovs_net = net_generic(net, ovs_net_id); +#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) ovs_ct_limit_exit(net, ovs_net); #endif + + if (ovs_net->xt_label) + nf_connlabels_put(net); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 365b9bb7f546..9ca6231ea647 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -160,6 +160,9 @@ struct ovs_net { #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_ct_limit_info *ct_limit_info; #endif + + /* Module reference for configuring conntrack. */ + bool xt_label; }; /** diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 881ddd3696d5..95e0dd14dc1a 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2317,14 +2317,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) OVS_FLOW_ATTR_MASK, true, skb); } -#define MAX_ACTIONS_BUFSIZE (32 * 1024) - static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); - sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); @@ -2480,15 +2476,6 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); - if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size exceeds max %u", - MAX_ACTIONS_BUFSIZE); - return ERR_PTR(-EMSGSIZE); - } - new_acts_size = MAX_ACTIONS_BUFSIZE; - } - acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return ERR_CAST(acts); @@ -3545,7 +3532,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, int err; u32 mpls_label_count = 0; - *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); + *sfa = nla_alloc_flow_actions(nla_len(attr)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e3e91cf867eb..aef39f6dc6a8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/hashtable.h> +#include <net/netdev_lock.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> @@ -1279,9 +1280,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev, * We replay the request because the device may * go away in the mean time. */ + netdev_unlock_ops(dev); rtnl_unlock(); request_module(NET_SCH_ALIAS_PREFIX "%s", name); rtnl_lock(); + netdev_lock_ops(dev); ops = qdisc_lookup_ops(kind); if (ops != NULL) { /* We will try again qdisc_lookup_ops, @@ -1505,27 +1508,18 @@ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { * Delete/get qdisc. */ -static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, - struct netlink_ext_ack *extack) +static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack, + struct net_device *dev, + struct nlattr *tca[TCA_MAX + 1], + struct tcmsg *tcm) { struct net *net = sock_net(skb->sk); - struct tcmsg *tcm = nlmsg_data(n); - struct nlattr *tca[TCA_MAX + 1]; - struct net_device *dev; - u32 clid; struct Qdisc *q = NULL; struct Qdisc *p = NULL; + u32 clid; int err; - err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, - rtm_tca_policy, extack); - if (err < 0) - return err; - - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - clid = tcm->tcm_parent; if (clid) { if (clid != TC_H_ROOT) { @@ -1582,6 +1576,31 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, return 0; } +static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct tcmsg *tcm = nlmsg_data(n); + struct nlattr *tca[TCA_MAX + 1]; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); + if (err < 0) + return err; + + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + + netdev_lock_ops(dev); + err = __tc_get_qdisc(skb, n, extack, dev, tca, tcm); + netdev_unlock_ops(dev); + + return err; +} + static bool req_create_or_replace(struct nlmsghdr *n) { return (n->nlmsg_flags & NLM_F_CREATE && @@ -1601,35 +1620,19 @@ static bool req_change(struct nlmsghdr *n) !(n->nlmsg_flags & NLM_F_EXCL)); } -/* - * Create/change qdisc. - */ -static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, - struct netlink_ext_ack *extack) +static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack, + struct net_device *dev, + struct nlattr *tca[TCA_MAX + 1], + struct tcmsg *tcm, + bool *replay) { - struct net *net = sock_net(skb->sk); - struct tcmsg *tcm; - struct nlattr *tca[TCA_MAX + 1]; - struct net_device *dev; + struct Qdisc *q = NULL; + struct Qdisc *p = NULL; u32 clid; - struct Qdisc *q, *p; int err; -replay: - /* Reinit, just in case something touches this. */ - err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, - rtm_tca_policy, extack); - if (err < 0) - return err; - - tcm = nlmsg_data(n); clid = tcm->tcm_parent; - q = p = NULL; - - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - if (clid) { if (clid != TC_H_ROOT) { @@ -1755,7 +1758,7 @@ replay: } err = qdisc_change(q, tca, extack); if (err == 0) - qdisc_notify(net, skb, n, clid, NULL, q, extack); + qdisc_notify(sock_net(skb->sk), skb, n, clid, NULL, q, extack); return err; create_n_graft: @@ -1788,8 +1791,10 @@ create_n_graft2: tca, &err, extack); } if (q == NULL) { - if (err == -EAGAIN) - goto replay; + if (err == -EAGAIN) { + *replay = true; + return 0; + } return err; } @@ -1804,6 +1809,41 @@ graft: return 0; } +/* + * Create/change qdisc. + */ +static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tca[TCA_MAX + 1]; + struct net_device *dev; + struct tcmsg *tcm; + bool replay; + int err; + +replay: + /* Reinit, just in case something touches this. */ + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); + if (err < 0) + return err; + + tcm = nlmsg_data(n); + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + + replay = false; + netdev_lock_ops(dev); + err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm, &replay); + netdev_unlock_ops(dev); + if (replay) + goto replay; + + return err; +} + static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, int *q_idx_p, int s_q_idx, bool recur, @@ -1888,17 +1928,23 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; + netdev_lock_ops(dev); if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc), skb, cb, &q_idx, s_q_idx, - true, tca[TCA_DUMP_INVISIBLE]) < 0) + true, tca[TCA_DUMP_INVISIBLE]) < 0) { + netdev_unlock_ops(dev); goto done; + } dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping), skb, cb, &q_idx, s_q_idx, false, - tca[TCA_DUMP_INVISIBLE]) < 0) + tca[TCA_DUMP_INVISIBLE]) < 0) { + netdev_unlock_ops(dev); goto done; + } + netdev_unlock_ops(dev); cont: idx++; @@ -2135,15 +2181,15 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, #endif -static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, - struct netlink_ext_ack *extack) +static int __tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack, + struct net_device *dev, + struct nlattr *tca[TCA_MAX + 1], + struct tcmsg *tcm) { struct net *net = sock_net(skb->sk); - struct tcmsg *tcm = nlmsg_data(n); - struct nlattr *tca[TCA_MAX + 1]; - struct net_device *dev; - struct Qdisc *q = NULL; const struct Qdisc_class_ops *cops; + struct Qdisc *q = NULL; unsigned long cl = 0; unsigned long new_cl; u32 portid; @@ -2151,15 +2197,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, u32 qid; int err; - err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, - rtm_tca_policy, extack); - if (err < 0) - return err; - - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - /* parent == TC_H_UNSPEC - unspecified parent. parent == TC_H_ROOT - class is root, which has no parent. @@ -2254,6 +2291,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, return -EOPNOTSUPP; } + /* Prevent creation of traffic classes with classid TC_H_ROOT */ + if (clid == TC_H_ROOT) { + NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT"); + return -EINVAL; + } + new_cl = cl; err = -EOPNOTSUPP; if (cops->change) @@ -2268,6 +2311,31 @@ out: return err; } +static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct tcmsg *tcm = nlmsg_data(n); + struct nlattr *tca[TCA_MAX + 1]; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); + if (err < 0) + return err; + + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + + netdev_lock_ops(dev); + err = __tc_ctl_tclass(skb, n, extack, dev, tca, tcm); + netdev_unlock_ops(dev); + + return err; +} + struct qdisc_dump_args { struct qdisc_walker w; struct sk_buff *skb; @@ -2344,20 +2412,12 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, return 0; } -static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) +static int __tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb, + struct tcmsg *tcm, struct net_device *dev) { - struct tcmsg *tcm = nlmsg_data(cb->nlh); - struct net *net = sock_net(skb->sk); struct netdev_queue *dev_queue; - struct net_device *dev; int t, s_t; - if (nlmsg_len(cb->nlh) < sizeof(*tcm)) - return 0; - dev = dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return 0; - s_t = cb->args[0]; t = 0; @@ -2374,10 +2434,32 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) done: cb->args[0] = t; - dev_put(dev); return skb->len; } +static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct tcmsg *tcm = nlmsg_data(cb->nlh); + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int err; + + if (nlmsg_len(cb->nlh) < sizeof(*tcm)) + return 0; + + dev = dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return 0; + + netdev_lock_ops(dev); + err = __tc_dump_tclass(skb, cb, tcm, dev); + netdev_unlock_ops(dev); + + dev_put(dev); + + return err; +} + #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index ab6234b4fcd5..532fde548b88 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -913,7 +913,8 @@ static void gred_destroy(struct Qdisc *sch) for (i = 0; i < table->DPs; i++) gred_destroy_vq(table->tab[i]); - gred_offload(sch, TC_GRED_DESTROY); + if (table->opt) + gred_offload(sch, TC_GRED_DESTROY); kfree(table->opt); } diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 716808f374a8..b391c2ef463f 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -1079,14 +1079,16 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, struct smc_init_info *ini) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct net_device *base_ndev; struct net *net; - ndev = pnet_find_base_ndev(ndev); + base_ndev = pnet_find_base_ndev(ndev); net = dev_net(ndev); - if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, + if (smc_pnetid_by_dev_port(base_ndev->dev.parent, base_ndev->dev_port, ndev_pnetid) && + smc_pnet_find_ndev_pnetid_by_table(base_ndev, ndev_pnetid) && smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { - smc_pnet_find_rdma_dev(ndev, ini); + smc_pnet_find_rdma_dev(base_ndev, ini); return; /* pnetid could not be determined */ } _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL, net); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 6488ead9e464..4d5fbacef496 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -472,7 +472,7 @@ bool switchdev_port_obj_act_is_deferred(struct net_device *dev, EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); -static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); +static RAW_NOTIFIER_HEAD(switchdev_blocking_notif_chain); /** * register_switchdev_notifier - Register notifier @@ -518,17 +518,27 @@ EXPORT_SYMBOL_GPL(call_switchdev_notifiers); int register_switchdev_blocking_notifier(struct notifier_block *nb) { - struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; + struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; + int err; + + rtnl_lock(); + err = raw_notifier_chain_register(chain, nb); + rtnl_unlock(); - return blocking_notifier_chain_register(chain, nb); + return err; } EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier); int unregister_switchdev_blocking_notifier(struct notifier_block *nb) { - struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; + struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; + int err; - return blocking_notifier_chain_unregister(chain, nb); + rtnl_lock(); + err = raw_notifier_chain_unregister(chain, nb); + rtnl_unlock(); + + return err; } EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier); @@ -536,10 +546,11 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info, struct netlink_ext_ack *extack) { + ASSERT_RTNL(); info->dev = dev; info->extack = extack; - return blocking_notifier_call_chain(&switchdev_blocking_notif_chain, - val, info); + return raw_notifier_call_chain(&switchdev_blocking_notif_chain, + val, info); } EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 99ca4465f702..cb86b0bf9a53 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1057,7 +1057,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb, bool net_admin) { u16 version, cipher_type; struct tls_context *ctx; @@ -1115,7 +1115,7 @@ nla_failure: return err; } -static size_t tls_get_info_size(const struct sock *sk) +static size_t tls_get_info_size(const struct sock *sk, bool net_admin) { size_t size = 0; diff --git a/net/wireless/core.c b/net/wireless/core.c index 15bbc9d06c9e..2589ed810b85 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1191,6 +1191,13 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) { struct cfg80211_internal_bss *scan, *tmp; struct cfg80211_beacon_registration *reg, *treg; + unsigned long flags; + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + WARN_ON(!list_empty(&rdev->wiphy_work_list)); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + cancel_work_sync(&rdev->wiphy_work); + rfkill_destroy(rdev->wiphy.rfkill); list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { list_del(®->list); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2c4e06610a79..34c0dfeeb7fc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4220,6 +4220,11 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) if (flags[flag]) *mntrflags |= (1<<flag); + /* cooked monitor mode is incompatible with other modes */ + if (*mntrflags & MONITOR_FLAG_COOK_FRAMES && + *mntrflags != MONITOR_FLAG_COOK_FRAMES) + return -EOPNOTSUPP; + *mntrflags |= MONITOR_FLAG_CHANGED; return 0; @@ -11123,6 +11128,7 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device static int nl80211_process_links(struct cfg80211_registered_device *rdev, struct cfg80211_assoc_link *links, + int assoc_link_id, const u8 *ssid, int ssid_len, struct genl_info *info) { @@ -11153,7 +11159,7 @@ static int nl80211_process_links(struct cfg80211_registered_device *rdev, } links[link_id].bss = nl80211_assoc_bss(rdev, ssid, ssid_len, attrs, - link_id, link_id); + assoc_link_id, link_id); if (IS_ERR(links[link_id].bss)) { err = PTR_ERR(links[link_id].bss); links[link_id].bss = NULL; @@ -11350,8 +11356,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); ap_addr = req.ap_mld_addr; - err = nl80211_process_links(rdev, req.links, ssid, ssid_len, - info); + err = nl80211_process_links(rdev, req.links, req.link_id, + ssid, ssid_len, info); if (err) goto free; @@ -16506,7 +16512,10 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) add_links = 0; if (info->attrs[NL80211_ATTR_MLO_LINKS]) { - err = nl80211_process_links(rdev, links, NULL, 0, info); + err = nl80211_process_links(rdev, links, + /* mark as MLO, but not assoc */ + IEEE80211_MLD_MAX_NUM_LINKS, + NULL, 0, info); if (err) return err; @@ -16534,7 +16543,7 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) goto out; } - err = cfg80211_assoc_ml_reconf(rdev, dev, links, rem_links); + err = -EOPNOTSUPP; out: for (link_id = 0; link_id < ARRAY_SIZE(links); link_id++) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2dd0533e7660..212e9561aae7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -407,7 +407,8 @@ static bool is_an_alpha2(const char *alpha2) { if (!alpha2) return false; - return isalpha(alpha2[0]) && isalpha(alpha2[1]); + return isascii(alpha2[0]) && isalpha(alpha2[0]) && + isascii(alpha2[1]) && isalpha(alpha2[1]); } static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 84bf9f1d4bf2..e5d104ce7b82 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -25,6 +25,7 @@ #include <linux/vmalloc.h> #include <net/xdp_sock_drv.h> #include <net/busy_poll.h> +#include <net/netdev_lock.h> #include <net/netdev_rx_queue.h> #include <net/xdp.h> @@ -1181,6 +1182,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) goto out_release; } + netdev_lock_ops(dev); + if (!xs->rx && !xs->tx) { err = -EINVAL; goto out_unlock; @@ -1315,6 +1318,7 @@ out_unlock: smp_wmb(); WRITE_ONCE(xs->state, XSK_BOUND); } + netdev_unlock_ops(dev); out_release: mutex_unlock(&xs->mutex); rtnl_unlock(); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index c263fb7a68dc..14716ad3d7bc 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/netdevice.h> +#include <net/netdev_lock.h> #include <net/xsk_buff_pool.h> #include <net/xdp_sock.h> #include <net/xdp_sock_drv.h> @@ -219,6 +221,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, bpf.xsk.pool = pool; bpf.xsk.queue_id = queue_id; + netdev_ops_assert_locked(netdev); err = netdev->netdev_ops->ndo_bpf(netdev, &bpf); if (err) goto err_unreg_pool; diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index 2966473b4660..b96538787f3d 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # Clear VPATH and srcroot because the source files reside in the output # directory. # shellcheck disable=SC2016 # $(MAKE) and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"${destdir}"/scripts + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"$(realpath --relative-base=. "${destdir}")"/scripts rm -f "${destdir}/scripts/Kbuild" fi diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index cb66ec42a3f8..706f53e39b53 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -106,7 +106,7 @@ static struct snd_seq_client *clientptr(int clientid) return clienttab[clientid]; } -struct snd_seq_client *snd_seq_client_use_ptr(int clientid) +static struct snd_seq_client *client_use_ptr(int clientid, bool load_module) { unsigned long flags; struct snd_seq_client *client; @@ -126,7 +126,7 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid) } spin_unlock_irqrestore(&clients_lock, flags); #ifdef CONFIG_MODULES - if (!in_interrupt()) { + if (load_module) { static DECLARE_BITMAP(client_requested, SNDRV_SEQ_GLOBAL_CLIENTS); static DECLARE_BITMAP(card_requested, SNDRV_CARDS); @@ -168,6 +168,20 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid) return client; } +/* get snd_seq_client object for the given id quickly */ +struct snd_seq_client *snd_seq_client_use_ptr(int clientid) +{ + return client_use_ptr(clientid, false); +} + +/* get snd_seq_client object for the given id; + * if not found, retry after loading the modules + */ +static struct snd_seq_client *client_load_and_use_ptr(int clientid) +{ + return client_use_ptr(clientid, IS_ENABLED(CONFIG_MODULES)); +} + /* Take refcount and perform ioctl_mutex lock on the given client; * used only for OSS sequencer * Unlock via snd_seq_client_ioctl_unlock() below @@ -176,7 +190,7 @@ bool snd_seq_client_ioctl_lock(int clientid) { struct snd_seq_client *client; - client = snd_seq_client_use_ptr(clientid); + client = client_load_and_use_ptr(clientid); if (!client) return false; mutex_lock(&client->ioctl_mutex); @@ -1195,7 +1209,7 @@ static int snd_seq_ioctl_running_mode(struct snd_seq_client *client, void *arg) int err = 0; /* requested client number */ - cptr = snd_seq_client_use_ptr(info->client); + cptr = client_load_and_use_ptr(info->client); if (cptr == NULL) return -ENOENT; /* don't change !!! */ @@ -1257,7 +1271,7 @@ static int snd_seq_ioctl_get_client_info(struct snd_seq_client *client, struct snd_seq_client *cptr; /* requested client number */ - cptr = snd_seq_client_use_ptr(client_info->client); + cptr = client_load_and_use_ptr(client_info->client); if (cptr == NULL) return -ENOENT; /* don't change !!! */ @@ -1396,7 +1410,7 @@ static int snd_seq_ioctl_get_port_info(struct snd_seq_client *client, void *arg) struct snd_seq_client *cptr; struct snd_seq_client_port *port; - cptr = snd_seq_client_use_ptr(info->addr.client); + cptr = client_load_and_use_ptr(info->addr.client); if (cptr == NULL) return -ENXIO; @@ -1503,10 +1517,10 @@ static int snd_seq_ioctl_subscribe_port(struct snd_seq_client *client, struct snd_seq_client *receiver = NULL, *sender = NULL; struct snd_seq_client_port *sport = NULL, *dport = NULL; - receiver = snd_seq_client_use_ptr(subs->dest.client); + receiver = client_load_and_use_ptr(subs->dest.client); if (!receiver) goto __end; - sender = snd_seq_client_use_ptr(subs->sender.client); + sender = client_load_and_use_ptr(subs->sender.client); if (!sender) goto __end; sport = snd_seq_port_use_ptr(sender, subs->sender.port); @@ -1871,7 +1885,7 @@ static int snd_seq_ioctl_get_client_pool(struct snd_seq_client *client, struct snd_seq_client_pool *info = arg; struct snd_seq_client *cptr; - cptr = snd_seq_client_use_ptr(info->client); + cptr = client_load_and_use_ptr(info->client); if (cptr == NULL) return -ENOENT; memset(info, 0, sizeof(*info)); @@ -1975,7 +1989,7 @@ static int snd_seq_ioctl_get_subscription(struct snd_seq_client *client, struct snd_seq_client_port *sport = NULL; result = -EINVAL; - sender = snd_seq_client_use_ptr(subs->sender.client); + sender = client_load_and_use_ptr(subs->sender.client); if (!sender) goto __end; sport = snd_seq_port_use_ptr(sender, subs->sender.port); @@ -2006,7 +2020,7 @@ static int snd_seq_ioctl_query_subs(struct snd_seq_client *client, void *arg) struct list_head *p; int i; - cptr = snd_seq_client_use_ptr(subs->root.client); + cptr = client_load_and_use_ptr(subs->root.client); if (!cptr) goto __end; port = snd_seq_port_use_ptr(cptr, subs->root.port); @@ -2073,7 +2087,7 @@ static int snd_seq_ioctl_query_next_client(struct snd_seq_client *client, if (info->client < 0) info->client = 0; for (; info->client < SNDRV_SEQ_MAX_CLIENTS; info->client++) { - cptr = snd_seq_client_use_ptr(info->client); + cptr = client_load_and_use_ptr(info->client); if (cptr) break; /* found */ } @@ -2096,7 +2110,7 @@ static int snd_seq_ioctl_query_next_port(struct snd_seq_client *client, struct snd_seq_client *cptr; struct snd_seq_client_port *port = NULL; - cptr = snd_seq_client_use_ptr(info->addr.client); + cptr = client_load_and_use_ptr(info->addr.client); if (cptr == NULL) return -ENXIO; @@ -2193,7 +2207,7 @@ static int snd_seq_ioctl_client_ump_info(struct snd_seq_client *caller, size = sizeof(struct snd_ump_endpoint_info); else size = sizeof(struct snd_ump_block_info); - cptr = snd_seq_client_use_ptr(client); + cptr = client_load_and_use_ptr(client); if (!cptr) return -ENOENT; @@ -2475,7 +2489,7 @@ int snd_seq_kernel_client_enqueue(int client, struct snd_seq_event *ev, if (check_event_type_and_length(ev)) return -EINVAL; - cptr = snd_seq_client_use_ptr(client); + cptr = client_load_and_use_ptr(client); if (cptr == NULL) return -EINVAL; @@ -2707,7 +2721,7 @@ void snd_seq_info_clients_read(struct snd_info_entry *entry, /* list the client table */ for (c = 0; c < SNDRV_SEQ_MAX_CLIENTS; c++) { - client = snd_seq_client_use_ptr(c); + client = client_load_and_use_ptr(c); if (client == NULL) continue; if (client->type == NO_CLIENT) { diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index e393578cbe68..84ebf19f2883 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -222,6 +222,7 @@ comment "Set to Y if you want auto-loading the side codec driver" config SND_HDA_CODEC_REALTEK tristate "Build Realtek HD-audio codec support" + depends on INPUT select SND_HDA_GENERIC select SND_HDA_GENERIC_LEDS select SND_HDA_SCODEC_COMPONENT diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 67540e037309..e67c22c59f02 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2232,6 +2232,8 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x1631, 0xe017, "Packard Bell NEC IMEDIA 5204", 0), /* KONTRON SinglePC may cause a stall at runtime resume */ SND_PCI_QUIRK(0x1734, 0x1232, "KONTRON SinglePC", 0), + /* Dell ALC3271 */ + SND_PCI_QUIRK(0x1028, 0x0962, "Dell ALC3271", 0), {} }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c735f630ecb5..d2a1f836dbbf 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3843,6 +3843,79 @@ static void alc225_shutup(struct hda_codec *codec) } } +static void alc222_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = alc_get_hp_pin(spec); + bool hp1_pin_sense, hp2_pin_sense; + + if (!hp_pin) + return; + + msleep(30); + + hp1_pin_sense = snd_hda_jack_detect(codec, hp_pin); + hp2_pin_sense = snd_hda_jack_detect(codec, 0x14); + + if (hp1_pin_sense || hp2_pin_sense) { + msleep(2); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); + msleep(75); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); + + msleep(75); + } +} + +static void alc222_shutup(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = alc_get_hp_pin(spec); + bool hp1_pin_sense, hp2_pin_sense; + + if (!hp_pin) + hp_pin = 0x21; + + hp1_pin_sense = snd_hda_jack_detect(codec, hp_pin); + hp2_pin_sense = snd_hda_jack_detect(codec, 0x14); + + if (hp1_pin_sense || hp2_pin_sense) { + msleep(2); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + + msleep(75); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + + msleep(75); + } + alc_auto_setup_eapd(codec, false); + alc_shutup_pins(codec); +} + static void alc_default_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -4927,7 +5000,6 @@ static void alc298_fixup_samsung_amp_v2_4_amps(struct hda_codec *codec, alc298_samsung_v2_init_amps(codec, 4); } -#if IS_REACHABLE(CONFIG_INPUT) static void gpio2_mic_hotkey_event(struct hda_codec *codec, struct hda_jack_callback *event) { @@ -5036,10 +5108,6 @@ static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec, spec->kb_dev = NULL; } } -#else /* INPUT */ -#define alc280_fixup_hp_gpio2_mic_hotkey NULL -#define alc233_fixup_lenovo_line2_mic_hotkey NULL -#endif /* INPUT */ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -5053,6 +5121,16 @@ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, } } +static void alc233_fixup_lenovo_low_en_micmute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->micmute_led_polarity = 1; + alc233_fixup_lenovo_line2_mic_hotkey(codec, fix, action); +} + static void alc_hp_mute_disable(struct hda_codec *codec, unsigned int delay) { if (delay <= 0) @@ -7621,6 +7699,7 @@ enum { ALC275_FIXUP_DELL_XPS, ALC293_FIXUP_LENOVO_SPK_NOISE, ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, + ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED, ALC255_FIXUP_DELL_SPK_NOISE, ALC225_FIXUP_DISABLE_MIC_VREF, ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -7690,7 +7769,6 @@ enum { ALC285_FIXUP_THINKPAD_X1_GEN7, ALC285_FIXUP_THINKPAD_HEADSET_JACK, ALC294_FIXUP_ASUS_ALLY, - ALC294_FIXUP_ASUS_ALLY_X, ALC294_FIXUP_ASUS_ALLY_PINS, ALC294_FIXUP_ASUS_ALLY_VERBS, ALC294_FIXUP_ASUS_ALLY_SPEAKER, @@ -8616,6 +8694,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc233_fixup_lenovo_line2_mic_hotkey, }, + [ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc233_fixup_lenovo_low_en_micmute_led, + }, [ALC233_FIXUP_INTEL_NUC8_DMIC] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_inv_dmic, @@ -9138,12 +9220,6 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_ASUS_ALLY_PINS }, - [ALC294_FIXUP_ASUS_ALLY_X] = { - .type = HDA_FIXUP_FUNC, - .v.func = tas2781_fixup_i2c, - .chained = true, - .chain_id = ALC294_FIXUP_ASUS_ALLY_PINS - }, [ALC294_FIXUP_ASUS_ALLY_PINS] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -10600,7 +10676,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), + SND_PCI_QUIRK(0x1043, 0x1054, "ASUS G614FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x1074, "ASUS G614PH/PM/PP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x10a1, "ASUS UX391UA", ALC294_FIXUP_ASUS_SPK), SND_PCI_QUIRK(0x1043, 0x10a4, "ASUS TP3407SA", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), @@ -10608,15 +10686,18 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x10d3, "ASUS K6500ZC", ALC294_FIXUP_ASUS_SPK), SND_PCI_QUIRK(0x1043, 0x1154, "ASUS TP3607SH", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x1194, "ASUS UM3406KA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1204, "ASUS Strix G615JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1214, "ASUS Strix G615LH_LM_LP", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1271, "ASUS X430UN", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1294, "ASUS B3405CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM), SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x12b4, "ASUS B3405CCA / P3405CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -10645,7 +10726,6 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS), SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x17f3, "ROG Ally NR2301L/X", ALC294_FIXUP_ASUS_ALLY), - SND_PCI_QUIRK(0x1043, 0x1eb3, "ROG Ally X RC72LA", ALC294_FIXUP_ASUS_ALLY_X), SND_PCI_QUIRK(0x1043, 0x1863, "ASUS UX6404VI/VV", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS), SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), @@ -10699,14 +10779,28 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1f12, "ASUS UM5302", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1f1f, "ASUS H7604JI/JV/J3D", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1f63, "ASUS P5405CSA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1fb3, "ASUS ROG Flow Z13 GZ302EA", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x3011, "ASUS B5605CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), + SND_PCI_QUIRK(0x1043, 0x3061, "ASUS B3405CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3071, "ASUS B5405CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30c1, "ASUS B3605CCA / P3605CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30d1, "ASUS B5405CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30e1, "ASUS B5605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), + SND_PCI_QUIRK(0x1043, 0x31e1, "ASUS B5605CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x31f1, "ASUS B3605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), + SND_PCI_QUIRK(0x1043, 0x3d78, "ASUS GA603KH", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x3d88, "ASUS GA603KM", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x3e00, "ASUS G814FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x3e20, "ASUS G814PH/PM/PP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e30, "ASUS TP3607SA", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3ee0, "ASUS Strix G815_JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3ef0, "ASUS Strix G635LR_LW_LX", ALC287_FIXUP_TAS2781_I2C), @@ -10714,6 +10808,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3f10, "ASUS Strix G835LR_LW_LX", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3f20, "ASUS Strix G615LR_LW", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3f30, "ASUS Strix G815LR_LW", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3fd0, "ASUS B3605CVA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3ff0, "ASUS B5405CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), @@ -10912,6 +11008,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), SND_PCI_QUIRK(0x17aa, 0x334b, "Lenovo ThinkCentre M70 Gen5", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3384, "ThinkCentre M90a PRO", ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED), + SND_PCI_QUIRK(0x17aa, 0x3386, "ThinkCentre M90a Gen6", ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED), + SND_PCI_QUIRK(0x17aa, 0x3387, "ThinkCentre M70a Gen6", ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED), SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), HDA_CODEC_QUIRK(0x17aa, 0x3802, "DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga Pro 9 14IRP8", ALC287_FIXUP_TAS2781_I2C), @@ -11900,8 +11999,11 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC300; spec->gen.mixer_nid = 0; /* no loopback on ALC300 */ break; + case 0x10ec0222: case 0x10ec0623: spec->codec_variant = ALC269_TYPE_ALC623; + spec->shutup = alc222_shutup; + spec->init_hook = alc222_init; break; case 0x10ec0700: case 0x10ec0701: diff --git a/sound/usb/usx2y/usbusx2y.c b/sound/usb/usx2y/usbusx2y.c index 5f81c68fd42b..5756ff3528a2 100644 --- a/sound/usb/usx2y/usbusx2y.c +++ b/sound/usb/usx2y/usbusx2y.c @@ -151,6 +151,12 @@ static int snd_usx2y_card_used[SNDRV_CARDS]; static void snd_usx2y_card_private_free(struct snd_card *card); static void usx2y_unlinkseq(struct snd_usx2y_async_seq *s); +#ifdef USX2Y_NRPACKS_VARIABLE +int nrpacks = USX2Y_NRPACKS; /* number of packets per urb */ +module_param(nrpacks, int, 0444); +MODULE_PARM_DESC(nrpacks, "Number of packets per URB."); +#endif + /* * pipe 4 is used for switching the lamps, setting samplerate, volumes .... */ @@ -432,6 +438,11 @@ static int snd_usx2y_probe(struct usb_interface *intf, struct snd_card *card; int err; +#ifdef USX2Y_NRPACKS_VARIABLE + if (nrpacks < 0 || nrpacks > USX2Y_NRPACKS_MAX) + return -EINVAL; +#endif + if (le16_to_cpu(device->descriptor.idVendor) != 0x1604 || (le16_to_cpu(device->descriptor.idProduct) != USB_ID_US122 && le16_to_cpu(device->descriptor.idProduct) != USB_ID_US224 && diff --git a/sound/usb/usx2y/usbusx2y.h b/sound/usb/usx2y/usbusx2y.h index 391fd7b4ed5e..6a76d04bf1c7 100644 --- a/sound/usb/usx2y/usbusx2y.h +++ b/sound/usb/usx2y/usbusx2y.h @@ -7,6 +7,32 @@ #define NRURBS 2 +/* Default value used for nr of packs per urb. + * 1 to 4 have been tested ok on uhci. + * To use 3 on ohci, you'd need a patch: + * look for "0000425-linux-2.6.9-rc4-mm1_ohci-hcd.patch.gz" on + * "https://bugtrack.alsa-project.org/alsa-bug/bug_view_page.php?bug_id=0000425" + * + * 1, 2 and 4 work out of the box on ohci, if I recall correctly. + * Bigger is safer operation, smaller gives lower latencies. + */ +#define USX2Y_NRPACKS 4 + +#define USX2Y_NRPACKS_MAX 1024 + +/* If your system works ok with this module's parameter + * nrpacks set to 1, you might as well comment + * this define out, and thereby produce smaller, faster code. + * You'd also set USX2Y_NRPACKS to 1 then. + */ +#define USX2Y_NRPACKS_VARIABLE 1 + +#ifdef USX2Y_NRPACKS_VARIABLE +extern int nrpacks; +#define nr_of_packs() nrpacks +#else +#define nr_of_packs() USX2Y_NRPACKS +#endif #define URBS_ASYNC_SEQ 10 #define URB_DATA_LEN_ASYNC_SEQ 32 diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index f540f46a0b14..acca8bead82e 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -28,33 +28,6 @@ #include "usx2y.h" #include "usbusx2y.h" -/* Default value used for nr of packs per urb. - * 1 to 4 have been tested ok on uhci. - * To use 3 on ohci, you'd need a patch: - * look for "0000425-linux-2.6.9-rc4-mm1_ohci-hcd.patch.gz" on - * "https://bugtrack.alsa-project.org/alsa-bug/bug_view_page.php?bug_id=0000425" - * - * 1, 2 and 4 work out of the box on ohci, if I recall correctly. - * Bigger is safer operation, smaller gives lower latencies. - */ -#define USX2Y_NRPACKS 4 - -/* If your system works ok with this module's parameter - * nrpacks set to 1, you might as well comment - * this define out, and thereby produce smaller, faster code. - * You'd also set USX2Y_NRPACKS to 1 then. - */ -#define USX2Y_NRPACKS_VARIABLE 1 - -#ifdef USX2Y_NRPACKS_VARIABLE -static int nrpacks = USX2Y_NRPACKS; /* number of packets per urb */ -#define nr_of_packs() nrpacks -module_param(nrpacks, int, 0444); -MODULE_PARM_DESC(nrpacks, "Number of packets per URB."); -#else -#define nr_of_packs() USX2Y_NRPACKS -#endif - static int usx2y_urb_capt_retire(struct snd_usx2y_substream *subs) { struct urb *urb = subs->completed_urb; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index be18a0489303..ce973d9d8e6d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2472,13 +2472,14 @@ static void mark_rodata(struct objtool_file *file) * * - .rodata: can contain GCC switch tables * - .rodata.<func>: same, if -fdata-sections is being used - * - .rodata..c_jump_table: contains C annotated jump tables + * - .data.rel.ro.c_jump_table: contains C annotated jump tables * * .rodata.str1.* sections are ignored; they don't contain jump tables. */ for_each_sec(file, sec) { - if (!strncmp(sec->name, ".rodata", 7) && - !strstr(sec->name, ".str1.")) { + if ((!strncmp(sec->name, ".rodata", 7) && + !strstr(sec->name, ".str1.")) || + !strncmp(sec->name, ".data.rel.ro", 12)) { sec->rodata = true; found = true; } diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h index e7ee7ffccefd..e049679bb17b 100644 --- a/tools/objtool/include/objtool/special.h +++ b/tools/objtool/include/objtool/special.h @@ -10,7 +10,7 @@ #include <objtool/check.h> #include <objtool/elf.h> -#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" +#define C_JUMP_TABLE_SECTION ".data.rel.ro.c_jump_table" struct special_alt { struct list_head list; diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index b2174894f9f7..6bb7edda3094 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -19,7 +19,7 @@ NORETURN(__x64_sys_exit_group) NORETURN(arch_cpu_idle_dead) NORETURN(bch2_trans_in_restart_error) NORETURN(bch2_trans_restart_error) -NORETURN(bch2_trans_unlocked_error) +NORETURN(bch2_trans_unlocked_or_in_restart_error) NORETURN(cpu_bringup_and_idle) NORETURN(cpu_startup_entry) NORETURN(do_exit) diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 80844a5fb1fe..29541d486c5e 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -548,6 +548,34 @@ void close_netns(struct nstoken *token) free(token); } +int open_tuntap(const char *dev_name, bool need_mac) +{ + int err = 0; + struct ifreq ifr; + int fd = open("/dev/net/tun", O_RDWR); + + if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)")) + return -1; + + ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); + strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); + ifr.ifr_name[IFNAMSIZ - 1] = '\0'; + + err = ioctl(fd, TUNSETIFF, &ifr); + if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { + close(fd); + return -1; + } + + err = fcntl(fd, F_SETFL, O_NONBLOCK); + if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { + close(fd); + return -1; + } + + return fd; +} + int get_socket_local_port(int sock_fd) { struct sockaddr_storage addr; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index ebec8a8d6f81..9235976d0c50 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -8,6 +8,7 @@ typedef __u16 __sum16; #include <linux/if_ether.h> #include <linux/if_packet.h> +#include <linux/if_tun.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/ethtool.h> @@ -85,6 +86,8 @@ int get_socket_local_port(int sock_fd); int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param); int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param); +int open_tuntap(const char *dev_name, bool need_mac); + struct nstoken; /** * open_netns() - Switch to specified network namespace by name. diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h index fb1eb8c67361..ccec0fcdabc1 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h @@ -5,7 +5,6 @@ #include <time.h> #include <net/if.h> -#include <linux/if_tun.h> #include <linux/icmp.h> #include "test_progs.h" @@ -37,34 +36,6 @@ static inline int netns_delete(void) return system("ip netns del " NETNS ">/dev/null 2>&1"); } -static int open_tuntap(const char *dev_name, bool need_mac) -{ - int err = 0; - struct ifreq ifr; - int fd = open("/dev/net/tun", O_RDWR); - - if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)")) - return -1; - - ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); - strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); - ifr.ifr_name[IFNAMSIZ - 1] = '\0'; - - err = ioctl(fd, TUNSETIFF, &ifr); - if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { - close(fd); - return -1; - } - - err = fcntl(fd, F_SETFL, O_NONBLOCK); - if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { - close(fd); - return -1; - } - - return fd; -} - #define ICMP_PAYLOAD_SIZE 100 /* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 937da9b7532a..b9d9f0a502ce 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -4,12 +4,20 @@ #include "test_xdp_context_test_run.skel.h" #include "test_xdp_meta.skel.h" -#define TX_ADDR "10.0.0.1" -#define RX_ADDR "10.0.0.2" #define RX_NAME "veth0" #define TX_NAME "veth1" #define TX_NETNS "xdp_context_tx" #define RX_NETNS "xdp_context_rx" +#define TAP_NAME "tap0" +#define TAP_NETNS "xdp_context_tuntap" + +#define TEST_PAYLOAD_LEN 32 +static const __u8 test_payload[TEST_PAYLOAD_LEN] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, +}; void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts, __u32 data_meta, __u32 data, __u32 data_end, @@ -112,7 +120,59 @@ void test_xdp_context_test_run(void) test_xdp_context_test_run__destroy(skel); } -void test_xdp_context_functional(void) +static int send_test_packet(int ifindex) +{ + int n, sock = -1; + __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; + + /* The ethernet header is not relevant for this test and doesn't need to + * be meaningful. + */ + struct ethhdr eth = { 0 }; + + memcpy(packet, ð, sizeof(eth)); + memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); + + sock = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW); + if (!ASSERT_GE(sock, 0, "socket")) + goto err; + + struct sockaddr_ll saddr = { + .sll_family = PF_PACKET, + .sll_ifindex = ifindex, + .sll_halen = ETH_ALEN + }; + n = sendto(sock, packet, sizeof(packet), 0, (struct sockaddr *)&saddr, + sizeof(saddr)); + if (!ASSERT_EQ(n, sizeof(packet), "sendto")) + goto err; + + close(sock); + return 0; + +err: + if (sock >= 0) + close(sock); + return -1; +} + +static void assert_test_result(struct test_xdp_meta *skel) +{ + int err; + __u32 map_key = 0; + __u8 map_value[TEST_PAYLOAD_LEN]; + + err = bpf_map__lookup_elem(skel->maps.test_result, &map_key, + sizeof(map_key), &map_value, + TEST_PAYLOAD_LEN, BPF_ANY); + if (!ASSERT_OK(err, "lookup test_result")) + return; + + ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN, + "test_result map contains test payload"); +} + +void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); @@ -120,7 +180,7 @@ void test_xdp_context_functional(void) struct bpf_program *tc_prog, *xdp_prog; struct test_xdp_meta *skel = NULL; struct nstoken *nstoken = NULL; - int rx_ifindex; + int rx_ifindex, tx_ifindex; int ret; tx_ns = netns_new(TX_NETNS, false); @@ -138,7 +198,6 @@ void test_xdp_context_functional(void) if (!ASSERT_OK_PTR(nstoken, "setns rx_ns")) goto close; - SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME); SYS(close, "ip link set dev " RX_NAME " up"); skel = test_xdp_meta__open_and_load(); @@ -179,9 +238,17 @@ void test_xdp_context_functional(void) if (!ASSERT_OK_PTR(nstoken, "setns tx_ns")) goto close; - SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME); SYS(close, "ip link set dev " TX_NAME " up"); - ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping"); + + tx_ifindex = if_nametoindex(TX_NAME); + if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx")) + goto close; + + ret = send_test_packet(tx_ifindex); + if (!ASSERT_OK(ret, "send_test_packet")) + goto close; + + assert_test_result(skel); close: close_netns(nstoken); @@ -190,3 +257,67 @@ close: netns_free(tx_ns); } +void test_xdp_context_tuntap(void) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *ns = NULL; + struct test_xdp_meta *skel = NULL; + __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; + int tap_fd = -1; + int tap_ifindex; + int ret; + + ns = netns_new(TAP_NETNS, true); + if (!ASSERT_OK_PTR(ns, "create and open ns")) + return; + + tap_fd = open_tuntap(TAP_NAME, true); + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) + goto close; + + SYS(close, "ip link set dev " TAP_NAME " up"); + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + goto close; + + tap_ifindex = if_nametoindex(TAP_NAME); + if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) + goto close; + + tc_hook.ifindex = tap_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp), + 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + /* The ethernet header is not relevant for this test and doesn't need to + * be meaningful. + */ + struct ethhdr eth = { 0 }; + + memcpy(packet, ð, sizeof(eth)); + memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); + + ret = write(tap_fd, packet, sizeof(packet)); + if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) + goto close; + + assert_test_result(skel); + +close: + if (tap_fd >= 0) + close(tap_fd); + test_xdp_meta__destroy(skel); + netns_free(ns); +} diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index fe2d71ae0e71..fcf6ca14f2ea 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -4,37 +4,50 @@ #include <bpf/bpf_helpers.h> -#define __round_mask(x, y) ((__typeof__(x))((y) - 1)) -#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) +#define META_SIZE 32 + #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem +/* Demonstrates how metadata can be passed from an XDP program to a TC program + * using bpf_xdp_adjust_meta. + * For the sake of testing the metadata support in drivers, the XDP program uses + * a fixed-size payload after the Ethernet header as metadata. The TC program + * copies the metadata it receives into a map so it can be checked from + * userspace. + */ + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __uint(value_size, META_SIZE); +} test_result SEC(".maps"); + SEC("tc") int ing_cls(struct __sk_buff *ctx) { - __u8 *data, *data_meta, *data_end; - __u32 diff = 0; + __u8 *data, *data_meta; + __u32 key = 0; data_meta = ctx_ptr(ctx, data_meta); - data_end = ctx_ptr(ctx, data_end); data = ctx_ptr(ctx, data); - if (data + ETH_ALEN > data_end || - data_meta + round_up(ETH_ALEN, 4) > data) + if (data_meta + META_SIZE > data) return TC_ACT_SHOT; - diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0]; - diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2]; + bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY); - return diff ? TC_ACT_SHOT : TC_ACT_OK; + return TC_ACT_SHOT; } SEC("xdp") int ing_xdp(struct xdp_md *ctx) { - __u8 *data, *data_meta, *data_end; + __u8 *data, *data_meta, *data_end, *payload; + struct ethhdr *eth; int ret; - ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4)); + ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); if (ret < 0) return XDP_DROP; @@ -42,11 +55,21 @@ int ing_xdp(struct xdp_md *ctx) data_end = ctx_ptr(ctx, data_end); data = ctx_ptr(ctx, data); - if (data + ETH_ALEN > data_end || - data_meta + round_up(ETH_ALEN, 4) > data) + eth = (struct ethhdr *)data; + payload = data + sizeof(struct ethhdr); + + if (payload + META_SIZE > data_end || + data_meta + META_SIZE > data) + return XDP_DROP; + + /* The Linux networking stack may send other packets on the test + * interface that interfere with the test. Just drop them. + * The test packets can be recognized by their ethertype of zero. + */ + if (eth->h_proto != 0) return XDP_DROP; - __builtin_memcpy(data_meta, data, ETH_ALEN); + __builtin_memcpy(data_meta, payload, META_SIZE); return XDP_PASS; } diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py index 2e8a74aff543..58f3291fed12 100755 --- a/tools/testing/selftests/damon/damon_nr_regions.py +++ b/tools/testing/selftests/damon/damon_nr_regions.py @@ -65,6 +65,7 @@ def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions): test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % ( real_nr_regions, min_nr_regions, max_nr_regions) + collected_nr_regions.sort() if (collected_nr_regions[0] < min_nr_regions or collected_nr_regions[-1] > max_nr_regions): print('fail %s' % test_name) @@ -109,6 +110,7 @@ def main(): attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs attrs.min_nr_regions = 3 attrs.max_nr_regions = 7 + attrs.update_us = 100000 err = kdamonds.kdamonds[0].commit() if err is not None: proc.terminate() diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py index 7d4c6bb2e3cd..57c4937aaed2 100755 --- a/tools/testing/selftests/damon/damos_quota.py +++ b/tools/testing/selftests/damon/damos_quota.py @@ -51,16 +51,19 @@ def main(): nr_quota_exceeds = scheme.stats.qt_exceeds wss_collected.sort() + nr_expected_quota_exceeds = 0 for wss in wss_collected: if wss > sz_quota: print('quota is not kept: %s > %s' % (wss, sz_quota)) print('collected samples are as below') print('\n'.join(['%d' % wss for wss in wss_collected])) exit(1) + if wss == sz_quota: + nr_expected_quota_exceeds += 1 - if nr_quota_exceeds < len(wss_collected): - print('quota is not always exceeded: %d > %d' % - (len(wss_collected), nr_quota_exceeds)) + if nr_quota_exceeds < nr_expected_quota_exceeds: + print('quota is exceeded less than expected: %d < %d' % + (nr_quota_exceeds, nr_expected_quota_exceeds)) exit(1) if __name__ == '__main__': diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py index 18246f3b62f7..f76e0412b564 100755 --- a/tools/testing/selftests/damon/damos_quota_goal.py +++ b/tools/testing/selftests/damon/damos_quota_goal.py @@ -63,6 +63,9 @@ def main(): if last_effective_bytes != 0 else -1.0)) if last_effective_bytes == goal.effective_bytes: + # effective quota was already minimum that cannot be more reduced + if expect_increase is False and last_effective_bytes == 1: + continue print('efective bytes not changed: %d' % goal.effective_bytes) exit(1) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index edc56e2cc606..7bc148889ca7 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -11,8 +11,8 @@ ALL_TESTS=" lib_dir=$(dirname "$0") source ${lib_dir}/bond_topo_3d1c.sh -c_maddr="33:33:00:00:00:10" -g_maddr="33:33:00:00:02:54" +c_maddr="33:33:ff:00:00:10" +g_maddr="33:33:ff:00:02:54" skip_prio() { diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 17dc11e9b6dd..79f07e0510ec 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -1,49 +1,219 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +import os +import random, string, time from lib.py import ksft_run, ksft_exit -from lib.py import ksft_eq -from lib.py import NetDrvEpEnv +from lib.py import ksft_eq, KsftSkipEx, KsftFailEx +from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen, rand_port +from lib.py import ethtool, ip +remote_ifname="" +no_sleep=False -def test_v4(cfg) -> None: +def _test_v4(cfg) -> None: cfg.require_ipver("4") cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["4"]) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) - -def test_v6(cfg) -> None: +def _test_v6(cfg) -> None: cfg.require_ipver("6") - cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["6"]) - cmd("ping -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote) - + cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"]) + cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["6"]) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote) -def test_tcp(cfg) -> None: +def _test_tcp(cfg) -> None: cfg.require_cmd("socat", remote=True) port = rand_port() listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT" + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) with bkg(listen_cmd, exit_wait=True) as nc: wait_port_listen(port) - cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", shell=True, host=cfg.remote) - ksft_eq(nc.stdout.strip(), "ping") + ksft_eq(nc.stdout.strip(), test_string) + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: wait_port_listen(port, host=cfg.remote) - cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) - ksft_eq(nc.stdout.strip(), "ping") - + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) + ksft_eq(nc.stdout.strip(), test_string) + +def _set_offload_checksum(cfg, netnl, on) -> None: + try: + ethtool(f" -K {cfg.ifname} rx {on} tx {on} ") + except: + return + +def _set_xdp_generic_sb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_generic_mb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_sb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) + xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if xdp_info['xdp']['mode'] != 1: + """ + If the interface doesn't support native-mode, it falls back to generic mode. + The mode value 1 is native and 2 is generic. + So it raises an exception if mode is not 1(native mode). + """ + raise KsftSkipEx('device does not support native-XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_mb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + try: + cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) + except Exception as e: + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + raise KsftSkipEx('device does not support native-multi-buffer XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_offload_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + try: + cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True) + except Exception as e: + raise KsftSkipEx('device does not support offloaded XDP') + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + + if no_sleep != True: + time.sleep(10) + +def get_interface_info(cfg) -> None: + global remote_ifname + global no_sleep + + remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout + remote_ifname = remote_info.rstrip('\n') + if remote_ifname == "": + raise KsftFailEx('Can not get remote interface') + local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": + no_sleep=True + if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth": + no_sleep=True + +def set_interface_init(cfg) -> None: + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + +def test_default(cfg, netnl) -> None: + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_generic_sb(cfg, netnl) -> None: + _set_xdp_generic_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpgeneric off" % cfg.ifname) + +def test_xdp_generic_mb(cfg, netnl) -> None: + _set_xdp_generic_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpgeneric off" % cfg.ifname) + +def test_xdp_native_sb(cfg, netnl) -> None: + _set_xdp_native_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdp off" % cfg.ifname) + +def test_xdp_native_mb(cfg, netnl) -> None: + _set_xdp_native_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdp off" % cfg.ifname) + +def test_xdp_offload(cfg, netnl) -> None: + _set_xdp_offload_on(cfg) + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpoffload off" % cfg.ifname) def main() -> None: with NetDrvEpEnv(__file__) as cfg: - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + get_interface_info(cfg) + set_interface_init(cfg) + ksft_run([test_default, + test_xdp_generic_sb, + test_xdp_generic_mb, + test_xdp_native_sb, + test_xdp_native_mb, + test_xdp_offload], + args=(cfg, EthtoolFamily())) + set_interface_init(cfg) ksft_exit() diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc index 449f9d8be746..73f6c6fcecab 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc @@ -10,12 +10,16 @@ PLACE=$FUNCTION_FORK PLACE2="kmem_cache_free" PLACE3="schedule_timeout" +# Some functions may have BPF programs attached, therefore +# count already enabled_functions before tests start +ocnt=`cat enabled_functions | wc -l` + echo "f:myevent1 $PLACE" >> dynamic_events # Make sure the event is attached and is the only one grep -q $PLACE enabled_functions cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 1 ]; then +if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi @@ -23,7 +27,7 @@ echo "f:myevent2 $PLACE%return" >> dynamic_events # It should till be the only attached function cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 1 ]; then +if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi @@ -32,7 +36,7 @@ echo "f:myevent3 $PLACE2" >> dynamic_events grep -q $PLACE2 enabled_functions cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 2 ]; then +if [ $cnt -ne $((ocnt + 2)) ]; then exit_fail fi @@ -49,7 +53,7 @@ grep -q myevent1 dynamic_events # should still have 2 left cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 2 ]; then +if [ $cnt -ne $((ocnt + 2)) ]; then exit_fail fi @@ -57,7 +61,7 @@ echo > dynamic_events # Should have none left cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 0 ]; then +if [ $cnt -ne $ocnt ]; then exit_fail fi @@ -65,7 +69,7 @@ echo "f:myevent4 $PLACE" >> dynamic_events # Should only have one enabled cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 1 ]; then +if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi @@ -73,7 +77,7 @@ echo > dynamic_events # Should have none left cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 0 ]; then +if [ $cnt -ne $ocnt ]; then exit_fail fi diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index d9c76b4c0d88..6a437d2be9fa 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -18,6 +18,7 @@ #include "ucall_common.h" static bool mprotect_ro_done; +static bool all_vcpus_hit_ro_fault; static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { @@ -36,9 +37,9 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) /* * Write to the region while mprotect(PROT_READ) is underway. Keep - * looping until the memory is guaranteed to be read-only, otherwise - * vCPUs may complete their writes and advance to the next stage - * prematurely. + * looping until the memory is guaranteed to be read-only and a fault + * has occurred, otherwise vCPUs may complete their writes and advance + * to the next stage prematurely. * * For architectures that support skipping the faulting instruction, * generate the store via inline assembly to ensure the exact length @@ -56,7 +57,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) #else vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); #endif - } while (!READ_ONCE(mprotect_ro_done)); + } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault)); /* * Only architectures that write the entire range can explicitly sync, @@ -81,6 +82,7 @@ struct vcpu_info { static int nr_vcpus; static atomic_t rendezvous; +static atomic_t nr_ro_faults; static void rendezvous_with_boss(void) { @@ -148,12 +150,16 @@ static void *vcpu_worker(void *data) * be stuck on the faulting instruction for other architectures. Go to * stage 3 without a rendezvous */ - do { - r = _vcpu_run(vcpu); - } while (!r); + r = _vcpu_run(vcpu); TEST_ASSERT(r == -1 && errno == EFAULT, "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno); + atomic_inc(&nr_ro_faults); + if (atomic_read(&nr_ro_faults) == nr_vcpus) { + WRITE_ONCE(all_vcpus_hit_ro_fault, true); + sync_global_to_guest(vm, all_vcpus_hit_ro_fault); + } + #if defined(__x86_64__) || defined(__aarch64__) /* * Verify *all* writes from the guest hit EFAULT due to the VMA now @@ -378,7 +384,6 @@ int main(int argc, char *argv[]) rendezvous_with_vcpus(&time_run2, "run 2"); mprotect(mem, slot_size, PROT_READ); - usleep(10); mprotect_ro_done = true; sync_global_to_guest(vm, mprotect_ro_done); diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c index 3eb0313ffa39..3641a42934ac 100644 --- a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c @@ -85,6 +85,7 @@ static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector, GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector)); GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code); + GUEST_ASSERT(!ctrl->int_state); } static void l1_svm_code(struct svm_test_data *svm) @@ -122,6 +123,7 @@ static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code) GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector); GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code); + GUEST_ASSERT(!vmreadz(GUEST_INTERRUPTIBILITY_INFO)); } static void l1_vmx_code(struct vmx_pages *vmx) diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index a1a688e75266..d97816dc476a 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -52,7 +52,8 @@ static void compare_xsave(u8 *from_host, u8 *from_guest) bool bad = false; for (i = 0; i < 4095; i++) { if (from_host[i] != from_guest[i]) { - printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]); + printf("mismatch at %u | %02hhx %02hhx\n", + i, from_host[i], from_guest[i]); bad = true; } } diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index ada9156cc497..c463d1c09c9b 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -15,7 +15,7 @@ #define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <sys/mman.h> #include <errno.h> #include <fcntl.h> /* Definition of O_* constants */ diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 66b4e111b5a2..b61803e36d1c 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -11,7 +11,7 @@ #include <string.h> #include <stdbool.h> #include <stdint.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <errno.h> #include <fcntl.h> #include <sys/mman.h> @@ -369,6 +369,7 @@ unmap: munmap(map, size); } +#ifdef __NR_userfaultfd static void test_unmerge_uffd_wp(void) { struct uffdio_writeprotect uffd_writeprotect; @@ -429,6 +430,7 @@ close_uffd: unmap: munmap(map, size); } +#endif /* Verify that KSM can be enabled / queried with prctl. */ static void test_prctl(void) @@ -684,7 +686,9 @@ int main(int argc, char **argv) exit(test_child_ksm()); } +#ifdef __NR_userfaultfd tests++; +#endif ksft_print_header(); ksft_set_plan(tests); @@ -696,7 +700,9 @@ int main(int argc, char **argv) test_unmerge(); test_unmerge_zero_pages(); test_unmerge_discarded(); +#ifdef __NR_userfaultfd test_unmerge_uffd_wp(); +#endif test_prot_none(); diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 74c911aa3aea..9a0597310a76 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -17,7 +17,7 @@ #include <stdlib.h> #include <string.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <errno.h> #include <stdio.h> #include <fcntl.h> @@ -28,6 +28,8 @@ #define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__) #define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__) +#ifdef __NR_memfd_secret + #define PATTERN 0x55 static const int prot = PROT_READ | PROT_WRITE; @@ -332,3 +334,13 @@ int main(int argc, char *argv[]) ksft_finished(); } + +#else /* __NR_memfd_secret */ + +int main(int argc, char *argv[]) +{ + printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_memfd_secret */ diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index af2fce496912..09feeb453646 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -9,7 +9,7 @@ */ #include <fcntl.h> #include <signal.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <string.h> #include <errno.h> #include <stdlib.h> @@ -265,6 +265,7 @@ munmap: munmap(mmap_mem, mmap_size); } +#ifdef __NR_userfaultfd static void test_uffdio_copy(void) { struct uffdio_register uffdio_register; @@ -322,6 +323,7 @@ munmap: munmap(dst, pagesize); free(src); } +#endif /* __NR_userfaultfd */ int main(void) { @@ -334,7 +336,9 @@ int main(void) thpsize / 1024); tests += 3; } +#ifdef __NR_userfaultfd tests += 1; +#endif /* __NR_userfaultfd */ ksft_print_header(); ksft_set_plan(tests); @@ -364,7 +368,9 @@ int main(void) if (thpsize) test_pte_mapped_thp(); /* Placing a fresh page via userfaultfd may set the PTE dirty. */ +#ifdef __NR_userfaultfd test_uffdio_copy(); +#endif /* __NR_userfaultfd */ err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 1e5731bab499..4417eaa5cfb7 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -3,7 +3,6 @@ #include <errno.h> #include <stdio.h> #include <stdlib.h> -#include <asm-generic/unistd.h> static int mlock2_(void *start, size_t len, int flags) { diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index a4683f2476f2..35565af308af 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -42,7 +42,7 @@ #include <sys/wait.h> #include <sys/stat.h> #include <fcntl.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <sys/ptrace.h> #include <setjmp.h> diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 717539eddf98..7ad6ba660c7d 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -673,7 +673,11 @@ int uffd_open_dev(unsigned int flags) int uffd_open_sys(unsigned int flags) { +#ifdef __NR_userfaultfd return syscall(__NR_userfaultfd, flags); +#else + return -1; +#endif } int uffd_open(unsigned int flags) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index a4b83280998a..944d559ade21 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -33,10 +33,11 @@ * pthread_mutex_lock will also verify the atomicity of the memory * transfer (UFFDIO_COPY). */ -#include <asm-generic/unistd.h> + #include "uffd-common.h" uint64_t features; +#ifdef __NR_userfaultfd #define BOUNCE_RANDOM (1<<0) #define BOUNCE_RACINGFAULTS (1<<1) @@ -471,3 +472,15 @@ int main(int argc, char **argv) nr_pages, nr_pages_per_cpu); return userfaultfd_stress(); } + +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 9ff71fa1f9bf..74c8bc02b506 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -5,11 +5,12 @@ * Copyright (C) 2015-2023 Red Hat, Inc. */ -#include <asm-generic/unistd.h> #include "uffd-common.h" #include "../../../../mm/gup_test.h" +#ifdef __NR_userfaultfd + /* The unit test doesn't need a large or random size, make it 32MB for now */ #define UFFD_TEST_MEM_SIZE (32UL << 20) @@ -1558,3 +1559,14 @@ int main(int argc, char *argv[]) return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS; } +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("Skipping %s (missing __NR_userfaultfd)\n", __file__); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 80dcae53ef55..679542f565a4 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -21,6 +21,7 @@ msg_oob msg_zerocopy netlink-dumps nettest +proc_net_pktgen psock_fanout psock_snd psock_tpacket diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index d5aa16185f42..f03a0399e7a3 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -31,6 +31,7 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += test_so_rcv.sh diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index fd0d959914e4..b2c271b79240 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -207,9 +207,11 @@ def bpftool_prog_list_wait(expected=0, n_retry=20): raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) def bpftool_map_list_wait(expected=0, n_retry=20, ns=""): + nmaps = None for i in range(n_retry): maps = bpftool_map_list(ns=ns) - if len(maps) == expected: + nmaps = len(maps) + if nmaps == expected: return maps time.sleep(0.05) raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) @@ -710,6 +712,7 @@ _, base_maps = bpftool("map") base_map_names = [ 'pid_iter.rodata', # created on each bpftool invocation 'libbpf_det_bind', # created on each bpftool invocation + 'libbpf_global', ] # Check netdevsim diff --git a/tools/testing/selftests/net/cmsg_ip.sh b/tools/testing/selftests/net/cmsg_ip.sh index 2a52520aca32..b55680e081ad 100755 --- a/tools/testing/selftests/net/cmsg_ip.sh +++ b/tools/testing/selftests/net/cmsg_ip.sh @@ -50,8 +50,9 @@ check_result() { # IPV6_DONTFRAG for ovr in setsock cmsg both diff; do for df in 0 1; do - for p in u i r; do + for p in u U i r; do [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP [ $p == "i" ] && prot=ICMP [ $p == "r" ] && prot=RAW @@ -81,8 +82,9 @@ test_dscp() { ip $IPVER -netns $NS route add table 300 prohibit any for ovr in setsock cmsg both diff; do - for p in u i r; do + for p in u U i r; do [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP [ $p == "i" ] && prot=ICMP [ $p == "r" ] && prot=RAW @@ -134,8 +136,9 @@ test_ttl_hoplimit() { local -r LIM=4 for ovr in setsock cmsg both diff; do - for p in u i r; do + for p in u U i r; do [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP [ $p == "i" ] && prot=ICMP [ $p == "r" ] && prot=RAW @@ -166,7 +169,7 @@ test_ttl_hoplimit -4 $TGT4 ttl test_ttl_hoplimit -6 $TGT6 hlim # IPV6 exthdr -for p in u i r; do +for p in u U i r; do # Very basic "does it crash" test for h in h d r; do $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234 diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 19bd8499031b..a825e628aee7 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -33,6 +33,7 @@ enum { ERN_RECVERR, ERN_CMSG_RD, ERN_CMSG_RCV, + ERN_SEND_MORE, }; struct option_cmsg_u32 { @@ -46,6 +47,7 @@ struct options { const char *service; unsigned int size; unsigned int num_pkt; + bool msg_more; struct { unsigned int mark; unsigned int dontfrag; @@ -94,7 +96,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\t\t-S send() size\n" "\t\t-4/-6 Force IPv4 / IPv6 only\n" "\t\t-p prot Socket protocol\n" - "\t\t (u = UDP (default); i = ICMP; r = RAW)\n" + "\t\t (u = UDP (default); i = ICMP; r = RAW;\n" + "\t\t U = UDP with MSG_MORE)\n" "\n" "\t\t-m val Set SO_MARK with given value\n" "\t\t-M val Set SO_MARK via setsockopt\n" @@ -109,8 +112,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\t\t-l val Set TTL/HOPLIMIT via cmsg\n" "\t\t-L val Set TTL/HOPLIMIT via setsockopt\n" "\t\t-H type Add an IPv6 header option\n" - "\t\t (h = HOP; d = DST; r = RTDST)" - ""); + "\t\t (h = HOP; d = DST; r = RTDST)\n" + "\n"); exit(ERN_HELP); } @@ -133,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[]) opt.sock.family = AF_INET6; break; case 'p': - if (*optarg == 'u' || *optarg == 'U') { + if (*optarg == 'u') { opt.sock.proto = IPPROTO_UDP; + } else if (*optarg == 'U') { + opt.sock.proto = IPPROTO_UDP; + opt.msg_more = true; } else if (*optarg == 'i' || *optarg == 'I') { opt.sock.proto = IPPROTO_ICMP; } else if (*optarg == 'r') { @@ -531,7 +537,7 @@ int main(int argc, char *argv[]) cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); for (i = 0; i < opt.num_pkt; i++) { - err = sendmsg(fd, &msg, 0); + err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0); if (err < 0) { if (!opt.silent_send) fprintf(stderr, "send failed: %s\n", strerror(errno)); @@ -542,6 +548,14 @@ int main(int argc, char *argv[]) err = ERN_SEND_SHORT; goto err_out; } + if (opt.msg_more) { + err = write(fd, NULL, 0); + if (err < 0) { + fprintf(stderr, "send more: %s\n", strerror(errno)); + err = ERN_SEND_MORE; + goto err_out; + } + } } err = ERN_SUCCESS; diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index a86f3e752ce2..b0d0eda829d0 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -18,6 +18,8 @@ CONFIG_DUMMY=y CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_BTF_MODULES=n CONFIG_VLAN_8021Q=y CONFIG_GENEVE=m CONFIG_IFB=y diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 000000000000..5b34f6e1f831 --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check if network device has an IPv6 link-local address assigned. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_ll_addr() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "" + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check that IPv6 link-local address is generated when device goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check that link-local address generation works when re-enabled while + # the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c index d988b2e0cee8..e73fab3edd9f 100644 --- a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c @@ -10,4 +10,10 @@ int xdp_dummy_prog(struct xdp_md *ctx) return XDP_PASS; } +SEC("xdp.frags") +int xdp_dummy_prog_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh index c28379a965d8..1559ba275105 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -13,6 +13,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -165,6 +171,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh index 6a764d70ab06..4788641717d9 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -4,6 +4,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg exit 1 fi diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 785e3875a6da..784d1b46912b 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -593,6 +593,7 @@ EOF echo "PASS: queue program exiting while packets queued" else echo "TAINT: queue program exiting while packets queued" + dmesg ret=1 fi } diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 960e1ab4dd04..3c8d3455d8e7 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -330,6 +330,11 @@ test_psample() { # - drop packets and verify the right drop reason is reported test_drop_reason() { which perf >/dev/null 2>&1 || return $ksft_skip + which pahole >/dev/null 2>&1 || return $ksft_skip + + ovs_drop_subsys=$(pahole -C skb_drop_reason_subsys | + awk '/OPENVSWITCH/ { print $3; }' | + tr -d ,) sbx_add "test_drop_reason" || return $? @@ -373,7 +378,7 @@ test_drop_reason() { "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop' ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20 - ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0001 # OVS_DROP_FLOW_ACTION if [[ "$?" -ne "2" ]]; then info "Did not detect expected drops: $?" return 1 @@ -390,7 +395,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000 - ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR + ovs_drop_reason_count 0x${ovs_drop_subsys}0004 # OVS_DROP_EXPLICIT_ACTION_ERROR if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit error drops: $?" return 1 @@ -398,7 +403,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000 - ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0003 # OVS_DROP_EXPLICIT_ACTION if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit drops: $?" return 1 diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index eb3182066d12..152bf4c65747 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -11,7 +11,7 @@ setup_veth_ns() { local -r ns_mac="$4" [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 200000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" echo 1 > "/sys/class/net/${ns_dev}/napi_defer_hard_irqs" ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 ip -netns "${ns_name}" link set dev "${ns_dev}" up diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json index 7126ec3485cb..2b61d8d79bde 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json @@ -61,5 +61,30 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "4009", + "name": "Reject creation of DRR class with classid TC_H_ROOT", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle ffff: drr", + "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr", + "$TC filter add dev $DUMMY parent ffff: prio 1 u32 match u16 0x0000 0xfe00 at 2 flowid ffff:ffff" + ], + "cmdUnderTest": "$TC class add dev $DUMMY parent ffff: classid ffff:ffff drr", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DUMMY", + "matchPattern": "class drr ffff:ffff", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] } ] diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 2fe5e983cb22..f89d052c730e 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -53,7 +53,7 @@ static struct vdso_info /* Symbol table */ ELF(Sym) *symtab; const char *symstrings; - ELF(Word) *gnu_hash; + ELF(Word) *gnu_hash, *gnu_bucket; ELF_HASH_ENTRY *bucket, *chain; ELF_HASH_ENTRY nbucket, nchain; @@ -185,8 +185,8 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) /* The bucket array is located after the header (4 uint32) and the bloom * filter (size_t array of gnu_hash[2] elements). */ - vdso_info.bucket = vdso_info.gnu_hash + 4 + - sizeof(size_t) / 4 * vdso_info.gnu_hash[2]; + vdso_info.gnu_bucket = vdso_info.gnu_hash + 4 + + sizeof(size_t) / 4 * vdso_info.gnu_hash[2]; } else { vdso_info.nbucket = hash[0]; vdso_info.nchain = hash[1]; @@ -268,11 +268,11 @@ void *vdso_sym(const char *version, const char *name) if (vdso_info.gnu_hash) { uint32_t h1 = gnu_hash(name), h2, *hashval; - i = vdso_info.bucket[h1 % vdso_info.nbucket]; + i = vdso_info.gnu_bucket[h1 % vdso_info.nbucket]; if (i == 0) return 0; h1 |= 1; - hashval = vdso_info.bucket + vdso_info.nbucket + + hashval = vdso_info.gnu_bucket + vdso_info.nbucket + (i - vdso_info.gnu_hash[1]); for (;; i++) { ELF(Sym) *sym = &vdso_info.symtab[i]; diff --git a/usr/include/Makefile b/usr/include/Makefile index 6c6de1b1622b..e3d6b03527fe 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -10,7 +10,7 @@ UAPI_CFLAGS := -std=c90 -Wall -Werror=implicit-function-declaration # In theory, we do not care -m32 or -m64 for header compile tests. # It is here just because CONFIG_CC_CAN_LINK is tested with -m32 or -m64. -UAPI_CFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) +UAPI_CFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) # USERCFLAGS might contain sysroot location for CC. UAPI_CFLAGS += $(USERCFLAGS) |