diff options
93 files changed, 3429 insertions, 890 deletions
diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 824bbe4333b7..71e2cd32580f 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -238,6 +238,16 @@ properties: peak-to-peak specified in ANSI X3.263. When omitted, the PHYs default will be left as is. + mac-termination-ohms: + maximum: 200 + description: + The xMII signals need series termination on the driver side to match both + the output driver impedance and the line characteristic impedance, to + prevent reflections and EMI problems. Select a resistance value which is + supported by the builtin resistors of the PHY, otherwise the resistors may + have to be placed on board. When omitted, the PHYs default will be left as + is. + leds: type: object diff --git a/Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml b/Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml new file mode 100644 index 000000000000..02a6793c26f5 --- /dev/null +++ b/Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml @@ -0,0 +1,201 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/renesas,r9a09g057-gbeth.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: GBETH glue layer for Renesas RZ/V2H(P) (and similar SoCs) + +maintainers: + - Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> + +select: + properties: + compatible: + contains: + enum: + - renesas,r9a09g057-gbeth + - renesas,rzv2h-gbeth + required: + - compatible + +properties: + compatible: + items: + - enum: + - renesas,r9a09g057-gbeth # RZ/V2H(P) + - const: renesas,rzv2h-gbeth + - const: snps,dwmac-5.20 + + reg: + maxItems: 1 + + clocks: + items: + - description: CSR clock + - description: AXI system clock + - description: PTP clock + - description: TX clock + - description: RX clock + - description: TX clock phase-shifted by 180 degrees + - description: RX clock phase-shifted by 180 degrees + + clock-names: + items: + - const: stmmaceth + - const: pclk + - const: ptp_ref + - const: tx + - const: rx + - const: tx-180 + - const: rx-180 + + interrupts: + minItems: 11 + + interrupt-names: + items: + - const: macirq + - const: eth_wake_irq + - const: eth_lpi + - const: rx-queue-0 + - const: rx-queue-1 + - const: rx-queue-2 + - const: rx-queue-3 + - const: tx-queue-0 + - const: tx-queue-1 + - const: tx-queue-2 + - const: tx-queue-3 + + resets: + items: + - description: AXI power-on system reset + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + - interrupt-names + - resets + +allOf: + - $ref: snps,dwmac.yaml# + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/renesas-cpg-mssr.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + + ethernet@15c30000 { + compatible = "renesas,r9a09g057-gbeth", "renesas,rzv2h-gbeth", "snps,dwmac-5.20"; + reg = <0x15c30000 0x10000>; + clocks = <&cpg CPG_MOD 0xbd>, <&cpg CPG_MOD 0xbc>, + <&ptp_clock>, <&cpg CPG_MOD 0xb8>, + <&cpg CPG_MOD 0xb9>, <&cpg CPG_MOD 0xba>, + <&cpg CPG_MOD 0xbb>; + clock-names = "stmmaceth", "pclk", "ptp_ref", + "tx", "rx", "tx-180", "rx-180"; + resets = <&cpg 0xb0>; + interrupts = <GIC_SPI 765 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 767 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 766 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 772 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 773 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 774 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 745 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 768 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 769 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 770 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 771 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_wake_irq", "eth_lpi", + "rx-queue-0", "rx-queue-1", "rx-queue-2", + "rx-queue-3", "tx-queue-0", "tx-queue-1", + "tx-queue-2", "tx-queue-3"; + phy-mode = "rgmii-id"; + snps,multicast-filter-bins = <256>; + snps,perfect-filter-entries = <128>; + rx-fifo-depth = <8192>; + tx-fifo-depth = <8192>; + snps,fixed-burst; + snps,force_thresh_dma_mode; + snps,axi-config = <&stmmac_axi_setup>; + snps,mtl-rx-config = <&mtl_rx_setup>; + snps,mtl-tx-config = <&mtl_tx_setup>; + snps,txpbl = <32>; + snps,rxpbl = <32>; + phy-handle = <&phy0>; + + stmmac_axi_setup: stmmac-axi-config { + snps,lpi_en; + snps,wr_osr_lmt = <0xf>; + snps,rd_osr_lmt = <0xf>; + snps,blen = <16 8 4 0 0 0 0>; + }; + + mtl_rx_setup: rx-queues-config { + snps,rx-queues-to-use = <4>; + snps,rx-sched-sp; + + queue0 { + snps,dcb-algorithm; + snps,priority = <0x1>; + snps,map-to-dma-channel = <0>; + }; + + queue1 { + snps,dcb-algorithm; + snps,priority = <0x2>; + snps,map-to-dma-channel = <1>; + }; + + queue2 { + snps,dcb-algorithm; + snps,priority = <0x4>; + snps,map-to-dma-channel = <2>; + }; + + queue3 { + snps,dcb-algorithm; + snps,priority = <0x8>; + snps,map-to-dma-channel = <3>; + }; + }; + + mtl_tx_setup: tx-queues-config { + snps,tx-queues-to-use = <4>; + + queue0 { + snps,dcb-algorithm; + snps,priority = <0x1>; + }; + + queue1 { + snps,dcb-algorithm; + snps,priority = <0x2>; + }; + + queue2 { + snps,dcb-algorithm; + snps,priority = <0x4>; + }; + + queue3 { + snps,dcb-algorithm; + snps,priority = <0x1>; + }; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + + phy0: ethernet-phy@0 { + reg = <0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 78b3030dc56d..b525eca53850 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -75,6 +75,7 @@ properties: - qcom,sm8150-ethqos - renesas,r9a06g032-gmac - renesas,rzn1-gmac + - renesas,rzv2h-gbeth - rockchip,px30-gmac - rockchip,rk3128-gmac - rockchip,rk3228-gmac @@ -114,19 +115,25 @@ properties: interrupts: minItems: 1 - items: - - description: Combined signal for various interrupt events - - description: The interrupt to manage the remote wake-up packet detection - - description: The interrupt that occurs when Rx exits the LPI state - - description: The interrupt that occurs when HW safety error triggered + maxItems: 11 interrupt-names: minItems: 1 + maxItems: 11 items: - - const: macirq - - enum: [eth_wake_irq, eth_lpi, sfty] - - enum: [eth_wake_irq, eth_lpi, sfty] - - enum: [eth_wake_irq, eth_lpi, sfty] + oneOf: + - description: Combined signal for various interrupt events + const: macirq + - description: The interrupt to manage the remote wake-up packet detection + const: eth_wake_irq + - description: The interrupt that occurs when Rx exits the LPI state + const: eth_lpi + - description: The interrupt that occurs when HW safety error triggered + const: sfty + - description: Per channel receive completion interrupt + pattern: '^rx-queue-[0-3]$' + - description: Per channel transmit completion interrupt + pattern: '^tx-queue-[0-3]$' clocks: minItems: 1 diff --git a/Documentation/devicetree/bindings/net/ti,dp83822.yaml b/Documentation/devicetree/bindings/net/ti,dp83822.yaml index 50c24248df26..28a0bddb9af9 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83822.yaml +++ b/Documentation/devicetree/bindings/net/ti,dp83822.yaml @@ -122,6 +122,9 @@ properties: - free-running - recovered + mac-termination-ohms: + enum: [43, 44, 46, 48, 50, 53, 55, 58, 61, 65, 69, 73, 78, 84, 91, 99] + required: - reg @@ -137,6 +140,7 @@ examples: rx-internal-delay-ps = <1>; tx-internal-delay-ps = <1>; ti,gpio2-clk-out = "xi"; + mac-termination-ohms = <43>; }; }; diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index 96fa1f1522ed..5a234e9b5fa2 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -148,6 +148,9 @@ properties: attr-max-name: description: The explicit name for last member of attribute enum. type: string + header: + description: For C-compatible languages, header which already defines this attribute set. + type: string # End genetlink-c attributes: description: List of attributes in the space. diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index a8c5b521937d..4cbfe666e6f5 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -193,6 +193,9 @@ properties: attr-max-name: description: The explicit name for last member of attribute enum. type: string + header: + description: For C-compatible languages, header which already defines this attribute set. + type: string # End genetlink-c attributes: description: List of attributes in the space. diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index 1b0772c8e333..e34bf23897fa 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -207,6 +207,9 @@ properties: attr-max-name: description: The explicit name for last member of attribute enum. type: string + header: + description: For C-compatible languages, header which already defines this attribute set. + type: string # End genetlink-c attributes: description: List of attributes in the space. diff --git a/Documentation/netlink/specs/rt-link.yaml b/Documentation/netlink/specs/rt-link.yaml index 726dfa083d14..25f0c3c6a886 100644 --- a/Documentation/netlink/specs/rt-link.yaml +++ b/Documentation/netlink/specs/rt-link.yaml @@ -2,6 +2,7 @@ name: rt-link protocol: netlink-raw +uapi-header: linux/rtnetlink.h protonum: 0 doc: @@ -11,6 +12,9 @@ definitions: - name: ifinfo-flags type: flags + header: linux/if.h + enum-name: net-device-flags + name-prefix: iff- entries: - name: up @@ -53,6 +57,7 @@ definitions: - name: vlan-protocols type: enum + enum-name: entries: - name: 8021q @@ -754,6 +759,7 @@ definitions: - name: vlan-flags type: flags + enum-name: entries: - reorder-hdr - gvrp @@ -840,6 +846,7 @@ definitions: - name: ifla-vf-link-state-enum type: enum + enum-name: entries: - auto - enable @@ -906,6 +913,7 @@ definitions: - name: rtext-filter type: flags + enum-name: entries: - vf - brvlan @@ -918,6 +926,7 @@ definitions: - name: netkit-policy type: enum + enum-name: entries: - name: forward @@ -928,6 +937,7 @@ definitions: - name: netkit-mode type: enum + enum-name: netkit-mode entries: - name: l2 - name: l3 @@ -935,6 +945,7 @@ definitions: - name: netkit-scrub type: enum + enum-name: entries: - name: none - name: default @@ -1177,24 +1188,27 @@ attribute-sets: multi-attr: true - name: af-spec-attrs + name-prefix: af- + attr-max-name: af-max attributes: - - name: "inet" + name: inet type: nest value: 2 nested-attributes: ifla-attrs - - name: "inet6" + name: inet6 type: nest value: 10 nested-attributes: ifla6-attrs - - name: "mctp" + name: mctp type: nest value: 45 nested-attributes: mctp-attrs - name: vfinfo-list-attrs + name-prefix: ifla-vf- attributes: - name: info @@ -1203,6 +1217,7 @@ attribute-sets: multi-attr: true - name: vfinfo-attrs + name-prefix: ifla-vf- attributes: - name: mac @@ -1257,6 +1272,7 @@ attribute-sets: type: binary - name: vf-stats-attrs + name-prefix: ifla-vf-stats- attributes: - name: rx-packets @@ -1288,6 +1304,8 @@ attribute-sets: type: u64 - name: vf-vlan-attrs + name-prefix: ifla-vf-vlan- + attr-max-name: ifla-vf-vlan-info-max attributes: - name: info @@ -1296,12 +1314,15 @@ attribute-sets: multi-attr: true - name: vf-ports-attrs + name-prefix: ifla- attributes: [] - name: port-self-attrs + name-prefix: ifla- attributes: [] - name: linkinfo-attrs + name-prefix: ifla-info- attributes: - name: kind @@ -1426,6 +1447,8 @@ attribute-sets: type: indexed-array sub-type: binary display-hint: ipv6 + checks: + exact-len: 16 - name: coupled-control type: u8 @@ -1855,6 +1878,7 @@ attribute-sets: - name: linkinfo-vti-attrs name-prefix: ifla-vti- + header: linux/if_tunnel.h attributes: - name: link @@ -2107,7 +2131,7 @@ attribute-sets: byte-order: big-endian - name: ifla-vlan-qos - name-prefix: ifla-vlan-qos + name-prefix: ifla-vlan-qos- attributes: - name: mapping @@ -2123,6 +2147,7 @@ attribute-sets: type: u32 - name: xdp-attrs + name-prefix: ifla-xdp- attributes: - name: fd @@ -2150,6 +2175,7 @@ attribute-sets: type: s32 - name: ifla-attrs + name-prefix: ifla-inet- attributes: - name: conf @@ -2157,6 +2183,7 @@ attribute-sets: struct: ipv4-devconf - name: ifla6-attrs + name-prefix: ifla-inet6- attributes: - name: flags @@ -2222,6 +2249,7 @@ attribute-sets: type: binary - name: link-offload-xstats + name-prefix: ifla-offload-xstats- attributes: - name: cpu-hit @@ -2236,6 +2264,7 @@ attribute-sets: type: binary - name: hw-s-info-one + name-prefix: ifla-offload-xstats-hw-s-info- attributes: - name: request @@ -2245,6 +2274,8 @@ attribute-sets: type: u8 - name: link-dpll-pin-attrs + name-prefix: dpll-a- + header: linux/dpll.h attributes: - name: id @@ -2357,6 +2388,7 @@ sub-messages: operations: enum-model: directional + name-prefix: rtm- list: - name: newlink @@ -2367,7 +2399,6 @@ operations: request: value: 16 attributes: &link-new-attrs - - ifi-index - ifname - net-ns-pid - net-ns-fd @@ -2383,7 +2414,6 @@ operations: - txqlen - operstate - linkmode - - group - gso-max-size - gso-max-segs - gro-max-size @@ -2391,6 +2421,12 @@ operations: - gro-ipv4-max-size - af-spec - + name: newlink-ntf + doc: Notify that a link has been created + value: 16 + notify: getlink + fixed-header: ifinfomsg + - name: dellink doc: Delete an existing link. attribute-set: link-attrs @@ -2399,7 +2435,6 @@ operations: request: value: 17 attributes: - - ifi-index - ifname - name: getlink @@ -2410,7 +2445,6 @@ operations: request: value: 18 attributes: - - ifi-index - ifname - alt-ifname - ext-mask @@ -2418,11 +2452,6 @@ operations: reply: value: 16 attributes: &link-all-attrs - - ifi-family - - ifi-type - - ifi-index - - ifi-flags - - ifi-change - address - broadcast - ifname @@ -2468,7 +2497,6 @@ operations: - xdp - event - new-netnsid - - if-netnsid - target-netnsid - carrier-up-count - carrier-down-count @@ -2515,14 +2543,9 @@ operations: do: request: value: 94 - attributes: - - ifindex reply: value: 92 attributes: &link-stats-attrs - - family - - ifindex - - filter-mask - link-64 - link-xstats - link-xstats-slave diff --git a/Documentation/netlink/specs/rt-neigh.yaml b/Documentation/netlink/specs/rt-neigh.yaml index a843caa72259..e9cba164e3d1 100644 --- a/Documentation/netlink/specs/rt-neigh.yaml +++ b/Documentation/netlink/specs/rt-neigh.yaml @@ -2,6 +2,7 @@ name: rt-neigh protocol: netlink-raw +uapi-header: linux/rtnetlink.h protonum: 0 doc: @@ -48,6 +49,7 @@ definitions: - name: nud-state type: flags + enum-name: entries: - incomplete - reachable @@ -60,6 +62,7 @@ definitions: - name: ntf-flags type: flags + enum-name: entries: - use - self @@ -72,12 +75,14 @@ definitions: - name: ntf-ext-flags type: flags + enum-name: entries: - managed - locked - name: rtm-type type: enum + enum-name: entries: - unspec - unicast @@ -179,6 +184,7 @@ definitions: attribute-sets: - name: neighbour-attrs + name-prefix: nda- attributes: - name: unspec @@ -241,6 +247,7 @@ attribute-sets: type: u8 - name: ndt-attrs + name-prefix: ndta- attributes: - name: name @@ -274,6 +281,7 @@ attribute-sets: type: pad - name: ndtpa-attrs + name-prefix: ndtpa- attributes: - name: ifindex @@ -335,6 +343,7 @@ attribute-sets: operations: enum-model: directional + name-prefix: rtm- list: - name: newneigh @@ -372,7 +381,7 @@ operations: name: delneigh-ntf doc: Notify a neighbour deletion value: 29 - notify: delneigh + notify: getneigh fixed-header: ndmsg - name: getneigh @@ -393,6 +402,7 @@ operations: - ifindex - master reply: + value: 28 attributes: *neighbour-all - name: newneigh-ntf diff --git a/Documentation/netlink/specs/rt-rule.yaml b/Documentation/netlink/specs/rt-rule.yaml index de0938d36541..003707ca4a3e 100644 --- a/Documentation/netlink/specs/rt-rule.yaml +++ b/Documentation/netlink/specs/rt-rule.yaml @@ -2,6 +2,7 @@ name: rt-rule protocol: netlink-raw +uapi-header: linux/fib_rules.h protonum: 0 doc: @@ -56,6 +57,7 @@ definitions: - name: fr-act type: enum + enum-name: entries: - unspec - to-tbl @@ -90,6 +92,7 @@ definitions: attribute-sets: - name: fib-rule-attrs + name-prefix: fra- attributes: - name: dst @@ -198,6 +201,7 @@ attribute-sets: operations: enum-model: directional fixed-header: fib-rule-hdr + name-prefix: rtm- list: - name: newrule @@ -234,7 +238,7 @@ operations: name: newrule-ntf doc: Notify a rule creation value: 32 - notify: newrule + notify: getrule - name: delrule doc: Remove an existing FIB rule @@ -247,7 +251,7 @@ operations: name: delrule-ntf doc: Notify a rule deletion value: 33 - notify: delrule + notify: getrule - name: getrule doc: Dump all FIB rules diff --git a/MAINTAINERS b/MAINTAINERS index 657a67f9031e..82e4b96030df 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20626,6 +20626,14 @@ S: Maintained F: Documentation/devicetree/bindings/usb/renesas,rzn1-usbf.yaml F: drivers/usb/gadget/udc/renesas_usbf.c +RENESAS RZ/V2H(P) DWMAC GBETH GLUE LAYER DRIVER +M: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> +L: netdev@vger.kernel.org +L: linux-renesas-soc@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml +F: drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c + RENESAS RZ/V2M I2C DRIVER M: Fabrizio Castro <fabrizio.castro.jz@renesas.com> L: linux-i2c@vger.kernel.org diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c index 31ea8130a495..df7466d4fe8f 100644 --- a/drivers/net/dsa/rzn1_a5psw.c +++ b/drivers/net/dsa/rzn1_a5psw.c @@ -337,8 +337,9 @@ static void a5psw_port_rx_block_set(struct a5psw *a5psw, int port, bool block) static void a5psw_flooding_set_resolution(struct a5psw *a5psw, int port, bool set) { - u8 offsets[] = {A5PSW_UCAST_DEF_MASK, A5PSW_BCAST_DEF_MASK, - A5PSW_MCAST_DEF_MASK}; + static const u8 offsets[] = { + A5PSW_UCAST_DEF_MASK, A5PSW_BCAST_DEF_MASK, A5PSW_MCAST_DEF_MASK + }; int i; for (i = 0; i < ARRAY_SIZE(offsets); i++) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index c773b5ea9c05..16c7896f931f 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -34,37 +34,40 @@ u32 airoha_rmw(void __iomem *base, u32 offset, u32 mask, u32 val) return val; } -static void airoha_qdma_set_irqmask(struct airoha_qdma *qdma, int index, - u32 clear, u32 set) +static void airoha_qdma_set_irqmask(struct airoha_irq_bank *irq_bank, + int index, u32 clear, u32 set) { + struct airoha_qdma *qdma = irq_bank->qdma; + int bank = irq_bank - &qdma->irq_banks[0]; unsigned long flags; - if (WARN_ON_ONCE(index >= ARRAY_SIZE(qdma->irqmask))) + if (WARN_ON_ONCE(index >= ARRAY_SIZE(irq_bank->irqmask))) return; - spin_lock_irqsave(&qdma->irq_lock, flags); + spin_lock_irqsave(&irq_bank->irq_lock, flags); - qdma->irqmask[index] &= ~clear; - qdma->irqmask[index] |= set; - airoha_qdma_wr(qdma, REG_INT_ENABLE(index), qdma->irqmask[index]); + irq_bank->irqmask[index] &= ~clear; + irq_bank->irqmask[index] |= set; + airoha_qdma_wr(qdma, REG_INT_ENABLE(bank, index), + irq_bank->irqmask[index]); /* Read irq_enable register in order to guarantee the update above * completes in the spinlock critical section. */ - airoha_qdma_rr(qdma, REG_INT_ENABLE(index)); + airoha_qdma_rr(qdma, REG_INT_ENABLE(bank, index)); - spin_unlock_irqrestore(&qdma->irq_lock, flags); + spin_unlock_irqrestore(&irq_bank->irq_lock, flags); } -static void airoha_qdma_irq_enable(struct airoha_qdma *qdma, int index, - u32 mask) +static void airoha_qdma_irq_enable(struct airoha_irq_bank *irq_bank, + int index, u32 mask) { - airoha_qdma_set_irqmask(qdma, index, 0, mask); + airoha_qdma_set_irqmask(irq_bank, index, 0, mask); } -static void airoha_qdma_irq_disable(struct airoha_qdma *qdma, int index, - u32 mask) +static void airoha_qdma_irq_disable(struct airoha_irq_bank *irq_bank, + int index, u32 mask) { - airoha_qdma_set_irqmask(qdma, index, mask, 0); + airoha_qdma_set_irqmask(irq_bank, index, mask, 0); } static bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port) @@ -739,9 +742,20 @@ static int airoha_qdma_rx_napi_poll(struct napi_struct *napi, int budget) done += cur; } while (cur && done < budget); - if (done < budget && napi_complete(napi)) - airoha_qdma_irq_enable(q->qdma, QDMA_INT_REG_IDX1, - RX_DONE_INT_MASK); + if (done < budget && napi_complete(napi)) { + struct airoha_qdma *qdma = q->qdma; + int i, qid = q - &qdma->q_rx[0]; + int intr_reg = qid < RX_DONE_HIGH_OFFSET ? QDMA_INT_REG_IDX1 + : QDMA_INT_REG_IDX2; + + for (i = 0; i < ARRAY_SIZE(qdma->irq_banks); i++) { + if (!(BIT(qid) & RX_IRQ_BANK_PIN_MASK(i))) + continue; + + airoha_qdma_irq_enable(&qdma->irq_banks[i], intr_reg, + BIT(qid % RX_DONE_HIGH_OFFSET)); + } + } return done; } @@ -944,7 +958,7 @@ unlock: } if (done < budget && napi_complete(napi)) - airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX0, + airoha_qdma_irq_enable(&qdma->irq_banks[0], QDMA_INT_REG_IDX0, TX_DONE_INT_MASK(id)); return done; @@ -1174,14 +1188,24 @@ static int airoha_qdma_hw_init(struct airoha_qdma *qdma) { int i; - /* clear pending irqs */ - for (i = 0; i < ARRAY_SIZE(qdma->irqmask); i++) + for (i = 0; i < ARRAY_SIZE(qdma->irq_banks); i++) { + /* clear pending irqs */ airoha_qdma_wr(qdma, REG_INT_STATUS(i), 0xffffffff); - - /* setup irqs */ - airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX0, INT_IDX0_MASK); - airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX1, INT_IDX1_MASK); - airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX4, INT_IDX4_MASK); + /* setup rx irqs */ + airoha_qdma_irq_enable(&qdma->irq_banks[i], QDMA_INT_REG_IDX0, + INT_RX0_MASK(RX_IRQ_BANK_PIN_MASK(i))); + airoha_qdma_irq_enable(&qdma->irq_banks[i], QDMA_INT_REG_IDX1, + INT_RX1_MASK(RX_IRQ_BANK_PIN_MASK(i))); + airoha_qdma_irq_enable(&qdma->irq_banks[i], QDMA_INT_REG_IDX2, + INT_RX2_MASK(RX_IRQ_BANK_PIN_MASK(i))); + airoha_qdma_irq_enable(&qdma->irq_banks[i], QDMA_INT_REG_IDX3, + INT_RX3_MASK(RX_IRQ_BANK_PIN_MASK(i))); + } + /* setup tx irqs */ + airoha_qdma_irq_enable(&qdma->irq_banks[0], QDMA_INT_REG_IDX0, + TX_COHERENT_LOW_INT_MASK | INT_TX_MASK); + airoha_qdma_irq_enable(&qdma->irq_banks[0], QDMA_INT_REG_IDX4, + TX_COHERENT_HIGH_INT_MASK); /* setup irq binding */ for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) { @@ -1226,30 +1250,39 @@ static int airoha_qdma_hw_init(struct airoha_qdma *qdma) static irqreturn_t airoha_irq_handler(int irq, void *dev_instance) { - struct airoha_qdma *qdma = dev_instance; - u32 intr[ARRAY_SIZE(qdma->irqmask)]; + struct airoha_irq_bank *irq_bank = dev_instance; + struct airoha_qdma *qdma = irq_bank->qdma; + u32 rx_intr_mask = 0, rx_intr1, rx_intr2; + u32 intr[ARRAY_SIZE(irq_bank->irqmask)]; int i; - for (i = 0; i < ARRAY_SIZE(qdma->irqmask); i++) { + for (i = 0; i < ARRAY_SIZE(intr); i++) { intr[i] = airoha_qdma_rr(qdma, REG_INT_STATUS(i)); - intr[i] &= qdma->irqmask[i]; + intr[i] &= irq_bank->irqmask[i]; airoha_qdma_wr(qdma, REG_INT_STATUS(i), intr[i]); } if (!test_bit(DEV_STATE_INITIALIZED, &qdma->eth->state)) return IRQ_NONE; - if (intr[1] & RX_DONE_INT_MASK) { - airoha_qdma_irq_disable(qdma, QDMA_INT_REG_IDX1, - RX_DONE_INT_MASK); + rx_intr1 = intr[1] & RX_DONE_LOW_INT_MASK; + if (rx_intr1) { + airoha_qdma_irq_disable(irq_bank, QDMA_INT_REG_IDX1, rx_intr1); + rx_intr_mask |= rx_intr1; + } + + rx_intr2 = intr[2] & RX_DONE_HIGH_INT_MASK; + if (rx_intr2) { + airoha_qdma_irq_disable(irq_bank, QDMA_INT_REG_IDX2, rx_intr2); + rx_intr_mask |= (rx_intr2 << 16); + } - for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) { - if (!qdma->q_rx[i].ndesc) - continue; + for (i = 0; rx_intr_mask && i < ARRAY_SIZE(qdma->q_rx); i++) { + if (!qdma->q_rx[i].ndesc) + continue; - if (intr[1] & BIT(i)) - napi_schedule(&qdma->q_rx[i].napi); - } + if (rx_intr_mask & BIT(i)) + napi_schedule(&qdma->q_rx[i].napi); } if (intr[0] & INT_TX_MASK) { @@ -1257,7 +1290,7 @@ static irqreturn_t airoha_irq_handler(int irq, void *dev_instance) if (!(intr[0] & TX_DONE_INT_MASK(i))) continue; - airoha_qdma_irq_disable(qdma, QDMA_INT_REG_IDX0, + airoha_qdma_irq_disable(irq_bank, QDMA_INT_REG_IDX0, TX_DONE_INT_MASK(i)); napi_schedule(&qdma->q_tx_irq[i].napi); } @@ -1266,6 +1299,39 @@ static irqreturn_t airoha_irq_handler(int irq, void *dev_instance) return IRQ_HANDLED; } +static int airoha_qdma_init_irq_banks(struct platform_device *pdev, + struct airoha_qdma *qdma) +{ + struct airoha_eth *eth = qdma->eth; + int i, id = qdma - ð->qdma[0]; + + for (i = 0; i < ARRAY_SIZE(qdma->irq_banks); i++) { + struct airoha_irq_bank *irq_bank = &qdma->irq_banks[i]; + int err, irq_index = 4 * id + i; + const char *name; + + spin_lock_init(&irq_bank->irq_lock); + irq_bank->qdma = qdma; + + irq_bank->irq = platform_get_irq(pdev, irq_index); + if (irq_bank->irq < 0) + return irq_bank->irq; + + name = devm_kasprintf(eth->dev, GFP_KERNEL, + KBUILD_MODNAME ".%d", irq_index); + if (!name) + return -ENOMEM; + + err = devm_request_irq(eth->dev, irq_bank->irq, + airoha_irq_handler, IRQF_SHARED, name, + irq_bank); + if (err) + return err; + } + + return 0; +} + static int airoha_qdma_init(struct platform_device *pdev, struct airoha_eth *eth, struct airoha_qdma *qdma) @@ -1273,9 +1339,7 @@ static int airoha_qdma_init(struct platform_device *pdev, int err, id = qdma - ð->qdma[0]; const char *res; - spin_lock_init(&qdma->irq_lock); qdma->eth = eth; - res = devm_kasprintf(eth->dev, GFP_KERNEL, "qdma%d", id); if (!res) return -ENOMEM; @@ -1285,12 +1349,7 @@ static int airoha_qdma_init(struct platform_device *pdev, return dev_err_probe(eth->dev, PTR_ERR(qdma->regs), "failed to iomap qdma%d regs\n", id); - qdma->irq = platform_get_irq(pdev, 4 * id); - if (qdma->irq < 0) - return qdma->irq; - - err = devm_request_irq(eth->dev, qdma->irq, airoha_irq_handler, - IRQF_SHARED, KBUILD_MODNAME, qdma); + err = airoha_qdma_init_irq_banks(pdev, qdma); if (err) return err; @@ -2784,7 +2843,7 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, dev->features |= dev->hw_features; dev->vlan_features = dev->hw_features; dev->dev.of_node = np; - dev->irq = qdma->irq; + dev->irq = qdma->irq_banks[0].irq; SET_NETDEV_DEV(dev, eth->dev); /* reserve hw queues for HTB offloading */ diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index da5371bcd147..53f39083a8b0 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -17,6 +17,7 @@ #define AIROHA_MAX_NUM_GDM_PORTS 4 #define AIROHA_MAX_NUM_QDMA 2 +#define AIROHA_MAX_NUM_IRQ_BANKS 4 #define AIROHA_MAX_DSA_PORTS 7 #define AIROHA_MAX_NUM_RSTS 3 #define AIROHA_MAX_NUM_XSI_RSTS 5 @@ -452,17 +453,34 @@ struct airoha_flow_table_entry { unsigned long cookie; }; -struct airoha_qdma { - struct airoha_eth *eth; - void __iomem *regs; +/* RX queue to IRQ mapping: BIT(q) in IRQ(n) */ +#define RX_IRQ0_BANK_PIN_MASK 0x839f +#define RX_IRQ1_BANK_PIN_MASK 0x7fe00000 +#define RX_IRQ2_BANK_PIN_MASK 0x20 +#define RX_IRQ3_BANK_PIN_MASK 0x40 +#define RX_IRQ_BANK_PIN_MASK(_n) \ + (((_n) == 3) ? RX_IRQ3_BANK_PIN_MASK : \ + ((_n) == 2) ? RX_IRQ2_BANK_PIN_MASK : \ + ((_n) == 1) ? RX_IRQ1_BANK_PIN_MASK : \ + RX_IRQ0_BANK_PIN_MASK) + +struct airoha_irq_bank { + struct airoha_qdma *qdma; /* protect concurrent irqmask accesses */ spinlock_t irq_lock; u32 irqmask[QDMA_INT_REG_MAX]; int irq; +}; + +struct airoha_qdma { + struct airoha_eth *eth; + void __iomem *regs; atomic_t users; + struct airoha_irq_bank irq_banks[AIROHA_MAX_NUM_IRQ_BANKS]; + struct airoha_tx_irq_queue q_tx_irq[AIROHA_NUM_TX_IRQ]; struct airoha_queue q_tx[AIROHA_NUM_TX_RING]; diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index 29c8f046b991..d931530fc96f 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -423,11 +423,12 @@ ((_n) == 2) ? 0x0720 : \ ((_n) == 1) ? 0x0024 : 0x0020) -#define REG_INT_ENABLE(_n) \ - (((_n) == 4) ? 0x0750 : \ - ((_n) == 3) ? 0x0744 : \ - ((_n) == 2) ? 0x0740 : \ - ((_n) == 1) ? 0x002c : 0x0028) +#define REG_INT_ENABLE(_b, _n) \ + (((_n) == 4) ? 0x0750 + ((_b) << 5) : \ + ((_n) == 3) ? 0x0744 + ((_b) << 5) : \ + ((_n) == 2) ? 0x0740 + ((_b) << 5) : \ + ((_n) == 1) ? 0x002c + ((_b) << 3) : \ + 0x0028 + ((_b) << 3)) /* QDMA_CSR_INT_ENABLE1 */ #define RX15_COHERENT_INT_MASK BIT(31) @@ -462,6 +463,26 @@ #define IRQ0_FULL_INT_MASK BIT(1) #define IRQ0_INT_MASK BIT(0) +#define RX_COHERENT_LOW_INT_MASK \ + (RX15_COHERENT_INT_MASK | RX14_COHERENT_INT_MASK | \ + RX13_COHERENT_INT_MASK | RX12_COHERENT_INT_MASK | \ + RX11_COHERENT_INT_MASK | RX10_COHERENT_INT_MASK | \ + RX9_COHERENT_INT_MASK | RX8_COHERENT_INT_MASK | \ + RX7_COHERENT_INT_MASK | RX6_COHERENT_INT_MASK | \ + RX5_COHERENT_INT_MASK | RX4_COHERENT_INT_MASK | \ + RX3_COHERENT_INT_MASK | RX2_COHERENT_INT_MASK | \ + RX1_COHERENT_INT_MASK | RX0_COHERENT_INT_MASK) + +#define RX_COHERENT_LOW_OFFSET __ffs(RX_COHERENT_LOW_INT_MASK) +#define INT_RX0_MASK(_n) \ + (((_n) << RX_COHERENT_LOW_OFFSET) & RX_COHERENT_LOW_INT_MASK) + +#define TX_COHERENT_LOW_INT_MASK \ + (TX7_COHERENT_INT_MASK | TX6_COHERENT_INT_MASK | \ + TX5_COHERENT_INT_MASK | TX4_COHERENT_INT_MASK | \ + TX3_COHERENT_INT_MASK | TX2_COHERENT_INT_MASK | \ + TX1_COHERENT_INT_MASK | TX0_COHERENT_INT_MASK) + #define TX_DONE_INT_MASK(_n) \ ((_n) ? IRQ1_INT_MASK | IRQ1_FULL_INT_MASK \ : IRQ0_INT_MASK | IRQ0_FULL_INT_MASK) @@ -470,17 +491,6 @@ (IRQ1_INT_MASK | IRQ1_FULL_INT_MASK | \ IRQ0_INT_MASK | IRQ0_FULL_INT_MASK) -#define INT_IDX0_MASK \ - (TX0_COHERENT_INT_MASK | TX1_COHERENT_INT_MASK | \ - TX2_COHERENT_INT_MASK | TX3_COHERENT_INT_MASK | \ - TX4_COHERENT_INT_MASK | TX5_COHERENT_INT_MASK | \ - TX6_COHERENT_INT_MASK | TX7_COHERENT_INT_MASK | \ - RX0_COHERENT_INT_MASK | RX1_COHERENT_INT_MASK | \ - RX2_COHERENT_INT_MASK | RX3_COHERENT_INT_MASK | \ - RX4_COHERENT_INT_MASK | RX7_COHERENT_INT_MASK | \ - RX8_COHERENT_INT_MASK | RX9_COHERENT_INT_MASK | \ - RX15_COHERENT_INT_MASK | INT_TX_MASK) - /* QDMA_CSR_INT_ENABLE2 */ #define RX15_NO_CPU_DSCP_INT_MASK BIT(31) #define RX14_NO_CPU_DSCP_INT_MASK BIT(30) @@ -515,19 +525,121 @@ #define RX1_DONE_INT_MASK BIT(1) #define RX0_DONE_INT_MASK BIT(0) -#define RX_DONE_INT_MASK \ - (RX0_DONE_INT_MASK | RX1_DONE_INT_MASK | \ - RX2_DONE_INT_MASK | RX3_DONE_INT_MASK | \ - RX4_DONE_INT_MASK | RX7_DONE_INT_MASK | \ - RX8_DONE_INT_MASK | RX9_DONE_INT_MASK | \ - RX15_DONE_INT_MASK) -#define INT_IDX1_MASK \ - (RX_DONE_INT_MASK | \ - RX0_NO_CPU_DSCP_INT_MASK | RX1_NO_CPU_DSCP_INT_MASK | \ - RX2_NO_CPU_DSCP_INT_MASK | RX3_NO_CPU_DSCP_INT_MASK | \ - RX4_NO_CPU_DSCP_INT_MASK | RX7_NO_CPU_DSCP_INT_MASK | \ - RX8_NO_CPU_DSCP_INT_MASK | RX9_NO_CPU_DSCP_INT_MASK | \ - RX15_NO_CPU_DSCP_INT_MASK) +#define RX_NO_CPU_DSCP_LOW_INT_MASK \ + (RX15_NO_CPU_DSCP_INT_MASK | RX14_NO_CPU_DSCP_INT_MASK | \ + RX13_NO_CPU_DSCP_INT_MASK | RX12_NO_CPU_DSCP_INT_MASK | \ + RX11_NO_CPU_DSCP_INT_MASK | RX10_NO_CPU_DSCP_INT_MASK | \ + RX9_NO_CPU_DSCP_INT_MASK | RX8_NO_CPU_DSCP_INT_MASK | \ + RX7_NO_CPU_DSCP_INT_MASK | RX6_NO_CPU_DSCP_INT_MASK | \ + RX5_NO_CPU_DSCP_INT_MASK | RX4_NO_CPU_DSCP_INT_MASK | \ + RX3_NO_CPU_DSCP_INT_MASK | RX2_NO_CPU_DSCP_INT_MASK | \ + RX1_NO_CPU_DSCP_INT_MASK | RX0_NO_CPU_DSCP_INT_MASK) + +#define RX_DONE_LOW_INT_MASK \ + (RX15_DONE_INT_MASK | RX14_DONE_INT_MASK | \ + RX13_DONE_INT_MASK | RX12_DONE_INT_MASK | \ + RX11_DONE_INT_MASK | RX10_DONE_INT_MASK | \ + RX9_DONE_INT_MASK | RX8_DONE_INT_MASK | \ + RX7_DONE_INT_MASK | RX6_DONE_INT_MASK | \ + RX5_DONE_INT_MASK | RX4_DONE_INT_MASK | \ + RX3_DONE_INT_MASK | RX2_DONE_INT_MASK | \ + RX1_DONE_INT_MASK | RX0_DONE_INT_MASK) + +#define RX_NO_CPU_DSCP_LOW_OFFSET __ffs(RX_NO_CPU_DSCP_LOW_INT_MASK) +#define INT_RX1_MASK(_n) \ + ((((_n) << RX_NO_CPU_DSCP_LOW_OFFSET) & RX_NO_CPU_DSCP_LOW_INT_MASK) | \ + (RX_DONE_LOW_INT_MASK & (_n))) + +/* QDMA_CSR_INT_ENABLE3 */ +#define RX31_NO_CPU_DSCP_INT_MASK BIT(31) +#define RX30_NO_CPU_DSCP_INT_MASK BIT(30) +#define RX29_NO_CPU_DSCP_INT_MASK BIT(29) +#define RX28_NO_CPU_DSCP_INT_MASK BIT(28) +#define RX27_NO_CPU_DSCP_INT_MASK BIT(27) +#define RX26_NO_CPU_DSCP_INT_MASK BIT(26) +#define RX25_NO_CPU_DSCP_INT_MASK BIT(25) +#define RX24_NO_CPU_DSCP_INT_MASK BIT(24) +#define RX23_NO_CPU_DSCP_INT_MASK BIT(23) +#define RX22_NO_CPU_DSCP_INT_MASK BIT(22) +#define RX21_NO_CPU_DSCP_INT_MASK BIT(21) +#define RX20_NO_CPU_DSCP_INT_MASK BIT(20) +#define RX19_NO_CPU_DSCP_INT_MASK BIT(19) +#define RX18_NO_CPU_DSCP_INT_MASK BIT(18) +#define RX17_NO_CPU_DSCP_INT_MASK BIT(17) +#define RX16_NO_CPU_DSCP_INT_MASK BIT(16) +#define RX31_DONE_INT_MASK BIT(15) +#define RX30_DONE_INT_MASK BIT(14) +#define RX29_DONE_INT_MASK BIT(13) +#define RX28_DONE_INT_MASK BIT(12) +#define RX27_DONE_INT_MASK BIT(11) +#define RX26_DONE_INT_MASK BIT(10) +#define RX25_DONE_INT_MASK BIT(9) +#define RX24_DONE_INT_MASK BIT(8) +#define RX23_DONE_INT_MASK BIT(7) +#define RX22_DONE_INT_MASK BIT(6) +#define RX21_DONE_INT_MASK BIT(5) +#define RX20_DONE_INT_MASK BIT(4) +#define RX19_DONE_INT_MASK BIT(3) +#define RX18_DONE_INT_MASK BIT(2) +#define RX17_DONE_INT_MASK BIT(1) +#define RX16_DONE_INT_MASK BIT(0) + +#define RX_NO_CPU_DSCP_HIGH_INT_MASK \ + (RX31_NO_CPU_DSCP_INT_MASK | RX30_NO_CPU_DSCP_INT_MASK | \ + RX29_NO_CPU_DSCP_INT_MASK | RX28_NO_CPU_DSCP_INT_MASK | \ + RX27_NO_CPU_DSCP_INT_MASK | RX26_NO_CPU_DSCP_INT_MASK | \ + RX25_NO_CPU_DSCP_INT_MASK | RX24_NO_CPU_DSCP_INT_MASK | \ + RX23_NO_CPU_DSCP_INT_MASK | RX22_NO_CPU_DSCP_INT_MASK | \ + RX21_NO_CPU_DSCP_INT_MASK | RX20_NO_CPU_DSCP_INT_MASK | \ + RX19_NO_CPU_DSCP_INT_MASK | RX18_NO_CPU_DSCP_INT_MASK | \ + RX17_NO_CPU_DSCP_INT_MASK | RX16_NO_CPU_DSCP_INT_MASK) + +#define RX_DONE_HIGH_INT_MASK \ + (RX31_DONE_INT_MASK | RX30_DONE_INT_MASK | \ + RX29_DONE_INT_MASK | RX28_DONE_INT_MASK | \ + RX27_DONE_INT_MASK | RX26_DONE_INT_MASK | \ + RX25_DONE_INT_MASK | RX24_DONE_INT_MASK | \ + RX23_DONE_INT_MASK | RX22_DONE_INT_MASK | \ + RX21_DONE_INT_MASK | RX20_DONE_INT_MASK | \ + RX19_DONE_INT_MASK | RX18_DONE_INT_MASK | \ + RX17_DONE_INT_MASK | RX16_DONE_INT_MASK) + +#define RX_DONE_INT_MASK (RX_DONE_HIGH_INT_MASK | RX_DONE_LOW_INT_MASK) +#define RX_DONE_HIGH_OFFSET fls(RX_DONE_HIGH_INT_MASK) + +#define INT_RX2_MASK(_n) \ + ((RX_NO_CPU_DSCP_HIGH_INT_MASK & (_n)) | \ + (((_n) >> RX_DONE_HIGH_OFFSET) & RX_DONE_HIGH_INT_MASK)) + +/* QDMA_CSR_INT_ENABLE4 */ +#define RX31_COHERENT_INT_MASK BIT(31) +#define RX30_COHERENT_INT_MASK BIT(30) +#define RX29_COHERENT_INT_MASK BIT(29) +#define RX28_COHERENT_INT_MASK BIT(28) +#define RX27_COHERENT_INT_MASK BIT(27) +#define RX26_COHERENT_INT_MASK BIT(26) +#define RX25_COHERENT_INT_MASK BIT(25) +#define RX24_COHERENT_INT_MASK BIT(24) +#define RX23_COHERENT_INT_MASK BIT(23) +#define RX22_COHERENT_INT_MASK BIT(22) +#define RX21_COHERENT_INT_MASK BIT(21) +#define RX20_COHERENT_INT_MASK BIT(20) +#define RX19_COHERENT_INT_MASK BIT(19) +#define RX18_COHERENT_INT_MASK BIT(18) +#define RX17_COHERENT_INT_MASK BIT(17) +#define RX16_COHERENT_INT_MASK BIT(16) + +#define RX_COHERENT_HIGH_INT_MASK \ + (RX31_COHERENT_INT_MASK | RX30_COHERENT_INT_MASK | \ + RX29_COHERENT_INT_MASK | RX28_COHERENT_INT_MASK | \ + RX27_COHERENT_INT_MASK | RX26_COHERENT_INT_MASK | \ + RX25_COHERENT_INT_MASK | RX24_COHERENT_INT_MASK | \ + RX23_COHERENT_INT_MASK | RX22_COHERENT_INT_MASK | \ + RX21_COHERENT_INT_MASK | RX20_COHERENT_INT_MASK | \ + RX19_COHERENT_INT_MASK | RX18_COHERENT_INT_MASK | \ + RX17_COHERENT_INT_MASK | RX16_COHERENT_INT_MASK) + +#define INT_RX3_MASK(_n) (RX_COHERENT_HIGH_INT_MASK & (_n)) /* QDMA_CSR_INT_ENABLE5 */ #define TX31_COHERENT_INT_MASK BIT(31) @@ -555,19 +667,19 @@ #define TX9_COHERENT_INT_MASK BIT(9) #define TX8_COHERENT_INT_MASK BIT(8) -#define INT_IDX4_MASK \ - (TX8_COHERENT_INT_MASK | TX9_COHERENT_INT_MASK | \ - TX10_COHERENT_INT_MASK | TX11_COHERENT_INT_MASK | \ - TX12_COHERENT_INT_MASK | TX13_COHERENT_INT_MASK | \ - TX14_COHERENT_INT_MASK | TX15_COHERENT_INT_MASK | \ - TX16_COHERENT_INT_MASK | TX17_COHERENT_INT_MASK | \ - TX18_COHERENT_INT_MASK | TX19_COHERENT_INT_MASK | \ - TX20_COHERENT_INT_MASK | TX21_COHERENT_INT_MASK | \ - TX22_COHERENT_INT_MASK | TX23_COHERENT_INT_MASK | \ - TX24_COHERENT_INT_MASK | TX25_COHERENT_INT_MASK | \ - TX26_COHERENT_INT_MASK | TX27_COHERENT_INT_MASK | \ - TX28_COHERENT_INT_MASK | TX29_COHERENT_INT_MASK | \ - TX30_COHERENT_INT_MASK | TX31_COHERENT_INT_MASK) +#define TX_COHERENT_HIGH_INT_MASK \ + (TX31_COHERENT_INT_MASK | TX30_COHERENT_INT_MASK | \ + TX29_COHERENT_INT_MASK | TX28_COHERENT_INT_MASK | \ + TX27_COHERENT_INT_MASK | TX26_COHERENT_INT_MASK | \ + TX25_COHERENT_INT_MASK | TX24_COHERENT_INT_MASK | \ + TX23_COHERENT_INT_MASK | TX22_COHERENT_INT_MASK | \ + TX21_COHERENT_INT_MASK | TX20_COHERENT_INT_MASK | \ + TX19_COHERENT_INT_MASK | TX18_COHERENT_INT_MASK | \ + TX17_COHERENT_INT_MASK | TX16_COHERENT_INT_MASK | \ + TX15_COHERENT_INT_MASK | TX14_COHERENT_INT_MASK | \ + TX13_COHERENT_INT_MASK | TX12_COHERENT_INT_MASK | \ + TX11_COHERENT_INT_MASK | TX10_COHERENT_INT_MASK | \ + TX9_COHERENT_INT_MASK | TX8_COHERENT_INT_MASK) #define REG_TX_IRQ_BASE(_n) ((_n) ? 0x0048 : 0x0050) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3d9205b9a8c3..18359fabe087 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10110,7 +10110,7 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) struct hwrm_ver_get_input *req; u16 fw_maj, fw_min, fw_bld, fw_rsv; u32 dev_caps_cfg, hwrm_ver; - int rc, len; + int rc, len, max_tmo_secs; rc = hwrm_req_init(bp, req, HWRM_VER_GET); if (rc) @@ -10183,9 +10183,14 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) bp->hwrm_cmd_max_timeout = le16_to_cpu(resp->max_req_timeout) * 1000; if (!bp->hwrm_cmd_max_timeout) bp->hwrm_cmd_max_timeout = HWRM_CMD_MAX_TIMEOUT; - else if (bp->hwrm_cmd_max_timeout > HWRM_CMD_MAX_TIMEOUT) - netdev_warn(bp->dev, "Device requests max timeout of %d seconds, may trigger hung task watchdog\n", - bp->hwrm_cmd_max_timeout / 1000); + max_tmo_secs = bp->hwrm_cmd_max_timeout / 1000; +#ifdef CONFIG_DETECT_HUNG_TASK + if (bp->hwrm_cmd_max_timeout > HWRM_CMD_MAX_TIMEOUT || + max_tmo_secs > CONFIG_DEFAULT_HUNG_TASK_TIMEOUT) { + netdev_warn(bp->dev, "Device requests max timeout of %d seconds, may trigger hung task watchdog (kernel default %ds)\n", + max_tmo_secs, CONFIG_DEFAULT_HUNG_TASK_TIMEOUT); + } +#endif if (resp->hwrm_intf_maj_8b >= 1) { bp->hwrm_max_req_len = le16_to_cpu(resp->max_req_win_len); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 5576e7cf8463..9b6489e417fc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -496,9 +496,16 @@ err: start_utc, coredump.total_segs + 1, rc); kfree(coredump.data); - *dump_len += sizeof(struct bnxt_coredump_record); - if (rc == -ENOBUFS) + if (!rc) { + *dump_len += sizeof(struct bnxt_coredump_record); + /* The actual coredump length can be smaller than the FW + * reported length earlier. Use the ethtool provided length. + */ + if (buf_len) + *dump_len = buf_len; + } else if (rc == -ENOBUFS) { netdev_err(bp->dev, "Firmware returned large coredump buffer\n"); + } return rc; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h index 15ca51b5d204..fb5f5b063c3d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h @@ -58,7 +58,7 @@ void hwrm_update_token(struct bnxt *bp, u16 seq, enum bnxt_hwrm_wait_state s); #define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len) #define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input) -#define HWRM_CMD_MAX_TIMEOUT 40000U +#define HWRM_CMD_MAX_TIMEOUT 60000U #define SHORT_HWRM_CMD_TIMEOUT 20 #define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout) #define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index a8e930d5dbb0..238db9a1aebf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -148,7 +148,6 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) struct net_device *dev = edev->net; struct bnxt *bp = netdev_priv(dev); struct bnxt_ulp *ulp; - int i = 0; ulp = edev->ulp_tbl; netdev_lock(dev); @@ -164,10 +163,6 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) synchronize_rcu(); ulp->max_async_event_id = 0; ulp->async_events_bmap = NULL; - while (atomic_read(&ulp->ref_count) != 0 && i < 10) { - msleep(100); - i++; - } mutex_unlock(&edev->en_dev_lock); netdev_unlock(dev); return; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index 7fa3b8d1ebd2..7b9dd8ebe4bc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -10,9 +10,6 @@ #ifndef BNXT_ULP_H #define BNXT_ULP_H -#define BNXT_ROCE_ULP 0 -#define BNXT_MAX_ULP 1 - #define BNXT_MIN_ROCE_CP_RINGS 2 #define BNXT_MIN_ROCE_STAT_CTXS 1 @@ -50,7 +47,6 @@ struct bnxt_ulp { unsigned long *async_events_bmap; u16 max_async_event_id; u16 msix_requested; - atomic_t ref_count; }; struct bnxt_en_dev { diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 51b8377edd1d..adb441b36581 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2615,7 +2615,11 @@ err: return status; } -static char flash_cookie[2][16] = {"*** SE FLAS", "H DIRECTORY *** "}; +/* + * Since the cookie is text, add a parsing-skipped space to keep it from + * ever being matched on storage holding this source file. + */ +static const char flash_cookie[32] __nonstring = "*** SE FLAS" "H DIRECTORY *** "; static bool phy_flashing_required(struct be_adapter *adapter) { diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index d70818f06be7..5e2d3ddb5d43 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -1415,7 +1415,7 @@ struct flash_section_entry { } __packed; struct flash_section_info { - u8 cookie[32]; + u8 cookie[32] __nonstring; struct flash_section_hdr fsec_hdr; struct flash_section_entry fsec_entry[32]; } __packed; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 8aaac9101377..446e4b6fd3f1 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1830,7 +1830,7 @@ static void gve_turndown(struct gve_priv *priv) /* Stop tx queues */ netif_tx_disable(priv->dev); - xdp_features_clear_redirect_target(priv->dev); + xdp_features_clear_redirect_target_locked(priv->dev); gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); @@ -1902,7 +1902,7 @@ static void gve_turnup(struct gve_priv *priv) } if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) - xdp_features_set_redirect_target(priv->dev, false); + xdp_features_set_redirect_target_locked(priv->dev, false); gve_set_napi_enabled(priv); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 60f085b00a8c..147d7f5c1fcc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -969,8 +969,6 @@ void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf, bool enable); void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan); -void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, int nixlf, - bool enable); void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan); void rvu_npc_enable_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 821fe242f821..6296a3cdabbb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -820,24 +820,6 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } -void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, int nixlf, - bool enable) -{ - struct npc_mcam *mcam = &rvu->hw->mcam; - int blkaddr, index; - - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); - if (blkaddr < 0) - return; - - /* Get 'pcifunc' of PF device */ - pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK; - - index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, - NIXLF_BCAST_ENTRY); - npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); -} - void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 2c5f850c31f6..40024cfa3099 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -148,7 +148,7 @@ out: * Free the IRQ and other resources such as rmap from the system. * BUT doesn't free or remove reference from mlx5. * This function is very important for the shutdown flow, where we need to - * cleanup system resoruces but keep mlx5 objects alive, + * cleanup system resources but keep mlx5 objects alive, * see mlx5_irq_table_free_irqs(). */ static void mlx5_system_free_irq(struct mlx5_irq *irq) @@ -588,7 +588,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) struct mlx5_irq *irq; unsigned long index; - /* There are cases in which we are destrying the irq_table before + /* There are cases in which we are destroying the irq_table before * freeing all the IRQs, fast teardown for example. Hence, free the irqs * which might not have been freed. */ @@ -617,7 +617,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec, if (!mlx5_sf_max_functions(dev)) return 0; if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { - mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n"); + mlx5_core_dbg(dev, "Not enough IRQs for SFs. SF may run at lower performance\n"); return 0; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index a2d4336d2766..92f30ff2d631 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -948,64 +948,20 @@ static int ionic_get_tunable(struct net_device *netdev, return 0; } -static int ionic_get_module_info(struct net_device *netdev, - struct ethtool_modinfo *modinfo) - -{ - struct ionic_lif *lif = netdev_priv(netdev); - struct ionic_dev *idev = &lif->ionic->idev; - struct ionic_xcvr_status *xcvr; - struct sfp_eeprom_base *sfp; - - xcvr = &idev->port_info->status.xcvr; - sfp = (struct sfp_eeprom_base *) xcvr->sprom; - - /* report the module data type and length */ - switch (sfp->phys_id) { - case SFF8024_ID_SFP: - modinfo->type = ETH_MODULE_SFF_8079; - modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; - break; - case SFF8024_ID_QSFP_8436_8636: - case SFF8024_ID_QSFP28_8636: - case SFF8024_ID_QSFP_PLUS_CMIS: - modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; - break; - default: - netdev_info(netdev, "unknown xcvr type 0x%02x\n", - xcvr->sprom[0]); - modinfo->type = 0; - modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; - break; - } - - return 0; -} - -static int ionic_get_module_eeprom(struct net_device *netdev, - struct ethtool_eeprom *ee, - u8 *data) +static int ionic_do_module_copy(u8 *dst, u8 *src, u32 len) { - struct ionic_lif *lif = netdev_priv(netdev); - struct ionic_dev *idev = &lif->ionic->idev; - struct ionic_xcvr_status *xcvr; - char tbuf[sizeof(xcvr->sprom)]; + char tbuf[sizeof_field(struct ionic_xcvr_status, sprom)]; int count = 10; - u32 len; /* The NIC keeps the module prom up-to-date in the DMA space * so we can simply copy the module bytes into the data buffer. */ - xcvr = &idev->port_info->status.xcvr; - len = min_t(u32, sizeof(xcvr->sprom), ee->len); - do { - memcpy(data, &xcvr->sprom[ee->offset], len); - memcpy(tbuf, &xcvr->sprom[ee->offset], len); + memcpy(dst, src, len); + memcpy(tbuf, src, len); /* Let's make sure we got a consistent copy */ - if (!memcmp(data, tbuf, len)) + if (!memcmp(dst, tbuf, len)) break; } while (--count); @@ -1016,6 +972,48 @@ static int ionic_get_module_eeprom(struct net_device *netdev, return 0; } +static int ionic_get_module_eeprom_by_page(struct net_device *netdev, + const struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_dev *idev = &lif->ionic->idev; + u32 err = -EINVAL; + u8 *src; + + if (!page_data->length) + return -EINVAL; + + if (page_data->bank != 0) { + NL_SET_ERR_MSG_MOD(extack, "Only bank 0 is supported"); + return -EINVAL; + } + + switch (page_data->page) { + case 0: + src = &idev->port_info->status.xcvr.sprom[page_data->offset]; + break; + case 1: + src = &idev->port_info->sprom_page1[page_data->offset - 128]; + break; + case 2: + src = &idev->port_info->sprom_page2[page_data->offset - 128]; + break; + case 17: + src = &idev->port_info->sprom_page17[page_data->offset - 128]; + break; + default: + return -EOPNOTSUPP; + } + + memset(page_data->data, 0, page_data->length); + err = ionic_do_module_copy(page_data->data, src, page_data->length); + if (err) + return err; + + return page_data->length; +} + static int ionic_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *info) { @@ -1161,8 +1159,7 @@ static const struct ethtool_ops ionic_ethtool_ops = { .set_rxfh = ionic_set_rxfh, .get_tunable = ionic_get_tunable, .set_tunable = ionic_set_tunable, - .get_module_info = ionic_get_module_info, - .get_module_eeprom = ionic_get_module_eeprom, + .get_module_eeprom_by_page = ionic_get_module_eeprom_by_page, .get_pauseparam = ionic_get_pauseparam, .set_pauseparam = ionic_set_pauseparam, .get_fecparam = ionic_get_fecparam, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 830c8adbfbee..f1ddbe9994a3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -2839,6 +2839,10 @@ union ionic_port_identity { * @status: Port status data * @stats: Port statistics data * @mgmt_stats: Port management statistics data + * @sprom_epage: Extended Transceiver sprom + * @sprom_page1: Extended Transceiver sprom, page 1 + * @sprom_page2: Extended Transceiver sprom, page 2 + * @sprom_page17: Extended Transceiver sprom, page 17 * @rsvd: reserved byte(s) * @pb_stats: uplink pb drop stats */ @@ -2849,8 +2853,17 @@ struct ionic_port_info { struct ionic_port_stats stats; struct ionic_mgmt_port_stats mgmt_stats; }; - /* room for pb_stats to start at 2k offset */ - u8 rsvd[760]; + union { + u8 sprom_epage[384]; + struct { + u8 sprom_page1[128]; + u8 sprom_page2[128]; + u8 sprom_page17[128]; + }; + }; + u8 rsvd[376]; + + /* pb_stats must start at 2k offset */ struct ionic_port_pb_stats pb_stats; }; diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h index 9f784840ea0d..f05231030925 100644 --- a/drivers/net/ethernet/realtek/r8169.h +++ b/drivers/net/ethernet/realtek/r8169.h @@ -64,15 +64,12 @@ enum mac_version { /* support for RTL_GIGA_MAC_VER_50 has been removed */ RTL_GIGA_MAC_VER_51, RTL_GIGA_MAC_VER_52, - RTL_GIGA_MAC_VER_53, /* support for RTL_GIGA_MAC_VER_60 has been removed */ RTL_GIGA_MAC_VER_61, RTL_GIGA_MAC_VER_63, RTL_GIGA_MAC_VER_64, - RTL_GIGA_MAC_VER_65, RTL_GIGA_MAC_VER_66, RTL_GIGA_MAC_VER_70, - RTL_GIGA_MAC_VER_71, RTL_GIGA_MAC_NONE, RTL_GIGA_MAC_VER_LAST = RTL_GIGA_MAC_NONE - 1 }; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 635785d91790..7bf71a675362 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -99,14 +99,14 @@ static const struct rtl_chip_info { const char *fw_name; } rtl_chip_infos[] = { /* 8126A family. */ - { 0x7cf, 0x64a, RTL_GIGA_MAC_VER_71, "RTL8126A", FIRMWARE_8126A_3 }, + { 0x7cf, 0x64a, RTL_GIGA_MAC_VER_70, "RTL8126A", FIRMWARE_8126A_3 }, { 0x7cf, 0x649, RTL_GIGA_MAC_VER_70, "RTL8126A", FIRMWARE_8126A_2 }, /* 8125BP family. */ { 0x7cf, 0x681, RTL_GIGA_MAC_VER_66, "RTL8125BP", FIRMWARE_8125BP_2 }, /* 8125D family. */ - { 0x7cf, 0x689, RTL_GIGA_MAC_VER_65, "RTL8125D", FIRMWARE_8125D_2 }, + { 0x7cf, 0x689, RTL_GIGA_MAC_VER_64, "RTL8125D", FIRMWARE_8125D_2 }, { 0x7cf, 0x688, RTL_GIGA_MAC_VER_64, "RTL8125D", FIRMWARE_8125D_1 }, /* 8125B family. */ @@ -116,7 +116,7 @@ static const struct rtl_chip_info { { 0x7cf, 0x609, RTL_GIGA_MAC_VER_61, "RTL8125A", FIRMWARE_8125A_3 }, /* RTL8117 */ - { 0x7cf, 0x54b, RTL_GIGA_MAC_VER_53, "RTL8168fp/RTL8117" }, + { 0x7cf, 0x54b, RTL_GIGA_MAC_VER_52, "RTL8168fp/RTL8117" }, { 0x7cf, 0x54a, RTL_GIGA_MAC_VER_52, "RTL8168fp/RTL8117", FIRMWARE_8168FP_3 }, @@ -830,7 +830,7 @@ static bool rtl_is_8168evl_up(struct rtl8169_private *tp) { return tp->mac_version >= RTL_GIGA_MAC_VER_34 && tp->mac_version != RTL_GIGA_MAC_VER_39 && - tp->mac_version <= RTL_GIGA_MAC_VER_53; + tp->mac_version <= RTL_GIGA_MAC_VER_52; } static bool rtl_supports_eee(struct rtl8169_private *tp) @@ -998,9 +998,7 @@ void r8169_get_led_name(struct rtl8169_private *tp, int idx, static void r8168fp_adjust_ocp_cmd(struct rtl8169_private *tp, u32 *cmd, int type) { /* based on RTL8168FP_OOBMAC_BASE in vendor driver */ - if (type == ERIAR_OOB && - (tp->mac_version == RTL_GIGA_MAC_VER_52 || - tp->mac_version == RTL_GIGA_MAC_VER_53)) + if (type == ERIAR_OOB && tp->mac_version == RTL_GIGA_MAC_VER_52) *cmd |= 0xf70 << 18; } @@ -1500,7 +1498,7 @@ static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: return RTL_DASH_DP; - case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53: + case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_52: return RTL_DASH_EP; case RTL_GIGA_MAC_VER_66: return RTL_DASH_25_BP; @@ -2485,7 +2483,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_38: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_52: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF); break; case RTL_GIGA_MAC_VER_61: @@ -2616,7 +2614,7 @@ DECLARE_RTL_COND(rtl_rxtx_empty_cond_2) static void rtl_wait_txrx_fifo_empty(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_52: rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42); rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42); break; @@ -2939,7 +2937,6 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) rtl_mod_config5(tp, 0, ASPM_en); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_70: - case RTL_GIGA_MAC_VER_71: val8 = RTL_R8(tp, INT_CFG0_8125) | INT_CFG0_CLKREQEN; RTL_W8(tp, INT_CFG0_8125, val8); break; @@ -2971,7 +2968,6 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_70: - case RTL_GIGA_MAC_VER_71: val8 = RTL_R8(tp, INT_CFG0_8125) & ~INT_CFG0_CLKREQEN; RTL_W8(tp, INT_CFG0_8125, val8); break; @@ -3691,12 +3687,10 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp) /* disable new tx descriptor format */ r8168_mac_ocp_modify(tp, 0xeb58, 0x0001, 0x0000); - if (tp->mac_version == RTL_GIGA_MAC_VER_70 || - tp->mac_version == RTL_GIGA_MAC_VER_71) + if (tp->mac_version == RTL_GIGA_MAC_VER_70) RTL_W8(tp, 0xD8, RTL_R8(tp, 0xD8) & ~0x02); - if (tp->mac_version == RTL_GIGA_MAC_VER_70 || - tp->mac_version == RTL_GIGA_MAC_VER_71) + if (tp->mac_version == RTL_GIGA_MAC_VER_70) r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0400); else if (tp->mac_version == RTL_GIGA_MAC_VER_63) r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0200); @@ -3714,8 +3708,7 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp) r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0030); r8168_mac_ocp_modify(tp, 0xe040, 0x1000, 0x0000); r8168_mac_ocp_modify(tp, 0xea1c, 0x0003, 0x0001); - if (tp->mac_version == RTL_GIGA_MAC_VER_70 || - tp->mac_version == RTL_GIGA_MAC_VER_71) + if (tp->mac_version == RTL_GIGA_MAC_VER_70) r8168_mac_ocp_modify(tp, 0xea1c, 0x0300, 0x0000); else r8168_mac_ocp_modify(tp, 0xea1c, 0x0004, 0x0000); @@ -3831,14 +3824,11 @@ static void rtl_hw_config(struct rtl8169_private *tp) [RTL_GIGA_MAC_VER_48] = rtl_hw_start_8168h_1, [RTL_GIGA_MAC_VER_51] = rtl_hw_start_8168ep_3, [RTL_GIGA_MAC_VER_52] = rtl_hw_start_8117, - [RTL_GIGA_MAC_VER_53] = rtl_hw_start_8117, [RTL_GIGA_MAC_VER_61] = rtl_hw_start_8125a_2, [RTL_GIGA_MAC_VER_63] = rtl_hw_start_8125b, [RTL_GIGA_MAC_VER_64] = rtl_hw_start_8125d, - [RTL_GIGA_MAC_VER_65] = rtl_hw_start_8125d, [RTL_GIGA_MAC_VER_66] = rtl_hw_start_8125d, [RTL_GIGA_MAC_VER_70] = rtl_hw_start_8126a, - [RTL_GIGA_MAC_VER_71] = rtl_hw_start_8126a, }; if (hw_configs[tp->mac_version]) @@ -3855,14 +3845,12 @@ static void rtl_hw_start_8125(struct rtl8169_private *tp) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_61: case RTL_GIGA_MAC_VER_64: - case RTL_GIGA_MAC_VER_65: case RTL_GIGA_MAC_VER_66: for (i = 0xa00; i < 0xb00; i += 4) RTL_W32(tp, i, 0); break; case RTL_GIGA_MAC_VER_63: case RTL_GIGA_MAC_VER_70: - case RTL_GIGA_MAC_VER_71: for (i = 0xa00; i < 0xa80; i += 4) RTL_W32(tp, i, 0); RTL_W16(tp, INT_CFG1_8125, 0x0000); @@ -5039,10 +5027,8 @@ static void rtl_shutdown(struct pci_dev *pdev) /* Restore original MAC address */ rtl_rar_set(tp, tp->dev->perm_addr); - if (system_state == SYSTEM_POWER_OFF && !tp->dash_enabled) { - pci_wake_from_d3(pdev, tp->saved_wolopts); - pci_set_power_state(pdev, PCI_D3hot); - } + if (system_state == SYSTEM_POWER_OFF && !tp->dash_enabled) + pci_prepare_to_sleep(pdev); } static void rtl_remove_one(struct pci_dev *pdev) @@ -5296,7 +5282,7 @@ static void rtl_hw_init_8125(struct rtl8169_private *tp) static void rtl_hw_initialize(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53: + case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_52: rtl8168ep_stop_cmac(tp); fallthrough; case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48: diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index 748ca8b212ba..5403f8202c79 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -1176,14 +1176,11 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev, [RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config, [RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config, [RTL_GIGA_MAC_VER_52] = rtl8117_hw_phy_config, - [RTL_GIGA_MAC_VER_53] = rtl8117_hw_phy_config, [RTL_GIGA_MAC_VER_61] = rtl8125a_2_hw_phy_config, [RTL_GIGA_MAC_VER_63] = rtl8125b_hw_phy_config, [RTL_GIGA_MAC_VER_64] = rtl8125d_hw_phy_config, - [RTL_GIGA_MAC_VER_65] = rtl8125d_hw_phy_config, [RTL_GIGA_MAC_VER_66] = rtl8125bp_hw_phy_config, [RTL_GIGA_MAC_VER_70] = rtl8126a_hw_phy_config, - [RTL_GIGA_MAC_VER_71] = rtl8126a_hw_phy_config, }; if (phy_configs[ver]) diff --git a/drivers/net/ethernet/realtek/rtase/rtase.h b/drivers/net/ethernet/realtek/rtase/rtase.h index 2bbfcad613ab..498cfe4d0cac 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase.h +++ b/drivers/net/ethernet/realtek/rtase/rtase.h @@ -170,6 +170,7 @@ enum rtase_registers { #define RTASE_TC_MODE_MASK GENMASK(11, 10) RTASE_TOKSEL = 0x2046, + RTASE_TXQCRDT_0 = 0x2500, RTASE_RFIFONFULL = 0x4406, RTASE_INT_MITI_TX = 0x0A00, RTASE_INT_MITI_RX = 0x0A80, @@ -259,6 +260,12 @@ union rtase_rx_desc { #define RTASE_VLAN_TAG_MASK GENMASK(15, 0) #define RTASE_RX_PKT_SIZE_MASK GENMASK(13, 0) +/* txqos hardware definitions */ +#define RTASE_1T_CLOCK 64 +#define RTASE_1T_POWER 10000000 +#define RTASE_IDLESLOPE_INT_SHIFT 25 +#define RTASE_IDLESLOPE_INT_MASK GENMASK(31, 25) + #define RTASE_IVEC_NAME_SIZE (IFNAMSIZ + 10) struct rtase_int_vector { @@ -294,6 +301,13 @@ struct rtase_ring { u64 alloc_fail; }; +struct rtase_txqos { + int hicredit; + int locredit; + int idleslope; + int sendslope; +}; + struct rtase_stats { u64 tx_dropped; u64 rx_dropped; @@ -313,6 +327,7 @@ struct rtase_private { struct page_pool *page_pool; struct rtase_ring tx_ring[RTASE_NUM_TX_QUEUE]; + struct rtase_txqos tx_qos[RTASE_NUM_TX_QUEUE]; struct rtase_ring rx_ring[RTASE_NUM_RX_QUEUE]; struct rtase_counters *tally_vaddr; dma_addr_t tally_paddr; diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 2aacc1996796..6251548d50ff 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1661,6 +1661,65 @@ static void rtase_get_stats64(struct net_device *dev, stats->rx_length_errors = tp->stats.rx_length_errors; } +static void rtase_set_hw_cbs(const struct rtase_private *tp, u32 queue) +{ + u32 idle = tp->tx_qos[queue].idleslope * RTASE_1T_CLOCK; + u32 val, i; + + val = u32_encode_bits(idle / RTASE_1T_POWER, RTASE_IDLESLOPE_INT_MASK); + idle %= RTASE_1T_POWER; + + for (i = 1; i <= RTASE_IDLESLOPE_INT_SHIFT; i++) { + idle *= 2; + if ((idle / RTASE_1T_POWER) == 1) + val |= BIT(RTASE_IDLESLOPE_INT_SHIFT - i); + + idle %= RTASE_1T_POWER; + } + + rtase_w32(tp, RTASE_TXQCRDT_0 + queue * 4, val); +} + +static int rtase_setup_tc_cbs(struct rtase_private *tp, + const struct tc_cbs_qopt_offload *qopt) +{ + int queue = qopt->queue; + + if (queue < 0 || queue >= tp->func_tx_queue_num) + return -EINVAL; + + if (!qopt->enable) { + tp->tx_qos[queue].hicredit = 0; + tp->tx_qos[queue].locredit = 0; + tp->tx_qos[queue].idleslope = 0; + tp->tx_qos[queue].sendslope = 0; + + rtase_w32(tp, RTASE_TXQCRDT_0 + queue * 4, 0); + } else { + tp->tx_qos[queue].hicredit = qopt->hicredit; + tp->tx_qos[queue].locredit = qopt->locredit; + tp->tx_qos[queue].idleslope = qopt->idleslope; + tp->tx_qos[queue].sendslope = qopt->sendslope; + + rtase_set_hw_cbs(tp, queue); + } + + return 0; +} + +static int rtase_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct rtase_private *tp = netdev_priv(dev); + + switch (type) { + case TC_SETUP_QDISC_CBS: + return rtase_setup_tc_cbs(tp, type_data); + default: + return -EOPNOTSUPP; + } +} + static netdev_features_t rtase_fix_features(struct net_device *dev, netdev_features_t features) { @@ -1696,6 +1755,7 @@ static const struct net_device_ops rtase_netdev_ops = { .ndo_change_mtu = rtase_change_mtu, .ndo_tx_timeout = rtase_tx_timeout, .ndo_get_stats64 = rtase_get_stats64, + .ndo_setup_tc = rtase_setup_tc, .ndo_fix_features = rtase_fix_features, .ndo_set_features = rtase_set_features, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 59c742a8b3ad..67fa879b1e52 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -132,6 +132,17 @@ config DWMAC_QCOM_ETHQOS This selects the Qualcomm ETHQOS glue layer support for the stmmac device driver. +config DWMAC_RENESAS_GBETH + tristate "Renesas RZ/V2H(P) GBETH support" + default ARCH_RENESAS + depends on OF && (ARCH_RENESAS || COMPILE_TEST) + help + Support for Gigabit Ethernet Interface (GBETH) on Renesas + RZ/V2H(P) SoCs. + + This selects the Renesas RZ/V2H(P) Soc specific glue layer support + for the stmmac device driver. + config DWMAC_ROCKCHIP tristate "Rockchip dwmac support" default ARCH_ROCKCHIP diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 594883fb4164..91050215511b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_DWMAC_LPC18XX) += dwmac-lpc18xx.o obj-$(CONFIG_DWMAC_MEDIATEK) += dwmac-mediatek.o obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o dwmac-meson8b.o obj-$(CONFIG_DWMAC_QCOM_ETHQOS) += dwmac-qcom-ethqos.o +obj-$(CONFIG_DWMAC_RENESAS_GBETH) += dwmac-renesas-gbeth.o obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o obj-$(CONFIG_DWMAC_RZN1) += dwmac-rzn1.o obj-$(CONFIG_DWMAC_S32) += dwmac-s32.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index fa900b4991d0..09ae16e026eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -136,10 +136,11 @@ static int dwc_qos_probe(struct platform_device *pdev, #define AUTO_CAL_STATUS 0x880c #define AUTO_CAL_STATUS_ACTIVE BIT(31) -static void tegra_eqos_fix_speed(void *priv, int speed, unsigned int mode) +static void tegra_eqos_fix_speed(void *bsp_priv, int speed, unsigned int mode) { - struct tegra_eqos *eqos = priv; + struct tegra_eqos *eqos = bsp_priv; bool needs_calibration = false; + struct stmmac_priv *priv; u32 value; int err; @@ -158,6 +159,11 @@ static void tegra_eqos_fix_speed(void *priv, int speed, unsigned int mode) } if (needs_calibration) { + priv = netdev_priv(dev_get_drvdata(eqos->dev)); + + /* Calibration should be done with the MDIO bus idle */ + mutex_lock(&priv->mii->mdio_lock); + /* calibrate */ value = readl(eqos->regs + SDMEMCOMPPADCTRL); value |= SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD; @@ -191,6 +197,8 @@ static void tegra_eqos_fix_speed(void *priv, int speed, unsigned int mode) value = readl(eqos->regs + SDMEMCOMPPADCTRL); value &= ~SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD; writel(value, eqos->regs + SDMEMCOMPPADCTRL); + + mutex_unlock(&priv->mii->mdio_lock); } else { value = readl(eqos->regs + AUTO_CAL_CONFIG); value &= ~AUTO_CAL_CONFIG_ENABLE; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c new file mode 100644 index 000000000000..9a774046455b --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * dwmac-renesas-gbeth.c - DWMAC Specific Glue layer for Renesas GBETH + * + * The Rx and Tx clocks are supplied as follows for the GBETH IP. + * + * Rx / Tx + * -------+------------- on / off ------- + * | + * | Rx-180 / Tx-180 + * +---- not ---- on / off ------- + * + * Copyright (C) 2025 Renesas Electronics Corporation + */ + +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/reset.h> + +#include "stmmac_platform.h" + +struct renesas_gbeth { + struct plat_stmmacenet_data *plat_dat; + struct reset_control *rstc; + struct device *dev; +}; + +static const char *const renesas_gbeth_clks[] = { + "tx", "tx-180", "rx", "rx-180", +}; + +static int renesas_gbeth_init(struct platform_device *pdev, void *priv) +{ + struct plat_stmmacenet_data *plat_dat; + struct renesas_gbeth *gbeth = priv; + int ret; + + plat_dat = gbeth->plat_dat; + + ret = reset_control_deassert(gbeth->rstc); + if (ret) { + dev_err(gbeth->dev, "Reset deassert failed\n"); + return ret; + } + + ret = clk_bulk_prepare_enable(plat_dat->num_clks, + plat_dat->clks); + if (ret) + reset_control_assert(gbeth->rstc); + + return ret; +} + +static void renesas_gbeth_exit(struct platform_device *pdev, void *priv) +{ + struct plat_stmmacenet_data *plat_dat; + struct renesas_gbeth *gbeth = priv; + int ret; + + plat_dat = gbeth->plat_dat; + + clk_bulk_disable_unprepare(plat_dat->num_clks, plat_dat->clks); + + ret = reset_control_assert(gbeth->rstc); + if (ret) + dev_err(gbeth->dev, "Reset assert failed\n"); +} + +static int renesas_gbeth_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct device *dev = &pdev->dev; + struct renesas_gbeth *gbeth; + unsigned int i; + int err; + + err = stmmac_get_platform_resources(pdev, &stmmac_res); + if (err) + return dev_err_probe(dev, err, + "failed to get resources\n"); + + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); + if (IS_ERR(plat_dat)) + return dev_err_probe(dev, PTR_ERR(plat_dat), + "dt configuration failed\n"); + + gbeth = devm_kzalloc(dev, sizeof(*gbeth), GFP_KERNEL); + if (!gbeth) + return -ENOMEM; + + plat_dat->num_clks = ARRAY_SIZE(renesas_gbeth_clks); + plat_dat->clks = devm_kcalloc(dev, plat_dat->num_clks, + sizeof(*plat_dat->clks), GFP_KERNEL); + if (!plat_dat->clks) + return -ENOMEM; + + for (i = 0; i < plat_dat->num_clks; i++) + plat_dat->clks[i].id = renesas_gbeth_clks[i]; + + err = devm_clk_bulk_get(dev, plat_dat->num_clks, plat_dat->clks); + if (err < 0) + return err; + + plat_dat->clk_tx_i = stmmac_pltfr_find_clk(plat_dat, "tx"); + if (!plat_dat->clk_tx_i) + return dev_err_probe(dev, -EINVAL, + "error finding tx clock\n"); + + gbeth->rstc = devm_reset_control_get_exclusive(dev, NULL); + if (IS_ERR(gbeth->rstc)) + return PTR_ERR(gbeth->rstc); + + gbeth->dev = dev; + gbeth->plat_dat = plat_dat; + plat_dat->bsp_priv = gbeth; + plat_dat->set_clk_tx_rate = stmmac_set_clk_tx_rate; + plat_dat->init = renesas_gbeth_init; + plat_dat->exit = renesas_gbeth_exit; + plat_dat->flags |= STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY | + STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP | + STMMAC_FLAG_SPH_DISABLE; + + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); +} + +static const struct of_device_id renesas_gbeth_match[] = { + { .compatible = "renesas,rzv2h-gbeth", }, + { /* Sentinel */ } +}; +MODULE_DEVICE_TABLE(of, renesas_gbeth_match); + +static struct platform_driver renesas_gbeth_driver = { + .probe = renesas_gbeth_probe, + .driver = { + .name = "renesas-gbeth", + .of_match_table = renesas_gbeth_match, + }, +}; +module_platform_driver(renesas_gbeth_driver); + +MODULE_AUTHOR("Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>"); +MODULE_DESCRIPTION("Renesas GBETH DWMAC Specific Glue layer"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 116855658559..59f90b123c5b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -50,6 +50,7 @@ struct socfpga_dwmac { u32 reg_offset; u32 reg_shift; struct device *dev; + struct plat_stmmacenet_data *plat_dat; struct regmap *sys_mgr_base_addr; struct reset_control *stmmac_rst; struct reset_control *stmmac_ocp_rst; @@ -233,10 +234,7 @@ err_node_put: static int socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac) { - struct net_device *ndev = dev_get_drvdata(dwmac->dev); - struct stmmac_priv *priv = netdev_priv(ndev); - - return priv->plat->mac_interface; + return dwmac->plat_dat->mac_interface; } static void socfpga_sgmii_config(struct socfpga_dwmac *dwmac, bool enable) @@ -435,6 +433,13 @@ static struct phylink_pcs *socfpga_dwmac_select_pcs(struct stmmac_priv *priv, return priv->hw->phylink_pcs; } +static int socfpga_dwmac_init(struct platform_device *pdev, void *bsp_priv) +{ + struct socfpga_dwmac *dwmac = bsp_priv; + + return dwmac->ops->set_phy_mode(dwmac); +} + static int socfpga_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -442,8 +447,6 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; int ret; struct socfpga_dwmac *dwmac; - struct net_device *ndev; - struct stmmac_priv *stpriv; const struct socfpga_dwmac_ops *ops; ops = device_get_match_data(&pdev->dev); @@ -479,9 +482,17 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) return ret; } + /* The socfpga driver needs to control the stmmac reset to set the phy + * mode. Create a copy of the core reset handle so it can be used by + * the driver later. + */ + dwmac->stmmac_rst = plat_dat->stmmac_rst; dwmac->ops = ops; + dwmac->plat_dat = plat_dat; + plat_dat->bsp_priv = dwmac; plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed; + plat_dat->init = socfpga_dwmac_init; plat_dat->pcs_init = socfpga_dwmac_pcs_init; plat_dat->pcs_exit = socfpga_dwmac_pcs_exit; plat_dat->select_pcs = socfpga_dwmac_select_pcs; @@ -489,66 +500,8 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) plat_dat->riwt_off = 1; - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) - return ret; - - ndev = platform_get_drvdata(pdev); - stpriv = netdev_priv(ndev); - - /* The socfpga driver needs to control the stmmac reset to set the phy - * mode. Create a copy of the core reset handle so it can be used by - * the driver later. - */ - dwmac->stmmac_rst = stpriv->plat->stmmac_rst; - - ret = ops->set_phy_mode(dwmac); - if (ret) - goto err_dvr_remove; - - return 0; - -err_dvr_remove: - stmmac_dvr_remove(&pdev->dev); - - return ret; -} - -#ifdef CONFIG_PM_SLEEP -static int socfpga_dwmac_resume(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct socfpga_dwmac *dwmac_priv = get_stmmac_bsp_priv(dev); - - dwmac_priv->ops->set_phy_mode(priv->plat->bsp_priv); - - return stmmac_resume(dev); + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } -#endif /* CONFIG_PM_SLEEP */ - -static int __maybe_unused socfpga_dwmac_runtime_suspend(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - - stmmac_bus_clks_config(priv, false); - - return 0; -} - -static int __maybe_unused socfpga_dwmac_runtime_resume(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - - return stmmac_bus_clks_config(priv, true); -} - -static const struct dev_pm_ops socfpga_dwmac_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(stmmac_suspend, socfpga_dwmac_resume) - SET_RUNTIME_PM_OPS(socfpga_dwmac_runtime_suspend, socfpga_dwmac_runtime_resume, NULL) -}; static const struct socfpga_dwmac_ops socfpga_gen5_ops = { .set_phy_mode = socfpga_gen5_set_phy_mode, @@ -567,10 +520,9 @@ MODULE_DEVICE_TABLE(of, socfpga_dwmac_match); static struct platform_driver socfpga_dwmac_driver = { .probe = socfpga_dwmac_probe, - .remove = stmmac_pltfr_remove, .driver = { .name = "socfpga-dwmac", - .pm = &socfpga_dwmac_pm_ops, + .pm = &stmmac_pltfr_pm_ops, .of_match_table = socfpga_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index 33cf99797df5..5e6ac82a89b9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -51,21 +51,14 @@ struct visconti_eth { u32 phy_intf_sel; struct clk *phy_ref_clk; struct device *dev; - spinlock_t lock; /* lock to protect register update */ }; -static void visconti_eth_fix_mac_speed(void *priv, int speed, unsigned int mode) +static int visconti_eth_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, + phy_interface_t interface, int speed) { - struct visconti_eth *dwmac = priv; + struct visconti_eth *dwmac = bsp_priv; struct net_device *netdev = dev_get_drvdata(dwmac->dev); unsigned int val, clk_sel_val = 0; - unsigned long flags; - - spin_lock_irqsave(&dwmac->lock, flags); - - /* adjust link */ - val = readl(dwmac->reg + MAC_CTRL_REG); - val &= ~(GMAC_CONFIG_PS | GMAC_CONFIG_FES); switch (speed) { case SPEED_1000: @@ -77,24 +70,19 @@ static void visconti_eth_fix_mac_speed(void *priv, int speed, unsigned int mode) clk_sel_val = ETHER_CLK_SEL_FREQ_SEL_25M; if (dwmac->phy_intf_sel == ETHER_CONFIG_INTF_RMII) clk_sel_val = ETHER_CLK_SEL_DIV_SEL_2; - val |= GMAC_CONFIG_PS | GMAC_CONFIG_FES; break; case SPEED_10: if (dwmac->phy_intf_sel == ETHER_CONFIG_INTF_RGMII) clk_sel_val = ETHER_CLK_SEL_FREQ_SEL_2P5M; if (dwmac->phy_intf_sel == ETHER_CONFIG_INTF_RMII) clk_sel_val = ETHER_CLK_SEL_DIV_SEL_20; - val |= GMAC_CONFIG_PS; break; default: /* No bit control */ netdev_err(netdev, "Unsupported speed request (%d)", speed); - spin_unlock_irqrestore(&dwmac->lock, flags); - return; + return -EINVAL; } - writel(val, dwmac->reg + MAC_CTRL_REG); - /* Stop internal clock */ val = readl(dwmac->reg + REG_ETHER_CLOCK_SEL); val &= ~(ETHER_CLK_SEL_RMII_CLK_EN | ETHER_CLK_SEL_RX_TX_CLK_EN); @@ -136,7 +124,7 @@ static void visconti_eth_fix_mac_speed(void *priv, int speed, unsigned int mode) break; } - spin_unlock_irqrestore(&dwmac->lock, flags); + return 0; } static int visconti_eth_init_hw(struct platform_device *pdev, struct plat_stmmacenet_data *plat_dat) @@ -228,11 +216,10 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev) if (!dwmac) return -ENOMEM; - spin_lock_init(&dwmac->lock); dwmac->reg = stmmac_res.addr; dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; - plat_dat->fix_mac_speed = visconti_eth_fix_mac_speed; + plat_dat->set_clk_tx_rate = visconti_eth_set_clk_tx_rate; ret = visconti_eth_clock_probe(pdev, plat_dat); if (ret) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 18422b940dbe..2a808afeb414 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -3000,6 +3000,33 @@ netdev_features_t wx_fix_features(struct net_device *netdev, } EXPORT_SYMBOL(wx_fix_features); +#define WX_MAX_TUNNEL_HDR_LEN 80 +netdev_features_t wx_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + struct wx *wx = netdev_priv(netdev); + + if (!skb->encapsulation) + return features; + + if (wx->mac.type == wx_mac_em) + return features & ~NETIF_F_CSUM_MASK; + + if (unlikely(skb_inner_mac_header(skb) - skb_transport_header(skb) > + WX_MAX_TUNNEL_HDR_LEN)) + return features & ~NETIF_F_CSUM_MASK; + + if (skb->inner_protocol_type == ENCAP_TYPE_ETHER && + skb->inner_protocol != htons(ETH_P_IP) && + skb->inner_protocol != htons(ETH_P_IPV6) && + skb->inner_protocol != htons(ETH_P_TEB)) + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + + return features; +} +EXPORT_SYMBOL(wx_features_check); + void wx_set_ring(struct wx *wx, u32 new_tx_count, u32 new_rx_count, struct wx_ring *temp_ring) { diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_lib.h index fdeb0c315b75..919f49999308 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.h @@ -33,6 +33,9 @@ void wx_get_stats64(struct net_device *netdev, int wx_set_features(struct net_device *netdev, netdev_features_t features); netdev_features_t wx_fix_features(struct net_device *netdev, netdev_features_t features); +netdev_features_t wx_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features); void wx_set_ring(struct wx *wx, u32 new_tx_count, u32 new_rx_count, struct wx_ring *temp_ring); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 9c18203c8ce1..b5022c49dc5e 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -587,6 +587,7 @@ static const struct net_device_ops ngbe_netdev_ops = { .ndo_set_rx_mode = wx_set_rx_mode, .ndo_set_features = wx_set_features, .ndo_fix_features = wx_fix_features, + .ndo_features_check = wx_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = wx_set_mac, .ndo_get_stats64 = wx_get_stats64, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index d40db6b42200..f57d84628e07 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -8,6 +8,7 @@ #include <linux/string.h> #include <linux/etherdevice.h> #include <linux/phylink.h> +#include <net/udp_tunnel.h> #include <net/ip.h> #include <linux/if_vlan.h> @@ -537,6 +538,39 @@ void txgbe_do_reset(struct net_device *netdev) txgbe_reset(wx); } +static int txgbe_udp_tunnel_sync(struct net_device *dev, unsigned int table) +{ + struct wx *wx = netdev_priv(dev); + struct udp_tunnel_info ti; + + udp_tunnel_nic_get_port(dev, table, 0, &ti); + switch (ti.type) { + case UDP_TUNNEL_TYPE_VXLAN: + wr32(wx, TXGBE_CFG_VXLAN, ntohs(ti.port)); + break; + case UDP_TUNNEL_TYPE_VXLAN_GPE: + wr32(wx, TXGBE_CFG_VXLAN_GPE, ntohs(ti.port)); + break; + case UDP_TUNNEL_TYPE_GENEVE: + wr32(wx, TXGBE_CFG_GENEVE, ntohs(ti.port)); + break; + default: + break; + } + + return 0; +} + +static const struct udp_tunnel_nic_info txgbe_udp_tunnels = { + .sync_table = txgbe_udp_tunnel_sync, + .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY, + .tables = { + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN_GPE, }, + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, }, + }, +}; + static const struct net_device_ops txgbe_netdev_ops = { .ndo_open = txgbe_open, .ndo_stop = txgbe_close, @@ -545,6 +579,7 @@ static const struct net_device_ops txgbe_netdev_ops = { .ndo_set_rx_mode = wx_set_rx_mode, .ndo_set_features = wx_set_features, .ndo_fix_features = wx_fix_features, + .ndo_features_check = wx_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = wx_set_mac, .ndo_get_stats64 = wx_get_stats64, @@ -632,6 +667,7 @@ static int txgbe_probe(struct pci_dev *pdev, wx->driver_name = txgbe_driver_name; txgbe_set_ethtool_ops(netdev); netdev->netdev_ops = &txgbe_netdev_ops; + netdev->udp_tunnel_nic_info = &txgbe_udp_tunnels; /* setup the private structure */ err = txgbe_sw_init(wx); @@ -677,6 +713,7 @@ static int txgbe_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_HIGHDMA; netdev->hw_features |= NETIF_F_GRO; netdev->features |= NETIF_F_GRO; + netdev->features |= NETIF_F_RX_UDP_TUNNEL_PORT; netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 5937cbc6bd05..cb553318641d 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -88,6 +88,9 @@ /* Port cfg registers */ #define TXGBE_CFG_PORT_ST 0x14404 #define TXGBE_CFG_PORT_ST_LINK_UP BIT(0) +#define TXGBE_CFG_VXLAN 0x14410 +#define TXGBE_CFG_VXLAN_GPE 0x14414 +#define TXGBE_CFG_GENEVE 0x14418 /* I2C registers */ #define TXGBE_I2C_BASE 0x14900 diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 054abf283ab3..1b7a653c1f4e 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2980,7 +2980,7 @@ static int axienet_probe(struct platform_device *pdev) } } if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) { - dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n"); + dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit architecture\n"); ret = -EINVAL; goto cleanup_clk; } diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig index 058fcdaf6c18..38a4901da32f 100644 --- a/drivers/net/mdio/Kconfig +++ b/drivers/net/mdio/Kconfig @@ -57,6 +57,7 @@ config MDIO_SUN4I config MDIO_XGENE tristate "APM X-Gene SoC MDIO bus controller" depends on ARCH_XGENE || COMPILE_TEST + depends on PHYLIB help This module provides a driver for the MDIO busses found in the APM X-Gene SoC's. diff --git a/drivers/net/pfcp.c b/drivers/net/pfcp.c index f873a92d2445..28e6bc4a1f14 100644 --- a/drivers/net/pfcp.c +++ b/drivers/net/pfcp.c @@ -245,30 +245,21 @@ static int __net_init pfcp_net_init(struct net *net) return 0; } -static void __net_exit pfcp_net_exit(struct net *net) +static void __net_exit pfcp_net_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { struct pfcp_net *pn = net_generic(net, pfcp_net_id); struct pfcp_dev *pfcp, *pfcp_next; - struct net_device *dev; - LIST_HEAD(list); - - rtnl_lock(); - for_each_netdev(net, dev) - if (dev->rtnl_link_ops == &pfcp_link_ops) - pfcp_dellink(dev, &list); list_for_each_entry_safe(pfcp, pfcp_next, &pn->pfcp_dev_list, list) - pfcp_dellink(pfcp->dev, &list); - - unregister_netdevice_many(&list); - rtnl_unlock(); + pfcp_dellink(pfcp->dev, dev_to_kill); } static struct pernet_operations pfcp_net_ops = { - .init = pfcp_net_init, - .exit = pfcp_net_exit, - .id = &pfcp_net_id, - .size = sizeof(struct pfcp_net), + .init = pfcp_net_init, + .exit_rtnl = pfcp_net_exit_rtnl, + .id = &pfcp_net_id, + .size = sizeof(struct pfcp_net), }; static int __init pfcp_init(void) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 14f361549638..490c9f4e5d4e 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -33,6 +33,7 @@ #define MII_DP83822_MLEDCR 0x25 #define MII_DP83822_LDCTRL 0x403 #define MII_DP83822_LEDCFG1 0x460 +#define MII_DP83822_IOCTRL 0x461 #define MII_DP83822_IOCTRL1 0x462 #define MII_DP83822_IOCTRL2 0x463 #define MII_DP83822_GENCFG 0x465 @@ -118,6 +119,9 @@ #define DP83822_LEDCFG1_LED1_CTRL GENMASK(11, 8) #define DP83822_LEDCFG1_LED3_CTRL GENMASK(7, 4) +/* IOCTRL bits */ +#define DP83822_IOCTRL_MAC_IMPEDANCE_CTRL GENMASK(4, 1) + /* IOCTRL1 bits */ #define DP83822_IOCTRL1_GPIO3_CTRL GENMASK(10, 8) #define DP83822_IOCTRL1_GPIO3_CTRL_LED3 BIT(0) @@ -202,6 +206,7 @@ struct dp83822_private { u32 gpio2_clk_out; bool led_pin_enable[DP83822_MAX_LED_PINS]; int tx_amplitude_100base_tx_index; + int mac_termination_index; }; static int dp83822_config_wol(struct phy_device *phydev, @@ -533,6 +538,12 @@ static int dp83822_config_init(struct phy_device *phydev) FIELD_PREP(DP83822_100BASE_TX_LINE_DRIVER_SWING, dp83822->tx_amplitude_100base_tx_index)); + if (dp83822->mac_termination_index >= 0) + phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_IOCTRL, + DP83822_IOCTRL_MAC_IMPEDANCE_CTRL, + FIELD_PREP(DP83822_IOCTRL_MAC_IMPEDANCE_CTRL, + dp83822->mac_termination_index)); + err = dp83822_config_init_leds(phydev); if (err) return err; @@ -736,6 +747,10 @@ static const u32 tx_amplitude_100base_tx_gain[] = { 93, 95, 97, 98, 100, 102, 103, 105, }; +static const u32 mac_termination[] = { + 99, 91, 84, 78, 73, 69, 65, 61, 58, 55, 53, 50, 48, 46, 44, 43, +}; + static int dp83822_of_init_leds(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; @@ -852,6 +867,23 @@ static int dp83822_of_init(struct phy_device *phydev) } } + ret = phy_get_mac_termination(phydev, dev, &val); + if (!ret) { + for (i = 0; i < ARRAY_SIZE(mac_termination); i++) { + if (mac_termination[i] == val) { + dp83822->mac_termination_index = i; + break; + } + } + + if (dp83822->mac_termination_index < 0) { + phydev_err(phydev, + "Invalid value for mac-termination-ohms property (%u)\n", + val); + return -EINVAL; + } + } + return dp83822_of_init_leds(phydev); } @@ -931,6 +963,7 @@ static int dp8382x_probe(struct phy_device *phydev) return -ENOMEM; dp83822->tx_amplitude_100base_tx_index = -1; + dp83822->mac_termination_index = -1; phydev->priv = dp83822; return 0; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index cc6c209fe702..f85c172c446c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2975,6 +2975,21 @@ int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, } EXPORT_SYMBOL_GPL(phy_get_tx_amplitude_gain); +/** + * phy_get_mac_termination - stores MAC termination in @val + * @phydev: phy_device struct + * @dev: pointer to the devices device struct + * @val: MAC termination + * + * Returns: 0 on success, < 0 on failure + */ +int phy_get_mac_termination(struct phy_device *phydev, struct device *dev, + u32 *val) +{ + return phy_get_u32_property(dev, "mac-termination-ohms", val); +} +EXPORT_SYMBOL_GPL(phy_get_mac_termination); + static int phy_led_set_brightness(struct led_classdev *led_cdev, enum led_brightness value) { diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 53463767cc43..def84e87e05b 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1131,6 +1131,8 @@ static const struct file_operations ppp_device_fops = { .llseek = noop_llseek, }; +static void ppp_nl_dellink(struct net_device *dev, struct list_head *head); + static __net_init int ppp_init_net(struct net *net) { struct ppp_net *pn = net_generic(net, ppp_net_id); @@ -1146,28 +1148,20 @@ static __net_init int ppp_init_net(struct net *net) return 0; } -static __net_exit void ppp_exit_net(struct net *net) +static __net_exit void ppp_exit_rtnl_net(struct net *net, + struct list_head *dev_to_kill) { struct ppp_net *pn = net_generic(net, ppp_net_id); - struct net_device *dev; - struct net_device *aux; struct ppp *ppp; - LIST_HEAD(list); int id; - rtnl_lock(); - for_each_netdev_safe(net, dev, aux) { - if (dev->netdev_ops == &ppp_netdev_ops) - unregister_netdevice_queue(dev, &list); - } - idr_for_each_entry(&pn->units_idr, ppp, id) - /* Skip devices already unregistered by previous loop */ - if (!net_eq(dev_net(ppp->dev), net)) - unregister_netdevice_queue(ppp->dev, &list); + ppp_nl_dellink(ppp->dev, dev_to_kill); +} - unregister_netdevice_many(&list); - rtnl_unlock(); +static __net_exit void ppp_exit_net(struct net *net) +{ + struct ppp_net *pn = net_generic(net, ppp_net_id); mutex_destroy(&pn->all_ppp_mutex); idr_destroy(&pn->units_idr); @@ -1177,6 +1171,7 @@ static __net_exit void ppp_exit_net(struct net *net) static struct pernet_operations ppp_net_ops = { .init = ppp_init_net, + .exit_rtnl = ppp_exit_rtnl_net, .exit = ppp_exit_net, .id = &ppp_net_id, .size = sizeof(struct ppp_net), diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 56aee539c235..a56d7239b127 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -15,6 +15,7 @@ #include <linux/igmp.h> #include <linux/if_ether.h> #include <linux/ethtool.h> +#include <linux/rhashtable.h> #include <net/arp.h> #include <net/ndisc.h> #include <net/gro.h> @@ -63,8 +64,12 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan); static void vxlan_vs_del_dev(struct vxlan_dev *vxlan); -/* salt for hash table */ -static u32 vxlan_salt __read_mostly; +static const struct rhashtable_params vxlan_fdb_rht_params = { + .head_offset = offsetof(struct vxlan_fdb, rhnode), + .key_offset = offsetof(struct vxlan_fdb, key), + .key_len = sizeof(struct vxlan_fdb_key), + .automatic_shrinking = true, +}; static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) { @@ -186,7 +191,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, } else if (nh) { ndm->ndm_family = nh_family; } - send_eth = !is_zero_ether_addr(fdb->eth_addr); + send_eth = !is_zero_ether_addr(fdb->key.eth_addr); } else ndm->ndm_family = AF_BRIDGE; ndm->ndm_state = fdb->state; @@ -201,7 +206,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, peernet2id(dev_net(vxlan->dev), vxlan->net))) goto nla_put_failure; - if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) + if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.eth_addr)) goto nla_put_failure; if (nh) { if (nla_put_u32(skb, NDA_NH_ID, nh_id)) @@ -223,9 +228,9 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, goto nla_put_failure; } - if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni && + if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->key.vni && nla_put_u32(skb, NDA_SRC_VNI, - be32_to_cpu(fdb->vni))) + be32_to_cpu(fdb->key.vni))) goto nla_put_failure; ci.ndm_used = jiffies_to_clock_t(now - READ_ONCE(fdb->used)); @@ -293,8 +298,8 @@ static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan, fdb_info->remote_port = rd->remote_port; fdb_info->remote_vni = rd->remote_vni; fdb_info->remote_ifindex = rd->remote_ifindex; - memcpy(fdb_info->eth_addr, fdb->eth_addr, ETH_ALEN); - fdb_info->vni = fdb->vni; + memcpy(fdb_info->eth_addr, fdb->key.eth_addr, ETH_ALEN); + fdb_info->vni = fdb->key.vni; fdb_info->offloaded = rd->offloaded; fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER; } @@ -366,67 +371,42 @@ static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN]) }; struct vxlan_rdst remote = { }; - memcpy(f.eth_addr, eth_addr, ETH_ALEN); + memcpy(f.key.eth_addr, eth_addr, ETH_ALEN); vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL); } -/* Hash Ethernet address */ -static u32 eth_hash(const unsigned char *addr) -{ - u64 value = get_unaligned((u64 *)addr); - - /* only want 6 bytes */ -#ifdef __BIG_ENDIAN - value >>= 16; -#else - value <<= 16; -#endif - return hash_64(value, FDB_HASH_BITS); -} - -u32 eth_vni_hash(const unsigned char *addr, __be32 vni) +/* Look up Ethernet address in forwarding table */ +static struct vxlan_fdb *vxlan_find_mac_rcu(struct vxlan_dev *vxlan, + const u8 *mac, __be32 vni) { - /* use 1 byte of OUI and 3 bytes of NIC */ - u32 key = get_unaligned((u32 *)(addr + 2)); - - return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1); -} + struct vxlan_fdb_key key; -u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni) -{ - if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) - return eth_vni_hash(mac, vni); + memset(&key, 0, sizeof(key)); + memcpy(key.eth_addr, mac, sizeof(key.eth_addr)); + if (!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) + key.vni = vxlan->default_dst.remote_vni; else - return eth_hash(mac); -} + key.vni = vni; -/* Hash chain to use given mac address */ -static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan, - const u8 *mac, __be32 vni) -{ - return &vxlan->fdb_head[fdb_head_index(vxlan, mac, vni)]; + return rhashtable_lookup(&vxlan->fdb_hash_tbl, &key, + vxlan_fdb_rht_params); } -/* Look up Ethernet address in forwarding table */ -static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan, - const u8 *mac, __be32 vni) +static struct vxlan_fdb *vxlan_find_mac_tx(struct vxlan_dev *vxlan, + const u8 *mac, __be32 vni) { - struct hlist_head *head = vxlan_fdb_head(vxlan, mac, vni); struct vxlan_fdb *f; - hlist_for_each_entry_rcu(f, head, hlist) { - if (ether_addr_equal(mac, f->eth_addr)) { - if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) { - if (vni == f->vni) - return f; - } else { - return f; - } - } + f = vxlan_find_mac_rcu(vxlan, mac, vni); + if (f) { + unsigned long now = jiffies; + + if (READ_ONCE(f->used) != now) + WRITE_ONCE(f->used, now); } - return NULL; + return f; } static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, @@ -434,13 +414,11 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, { struct vxlan_fdb *f; - f = __vxlan_find_mac(vxlan, mac, vni); - if (f) { - unsigned long now = jiffies; + lockdep_assert_held_once(&vxlan->hash_lock); - if (READ_ONCE(f->used) != now) - WRITE_ONCE(f->used, now); - } + rcu_read_lock(); + f = vxlan_find_mac_rcu(vxlan, mac, vni); + rcu_read_unlock(); return f; } @@ -480,7 +458,7 @@ int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, rcu_read_lock(); - f = __vxlan_find_mac(vxlan, eth_addr, vni); + f = vxlan_find_mac_rcu(vxlan, eth_addr, vni); if (!f) { rc = -ENOENT; goto out; @@ -517,32 +495,28 @@ int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, struct vxlan_dev *vxlan; struct vxlan_rdst *rdst; struct vxlan_fdb *f; - unsigned int h; int rc = 0; if (!netif_is_vxlan(dev)) return -EINVAL; vxlan = netdev_priv(dev); - for (h = 0; h < FDB_HASH_SIZE; ++h) { - spin_lock_bh(&vxlan->hash_lock[h]); - hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) { - if (f->vni == vni) { - list_for_each_entry(rdst, &f->remotes, list) { - rc = vxlan_fdb_notify_one(nb, vxlan, - f, rdst, - extack); - if (rc) - goto unlock; - } + spin_lock_bh(&vxlan->hash_lock); + hlist_for_each_entry(f, &vxlan->fdb_list, fdb_node) { + if (f->key.vni == vni) { + list_for_each_entry(rdst, &f->remotes, list) { + rc = vxlan_fdb_notify_one(nb, vxlan, f, rdst, + extack); + if (rc) + goto unlock; } } - spin_unlock_bh(&vxlan->hash_lock[h]); } + spin_unlock_bh(&vxlan->hash_lock); return 0; unlock: - spin_unlock_bh(&vxlan->hash_lock[h]); + spin_unlock_bh(&vxlan->hash_lock); return rc; } EXPORT_SYMBOL_GPL(vxlan_fdb_replay); @@ -552,20 +526,19 @@ void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni) struct vxlan_dev *vxlan; struct vxlan_rdst *rdst; struct vxlan_fdb *f; - unsigned int h; if (!netif_is_vxlan(dev)) return; vxlan = netdev_priv(dev); - for (h = 0; h < FDB_HASH_SIZE; ++h) { - spin_lock_bh(&vxlan->hash_lock[h]); - hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) - if (f->vni == vni) - list_for_each_entry(rdst, &f->remotes, list) - rdst->offloaded = false; - spin_unlock_bh(&vxlan->hash_lock[h]); + spin_lock_bh(&vxlan->hash_lock); + hlist_for_each_entry(f, &vxlan->fdb_list, fdb_node) { + if (f->key.vni == vni) { + list_for_each_entry(rdst, &f->remotes, list) + rdst->offloaded = false; + } } + spin_unlock_bh(&vxlan->hash_lock); } EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload); @@ -610,10 +583,10 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, if (rd == NULL) return -ENOMEM; - if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) { - kfree(rd); - return -ENOMEM; - } + /* The driver can work correctly without a dst cache, so do not treat + * dst cache initialization errors as fatal. + */ + dst_cache_init(&rd->dst_cache, GFP_ATOMIC | __GFP_NOWARN); rd->remote_ip = *ip; rd->remote_port = port; @@ -803,27 +776,20 @@ static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac, f = kmalloc(sizeof(*f), GFP_ATOMIC); if (!f) return NULL; + memset(&f->key, 0, sizeof(f->key)); f->state = state; f->flags = ndm_flags; f->updated = f->used = jiffies; - f->vni = src_vni; + f->key.vni = src_vni; f->nh = NULL; RCU_INIT_POINTER(f->vdev, vxlan); INIT_LIST_HEAD(&f->nh_list); INIT_LIST_HEAD(&f->remotes); - memcpy(f->eth_addr, mac, ETH_ALEN); + memcpy(f->key.eth_addr, mac, ETH_ALEN); return f; } -static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac, - __be32 src_vni, struct vxlan_fdb *f) -{ - ++vxlan->addrcnt; - hlist_add_head_rcu(&f->hlist, - vxlan_fdb_head(vxlan, mac, src_vni)); -} - static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, u32 nhid, struct netlink_ext_ack *extack) { @@ -913,10 +879,27 @@ int vxlan_fdb_create(struct vxlan_dev *vxlan, if (rc < 0) goto errout; + rc = rhashtable_lookup_insert_fast(&vxlan->fdb_hash_tbl, &f->rhnode, + vxlan_fdb_rht_params); + if (rc) + goto destroy_remote; + + ++vxlan->addrcnt; + hlist_add_head_rcu(&f->fdb_node, &vxlan->fdb_list); + *fdb = f; return 0; +destroy_remote: + if (rcu_access_pointer(f->nh)) { + list_del_rcu(&f->nh_list); + nexthop_put(rtnl_dereference(f->nh)); + } else { + list_del(&rd->list); + dst_cache_destroy(&rd->dst_cache); + kfree(rd); + } errout: kfree(f); return rc; @@ -953,7 +936,7 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, { struct vxlan_rdst *rd; - netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr); + netdev_dbg(vxlan->dev, "delete %pM\n", f->key.eth_addr); --vxlan->addrcnt; if (do_notify) { @@ -966,7 +949,9 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, swdev_notify, NULL); } - hlist_del_rcu(&f->hlist); + hlist_del_init_rcu(&f->fdb_node); + rhashtable_remove_fast(&vxlan->fdb_hash_tbl, &f->rhnode, + vxlan_fdb_rht_params); list_del_rcu(&f->nh_list); call_rcu(&f->rcu, vxlan_fdb_free); } @@ -1024,8 +1009,8 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, if ((flags & NLM_F_REPLACE)) { /* Only change unicasts */ - if (!(is_multicast_ether_addr(f->eth_addr) || - is_zero_ether_addr(f->eth_addr))) { + if (!(is_multicast_ether_addr(f->key.eth_addr) || + is_zero_ether_addr(f->key.eth_addr))) { if (nhid) { rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack); if (rc < 0) @@ -1041,8 +1026,8 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, } } if ((flags & NLM_F_APPEND) && - (is_multicast_ether_addr(f->eth_addr) || - is_zero_ether_addr(f->eth_addr))) { + (is_multicast_ether_addr(f->key.eth_addr) || + is_zero_ether_addr(f->key.eth_addr))) { rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); if (rc < 0) @@ -1101,7 +1086,6 @@ static int vxlan_fdb_update_create(struct vxlan_dev *vxlan, if (rc < 0) return rc; - vxlan_fdb_insert(vxlan, mac, src_vni, f); rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH, swdev_notify, extack); if (rc) @@ -1125,7 +1109,7 @@ int vxlan_fdb_update(struct vxlan_dev *vxlan, { struct vxlan_fdb *f; - f = __vxlan_find_mac(vxlan, mac, src_vni); + f = vxlan_find_mac(vxlan, mac, src_vni); if (f) { if (flags & NLM_F_EXCL) { netdev_dbg(vxlan->dev, @@ -1253,7 +1237,6 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], __be16 port; __be32 src_vni, vni; u32 ifindex, nhid; - u32 hash_index; int err; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { @@ -1273,13 +1256,12 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family) return -EAFNOSUPPORT; - hash_index = fdb_head_index(vxlan, addr, src_vni); - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags, port, src_vni, vni, ifindex, ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER, nhid, true, extack); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); if (!err) *notified = true; @@ -1296,7 +1278,7 @@ int __vxlan_fdb_delete(struct vxlan_dev *vxlan, struct vxlan_fdb *f; int err = -ENOENT; - f = __vxlan_find_mac(vxlan, addr, src_vni); + f = vxlan_find_mac(vxlan, addr, src_vni); if (!f) return err; @@ -1330,7 +1312,6 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], union vxlan_addr ip; __be32 src_vni, vni; u32 ifindex, nhid; - u32 hash_index; __be16 port; int err; @@ -1339,11 +1320,10 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], if (err) return err; - hash_index = fdb_head_index(vxlan, addr, src_vni); - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex, true); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); if (!err) *notified = true; @@ -1358,52 +1338,46 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, { struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct vxlan_dev *vxlan = netdev_priv(dev); - unsigned int h; + struct vxlan_fdb *f; int err = 0; - for (h = 0; h < FDB_HASH_SIZE; ++h) { - struct vxlan_fdb *f; - - rcu_read_lock(); - hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { - struct vxlan_rdst *rd; - - if (rcu_access_pointer(f->nh)) { - if (*idx < ctx->fdb_idx) - goto skip_nh; - err = vxlan_fdb_info(skb, vxlan, f, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, - NLM_F_MULTI, NULL); - if (err < 0) { - rcu_read_unlock(); - goto out; - } -skip_nh: - *idx += 1; - continue; + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &vxlan->fdb_list, fdb_node) { + struct vxlan_rdst *rd; + + if (rcu_access_pointer(f->nh)) { + if (*idx < ctx->fdb_idx) + goto skip_nh; + err = vxlan_fdb_info(skb, vxlan, f, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, NLM_F_MULTI, NULL); + if (err < 0) { + rcu_read_unlock(); + goto out; } +skip_nh: + *idx += 1; + continue; + } - list_for_each_entry_rcu(rd, &f->remotes, list) { - if (*idx < ctx->fdb_idx) - goto skip; - - err = vxlan_fdb_info(skb, vxlan, f, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, - NLM_F_MULTI, rd); - if (err < 0) { - rcu_read_unlock(); - goto out; - } -skip: - *idx += 1; + list_for_each_entry_rcu(rd, &f->remotes, list) { + if (*idx < ctx->fdb_idx) + goto skip; + + err = vxlan_fdb_info(skb, vxlan, f, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, NLM_F_MULTI, rd); + if (err < 0) { + rcu_read_unlock(); + goto out; } +skip: + *idx += 1; } - rcu_read_unlock(); } + rcu_read_unlock(); out: return err; } @@ -1427,7 +1401,7 @@ static int vxlan_fdb_get(struct sk_buff *skb, rcu_read_lock(); - f = __vxlan_find_mac(vxlan, addr, vni); + f = vxlan_find_mac_rcu(vxlan, addr, vni); if (!f) { NL_SET_ERR_MSG(extack, "Fdb entry not found"); err = -ENOENT; @@ -1463,7 +1437,7 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, ifindex = src_ifindex; #endif - f = __vxlan_find_mac(vxlan, src_mac, vni); + f = vxlan_find_mac_rcu(vxlan, src_mac, vni); if (likely(f)) { struct vxlan_rdst *rdst = first_remote_rcu(f); unsigned long now = jiffies; @@ -1491,10 +1465,8 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, rdst->remote_ip = *src_ip; vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL); } else { - u32 hash_index = fdb_head_index(vxlan, src_mac, vni); - /* learned new entry */ - spin_lock(&vxlan->hash_lock[hash_index]); + spin_lock(&vxlan->hash_lock); /* close off race between vxlan_flush and incoming packets */ if (netif_running(dev)) @@ -1505,7 +1477,7 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, vni, vxlan->default_dst.remote_vni, ifindex, NTF_SELF, 0, true, NULL); - spin_unlock(&vxlan->hash_lock[hash_index]); + spin_unlock(&vxlan->hash_lock); } return SKB_NOT_DROPPED_YET; @@ -1916,12 +1888,15 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) goto out; } - f = vxlan_find_mac(vxlan, n->ha, vni); + rcu_read_lock(); + f = vxlan_find_mac_tx(vxlan, n->ha, vni); if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { /* bridge-local neighbor */ neigh_release(n); + rcu_read_unlock(); goto out; } + rcu_read_unlock(); reply = arp_create(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, n->ha, sha); @@ -2080,7 +2055,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) goto out; } - f = vxlan_find_mac(vxlan, n->ha, vni); + f = vxlan_find_mac_tx(vxlan, n->ha, vni); if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { /* bridge-local neighbor */ neigh_release(n); @@ -2648,14 +2623,10 @@ static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev, memset(&nh_rdst, 0, sizeof(struct vxlan_rdst)); hash = skb_get_hash(skb); - rcu_read_lock(); nh = rcu_dereference(f->nh); - if (!nh) { - rcu_read_unlock(); + if (!nh) goto drop; - } do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst); - rcu_read_unlock(); if (likely(do_xmit)) vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc); @@ -2782,7 +2753,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) } eth = eth_hdr(skb); - f = vxlan_find_mac(vxlan, eth->h_dest, vni); + rcu_read_lock(); + f = vxlan_find_mac_tx(vxlan, eth->h_dest, vni); did_rsc = false; if (f && (f->flags & NTF_ROUTER) && (vxlan->cfg.flags & VXLAN_F_RSC) && @@ -2790,11 +2762,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) ntohs(eth->h_proto) == ETH_P_IPV6)) { did_rsc = route_shortcircuit(dev, skb); if (did_rsc) - f = vxlan_find_mac(vxlan, eth->h_dest, vni); + f = vxlan_find_mac_tx(vxlan, eth->h_dest, vni); } if (f == NULL) { - f = vxlan_find_mac(vxlan, all_zeros_mac, vni); + f = vxlan_find_mac_tx(vxlan, all_zeros_mac, vni); if (f == NULL) { if ((vxlan->cfg.flags & VXLAN_F_L2MISS) && !is_multicast_ether_addr(eth->h_dest)) @@ -2804,7 +2776,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); kfree_skb_reason(skb, SKB_DROP_REASON_NO_TX_TARGET); - return NETDEV_TX_OK; + goto out; } } @@ -2829,6 +2801,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) kfree_skb_reason(skb, SKB_DROP_REASON_NO_TX_TARGET); } +out: + rcu_read_unlock(); return NETDEV_TX_OK; } @@ -2837,38 +2811,36 @@ static void vxlan_cleanup(struct timer_list *t) { struct vxlan_dev *vxlan = from_timer(vxlan, t, age_timer); unsigned long next_timer = jiffies + FDB_AGE_INTERVAL; - unsigned int h; + struct vxlan_fdb *f; if (!netif_running(vxlan->dev)) return; - for (h = 0; h < FDB_HASH_SIZE; ++h) { - struct hlist_node *p, *n; - - spin_lock(&vxlan->hash_lock[h]); - hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { - struct vxlan_fdb *f - = container_of(p, struct vxlan_fdb, hlist); - unsigned long timeout; + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &vxlan->fdb_list, fdb_node) { + unsigned long timeout; - if (f->state & (NUD_PERMANENT | NUD_NOARP)) - continue; + if (f->state & (NUD_PERMANENT | NUD_NOARP)) + continue; - if (f->flags & NTF_EXT_LEARNED) - continue; + if (f->flags & NTF_EXT_LEARNED) + continue; - timeout = READ_ONCE(f->updated) + vxlan->cfg.age_interval * HZ; - if (time_before_eq(timeout, jiffies)) { - netdev_dbg(vxlan->dev, - "garbage collect %pM\n", - f->eth_addr); + timeout = READ_ONCE(f->updated) + vxlan->cfg.age_interval * HZ; + if (time_before_eq(timeout, jiffies)) { + spin_lock(&vxlan->hash_lock); + if (!hlist_unhashed(&f->fdb_node)) { + netdev_dbg(vxlan->dev, "garbage collect %pM\n", + f->key.eth_addr); f->state = NUD_STALE; vxlan_fdb_destroy(vxlan, f, true, true); - } else if (time_before(timeout, next_timer)) - next_timer = timeout; + } + spin_unlock(&vxlan->hash_lock); + } else if (time_before(timeout, next_timer)) { + next_timer = timeout; } - spin_unlock(&vxlan->hash_lock[h]); } + rcu_read_unlock(); mod_timer(&vxlan->age_timer, next_timer); } @@ -2903,10 +2875,14 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); int err; + err = rhashtable_init(&vxlan->fdb_hash_tbl, &vxlan_fdb_rht_params); + if (err) + return err; + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) { err = vxlan_vnigroup_init(vxlan); if (err) - return err; + goto err_rhashtable_destroy; } err = gro_cells_init(&vxlan->gro_cells, dev); @@ -2925,21 +2901,11 @@ err_gro_cells_destroy: err_vnigroup_uninit: if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) vxlan_vnigroup_uninit(vxlan); +err_rhashtable_destroy: + rhashtable_destroy(&vxlan->fdb_hash_tbl); return err; } -static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) -{ - struct vxlan_fdb *f; - u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni); - - spin_lock_bh(&vxlan->hash_lock[hash_index]); - f = __vxlan_find_mac(vxlan, all_zeros_mac, vni); - if (f) - vxlan_fdb_destroy(vxlan, f, true, true); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); -} - static void vxlan_uninit(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); @@ -2951,7 +2917,7 @@ static void vxlan_uninit(struct net_device *dev) gro_cells_destroy(&vxlan->gro_cells); - vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni); + rhashtable_destroy(&vxlan->fdb_hash_tbl); } /* Start ageing timer and join group when device is brought up */ @@ -2992,7 +2958,8 @@ struct vxlan_fdb_flush_desc { static bool vxlan_fdb_is_default_entry(const struct vxlan_fdb *f, const struct vxlan_dev *vxlan) { - return is_zero_ether_addr(f->eth_addr) && f->vni == vxlan->cfg.vni; + return is_zero_ether_addr(f->key.eth_addr) && + f->key.vni == vxlan->cfg.vni; } static bool vxlan_fdb_nhid_matches(const struct vxlan_fdb *f, u32 nhid) @@ -3015,7 +2982,7 @@ static bool vxlan_fdb_flush_matches(const struct vxlan_fdb *f, if (desc->ignore_default_entry && vxlan_fdb_is_default_entry(f, vxlan)) return false; - if (desc->src_vni && f->vni != desc->src_vni) + if (desc->src_vni && f->key.vni != desc->src_vni) return false; if (desc->nhid && !vxlan_fdb_nhid_matches(f, desc->nhid)) @@ -3071,33 +3038,32 @@ static void vxlan_flush(struct vxlan_dev *vxlan, const struct vxlan_fdb_flush_desc *desc) { bool match_remotes = vxlan_fdb_flush_should_match_remotes(desc); - unsigned int h; - - for (h = 0; h < FDB_HASH_SIZE; ++h) { - struct hlist_node *p, *n; - - spin_lock_bh(&vxlan->hash_lock[h]); - hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { - struct vxlan_fdb *f - = container_of(p, struct vxlan_fdb, hlist); + struct vxlan_fdb *f; - if (!vxlan_fdb_flush_matches(f, vxlan, desc)) - continue; + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &vxlan->fdb_list, fdb_node) { + if (!vxlan_fdb_flush_matches(f, vxlan, desc)) + continue; - if (match_remotes) { - bool destroy_fdb = false; + spin_lock_bh(&vxlan->hash_lock); + if (hlist_unhashed(&f->fdb_node)) + goto unlock; - vxlan_fdb_flush_match_remotes(f, vxlan, desc, - &destroy_fdb); + if (match_remotes) { + bool destroy_fdb = false; - if (!destroy_fdb) - continue; - } + vxlan_fdb_flush_match_remotes(f, vxlan, desc, + &destroy_fdb); - vxlan_fdb_destroy(vxlan, f, true, true); + if (!destroy_fdb) + goto unlock; } - spin_unlock_bh(&vxlan->hash_lock[h]); + + vxlan_fdb_destroy(vxlan, f, true, true); +unlock: + spin_unlock_bh(&vxlan->hash_lock); } + rcu_read_unlock(); } static const struct nla_policy vxlan_del_bulk_policy[NDA_MAX + 1] = { @@ -3185,7 +3151,7 @@ static int vxlan_stop(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb_flush_desc desc = { - /* Default entry is deleted at vxlan_uninit. */ + /* Default entry is deleted at vxlan_dellink. */ .ignore_default_entry = true, .state = 0, .state_mask = NUD_PERMANENT | NUD_NOARP, @@ -3348,7 +3314,6 @@ static void vxlan_offload_rx_ports(struct net_device *dev, bool push) static void vxlan_setup(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); - unsigned int h; eth_hw_addr_random(dev); ether_setup(dev); @@ -3375,15 +3340,13 @@ static void vxlan_setup(struct net_device *dev) dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; INIT_LIST_HEAD(&vxlan->next); + spin_lock_init(&vxlan->hash_lock); timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE); vxlan->dev = dev; - for (h = 0; h < FDB_HASH_SIZE; ++h) { - spin_lock_init(&vxlan->hash_lock[h]); - INIT_HLIST_HEAD(&vxlan->fdb_head[h]); - } + INIT_HLIST_HEAD(&vxlan->fdb_list); } static void vxlan_ether_setup(struct net_device *dev) @@ -3960,8 +3923,6 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); struct net_device *remote_dev = NULL; - struct vxlan_fdb *f = NULL; - bool unregister = false; struct vxlan_rdst *dst; int err; @@ -3972,72 +3933,54 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, dev->ethtool_ops = &vxlan_ethtool_ops; - /* create an fdb entry for a valid default destination */ - if (!vxlan_addr_any(&dst->remote_ip)) { - err = vxlan_fdb_create(vxlan, all_zeros_mac, - &dst->remote_ip, - NUD_REACHABLE | NUD_PERMANENT, - vxlan->cfg.dst_port, - dst->remote_vni, - dst->remote_vni, - dst->remote_ifindex, - NTF_SELF, 0, &f, extack); - if (err) - return err; - } - err = register_netdevice(dev); if (err) - goto errout; - unregister = true; + return err; if (dst->remote_ifindex) { remote_dev = __dev_get_by_index(net, dst->remote_ifindex); if (!remote_dev) { err = -ENODEV; - goto errout; + goto unregister; } err = netdev_upper_dev_link(remote_dev, dev, extack); if (err) - goto errout; + goto unregister; + + dst->remote_dev = remote_dev; } err = rtnl_configure_link(dev, NULL, 0, NULL); if (err < 0) goto unlink; - if (f) { - vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f); - - /* notify default fdb entry */ - err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), - RTM_NEWNEIGH, true, extack); - if (err) { - vxlan_fdb_destroy(vxlan, f, false, false); - if (remote_dev) - netdev_upper_dev_unlink(remote_dev, dev); - goto unregister; - } + /* create an fdb entry for a valid default destination */ + if (!vxlan_addr_any(&dst->remote_ip)) { + spin_lock_bh(&vxlan->hash_lock); + err = vxlan_fdb_update(vxlan, all_zeros_mac, + &dst->remote_ip, + NUD_REACHABLE | NUD_PERMANENT, + NLM_F_EXCL | NLM_F_CREATE, + vxlan->cfg.dst_port, + dst->remote_vni, + dst->remote_vni, + dst->remote_ifindex, + NTF_SELF, 0, true, extack); + spin_unlock_bh(&vxlan->hash_lock); + if (err) + goto unlink; } list_add(&vxlan->next, &vn->vxlan_list); - if (remote_dev) - dst->remote_dev = remote_dev; + return 0; + unlink: if (remote_dev) netdev_upper_dev_unlink(remote_dev, dev); -errout: - /* unregister_netdevice() destroys the default FDB entry with deletion - * notification. But the addition notification was not sent yet, so - * destroy the entry by hand here. - */ - if (f) - __vxlan_fdb_free(f); unregister: - if (unregister) - unregister_netdevice(dev); + unregister_netdevice(dev); return err; } @@ -4454,9 +4397,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], /* handle default dst entry */ if (rem_ip_changed) { - u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni); - - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); if (!vxlan_addr_any(&conf.remote_ip)) { err = vxlan_fdb_update(vxlan, all_zeros_mac, &conf.remote_ip, @@ -4467,7 +4408,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], conf.remote_ifindex, NTF_SELF, 0, true, extack); if (err) { - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); netdev_adjacent_change_abort(dst->remote_dev, lowerdev, dev); return err; @@ -4481,7 +4422,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], dst->remote_vni, dst->remote_ifindex, true); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); /* If vni filtering device, also update fdb entries of * all vnis that were using default remote ip @@ -4518,10 +4459,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], static void vxlan_dellink(struct net_device *dev, struct list_head *head) { struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_fdb_flush_desc desc = { - /* Default entry is deleted at vxlan_uninit. */ - .ignore_default_entry = true, - }; + struct vxlan_fdb_flush_desc desc = {}; vxlan_flush(vxlan, &desc); @@ -4784,13 +4722,10 @@ vxlan_fdb_offloaded_set(struct net_device *dev, struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_rdst *rdst; struct vxlan_fdb *f; - u32 hash_index; - hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni); + spin_lock_bh(&vxlan->hash_lock); - spin_lock_bh(&vxlan->hash_lock[hash_index]); - - f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); + f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); if (!f) goto out; @@ -4804,7 +4739,7 @@ vxlan_fdb_offloaded_set(struct net_device *dev, rdst->offloaded = fdb_info->offloaded; out: - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); } static int @@ -4813,13 +4748,11 @@ vxlan_fdb_external_learn_add(struct net_device *dev, { struct vxlan_dev *vxlan = netdev_priv(dev); struct netlink_ext_ack *extack; - u32 hash_index; int err; - hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni); extack = switchdev_notifier_info_to_extack(&fdb_info->info); - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip, NUD_REACHABLE, NLM_F_CREATE | NLM_F_REPLACE, @@ -4829,7 +4762,7 @@ vxlan_fdb_external_learn_add(struct net_device *dev, fdb_info->remote_ifindex, NTF_USE | NTF_SELF | NTF_EXT_LEARNED, 0, false, extack); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); return err; } @@ -4840,13 +4773,11 @@ vxlan_fdb_external_learn_del(struct net_device *dev, { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f; - u32 hash_index; int err = 0; - hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni); - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); - f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); + f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); if (!f) err = -ENOENT; else if (f->flags & NTF_EXT_LEARNED) @@ -4858,7 +4789,7 @@ vxlan_fdb_external_learn_del(struct net_device *dev, fdb_info->remote_ifindex, false); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); return err; } @@ -4907,18 +4838,15 @@ static void vxlan_fdb_nh_flush(struct nexthop *nh) { struct vxlan_fdb *fdb; struct vxlan_dev *vxlan; - u32 hash_index; rcu_read_lock(); list_for_each_entry_rcu(fdb, &nh->fdb_list, nh_list) { vxlan = rcu_dereference(fdb->vdev); WARN_ON(!vxlan); - hash_index = fdb_head_index(vxlan, fdb->eth_addr, - vxlan->default_dst.remote_vni); - spin_lock_bh(&vxlan->hash_lock[hash_index]); - if (!hlist_unhashed(&fdb->hlist)) + spin_lock_bh(&vxlan->hash_lock); + if (!hlist_unhashed(&fdb->fdb_node)) vxlan_fdb_destroy(vxlan, fdb, false, false); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); } rcu_read_unlock(); } @@ -4998,8 +4926,6 @@ static int __init vxlan_init_module(void) { int rc; - get_random_bytes(&vxlan_salt, sizeof(vxlan_salt)); - rc = register_pernet_subsys(&vxlan_net_ops); if (rc) goto out1; diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h index 76a351a997d5..d328aed9feef 100644 --- a/drivers/net/vxlan/vxlan_private.h +++ b/drivers/net/vxlan/vxlan_private.h @@ -24,18 +24,23 @@ struct vxlan_net { struct notifier_block nexthop_notifier_block; }; +struct vxlan_fdb_key { + u8 eth_addr[ETH_ALEN]; + __be32 vni; +}; + /* Forwarding table entry */ struct vxlan_fdb { - struct hlist_node hlist; /* linked list of entries */ + struct rhash_head rhnode; struct rcu_head rcu; unsigned long updated; /* jiffies */ unsigned long used; struct list_head remotes; - u8 eth_addr[ETH_ALEN]; + struct vxlan_fdb_key key; u16 state; /* see ndm_state */ - __be32 vni; u16 flags; /* see ndm_flags and below */ struct list_head nh_list; + struct hlist_node fdb_node; struct nexthop __rcu *nh; struct vxlan_dev __rcu *vdev; }; diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index d0753776d339..336cd0e20309 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -482,11 +482,9 @@ static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, struct netlink_ext_ack *extack) { struct vxlan_rdst *dst = &vxlan->default_dst; - u32 hash_index; int err = 0; - hash_index = fdb_head_index(vxlan, all_zeros_mac, vni); - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); if (remote_ip && !vxlan_addr_any(remote_ip)) { err = vxlan_fdb_update(vxlan, all_zeros_mac, remote_ip, @@ -498,7 +496,7 @@ static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, dst->remote_ifindex, NTF_SELF, 0, true, extack); if (err) { - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); return err; } } @@ -511,7 +509,7 @@ static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, dst->remote_ifindex, true); } - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); return err; } diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 07bf7f9aae01..204278eb215e 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -44,7 +44,7 @@ config PTP_1588_CLOCK_DTE depends on PTP_1588_CLOCK depends on NET && HAS_IOMEM depends on ARCH_BCM_MOBILE || (ARCH_BCM_IPROC && !(ARCH_BCM_NSP || ARCH_BCM_5301X)) || COMPILE_TEST - default y + default y if ARCH_BCM_MOBILE || ARCH_BCM_IPROC help This driver adds support for using the Digital timing engine (DTE) in the Broadcom SoC's as a PTP clock. @@ -59,7 +59,7 @@ config PTP_1588_CLOCK_QORIQ tristate "Freescale QorIQ 1588 timer as PTP clock" depends on GIANFAR || FSL_DPAA_ETH || FSL_DPAA2_ETH || FSL_ENETC || FSL_ENETC_VF || COMPILE_TEST depends on PTP_1588_CLOCK - default y + default y if GIANFAR || FSL_DPAA_ETH || FSL_DPAA2_ETH || FSL_ENETC || FSL_ENETC_VF help This driver adds support for using the Freescale QorIQ 1588 timer as a PTP clock. This clock is only useful if your PTP diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 60ed70a39d2c..b7f15f303ea2 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -611,7 +611,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) ism->dev.parent = &pdev->dev; ism->dev.release = ism_dev_release; device_initialize(&ism->dev); - dev_set_name(&ism->dev, dev_name(&pdev->dev)); + dev_set_name(&ism->dev, "%s", dev_name(&pdev->dev)); ret = device_add(&ism->dev); if (ret) goto err_dev; diff --git a/include/linux/btf.h b/include/linux/btf.h index ebc0c0c9b944..b2983706292f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -522,6 +522,7 @@ bool btf_param_match_suffix(const struct btf *btf, const char *suffix); int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, u32 arg_no); +u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, int off); struct bpf_verifier_log; diff --git a/include/linux/phy.h b/include/linux/phy.h index fb755358d965..3beaf225ee88 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1753,7 +1753,6 @@ int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum, struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids); -#if IS_ENABLED(CONFIG_PHYLIB) int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id); struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode); struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode); @@ -1761,42 +1760,6 @@ struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); void phy_device_free(struct phy_device *phydev); -#else -static inline int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id) -{ - return 0; -} -static inline -struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode) -{ - return 0; -} - -static inline -struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode) -{ - return NULL; -} - -static inline -struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode) -{ - return NULL; -} - -static inline -struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45) -{ - return NULL; -} - -static inline int phy_device_register(struct phy_device *phy) -{ - return 0; -} - -static inline void phy_device_free(struct phy_device *phydev) { } -#endif /* CONFIG_PHYLIB */ void phy_device_remove(struct phy_device *phydev); int phy_get_c45_ids(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); @@ -2040,6 +2003,9 @@ int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, enum ethtool_link_mode_bit_indices linkmode, u32 *val); +int phy_get_mac_termination(struct phy_device *phydev, struct device *dev, + u32 *val); + void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv, bool *tx_pause, bool *rx_pause); diff --git a/include/net/p8022.h b/include/net/p8022.h deleted file mode 100644 index a29e224ac498..000000000000 --- a/include/net/p8022.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_P8022_H -#define _NET_P8022_H - -struct net_device; -struct packet_type; -struct sk_buff; - -struct datalink_proto * -register_8022_client(unsigned char type, - int (*func)(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev)); -void unregister_8022_client(struct datalink_proto *proto); -#endif diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 2dd23ee2bacd..e2f7ca045d3e 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -296,7 +296,7 @@ struct vxlan_dev { struct vxlan_rdst default_dst; /* default destination */ struct timer_list age_timer; - spinlock_t hash_lock[FDB_HASH_SIZE]; + spinlock_t hash_lock; unsigned int addrcnt; struct gro_cells gro_cells; @@ -304,9 +304,10 @@ struct vxlan_dev { struct vxlan_vni_group __rcu *vnigrp; - struct hlist_head fdb_head[FDB_HASH_SIZE]; + struct rhashtable fdb_hash_tbl; struct rhashtable mdb_tbl; + struct hlist_head fdb_list; struct hlist_head mdb_list; unsigned int mdb_seq; }; diff --git a/include/net/xdp.h b/include/net/xdp.h index 20e41b5ff319..b40f1f96cb11 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -618,7 +618,10 @@ bool bpf_dev_bound_kfunc_id(u32 btf_id); void xdp_set_features_flag(struct net_device *dev, xdp_features_t val); void xdp_set_features_flag_locked(struct net_device *dev, xdp_features_t val); void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg); +void xdp_features_set_redirect_target_locked(struct net_device *dev, + bool support_sg); void xdp_features_clear_redirect_target(struct net_device *dev); +void xdp_features_clear_redirect_target_locked(struct net_device *dev); #else static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; } static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; } diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 24a26b4bb0b8..324c47ab377a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6391,8 +6391,8 @@ static bool is_int_ptr(struct btf *btf, const struct btf_type *t) return btf_type_is_int(t); } -static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, - int off) +u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, + int off) { const struct btf_param *args; const struct btf_type *t; @@ -6672,7 +6672,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, tname, off); return false; } - arg = get_ctx_arg_idx(btf, t, off); + arg = btf_ctx_arg_idx(btf, t, off); args = (const struct btf_param *)(t + 1); /* if (t == NULL) Fall back to default BPF prog with * MAX_BPF_FUNC_REG_ARGS u64 arguments. diff --git a/net/802/Makefile b/net/802/Makefile index bfed80221b8b..99abc29d537c 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -3,12 +3,11 @@ # Makefile for the Linux 802.x protocol layers. # -# Check the p8022 selections against net/core/Makefile. -obj-$(CONFIG_LLC) += p8022.o psnap.o +obj-$(CONFIG_LLC) += psnap.o obj-$(CONFIG_NET_FC) += fc.o obj-$(CONFIG_FDDI) += fddi.o obj-$(CONFIG_HIPPI) += hippi.o -obj-$(CONFIG_ATALK) += p8022.o psnap.o +obj-$(CONFIG_ATALK) += psnap.o obj-$(CONFIG_STP) += stp.o obj-$(CONFIG_GARP) += garp.o obj-$(CONFIG_MRP) += mrp.o diff --git a/net/802/p8022.c b/net/802/p8022.c deleted file mode 100644 index 78c25168d7c9..000000000000 --- a/net/802/p8022.c +++ /dev/null @@ -1,64 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * NET3: Support for 802.2 demultiplexing off Ethernet - * - * Demultiplex 802.2 encoded protocols. We match the entry by the - * SSAP/DSAP pair and then deliver to the registered datalink that - * matches. The control byte is ignored and handling of such items - * is up to the routine passed the frame. - * - * Unlike the 802.3 datalink we have a list of 802.2 entries as - * there are multiple protocols to demux. The list is currently - * short (3 or 4 entries at most). The current demux assumes this. - */ -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <net/datalink.h> -#include <linux/mm.h> -#include <linux/in.h> -#include <linux/init.h> -#include <net/llc.h> -#include <net/p8022.h> - -static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb, - const unsigned char *dest) -{ - llc_build_and_send_ui_pkt(dl->sap, skb, dest, dl->sap->laddr.lsap); - return 0; -} - -struct datalink_proto *register_8022_client(unsigned char type, - int (*func)(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev)) -{ - struct datalink_proto *proto; - - proto = kmalloc(sizeof(*proto), GFP_ATOMIC); - if (proto) { - proto->type[0] = type; - proto->header_length = 3; - proto->request = p8022_request; - proto->sap = llc_sap_open(type, func); - if (!proto->sap) { - kfree(proto); - proto = NULL; - } - } - return proto; -} - -void unregister_8022_client(struct datalink_proto *proto) -{ - llc_sap_put(proto->sap); - kfree(proto); -} - -EXPORT_SYMBOL(register_8022_client); -EXPORT_SYMBOL(unregister_8022_client); - -MODULE_DESCRIPTION("Support for 802.2 demultiplexing off Ethernet"); -MODULE_LICENSE("GPL"); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 41be38264493..06908e37c3d9 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -23,7 +23,6 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/rculist.h> -#include <net/p8022.h> #include <net/arp.h> #include <linux/rtnetlink.h> #include <linux/notifier.h> diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 1820f09ff59c..3f24b4ee49c2 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -80,10 +80,10 @@ static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg, if (br_vlan_get_state(v) == state) return; - br_vlan_set_state(v, state); - if (v->vid == vg->pvid) br_vlan_set_pvid_state(vg, state); + + br_vlan_set_state(v, state); } int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index dcbf058de1e3..7e0b2362b9ee 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2105,12 +2105,17 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) } } -void br_multicast_enable_port(struct net_bridge_port *port) +static void br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) { - struct net_bridge *br = port->br; + struct net_bridge *br = pmctx->port->br; spin_lock_bh(&br->multicast_lock); - __br_multicast_enable_port_ctx(&port->multicast_ctx); + if (br_multicast_port_ctx_is_vlan(pmctx) && + !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED)) { + spin_unlock_bh(&br->multicast_lock); + return; + } + __br_multicast_enable_port_ctx(pmctx); spin_unlock_bh(&br->multicast_lock); } @@ -2137,11 +2142,67 @@ static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx) br_multicast_rport_del_notify(pmctx, del); } +static void br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx) +{ + struct net_bridge *br = pmctx->port->br; + + spin_lock_bh(&br->multicast_lock); + if (br_multicast_port_ctx_is_vlan(pmctx) && + !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED)) { + spin_unlock_bh(&br->multicast_lock); + return; + } + + __br_multicast_disable_port_ctx(pmctx); + spin_unlock_bh(&br->multicast_lock); +} + +static void br_multicast_toggle_port(struct net_bridge_port *port, bool on) +{ +#if IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) + if (br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + + rcu_read_lock(); + vg = nbp_vlan_group_rcu(port); + if (!vg) { + rcu_read_unlock(); + return; + } + + /* iterate each vlan, toggle vlan multicast context */ + list_for_each_entry_rcu(vlan, &vg->vlan_list, vlist) { + struct net_bridge_mcast_port *pmctx = + &vlan->port_mcast_ctx; + u8 state = br_vlan_get_state(vlan); + /* enable vlan multicast context when state is + * LEARNING or FORWARDING + */ + if (on && br_vlan_state_allowed(state, true)) + br_multicast_enable_port_ctx(pmctx); + else + br_multicast_disable_port_ctx(pmctx); + } + rcu_read_unlock(); + return; + } +#endif + /* toggle port multicast context when vlan snooping is disabled */ + if (on) + br_multicast_enable_port_ctx(&port->multicast_ctx); + else + br_multicast_disable_port_ctx(&port->multicast_ctx); +} + +void br_multicast_enable_port(struct net_bridge_port *port) +{ + br_multicast_toggle_port(port, true); +} + void br_multicast_disable_port(struct net_bridge_port *port) { - spin_lock_bh(&port->br->multicast_lock); - __br_multicast_disable_port_ctx(&port->multicast_ctx); - spin_unlock_bh(&port->br->multicast_lock); + br_multicast_toggle_port(port, false); } static int __grp_src_delete_marked(struct net_bridge_port_group *pg) @@ -4211,6 +4272,32 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx) #endif } +void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v, u8 state) +{ +#if IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) + struct net_bridge *br; + + if (!br_vlan_should_use(v)) + return; + + if (br_vlan_is_master(v)) + return; + + br = v->port->br; + + if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) + return; + + if (br_vlan_state_allowed(state, true)) + br_multicast_enable_port_ctx(&v->port_mcast_ctx); + + /* Multicast is not disabled for the vlan when it goes in + * blocking state because the timers will expire and stop by + * themselves without sending more queries. + */ +#endif +} + void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) { struct net_bridge *br; @@ -4304,9 +4391,9 @@ int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, __br_multicast_open(&br->multicast_ctx); list_for_each_entry(p, &br->port_list, list) { if (on) - br_multicast_disable_port(p); + br_multicast_disable_port_ctx(&p->multicast_ctx); else - br_multicast_enable_port(p); + br_multicast_enable_port_ctx(&p->multicast_ctx); } list_for_each_entry(vlan, &vg->vlan_list, vlist) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 71f351a6ce1b..db1bddb330ff 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1055,6 +1055,7 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port, struct net_bridge_vlan *vlan, struct net_bridge_mcast_port *pmctx); void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx); +void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v, u8 state); void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on); int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack); @@ -1521,6 +1522,11 @@ static inline void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pm { } +static inline void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v, + u8 state) +{ +} + static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) { @@ -1881,7 +1887,9 @@ bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr, bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, const struct net_bridge_vlan *v_opts); -/* vlan state manipulation helpers using *_ONCE to annotate lock-free access */ +/* vlan state manipulation helpers using *_ONCE to annotate lock-free access, + * while br_vlan_set_state() may access data protected by multicast_lock. + */ static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v) { return READ_ONCE(v->state); @@ -1890,6 +1898,7 @@ static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v) static inline void br_vlan_set_state(struct net_bridge_vlan *v, u8 state) { WRITE_ONCE(v->state, state); + br_multicast_update_vlan_mcast_ctx(v, state); } static inline u8 br_vlan_get_pvid_state(const struct net_bridge_vlan_group *vg) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 65cf582b5dac..254067b719da 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2747,12 +2747,12 @@ static int neigh_valid_dump_req(const struct nlmsghdr *nlh, if (strict_check) { struct ndmsg *ndm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { + ndm = nlmsg_payload(nlh, sizeof(*ndm)); + if (!ndm) { NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request"); return -EINVAL; } - ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || ndm->ndm_state || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 0a2b24af4028..42ee7fce3d95 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -220,17 +220,20 @@ static void ops_free_list(const struct pernet_operations *ops, static void ops_undo_list(const struct list_head *ops_list, const struct pernet_operations *ops, struct list_head *net_exit_list, - bool expedite_rcu, bool hold_rtnl) + bool expedite_rcu) { const struct pernet_operations *saved_ops; + bool hold_rtnl = false; if (!ops) ops = list_entry(ops_list, typeof(*ops), list); saved_ops = ops; - list_for_each_entry_continue_reverse(ops, ops_list, list) + list_for_each_entry_continue_reverse(ops, ops_list, list) { + hold_rtnl |= !!ops->exit_rtnl; ops_pre_exit_list(ops, net_exit_list); + } /* Another CPU might be rcu-iterating the list, wait for it. * This needs to be before calling the exit() notifiers, so the @@ -257,11 +260,10 @@ static void ops_undo_list(const struct list_head *ops_list, static void ops_undo_single(struct pernet_operations *ops, struct list_head *net_exit_list) { - bool hold_rtnl = !!ops->exit_rtnl; LIST_HEAD(ops_list); list_add(&ops->list, &ops_list); - ops_undo_list(&ops_list, NULL, net_exit_list, false, hold_rtnl); + ops_undo_list(&ops_list, NULL, net_exit_list, false); list_del(&ops->list); } @@ -452,7 +454,7 @@ out_undo: * for the pernet modules whose init functions did not fail. */ list_add(&net->exit_list, &net_exit_list); - ops_undo_list(&pernet_list, ops, &net_exit_list, false, true); + ops_undo_list(&pernet_list, ops, &net_exit_list, false); rcu_barrier(); goto out; } @@ -681,7 +683,7 @@ static void cleanup_net(struct work_struct *work) list_add_tail(&net->exit_list, &net_exit_list); } - ops_undo_list(&pernet_list, NULL, &net_exit_list, true, true); + ops_undo_list(&pernet_list, NULL, &net_exit_list, true); up_read(&pernet_ops_rwsem); @@ -1312,19 +1314,19 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { - list_add_tail(&ops->list, list); - - if (!init_net_initialized) + if (!init_net_initialized) { + list_add_tail(&ops->list, list); return 0; + } return ops_init(ops, &init_net); } static void __unregister_pernet_operations(struct pernet_operations *ops) { - list_del(&ops->list); - - if (init_net_initialized) { + if (!init_net_initialized) { + list_del(&ops->list); + } else { LIST_HEAD(net_exit_list); list_add(&init_net.exit_list, &net_exit_list); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ea2e15de8d99..8a914b37ef6e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2390,12 +2390,12 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, if (strict_check) { struct ifinfomsg *ifm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG(extack, "Invalid header for link dump"); return -EINVAL; } - ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change) { NL_SET_ERR_MSG(extack, "Invalid values in header for link dump request"); @@ -4084,7 +4084,8 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb, struct ifinfomsg *ifm; int i, err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG(extack, "Invalid header for get link"); return -EINVAL; } @@ -4093,7 +4094,6 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb, return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); - ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change) { NL_SET_ERR_MSG(extack, "Invalid values in header for get link request"); @@ -5052,12 +5052,12 @@ static int valid_fdb_get_strict(const struct nlmsghdr *nlh, struct ndmsg *ndm; int err, i; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { + ndm = nlmsg_payload(nlh, sizeof(*ndm)); + if (!ndm) { NL_SET_ERR_MSG(extack, "Invalid header for fdb get request"); return -EINVAL; } - ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for fdb get request"); @@ -5324,12 +5324,12 @@ static int valid_bridge_getlink_req(const struct nlmsghdr *nlh, if (strict_check) { struct ifinfomsg *ifm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG(extack, "Invalid header for bridge link dump"); return -EINVAL; } - ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change || ifm->ifi_index) { NL_SET_ERR_MSG(extack, "Invalid values in header for bridge link dump request"); @@ -6221,7 +6221,8 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, { struct if_stats_msg *ifsm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) { + ifsm = nlmsg_payload(nlh, sizeof(*ifsm)); + if (!ifsm) { NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); return -EINVAL; } @@ -6229,8 +6230,6 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, if (!strict_check) return 0; - ifsm = nlmsg_data(nlh); - /* only requests using strict checks can pass data to influence * the dump. The legacy exception is filter_mask. */ @@ -6458,12 +6457,12 @@ static int rtnl_mdb_valid_dump_req(const struct nlmsghdr *nlh, { struct br_port_msg *bpm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) { + bpm = nlmsg_payload(nlh, sizeof(*bpm)); + if (!bpm) { NL_SET_ERR_MSG(extack, "Invalid header for mdb dump request"); return -EINVAL; } - bpm = nlmsg_data(nlh); if (bpm->ifindex) { NL_SET_ERR_MSG(extack, "Filtering by device index is not supported for mdb dump request"); return -EINVAL; diff --git a/net/core/xdp.c b/net/core/xdp.c index a014df88620c..ea819764ae39 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -1014,21 +1014,38 @@ void xdp_set_features_flag(struct net_device *dev, xdp_features_t val) } EXPORT_SYMBOL_GPL(xdp_set_features_flag); -void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) +void xdp_features_set_redirect_target_locked(struct net_device *dev, + bool support_sg) { xdp_features_t val = (dev->xdp_features | NETDEV_XDP_ACT_NDO_XMIT); if (support_sg) val |= NETDEV_XDP_ACT_NDO_XMIT_SG; - xdp_set_features_flag(dev, val); + xdp_set_features_flag_locked(dev, val); +} +EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target_locked); + +void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) +{ + netdev_lock(dev); + xdp_features_set_redirect_target_locked(dev, support_sg); + netdev_unlock(dev); } EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target); -void xdp_features_clear_redirect_target(struct net_device *dev) +void xdp_features_clear_redirect_target_locked(struct net_device *dev) { xdp_features_t val = dev->xdp_features; val &= ~(NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG); - xdp_set_features_flag(dev, val); + xdp_set_features_flag_locked(dev, val); +} +EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target_locked); + +void xdp_features_clear_redirect_target(struct net_device *dev) +{ + netdev_lock(dev); + xdp_features_clear_redirect_target_locked(dev); + netdev_unlock(dev); } EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target); diff --git a/net/rxrpc/rxgk_app.c b/net/rxrpc/rxgk_app.c index 6206a84395b8..b94b77a1c317 100644 --- a/net/rxrpc/rxgk_app.c +++ b/net/rxrpc/rxgk_app.c @@ -141,6 +141,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(key)) { _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key)); + ret = PTR_ERR(key); goto error; } diff --git a/net/sched/Kconfig b/net/sched/Kconfig index a800127effcd..9f0b3f943fca 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -403,6 +403,18 @@ config NET_SCH_ETS If unsure, say N. +config NET_SCH_BPF + bool "BPF-based Qdisc" + depends on BPF_SYSCALL && BPF_JIT && DEBUG_INFO_BTF + help + This option allows BPF-based queueing disiplines. With BPF struct_ops, + users can implement supported operators in Qdisc_ops using BPF programs. + The queue holding skb can be built with BPF maps or graphs. + + Say Y here if you want to use BPF-based Qdisc. + + If unsure, say N. + menuconfig NET_SCH_DEFAULT bool "Allow override default queue discipline" help diff --git a/net/sched/Makefile b/net/sched/Makefile index 82c3f78ca486..904d784902d1 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_NET_SCH_FQ_PIE) += sch_fq_pie.o obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o +obj-$(CONFIG_NET_SCH_BPF) += bpf_qdisc.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c new file mode 100644 index 000000000000..9f32b305636f --- /dev/null +++ b/net/sched/bpf_qdisc.c @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/types.h> +#include <linux/bpf_verifier.h> +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/filter.h> +#include <net/pkt_sched.h> +#include <net/pkt_cls.h> + +#define QDISC_OP_IDX(op) (offsetof(struct Qdisc_ops, op) / sizeof(void (*)(void))) +#define QDISC_MOFF_IDX(moff) (moff / sizeof(void (*)(void))) + +static struct bpf_struct_ops bpf_Qdisc_ops; + +struct bpf_sched_data { + struct qdisc_watchdog watchdog; +}; + +struct bpf_sk_buff_ptr { + struct sk_buff *skb; +}; + +static int bpf_qdisc_init(struct btf *btf) +{ + return 0; +} + +BTF_ID_LIST_SINGLE(bpf_qdisc_ids, struct, Qdisc) +BTF_ID_LIST_SINGLE(bpf_sk_buff_ids, struct, sk_buff) +BTF_ID_LIST_SINGLE(bpf_sk_buff_ptr_ids, struct, bpf_sk_buff_ptr) + +static bool bpf_qdisc_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + struct btf *btf = prog->aux->attach_btf; + u32 arg; + + arg = btf_ctx_arg_idx(btf, prog->aux->attach_func_proto, off); + if (prog->aux->attach_st_ops_member_off == offsetof(struct Qdisc_ops, enqueue)) { + if (arg == 2 && type == BPF_READ) { + info->reg_type = PTR_TO_BTF_ID | PTR_TRUSTED; + info->btf = btf; + info->btf_id = bpf_sk_buff_ptr_ids[0]; + return true; + } + } + + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); +} + +static int bpf_qdisc_qdisc_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, size_t *end) +{ + switch (off) { + case offsetof(struct Qdisc, limit): + *end = offsetofend(struct Qdisc, limit); + break; + case offsetof(struct Qdisc, q) + offsetof(struct qdisc_skb_head, qlen): + *end = offsetof(struct Qdisc, q) + offsetofend(struct qdisc_skb_head, qlen); + break; + case offsetof(struct Qdisc, qstats) ... offsetofend(struct Qdisc, qstats) - 1: + *end = offsetofend(struct Qdisc, qstats); + break; + default: + return -EACCES; + } + + return 0; +} + +static int bpf_qdisc_sk_buff_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, size_t *end) +{ + switch (off) { + case offsetof(struct sk_buff, tstamp): + *end = offsetofend(struct sk_buff, tstamp); + break; + case offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb, data[0]) ... + offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb, + data[QDISC_CB_PRIV_LEN - 1]): + *end = offsetof(struct sk_buff, cb) + + offsetofend(struct qdisc_skb_cb, data[QDISC_CB_PRIV_LEN - 1]); + break; + default: + return -EACCES; + } + + return 0; +} + +static int bpf_qdisc_btf_struct_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size) +{ + const struct btf_type *t, *skbt, *qdisct; + size_t end; + int err; + + skbt = btf_type_by_id(reg->btf, bpf_sk_buff_ids[0]); + qdisct = btf_type_by_id(reg->btf, bpf_qdisc_ids[0]); + t = btf_type_by_id(reg->btf, reg->btf_id); + + if (t == skbt) { + err = bpf_qdisc_sk_buff_access(log, reg, off, &end); + } else if (t == qdisct) { + err = bpf_qdisc_qdisc_access(log, reg, off, &end); + } else { + bpf_log(log, "only read is supported\n"); + return -EACCES; + } + + if (err) { + bpf_log(log, "no write support to %s at off %d\n", + btf_name_by_offset(reg->btf, t->name_off), off); + return -EACCES; + } + + if (off + size > end) { + bpf_log(log, + "write access at off %d with size %d beyond the member of %s ended at %zu\n", + off, size, btf_name_by_offset(reg->btf, t->name_off), end); + return -EACCES; + } + + return 0; +} + +BTF_ID_LIST(bpf_qdisc_init_prologue_ids) +BTF_ID(func, bpf_qdisc_init_prologue) + +static int bpf_qdisc_gen_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + if (prog->aux->attach_st_ops_member_off != offsetof(struct Qdisc_ops, init)) + return 0; + + /* r6 = r1; // r6 will be "u64 *ctx". r1 is "u64 *ctx". + * r2 = r1[16]; // r2 will be "struct netlink_ext_ack *extack" + * r1 = r1[0]; // r1 will be "struct Qdisc *sch" + * r0 = bpf_qdisc_init_prologue(r1, r2); + * if r0 == 0 goto pc+1; + * BPF_EXIT; + * r1 = r6; // r1 will be "u64 *ctx". + */ + *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 16); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); + *insn++ = BPF_CALL_KFUNC(0, bpf_qdisc_init_prologue_ids[0]); + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1); + *insn++ = BPF_EXIT_INSN(); + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); + *insn++ = prog->insnsi[0]; + + return insn - insn_buf; +} + +BTF_ID_LIST(bpf_qdisc_reset_destroy_epilogue_ids) +BTF_ID(func, bpf_qdisc_reset_destroy_epilogue) + +static int bpf_qdisc_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog *prog, + s16 ctx_stack_off) +{ + struct bpf_insn *insn = insn_buf; + + if (prog->aux->attach_st_ops_member_off != offsetof(struct Qdisc_ops, reset) && + prog->aux->attach_st_ops_member_off != offsetof(struct Qdisc_ops, destroy)) + return 0; + + /* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx" + * r1 = r1[0]; // r1 will be "struct Qdisc *sch" + * r0 = bpf_qdisc_reset_destroy_epilogue(r1); + * BPF_EXIT; + */ + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); + *insn++ = BPF_CALL_KFUNC(0, bpf_qdisc_reset_destroy_epilogue_ids[0]); + *insn++ = BPF_EXIT_INSN(); + + return insn - insn_buf; +} + +__bpf_kfunc_start_defs(); + +/* bpf_skb_get_hash - Get the flow hash of an skb. + * @skb: The skb to get the flow hash from. + */ +__bpf_kfunc u32 bpf_skb_get_hash(struct sk_buff *skb) +{ + return skb_get_hash(skb); +} + +/* bpf_kfree_skb - Release an skb's reference and drop it immediately. + * @skb: The skb whose reference to be released and dropped. + */ +__bpf_kfunc void bpf_kfree_skb(struct sk_buff *skb) +{ + kfree_skb(skb); +} + +/* bpf_qdisc_skb_drop - Drop an skb by adding it to a deferred free list. + * @skb: The skb whose reference to be released and dropped. + * @to_free_list: The list of skbs to be dropped. + */ +__bpf_kfunc void bpf_qdisc_skb_drop(struct sk_buff *skb, + struct bpf_sk_buff_ptr *to_free_list) +{ + __qdisc_drop(skb, (struct sk_buff **)to_free_list); +} + +/* bpf_qdisc_watchdog_schedule - Schedule a qdisc to a later time using a timer. + * @sch: The qdisc to be scheduled. + * @expire: The expiry time of the timer. + * @delta_ns: The slack range of the timer. + */ +__bpf_kfunc void bpf_qdisc_watchdog_schedule(struct Qdisc *sch, u64 expire, u64 delta_ns) +{ + struct bpf_sched_data *q = qdisc_priv(sch); + + qdisc_watchdog_schedule_range_ns(&q->watchdog, expire, delta_ns); +} + +/* bpf_qdisc_init_prologue - Hidden kfunc called in prologue of .init. */ +__bpf_kfunc int bpf_qdisc_init_prologue(struct Qdisc *sch, + struct netlink_ext_ack *extack) +{ + struct bpf_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *p; + + if (sch->parent != TC_H_ROOT) { + p = qdisc_lookup(dev, TC_H_MAJ(sch->parent)); + if (!p) + return -ENOENT; + + if (!(p->flags & TCQ_F_MQROOT)) { + NL_SET_ERR_MSG(extack, "BPF qdisc only supported on root or mq"); + return -EINVAL; + } + } + + qdisc_watchdog_init(&q->watchdog, sch); + return 0; +} + +/* bpf_qdisc_reset_destroy_epilogue - Hidden kfunc called in epilogue of .reset + * and .destroy + */ +__bpf_kfunc void bpf_qdisc_reset_destroy_epilogue(struct Qdisc *sch) +{ + struct bpf_sched_data *q = qdisc_priv(sch); + + qdisc_watchdog_cancel(&q->watchdog); +} + +/* bpf_qdisc_bstats_update - Update Qdisc basic statistics + * @sch: The qdisc from which an skb is dequeued. + * @skb: The skb to be dequeued. + */ +__bpf_kfunc void bpf_qdisc_bstats_update(struct Qdisc *sch, const struct sk_buff *skb) +{ + bstats_update(&sch->bstats, skb); +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(qdisc_kfunc_ids) +BTF_ID_FLAGS(func, bpf_skb_get_hash, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_kfree_skb, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_qdisc_skb_drop, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_qdisc_watchdog_schedule, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_qdisc_init_prologue, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_qdisc_reset_destroy_epilogue, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_qdisc_bstats_update, KF_TRUSTED_ARGS) +BTF_KFUNCS_END(qdisc_kfunc_ids) + +BTF_SET_START(qdisc_common_kfunc_set) +BTF_ID(func, bpf_skb_get_hash) +BTF_ID(func, bpf_kfree_skb) +BTF_ID(func, bpf_dynptr_from_skb) +BTF_SET_END(qdisc_common_kfunc_set) + +BTF_SET_START(qdisc_enqueue_kfunc_set) +BTF_ID(func, bpf_qdisc_skb_drop) +BTF_ID(func, bpf_qdisc_watchdog_schedule) +BTF_SET_END(qdisc_enqueue_kfunc_set) + +BTF_SET_START(qdisc_dequeue_kfunc_set) +BTF_ID(func, bpf_qdisc_watchdog_schedule) +BTF_ID(func, bpf_qdisc_bstats_update) +BTF_SET_END(qdisc_dequeue_kfunc_set) + +enum qdisc_ops_kf_flags { + QDISC_OPS_KF_COMMON = 0, + QDISC_OPS_KF_ENQUEUE = 1 << 0, + QDISC_OPS_KF_DEQUEUE = 1 << 1, +}; + +static const u32 qdisc_ops_context_flags[] = { + [QDISC_OP_IDX(enqueue)] = QDISC_OPS_KF_ENQUEUE, + [QDISC_OP_IDX(dequeue)] = QDISC_OPS_KF_DEQUEUE, + [QDISC_OP_IDX(init)] = QDISC_OPS_KF_COMMON, + [QDISC_OP_IDX(reset)] = QDISC_OPS_KF_COMMON, + [QDISC_OP_IDX(destroy)] = QDISC_OPS_KF_COMMON, +}; + +static int bpf_qdisc_kfunc_filter(const struct bpf_prog *prog, u32 kfunc_id) +{ + u32 moff, flags; + + if (!btf_id_set8_contains(&qdisc_kfunc_ids, kfunc_id)) + return 0; + + if (prog->aux->st_ops != &bpf_Qdisc_ops) + return -EACCES; + + moff = prog->aux->attach_st_ops_member_off; + flags = qdisc_ops_context_flags[QDISC_MOFF_IDX(moff)]; + + if ((flags & QDISC_OPS_KF_ENQUEUE) && + btf_id_set_contains(&qdisc_enqueue_kfunc_set, kfunc_id)) + return 0; + + if ((flags & QDISC_OPS_KF_DEQUEUE) && + btf_id_set_contains(&qdisc_dequeue_kfunc_set, kfunc_id)) + return 0; + + if (btf_id_set_contains(&qdisc_common_kfunc_set, kfunc_id)) + return 0; + + return -EACCES; +} + +static const struct btf_kfunc_id_set bpf_qdisc_kfunc_set = { + .owner = THIS_MODULE, + .set = &qdisc_kfunc_ids, + .filter = bpf_qdisc_kfunc_filter, +}; + +static const struct bpf_verifier_ops bpf_qdisc_verifier_ops = { + .get_func_proto = bpf_base_func_proto, + .is_valid_access = bpf_qdisc_is_valid_access, + .btf_struct_access = bpf_qdisc_btf_struct_access, + .gen_prologue = bpf_qdisc_gen_prologue, + .gen_epilogue = bpf_qdisc_gen_epilogue, +}; + +static int bpf_qdisc_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + const struct Qdisc_ops *uqdisc_ops; + struct Qdisc_ops *qdisc_ops; + u32 moff; + + uqdisc_ops = (const struct Qdisc_ops *)udata; + qdisc_ops = (struct Qdisc_ops *)kdata; + + moff = __btf_member_bit_offset(t, member) / 8; + switch (moff) { + case offsetof(struct Qdisc_ops, priv_size): + if (uqdisc_ops->priv_size) + return -EINVAL; + qdisc_ops->priv_size = sizeof(struct bpf_sched_data); + return 1; + case offsetof(struct Qdisc_ops, peek): + qdisc_ops->peek = qdisc_peek_dequeued; + return 0; + case offsetof(struct Qdisc_ops, id): + if (bpf_obj_name_cpy(qdisc_ops->id, uqdisc_ops->id, + sizeof(qdisc_ops->id)) <= 0) + return -EINVAL; + return 1; + } + + return 0; +} + +static int bpf_qdisc_reg(void *kdata, struct bpf_link *link) +{ + return register_qdisc(kdata); +} + +static void bpf_qdisc_unreg(void *kdata, struct bpf_link *link) +{ + return unregister_qdisc(kdata); +} + +static int Qdisc_ops__enqueue(struct sk_buff *skb__ref, struct Qdisc *sch, + struct sk_buff **to_free) +{ + return 0; +} + +static struct sk_buff *Qdisc_ops__dequeue(struct Qdisc *sch) +{ + return NULL; +} + +static int Qdisc_ops__init(struct Qdisc *sch, struct nlattr *arg, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static void Qdisc_ops__reset(struct Qdisc *sch) +{ +} + +static void Qdisc_ops__destroy(struct Qdisc *sch) +{ +} + +static struct Qdisc_ops __bpf_ops_qdisc_ops = { + .enqueue = Qdisc_ops__enqueue, + .dequeue = Qdisc_ops__dequeue, + .init = Qdisc_ops__init, + .reset = Qdisc_ops__reset, + .destroy = Qdisc_ops__destroy, +}; + +static struct bpf_struct_ops bpf_Qdisc_ops = { + .verifier_ops = &bpf_qdisc_verifier_ops, + .reg = bpf_qdisc_reg, + .unreg = bpf_qdisc_unreg, + .init_member = bpf_qdisc_init_member, + .init = bpf_qdisc_init, + .name = "Qdisc_ops", + .cfi_stubs = &__bpf_ops_qdisc_ops, + .owner = THIS_MODULE, +}; + +BTF_ID_LIST(bpf_sk_buff_dtor_ids) +BTF_ID(func, bpf_kfree_skb) + +static int __init bpf_qdisc_kfunc_init(void) +{ + int ret; + const struct btf_id_dtor_kfunc skb_kfunc_dtors[] = { + { + .btf_id = bpf_sk_buff_ids[0], + .kfunc_btf_id = bpf_sk_buff_dtor_ids[0] + }, + }; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_qdisc_kfunc_set); + ret = ret ?: register_btf_id_dtor_kfuncs(skb_kfunc_dtors, + ARRAY_SIZE(skb_kfunc_dtors), + THIS_MODULE); + ret = ret ?: register_bpf_struct_ops(&bpf_Qdisc_ops, Qdisc_ops); + + return ret; +} +late_initcall(bpf_qdisc_kfunc_init); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f74a097f54ae..db6330258dda 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -25,6 +25,7 @@ #include <linux/hrtimer.h> #include <linux/slab.h> #include <linux/hashtable.h> +#include <linux/bpf.h> #include <net/netdev_lock.h> #include <net/net_namespace.h> @@ -359,7 +360,7 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) read_lock(&qdisc_mod_lock); for (q = qdisc_base; q; q = q->next) { if (nla_strcmp(kind, q->id) == 0) { - if (!try_module_get(q->owner)) + if (!bpf_try_module_get(q, q->owner)) q = NULL; break; } @@ -1370,7 +1371,7 @@ err_out3: netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); err_out2: - module_put(ops->owner); + bpf_module_put(ops, ops->owner); err_out: *errp = err; return NULL; @@ -1782,7 +1783,7 @@ static void request_qdisc_module(struct nlattr *kind) ops = qdisc_lookup_ops(kind); if (ops) { - module_put(ops->owner); + bpf_module_put(ops, ops->owner); return; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 514b1b6ac681..fc5ef69359db 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -24,6 +24,7 @@ #include <linux/if_vlan.h> #include <linux/skb_array.h> #include <linux/if_macvlan.h> +#include <linux/bpf.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/dst.h> @@ -1078,7 +1079,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc) ops->destroy(qdisc); lockdep_unregister_key(&qdisc->root_lock_key); - module_put(ops->owner); + bpf_module_put(ops, ops->owner); netdev_put(dev, &qdisc->dev_tracker); trace_qdisc_destroy(qdisc); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e0605403f977..fdcee6a71e0f 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1283,6 +1283,7 @@ enum bpf_tc_attach_point { BPF_TC_INGRESS = 1 << 0, BPF_TC_EGRESS = 1 << 1, BPF_TC_CUSTOM = 1 << 2, + BPF_TC_QDISC = 1 << 3, }; #define BPF_TC_PARENT(a, b) \ @@ -1297,9 +1298,11 @@ struct bpf_tc_hook { int ifindex; enum bpf_tc_attach_point attach_point; __u32 parent; + __u32 handle; + const char *qdisc; size_t :0; }; -#define bpf_tc_hook__last_field parent +#define bpf_tc_hook__last_field qdisc struct bpf_tc_opts { size_t sz; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 68a2def17175..c997e69d507f 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -529,9 +529,9 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) } -typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); +typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook); -static int clsact_config(struct libbpf_nla_req *req) +static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook) { req->tc.tcm_parent = TC_H_CLSACT; req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); @@ -539,6 +539,16 @@ static int clsact_config(struct libbpf_nla_req *req) return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact")); } +static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook) +{ + const char *qdisc = OPTS_GET(hook, qdisc, NULL); + + req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT); + req->tc.tcm_handle = OPTS_GET(hook, handle, 0); + + return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1); +} + static int attach_point_to_config(struct bpf_tc_hook *hook, qdisc_config_t *config) { @@ -552,6 +562,9 @@ static int attach_point_to_config(struct bpf_tc_hook *hook, return 0; case BPF_TC_CUSTOM: return -EOPNOTSUPP; + case BPF_TC_QDISC: + *config = &qdisc_config; + return 0; default: return -EINVAL; } @@ -596,7 +609,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags) req.tc.tcm_family = AF_UNSPEC; req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0); - ret = config(&req); + ret = config(&req, hook); if (ret < 0) return ret; @@ -639,6 +652,7 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook) case BPF_TC_INGRESS: case BPF_TC_EGRESS: return libbpf_err(__bpf_tc_detach(hook, NULL, true)); + case BPF_TC_QDISC: case BPF_TC_INGRESS | BPF_TC_EGRESS: return libbpf_err(tc_qdisc_delete(hook)); case BPF_TC_CUSTOM: diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 385783489f84..8b7bf673b686 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -20,15 +20,18 @@ CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) +CFLAGS_lockd_netlink:=$(call get_hdr_inc,_LINUX_LOCKD_NETLINK_H,lockd_netlink.h) CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h) CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) CFLAGS_nl80211:=$(call get_hdr_inc,__LINUX_NL802121_H,nl80211.h) CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h) CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) +CFLAGS_ovpn:=$(call get_hdr_inc,_LINUX_OVPN,ovpn.h) CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) -CFLAGS_rt-addr:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) +CFLAGS_rt-addr:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \ + $(call get_hdr_inc,__LINUX_IF_ADDR_H,if_addr.h) CFLAGS_rt-route:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 0d930c17f963..9613a6135003 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -2909,7 +2909,7 @@ def main(): cw.p(f'#include "{hdr_file}"') cw.p('#include "ynl.h"') headers = [] - for definition in parsed['definitions']: + for definition in parsed['definitions'] + parsed['attribute-sets']: if 'header' in definition: headers.append(definition['header']) if args.mode == 'user': diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index c378d5d07e02..3201a962b3dc 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -71,8 +71,10 @@ CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y CONFIG_NET_IPIP=y CONFIG_NET_MPLS_GSO=y +CONFIG_NET_SCH_BPF=y CONFIG_NET_SCH_FQ=y CONFIG_NET_SCH_INGRESS=y +CONFIG_NET_SCH_HTB=y CONFIG_NET_SCHED=y CONFIG_NETDEVSIM=y CONFIG_NETFILTER=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c new file mode 100644 index 000000000000..c9a54177c84e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/pkt_sched.h> +#include <linux/rtnetlink.h> +#include <test_progs.h> + +#include "network_helpers.h" +#include "bpf_qdisc_fifo.skel.h" +#include "bpf_qdisc_fq.skel.h" + +#define LO_IFINDEX 1 + +static const unsigned int total_bytes = 10 * 1024 * 1024; + +static void do_test(char *qdisc) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_ROOT, + .handle = 0x8000000, + .qdisc = qdisc); + int srv_fd = -1, cli_fd = -1; + int err; + + err = bpf_tc_hook_create(&hook); + if (!ASSERT_OK(err, "attach qdisc")) + return; + + srv_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_OK_FD(srv_fd, "start server")) + goto done; + + cli_fd = connect_to_fd(srv_fd, 0); + if (!ASSERT_OK_FD(cli_fd, "connect to client")) + goto done; + + err = send_recv_data(srv_fd, cli_fd, total_bytes); + ASSERT_OK(err, "send_recv_data"); + +done: + if (srv_fd != -1) + close(srv_fd); + if (cli_fd != -1) + close(cli_fd); + + bpf_tc_hook_destroy(&hook); +} + +static void test_fifo(void) +{ + struct bpf_qdisc_fifo *fifo_skel; + struct bpf_link *link; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(fifo_skel->maps.fifo); + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) { + bpf_qdisc_fifo__destroy(fifo_skel); + return; + } + + do_test("bpf_fifo"); + + bpf_link__destroy(link); + bpf_qdisc_fifo__destroy(fifo_skel); +} + +static void test_fq(void) +{ + struct bpf_qdisc_fq *fq_skel; + struct bpf_link *link; + + fq_skel = bpf_qdisc_fq__open_and_load(); + if (!ASSERT_OK_PTR(fq_skel, "bpf_qdisc_fq__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(fq_skel->maps.fq); + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) { + bpf_qdisc_fq__destroy(fq_skel); + return; + } + + do_test("bpf_fq"); + + bpf_link__destroy(link); + bpf_qdisc_fq__destroy(fq_skel); +} + +static void test_qdisc_attach_to_mq(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_MAKE(1 << 16, 1), + .handle = 0x11 << 16, + .qdisc = "bpf_fifo"); + struct bpf_qdisc_fifo *fifo_skel; + struct bpf_link *link; + int err; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(fifo_skel->maps.fifo); + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) { + bpf_qdisc_fifo__destroy(fifo_skel); + return; + } + + SYS(out, "ip link add veth0 type veth peer veth1"); + hook.ifindex = if_nametoindex("veth0"); + SYS(out, "tc qdisc add dev veth0 root handle 1: mq"); + + err = bpf_tc_hook_create(&hook); + ASSERT_OK(err, "attach qdisc"); + + bpf_tc_hook_destroy(&hook); + + SYS(out, "tc qdisc delete dev veth0 root mq"); +out: + bpf_link__destroy(link); + bpf_qdisc_fifo__destroy(fifo_skel); +} + +static void test_qdisc_attach_to_non_root(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_MAKE(1 << 16, 1), + .handle = 0x11 << 16, + .qdisc = "bpf_fifo"); + struct bpf_qdisc_fifo *fifo_skel; + struct bpf_link *link; + int err; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(fifo_skel->maps.fifo); + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) { + bpf_qdisc_fifo__destroy(fifo_skel); + return; + } + + SYS(out, "tc qdisc add dev lo root handle 1: htb"); + SYS(out_del_htb, "tc class add dev lo parent 1: classid 1:1 htb rate 75Kbit"); + + err = bpf_tc_hook_create(&hook); + if (!ASSERT_ERR(err, "attach qdisc")) + bpf_tc_hook_destroy(&hook); + +out_del_htb: + SYS(out, "tc qdisc delete dev lo root htb"); +out: + bpf_link__destroy(link); + bpf_qdisc_fifo__destroy(fifo_skel); +} + +void test_bpf_qdisc(void) +{ + struct netns_obj *netns; + + netns = netns_new("bpf_qdisc_ns", true); + if (!ASSERT_OK_PTR(netns, "netns_new")) + return; + + if (test__start_subtest("fifo")) + test_fifo(); + if (test__start_subtest("fq")) + test_fq(); + if (test__start_subtest("attach to mq")) + test_qdisc_attach_to_mq(); + if (test__start_subtest("attach to non root")) + test_qdisc_attach_to_non_root(); + + netns_free(netns); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h new file mode 100644 index 000000000000..65a2c561c0bb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BPF_QDISC_COMMON_H +#define _BPF_QDISC_COMMON_H + +#define NET_XMIT_SUCCESS 0x00 +#define NET_XMIT_DROP 0x01 /* skb dropped */ +#define NET_XMIT_CN 0x02 /* congestion notification */ + +#define TC_PRIO_CONTROL 7 +#define TC_PRIO_MAX 15 + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) + +u32 bpf_skb_get_hash(struct sk_buff *p) __ksym; +void bpf_kfree_skb(struct sk_buff *p) __ksym; +void bpf_qdisc_skb_drop(struct sk_buff *p, struct bpf_sk_buff_ptr *to_free) __ksym; +void bpf_qdisc_watchdog_schedule(struct Qdisc *sch, u64 expire, u64 delta_ns) __ksym; +void bpf_qdisc_bstats_update(struct Qdisc *sch, const struct sk_buff *skb) __ksym; + +static struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb) +{ + return (struct qdisc_skb_cb *)skb->cb; +} + +static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb) +{ + return qdisc_skb_cb(skb)->pkt_len; +} + +#endif diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c new file mode 100644 index 000000000000..0c7cfb82dae1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" + +char _license[] SEC("license") = "GPL"; + +struct skb_node { + struct sk_buff __kptr * skb; + struct bpf_list_node node; +}; + +private(A) struct bpf_spin_lock q_fifo_lock; +private(A) struct bpf_list_head q_fifo __contains(skb_node, node); + +SEC("struct_ops/bpf_fifo_enqueue") +int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + struct skb_node *skbn; + u32 pkt_len; + + if (sch->q.qlen == sch->limit) + goto drop; + + skbn = bpf_obj_new(typeof(*skbn)); + if (!skbn) + goto drop; + + pkt_len = qdisc_pkt_len(skb); + + sch->q.qlen++; + skb = bpf_kptr_xchg(&skbn->skb, skb); + if (skb) + bpf_qdisc_skb_drop(skb, to_free); + + bpf_spin_lock(&q_fifo_lock); + bpf_list_push_back(&q_fifo, &skbn->node); + bpf_spin_unlock(&q_fifo_lock); + + sch->qstats.backlog += pkt_len; + return NET_XMIT_SUCCESS; +drop: + bpf_qdisc_skb_drop(skb, to_free); + return NET_XMIT_DROP; +} + +SEC("struct_ops/bpf_fifo_dequeue") +struct sk_buff *BPF_PROG(bpf_fifo_dequeue, struct Qdisc *sch) +{ + struct bpf_list_node *node; + struct sk_buff *skb = NULL; + struct skb_node *skbn; + + bpf_spin_lock(&q_fifo_lock); + node = bpf_list_pop_front(&q_fifo); + bpf_spin_unlock(&q_fifo_lock); + if (!node) + return NULL; + + skbn = container_of(node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + bpf_obj_drop(skbn); + if (!skb) + return NULL; + + sch->qstats.backlog -= qdisc_pkt_len(skb); + bpf_qdisc_bstats_update(sch, skb); + sch->q.qlen--; + + return skb; +} + +SEC("struct_ops/bpf_fifo_init") +int BPF_PROG(bpf_fifo_init, struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + sch->limit = 1000; + return 0; +} + +SEC("struct_ops/bpf_fifo_reset") +void BPF_PROG(bpf_fifo_reset, struct Qdisc *sch) +{ + struct bpf_list_node *node; + struct skb_node *skbn; + int i; + + bpf_for(i, 0, sch->q.qlen) { + struct sk_buff *skb = NULL; + + bpf_spin_lock(&q_fifo_lock); + node = bpf_list_pop_front(&q_fifo); + bpf_spin_unlock(&q_fifo_lock); + + if (!node) + break; + + skbn = container_of(node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + if (skb) + bpf_kfree_skb(skb); + bpf_obj_drop(skbn); + } + sch->q.qlen = 0; +} + +SEC(".struct_ops") +struct Qdisc_ops fifo = { + .enqueue = (void *)bpf_fifo_enqueue, + .dequeue = (void *)bpf_fifo_dequeue, + .init = (void *)bpf_fifo_init, + .reset = (void *)bpf_fifo_reset, + .id = "bpf_fifo", +}; + diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c new file mode 100644 index 000000000000..7c110a156224 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c @@ -0,0 +1,750 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* bpf_fq is intended for testing the bpf qdisc infrastructure and not a direct + * copy of sch_fq. bpf_fq implements the scheduling algorithm of sch_fq before + * 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") was + * introduced. It gives each flow a fair chance to transmit packets in a + * round-robin fashion. Note that for flow pacing, bpf_fq currently only + * respects skb->tstamp but not skb->sk->sk_pacing_rate. In addition, if there + * are multiple bpf_fq instances, they will have a shared view of flows and + * configuration since some key data structure such as fq_prio_flows, + * fq_nonprio_flows, and fq_bpf_data are global. + * + * To use bpf_fq alone without running selftests, use the following commands. + * + * 1. Register bpf_fq to the kernel + * bpftool struct_ops register bpf_qdisc_fq.bpf.o /sys/fs/bpf + * 2. Add bpf_fq to an interface + * tc qdisc add dev <interface name> root handle <handle> bpf_fq + * 3. Delete bpf_fq attached to the interface + * tc qdisc delete dev <interface name> root + * 4. Unregister bpf_fq + * bpftool struct_ops unregister name fq + * + * The qdisc name, bpf_fq, used in tc commands is defined by Qdisc_ops.id. + * The struct_ops_map_name, fq, used in the bpftool command is the name of the + * Qdisc_ops. + * + * SEC(".struct_ops") + * struct Qdisc_ops fq = { + * ... + * .id = "bpf_fq", + * }; + */ + +#include <vmlinux.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" + +char _license[] SEC("license") = "GPL"; + +#define NSEC_PER_USEC 1000L +#define NSEC_PER_SEC 1000000000L + +#define NUM_QUEUE (1 << 20) + +struct fq_bpf_data { + u32 quantum; + u32 initial_quantum; + u32 flow_refill_delay; + u32 flow_plimit; + u64 horizon; + u32 orphan_mask; + u32 timer_slack; + u64 time_next_delayed_flow; + u64 unthrottle_latency_ns; + u8 horizon_drop; + u32 new_flow_cnt; + u32 old_flow_cnt; + u64 ktime_cache; +}; + +enum { + CLS_RET_PRIO = 0, + CLS_RET_NONPRIO = 1, + CLS_RET_ERR = 2, +}; + +struct skb_node { + u64 tstamp; + struct sk_buff __kptr * skb; + struct bpf_rb_node node; +}; + +struct fq_flow_node { + int credit; + u32 qlen; + u64 age; + u64 time_next_packet; + struct bpf_list_node list_node; + struct bpf_rb_node rb_node; + struct bpf_rb_root queue __contains(skb_node, node); + struct bpf_spin_lock lock; + struct bpf_refcount refcount; +}; + +struct dequeue_nonprio_ctx { + bool stop_iter; + u64 expire; + u64 now; +}; + +struct remove_flows_ctx { + bool gc_only; + u32 reset_cnt; + u32 reset_max; +}; + +struct unset_throttled_flows_ctx { + bool unset_all; + u64 now; +}; + +struct fq_stashed_flow { + struct fq_flow_node __kptr * flow; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u64); + __type(value, struct fq_stashed_flow); + __uint(max_entries, NUM_QUEUE); +} fq_nonprio_flows SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u64); + __type(value, struct fq_stashed_flow); + __uint(max_entries, 1); +} fq_prio_flows SEC(".maps"); + +private(A) struct bpf_spin_lock fq_delayed_lock; +private(A) struct bpf_rb_root fq_delayed __contains(fq_flow_node, rb_node); + +private(B) struct bpf_spin_lock fq_new_flows_lock; +private(B) struct bpf_list_head fq_new_flows __contains(fq_flow_node, list_node); + +private(C) struct bpf_spin_lock fq_old_flows_lock; +private(C) struct bpf_list_head fq_old_flows __contains(fq_flow_node, list_node); + +private(D) struct fq_bpf_data q; + +/* Wrapper for bpf_kptr_xchg that expects NULL dst */ +static void bpf_kptr_xchg_back(void *map_val, void *ptr) +{ + void *ret; + + ret = bpf_kptr_xchg(map_val, ptr); + if (ret) + bpf_obj_drop(ret); +} + +static bool skbn_tstamp_less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct skb_node *skbn_a; + struct skb_node *skbn_b; + + skbn_a = container_of(a, struct skb_node, node); + skbn_b = container_of(b, struct skb_node, node); + + return skbn_a->tstamp < skbn_b->tstamp; +} + +static bool fn_time_next_packet_less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct fq_flow_node *flow_a; + struct fq_flow_node *flow_b; + + flow_a = container_of(a, struct fq_flow_node, rb_node); + flow_b = container_of(b, struct fq_flow_node, rb_node); + + return flow_a->time_next_packet < flow_b->time_next_packet; +} + +static void +fq_flows_add_head(struct bpf_list_head *head, struct bpf_spin_lock *lock, + struct fq_flow_node *flow, u32 *flow_cnt) +{ + bpf_spin_lock(lock); + bpf_list_push_front(head, &flow->list_node); + bpf_spin_unlock(lock); + *flow_cnt += 1; +} + +static void +fq_flows_add_tail(struct bpf_list_head *head, struct bpf_spin_lock *lock, + struct fq_flow_node *flow, u32 *flow_cnt) +{ + bpf_spin_lock(lock); + bpf_list_push_back(head, &flow->list_node); + bpf_spin_unlock(lock); + *flow_cnt += 1; +} + +static void +fq_flows_remove_front(struct bpf_list_head *head, struct bpf_spin_lock *lock, + struct bpf_list_node **node, u32 *flow_cnt) +{ + bpf_spin_lock(lock); + *node = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + *flow_cnt -= 1; +} + +static bool +fq_flows_is_empty(struct bpf_list_head *head, struct bpf_spin_lock *lock) +{ + struct bpf_list_node *node; + + bpf_spin_lock(lock); + node = bpf_list_pop_front(head); + if (node) { + bpf_list_push_front(head, node); + bpf_spin_unlock(lock); + return false; + } + bpf_spin_unlock(lock); + + return true; +} + +/* flow->age is used to denote the state of the flow (not-detached, detached, throttled) + * as well as the timestamp when the flow is detached. + * + * 0: not-detached + * 1 - (~0ULL-1): detached + * ~0ULL: throttled + */ +static void fq_flow_set_detached(struct fq_flow_node *flow) +{ + flow->age = bpf_jiffies64(); +} + +static bool fq_flow_is_detached(struct fq_flow_node *flow) +{ + return flow->age != 0 && flow->age != ~0ULL; +} + +static bool sk_listener(struct sock *sk) +{ + return (1 << sk->__sk_common.skc_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); +} + +static void fq_gc(void); + +static int fq_new_flow(void *flow_map, struct fq_stashed_flow **sflow, u64 hash) +{ + struct fq_stashed_flow tmp = {}; + struct fq_flow_node *flow; + int ret; + + flow = bpf_obj_new(typeof(*flow)); + if (!flow) + return -ENOMEM; + + flow->credit = q.initial_quantum, + flow->qlen = 0, + flow->age = 1, + flow->time_next_packet = 0, + + ret = bpf_map_update_elem(flow_map, &hash, &tmp, 0); + if (ret == -ENOMEM || ret == -E2BIG) { + fq_gc(); + bpf_map_update_elem(&fq_nonprio_flows, &hash, &tmp, 0); + } + + *sflow = bpf_map_lookup_elem(flow_map, &hash); + if (!*sflow) { + bpf_obj_drop(flow); + return -ENOMEM; + } + + bpf_kptr_xchg_back(&(*sflow)->flow, flow); + return 0; +} + +static int +fq_classify(struct sk_buff *skb, struct fq_stashed_flow **sflow) +{ + struct sock *sk = skb->sk; + int ret = CLS_RET_NONPRIO; + u64 hash = 0; + + if ((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL) { + *sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash); + ret = CLS_RET_PRIO; + } else { + if (!sk || sk_listener(sk)) { + hash = bpf_skb_get_hash(skb) & q.orphan_mask; + /* Avoid collision with an existing flow hash, which + * only uses the lower 32 bits of hash, by setting the + * upper half of hash to 1. + */ + hash |= (1ULL << 32); + } else if (sk->__sk_common.skc_state == TCP_CLOSE) { + hash = bpf_skb_get_hash(skb) & q.orphan_mask; + hash |= (1ULL << 32); + } else { + hash = sk->__sk_common.skc_hash; + } + *sflow = bpf_map_lookup_elem(&fq_nonprio_flows, &hash); + } + + if (!*sflow) + ret = fq_new_flow(&fq_nonprio_flows, sflow, hash) < 0 ? + CLS_RET_ERR : CLS_RET_NONPRIO; + + return ret; +} + +static bool fq_packet_beyond_horizon(struct sk_buff *skb) +{ + return (s64)skb->tstamp > (s64)(q.ktime_cache + q.horizon); +} + +SEC("struct_ops/bpf_fq_enqueue") +int BPF_PROG(bpf_fq_enqueue, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + struct fq_flow_node *flow = NULL, *flow_copy; + struct fq_stashed_flow *sflow; + u64 time_to_send, jiffies; + struct skb_node *skbn; + int ret; + + if (sch->q.qlen >= sch->limit) + goto drop; + + if (!skb->tstamp) { + time_to_send = q.ktime_cache = bpf_ktime_get_ns(); + } else { + if (fq_packet_beyond_horizon(skb)) { + q.ktime_cache = bpf_ktime_get_ns(); + if (fq_packet_beyond_horizon(skb)) { + if (q.horizon_drop) + goto drop; + + skb->tstamp = q.ktime_cache + q.horizon; + } + } + time_to_send = skb->tstamp; + } + + ret = fq_classify(skb, &sflow); + if (ret == CLS_RET_ERR) + goto drop; + + flow = bpf_kptr_xchg(&sflow->flow, flow); + if (!flow) + goto drop; + + if (ret == CLS_RET_NONPRIO) { + if (flow->qlen >= q.flow_plimit) { + bpf_kptr_xchg_back(&sflow->flow, flow); + goto drop; + } + + if (fq_flow_is_detached(flow)) { + flow_copy = bpf_refcount_acquire(flow); + + jiffies = bpf_jiffies64(); + if ((s64)(jiffies - (flow_copy->age + q.flow_refill_delay)) > 0) { + if (flow_copy->credit < q.quantum) + flow_copy->credit = q.quantum; + } + flow_copy->age = 0; + fq_flows_add_tail(&fq_new_flows, &fq_new_flows_lock, flow_copy, + &q.new_flow_cnt); + } + } + + skbn = bpf_obj_new(typeof(*skbn)); + if (!skbn) { + bpf_kptr_xchg_back(&sflow->flow, flow); + goto drop; + } + + skbn->tstamp = skb->tstamp = time_to_send; + + sch->qstats.backlog += qdisc_pkt_len(skb); + + skb = bpf_kptr_xchg(&skbn->skb, skb); + if (skb) + bpf_qdisc_skb_drop(skb, to_free); + + bpf_spin_lock(&flow->lock); + bpf_rbtree_add(&flow->queue, &skbn->node, skbn_tstamp_less); + bpf_spin_unlock(&flow->lock); + + flow->qlen++; + bpf_kptr_xchg_back(&sflow->flow, flow); + + sch->q.qlen++; + return NET_XMIT_SUCCESS; + +drop: + bpf_qdisc_skb_drop(skb, to_free); + sch->qstats.drops++; + return NET_XMIT_DROP; +} + +static int fq_unset_throttled_flows(u32 index, struct unset_throttled_flows_ctx *ctx) +{ + struct bpf_rb_node *node = NULL; + struct fq_flow_node *flow; + + bpf_spin_lock(&fq_delayed_lock); + + node = bpf_rbtree_first(&fq_delayed); + if (!node) { + bpf_spin_unlock(&fq_delayed_lock); + return 1; + } + + flow = container_of(node, struct fq_flow_node, rb_node); + if (!ctx->unset_all && flow->time_next_packet > ctx->now) { + q.time_next_delayed_flow = flow->time_next_packet; + bpf_spin_unlock(&fq_delayed_lock); + return 1; + } + + node = bpf_rbtree_remove(&fq_delayed, &flow->rb_node); + + bpf_spin_unlock(&fq_delayed_lock); + + if (!node) + return 1; + + flow = container_of(node, struct fq_flow_node, rb_node); + flow->age = 0; + fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt); + + return 0; +} + +static void fq_flow_set_throttled(struct fq_flow_node *flow) +{ + flow->age = ~0ULL; + + if (q.time_next_delayed_flow > flow->time_next_packet) + q.time_next_delayed_flow = flow->time_next_packet; + + bpf_spin_lock(&fq_delayed_lock); + bpf_rbtree_add(&fq_delayed, &flow->rb_node, fn_time_next_packet_less); + bpf_spin_unlock(&fq_delayed_lock); +} + +static void fq_check_throttled(u64 now) +{ + struct unset_throttled_flows_ctx ctx = { + .unset_all = false, + .now = now, + }; + unsigned long sample; + + if (q.time_next_delayed_flow > now) + return; + + sample = (unsigned long)(now - q.time_next_delayed_flow); + q.unthrottle_latency_ns -= q.unthrottle_latency_ns >> 3; + q.unthrottle_latency_ns += sample >> 3; + + q.time_next_delayed_flow = ~0ULL; + bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &ctx, 0); +} + +static struct sk_buff* +fq_dequeue_nonprio_flows(u32 index, struct dequeue_nonprio_ctx *ctx) +{ + u64 time_next_packet, time_to_send; + struct bpf_rb_node *rb_node; + struct sk_buff *skb = NULL; + struct bpf_list_head *head; + struct bpf_list_node *node; + struct bpf_spin_lock *lock; + struct fq_flow_node *flow; + struct skb_node *skbn; + bool is_empty; + u32 *cnt; + + if (q.new_flow_cnt) { + head = &fq_new_flows; + lock = &fq_new_flows_lock; + cnt = &q.new_flow_cnt; + } else if (q.old_flow_cnt) { + head = &fq_old_flows; + lock = &fq_old_flows_lock; + cnt = &q.old_flow_cnt; + } else { + if (q.time_next_delayed_flow != ~0ULL) + ctx->expire = q.time_next_delayed_flow; + goto break_loop; + } + + fq_flows_remove_front(head, lock, &node, cnt); + if (!node) + goto break_loop; + + flow = container_of(node, struct fq_flow_node, list_node); + if (flow->credit <= 0) { + flow->credit += q.quantum; + fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt); + return NULL; + } + + bpf_spin_lock(&flow->lock); + rb_node = bpf_rbtree_first(&flow->queue); + if (!rb_node) { + bpf_spin_unlock(&flow->lock); + is_empty = fq_flows_is_empty(&fq_old_flows, &fq_old_flows_lock); + if (head == &fq_new_flows && !is_empty) { + fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt); + } else { + fq_flow_set_detached(flow); + bpf_obj_drop(flow); + } + return NULL; + } + + skbn = container_of(rb_node, struct skb_node, node); + time_to_send = skbn->tstamp; + + time_next_packet = (time_to_send > flow->time_next_packet) ? + time_to_send : flow->time_next_packet; + if (ctx->now < time_next_packet) { + bpf_spin_unlock(&flow->lock); + flow->time_next_packet = time_next_packet; + fq_flow_set_throttled(flow); + return NULL; + } + + rb_node = bpf_rbtree_remove(&flow->queue, rb_node); + bpf_spin_unlock(&flow->lock); + + if (!rb_node) + goto add_flow_and_break; + + skbn = container_of(rb_node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + bpf_obj_drop(skbn); + + if (!skb) + goto add_flow_and_break; + + flow->credit -= qdisc_skb_cb(skb)->pkt_len; + flow->qlen--; + +add_flow_and_break: + fq_flows_add_head(head, lock, flow, cnt); + +break_loop: + ctx->stop_iter = true; + return skb; +} + +static struct sk_buff *fq_dequeue_prio(void) +{ + struct fq_flow_node *flow = NULL; + struct fq_stashed_flow *sflow; + struct bpf_rb_node *rb_node; + struct sk_buff *skb = NULL; + struct skb_node *skbn; + u64 hash = 0; + + sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash); + if (!sflow) + return NULL; + + flow = bpf_kptr_xchg(&sflow->flow, flow); + if (!flow) + return NULL; + + bpf_spin_lock(&flow->lock); + rb_node = bpf_rbtree_first(&flow->queue); + if (!rb_node) { + bpf_spin_unlock(&flow->lock); + goto out; + } + + skbn = container_of(rb_node, struct skb_node, node); + rb_node = bpf_rbtree_remove(&flow->queue, &skbn->node); + bpf_spin_unlock(&flow->lock); + + if (!rb_node) + goto out; + + skbn = container_of(rb_node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + bpf_obj_drop(skbn); + +out: + bpf_kptr_xchg_back(&sflow->flow, flow); + + return skb; +} + +SEC("struct_ops/bpf_fq_dequeue") +struct sk_buff *BPF_PROG(bpf_fq_dequeue, struct Qdisc *sch) +{ + struct dequeue_nonprio_ctx cb_ctx = {}; + struct sk_buff *skb = NULL; + int i; + + if (!sch->q.qlen) + goto out; + + skb = fq_dequeue_prio(); + if (skb) + goto dequeue; + + q.ktime_cache = cb_ctx.now = bpf_ktime_get_ns(); + fq_check_throttled(q.ktime_cache); + bpf_for(i, 0, sch->limit) { + skb = fq_dequeue_nonprio_flows(i, &cb_ctx); + if (cb_ctx.stop_iter) + break; + }; + + if (skb) { +dequeue: + sch->q.qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); + bpf_qdisc_bstats_update(sch, skb); + return skb; + } + + if (cb_ctx.expire) + bpf_qdisc_watchdog_schedule(sch, cb_ctx.expire, q.timer_slack); +out: + return NULL; +} + +static int fq_remove_flows_in_list(u32 index, void *ctx) +{ + struct bpf_list_node *node; + struct fq_flow_node *flow; + + bpf_spin_lock(&fq_new_flows_lock); + node = bpf_list_pop_front(&fq_new_flows); + bpf_spin_unlock(&fq_new_flows_lock); + if (!node) { + bpf_spin_lock(&fq_old_flows_lock); + node = bpf_list_pop_front(&fq_old_flows); + bpf_spin_unlock(&fq_old_flows_lock); + if (!node) + return 1; + } + + flow = container_of(node, struct fq_flow_node, list_node); + bpf_obj_drop(flow); + + return 0; +} + +extern unsigned CONFIG_HZ __kconfig; + +/* limit number of collected flows per round */ +#define FQ_GC_MAX 8 +#define FQ_GC_AGE (3*CONFIG_HZ) + +static bool fq_gc_candidate(struct fq_flow_node *flow) +{ + u64 jiffies = bpf_jiffies64(); + + return fq_flow_is_detached(flow) && + ((s64)(jiffies - (flow->age + FQ_GC_AGE)) > 0); +} + +static int +fq_remove_flows(struct bpf_map *flow_map, u64 *hash, + struct fq_stashed_flow *sflow, struct remove_flows_ctx *ctx) +{ + if (sflow->flow && + (!ctx->gc_only || fq_gc_candidate(sflow->flow))) { + bpf_map_delete_elem(flow_map, hash); + ctx->reset_cnt++; + } + + return ctx->reset_cnt < ctx->reset_max ? 0 : 1; +} + +static void fq_gc(void) +{ + struct remove_flows_ctx cb_ctx = { + .gc_only = true, + .reset_cnt = 0, + .reset_max = FQ_GC_MAX, + }; + + bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &cb_ctx, 0); +} + +SEC("struct_ops/bpf_fq_reset") +void BPF_PROG(bpf_fq_reset, struct Qdisc *sch) +{ + struct unset_throttled_flows_ctx utf_ctx = { + .unset_all = true, + }; + struct remove_flows_ctx rf_ctx = { + .gc_only = false, + .reset_cnt = 0, + .reset_max = NUM_QUEUE, + }; + struct fq_stashed_flow *sflow; + u64 hash = 0; + + sch->q.qlen = 0; + sch->qstats.backlog = 0; + + bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &rf_ctx, 0); + + rf_ctx.reset_cnt = 0; + bpf_for_each_map_elem(&fq_prio_flows, fq_remove_flows, &rf_ctx, 0); + fq_new_flow(&fq_prio_flows, &sflow, hash); + + bpf_loop(NUM_QUEUE, fq_remove_flows_in_list, NULL, 0); + q.new_flow_cnt = 0; + q.old_flow_cnt = 0; + + bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &utf_ctx, 0); +} + +SEC("struct_ops/bpf_fq_init") +int BPF_PROG(bpf_fq_init, struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = sch->dev_queue->dev; + u32 psched_mtu = dev->mtu + dev->hard_header_len; + struct fq_stashed_flow *sflow; + u64 hash = 0; + + if (fq_new_flow(&fq_prio_flows, &sflow, hash) < 0) + return -ENOMEM; + + sch->limit = 10000; + q.initial_quantum = 10 * psched_mtu; + q.quantum = 2 * psched_mtu; + q.flow_refill_delay = 40; + q.flow_plimit = 100; + q.horizon = 10ULL * NSEC_PER_SEC; + q.horizon_drop = 1; + q.orphan_mask = 1024 - 1; + q.timer_slack = 10 * NSEC_PER_USEC; + q.time_next_delayed_flow = ~0ULL; + q.unthrottle_latency_ns = 0ULL; + q.new_flow_cnt = 0; + q.old_flow_cnt = 0; + + return 0; +} + +SEC(".struct_ops") +struct Qdisc_ops fq = { + .enqueue = (void *)bpf_fq_enqueue, + .dequeue = (void *)bpf_fq_dequeue, + .reset = (void *)bpf_fq_reset, + .init = (void *)bpf_fq_init, + .id = "bpf_fq", +}; diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c index ccde6a4c6319..683306db8594 100644 --- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c +++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c @@ -4,6 +4,8 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/errno.h> #include "xsk_xdp_common.h" struct { @@ -14,6 +16,7 @@ struct { } xsk SEC(".maps"); static unsigned int idx; +int adjust_value = 0; int count = 0; SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp) @@ -70,4 +73,51 @@ SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp) return bpf_redirect_map(&xsk, idx, XDP_DROP); } +SEC("xdp.frags") int xsk_xdp_adjust_tail(struct xdp_md *xdp) +{ + __u32 buff_len, curr_buff_len; + int ret; + + buff_len = bpf_xdp_get_buff_len(xdp); + if (buff_len == 0) + return XDP_DROP; + + ret = bpf_xdp_adjust_tail(xdp, adjust_value); + if (ret < 0) { + /* Handle unsupported cases */ + if (ret == -EOPNOTSUPP) { + /* Set adjust_value to -EOPNOTSUPP to indicate to userspace that this case + * is unsupported + */ + adjust_value = -EOPNOTSUPP; + return bpf_redirect_map(&xsk, 0, XDP_DROP); + } + + return XDP_DROP; + } + + curr_buff_len = bpf_xdp_get_buff_len(xdp); + if (curr_buff_len != buff_len + adjust_value) + return XDP_DROP; + + if (curr_buff_len > buff_len) { + __u32 *pkt_data = (void *)(long)xdp->data; + __u32 len, words_to_end, seq_num; + + len = curr_buff_len - PKT_HDR_ALIGN; + words_to_end = len / sizeof(*pkt_data) - 1; + seq_num = words_to_end; + + /* Convert sequence number to network byte order. Store this in the last 4 bytes of + * the packet. Use 'adjust_value' to determine the position at the end of the + * packet for storing the sequence number. + */ + seq_num = __constant_htonl(words_to_end); + bpf_xdp_store_bytes(xdp, curr_buff_len - sizeof(seq_num), &seq_num, + sizeof(seq_num)); + } + + return bpf_redirect_map(&xsk, 0, XDP_DROP); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h index 5a6f36f07383..45810ff552da 100644 --- a/tools/testing/selftests/bpf/xsk_xdp_common.h +++ b/tools/testing/selftests/bpf/xsk_xdp_common.h @@ -4,6 +4,7 @@ #define XSK_XDP_COMMON_H_ #define MAX_SOCKETS 2 +#define PKT_HDR_ALIGN (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ struct xdp_info { __u64 count; diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 11f047b8af75..0ced4026ee44 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -524,6 +524,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, test->nb_sockets = 1; test->fail = false; test->set_ring = false; + test->adjust_tail = false; + test->adjust_tail_support = false; test->mtu = MAX_ETH_PKT_SIZE; test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog; test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk; @@ -757,14 +759,15 @@ static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream) return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len); } -static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) +static void pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len) { - struct pkt_stream *pkt_stream; + ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); +} - pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); - test->ifobj_tx->xsk->pkt_stream = pkt_stream; - pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); - test->ifobj_rx->xsk->pkt_stream = pkt_stream; +static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) +{ + pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len); + pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len); } static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, @@ -991,6 +994,31 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) return true; } +static bool is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx) +{ + struct bpf_map *data_map; + int adjust_value = 0; + int key = 0; + int ret; + + data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss"); + if (!data_map || !bpf_map__is_internal(data_map)) { + ksft_print_msg("Error: could not find bss section of XDP program\n"); + exit_with_error(errno); + } + + ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value); + if (ret) { + ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret); + exit_with_error(errno); + } + + /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail + * helper is not supported. Skip the adjust_tail test case in this scenario. + */ + return adjust_value != -EOPNOTSUPP; +} + static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb, u32 bytes_processed) { @@ -1767,8 +1795,13 @@ static void *worker_testapp_validate_rx(void *arg) if (!err && ifobject->validation_func) err = ifobject->validation_func(ifobject); - if (err) - report_failure(test); + + if (err) { + if (test->adjust_tail && !is_adjust_tail_supported(ifobject->xdp_progs)) + test->adjust_tail_support = false; + else + report_failure(test); + } pthread_exit(NULL); } @@ -2515,6 +2548,71 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test) return testapp_validate_traffic(test); } +static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail, + skel_tx->progs.xsk_xdp_adjust_tail, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + skel_rx->bss->adjust_value = adjust_value; + + return testapp_validate_traffic(test); +} + +static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len) +{ + int ret; + + test->adjust_tail_support = true; + test->adjust_tail = true; + test->total_steps = 1; + + pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len); + pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value); + + ret = testapp_xdp_adjust_tail(test, value); + if (ret) + return ret; + + if (!test->adjust_tail_support) { + ksft_test_result_skip("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n", + mode_string(test), busy_poll_string(test)); + return TEST_SKIP; + } + + return 0; +} + +static int testapp_adjust_tail_shrink(struct test_spec *test) +{ + /* Shrink by 4 bytes for testing purpose */ + return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2); +} + +static int testapp_adjust_tail_shrink_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + /* Shrink by the frag size */ + return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2); +} + +static int testapp_adjust_tail_grow(struct test_spec *test) +{ + /* Grow by 4 bytes for testing purpose */ + return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2); +} + +static int testapp_adjust_tail_grow_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */ + return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1, + XSK_UMEM__LARGE_FRAME_SIZE * 2); +} + static void run_pkt_test(struct test_spec *test) { int ret; @@ -2621,6 +2719,10 @@ static const struct test_spec tests[] = { {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags}, {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size}, {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size}, + {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink}, + {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb}, + {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow}, + {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb}, }; static void print_tests(void) diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index e46e823f6a1a..67fc44b2813b 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -173,6 +173,8 @@ struct test_spec { u16 nb_sockets; bool fail; bool set_ring; + bool adjust_tail; + bool adjust_tail_support; enum test_mode mode; char name[MAX_TEST_NAME_SIZE]; }; diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index e6a3e04fd83f..d4e7dd659354 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,10 +1,24 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ - v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ - v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \ - v3exc_timeout_test v3star_ex_auto_add_test" +ALL_TESTS=" + v2reportleave_test + v3include_test + v3inc_allow_test + v3inc_is_include_test + v3inc_is_exclude_test + v3inc_to_exclude_test + v3exc_allow_test + v3exc_is_include_test + v3exc_is_exclude_test + v3exc_to_exclude_test + v3inc_block_test + v3exc_block_test + v3exc_timeout_test + v3star_ex_auto_add_test + v2per_vlan_snooping_port_stp_test + v2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -554,6 +568,64 @@ v3star_ex_auto_add_test() v3cleanup $swp2 $TEST_GROUP } +v2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_igmp_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No IGMP queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +v2per_vlan_snooping_port_stp_test() +{ + v2per_vlan_snooping_stp_test 1 +} + +v2per_vlan_snooping_vlan_stp_test() +{ + v2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index f84ab2e65754..4cacef5a813a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -1,10 +1,23 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ - mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ - mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ - mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test" +ALL_TESTS=" + mldv2include_test + mldv2inc_allow_test + mldv2inc_is_include_test + mldv2inc_is_exclude_test + mldv2inc_to_exclude_test + mldv2exc_allow_test + mldv2exc_is_include_test + mldv2exc_is_exclude_test + mldv2exc_to_exclude_test + mldv2inc_block_test + mldv2exc_block_test + mldv2exc_timeout_test + mldv2star_ex_auto_add_test + mldv2per_vlan_snooping_port_stp_test + mldv2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test() mldv2cleanup $swp2 } +mldv2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No MLD queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 \ + mcast_mld_version 1 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +mldv2per_vlan_snooping_port_stp_test() +{ + mldv2per_vlan_snooping_stp_test 1 +} + +mldv2per_vlan_snooping_vlan_stp_test() +{ + mldv2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 8d7a1a004b7c..18fd69d8d937 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -1,6 +1,7 @@ CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_BRIDGE_IGMP_SNOOPING=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m |